New upstream version 18.11-rc1 59/15859/1 upstream/18.11-rc1
authorLuca Boccassi <luca.boccassi@gmail.com>
Thu, 1 Nov 2018 11:59:50 +0000 (11:59 +0000)
committerLuca Boccassi <luca.boccassi@gmail.com>
Thu, 1 Nov 2018 12:00:19 +0000 (12:00 +0000)
Change-Id: Iaa71986dd6332e878d8f4bf493101b2bbc6313bb
Signed-off-by: Luca Boccassi <luca.boccassi@gmail.com>
1190 files changed:
MAINTAINERS
app/meson.build
app/pdump/main.c
app/pdump/meson.build
app/proc-info/meson.build
app/test-bbdev/meson.build
app/test-bbdev/test_bbdev_perf.c
app/test-crypto-perf/cperf_test_vectors.c
app/test-crypto-perf/main.c
app/test-crypto-perf/meson.build
app/test-eventdev/test_perf_common.c
app/test-eventdev/test_pipeline_atq.c
app/test-eventdev/test_pipeline_common.c
app/test-eventdev/test_pipeline_common.h
app/test-eventdev/test_pipeline_queue.c
app/test-pmd/Makefile
app/test-pmd/cmdline.c
app/test-pmd/cmdline_flow.c
app/test-pmd/cmdline_mtr.c
app/test-pmd/cmdline_tm.c
app/test-pmd/cmdline_tm.h
app/test-pmd/config.c
app/test-pmd/csumonly.c
app/test-pmd/meson.build
app/test-pmd/noisy_vnf.c [new file with mode: 0644]
app/test-pmd/parameters.c
app/test-pmd/rxonly.c
app/test-pmd/testpmd.c
app/test-pmd/testpmd.h
app/test-pmd/util.c [new file with mode: 0644]
buildtools/auto-config-h.sh
buildtools/check-experimental-syms.sh
buildtools/pmdinfogen/pmdinfogen.c
buildtools/symlink-drivers-solibs.sh
config/arm/meson.build
config/common_armv8a_linuxapp
config/common_base
config/common_linuxapp
config/defconfig_arm64-dpaa-linuxapp-gcc
config/defconfig_arm64-thunderx-linuxapp-gcc
config/defconfig_ppc_64-power8-linuxapp-gcc
config/meson.build
config/ppc_64/meson.build [new file with mode: 0644]
config/rte_config.h
devtools/check-forbidden-tokens.awk [new file with mode: 0755]
devtools/check-git-log.sh
devtools/check-includes.sh
devtools/check-symbol-change.sh
devtools/checkpatches.sh
devtools/cocci.sh
devtools/test-build.sh
devtools/test-meson-builds.sh
doc/api/doxy-api-index.md
doc/api/doxy-api.conf [deleted file]
doc/api/doxy-api.conf.in [new file with mode: 0644]
doc/api/generate_doxygen.sh [new file with mode: 0755]
doc/api/generate_examples.sh [new file with mode: 0755]
doc/api/meson.build [new file with mode: 0644]
doc/build-sdk-meson.txt
doc/guides/compressdevs/features/octeontx.ini
doc/guides/compressdevs/features/qat.ini
doc/guides/compressdevs/octeontx.rst
doc/guides/compressdevs/qat_comp.rst
doc/guides/contributing/coding_style.rst
doc/guides/contributing/documentation.rst
doc/guides/cryptodevs/aesni_mb.rst
doc/guides/cryptodevs/caam_jr.rst [new file with mode: 0644]
doc/guides/cryptodevs/features/caam_jr.ini [new file with mode: 0644]
doc/guides/cryptodevs/features/default.ini
doc/guides/cryptodevs/features/mvsam.ini
doc/guides/cryptodevs/features/octeontx.ini [new file with mode: 0644]
doc/guides/cryptodevs/features/qat.ini
doc/guides/cryptodevs/index.rst
doc/guides/cryptodevs/mvsam.rst
doc/guides/cryptodevs/octeontx.rst [new file with mode: 0644]
doc/guides/cryptodevs/overview.rst
doc/guides/cryptodevs/qat.rst
doc/guides/eventdevs/dpaa.rst
doc/guides/eventdevs/dsw.rst [new file with mode: 0644]
doc/guides/eventdevs/index.rst
doc/guides/eventdevs/octeontx.rst
doc/guides/eventdevs/opdl.rst
doc/guides/howto/index.rst
doc/guides/howto/rte_flow.rst
doc/guides/howto/telemetry.rst [new file with mode: 0644]
doc/guides/howto/virtio_user_as_exceptional_path.rst
doc/guides/howto/virtio_user_for_container_networking.rst
doc/guides/mempool/octeontx.rst
doc/guides/meson.build [new file with mode: 0644]
doc/guides/nics/atlantic.rst [new file with mode: 0644]
doc/guides/nics/axgbe.rst
doc/guides/nics/dpaa2.rst
doc/guides/nics/ena.rst
doc/guides/nics/enetc.rst [new file with mode: 0644]
doc/guides/nics/enic.rst
doc/guides/nics/features.rst
doc/guides/nics/features/atlantic.ini [new file with mode: 0644]
doc/guides/nics/features/ena.ini
doc/guides/nics/features/enetc.ini [new file with mode: 0644]
doc/guides/nics/features/failsafe.ini
doc/guides/nics/features/mvneta.ini [new file with mode: 0644]
doc/guides/nics/features/netvsc.ini
doc/guides/nics/features/sfc_efx.ini
doc/guides/nics/fm10k.rst
doc/guides/nics/i40e.rst
doc/guides/nics/img/mvpp2_tm.svg [new file with mode: 0644]
doc/guides/nics/index.rst
doc/guides/nics/ixgbe.rst
doc/guides/nics/liquidio.rst
doc/guides/nics/mlx5.rst
doc/guides/nics/mvneta.rst [new file with mode: 0644]
doc/guides/nics/mvpp2.rst
doc/guides/nics/netvsc.rst
doc/guides/nics/octeontx.rst
doc/guides/nics/pcap_ring.rst
doc/guides/nics/sfc_efx.rst
doc/guides/nics/softnic.rst
doc/guides/nics/tap.rst
doc/guides/nics/vhost.rst
doc/guides/nics/virtio.rst
doc/guides/platform/octeontx.rst
doc/guides/prog_guide/env_abstraction_layer.rst
doc/guides/prog_guide/event_ethernet_tx_adapter.rst [new file with mode: 0644]
doc/guides/prog_guide/hash_lib.rst
doc/guides/prog_guide/index.rst
doc/guides/prog_guide/kernel_nic_interface.rst
doc/guides/prog_guide/packet_framework.rst
doc/guides/prog_guide/port_hotplug_framework.rst [deleted file]
doc/guides/prog_guide/power_man.rst
doc/guides/prog_guide/profile_app.rst
doc/guides/prog_guide/rte_flow.rst
doc/guides/prog_guide/rte_security.rst
doc/guides/prog_guide/vhost_lib.rst
doc/guides/rel_notes/deprecation.rst
doc/guides/rel_notes/index.rst
doc/guides/rel_notes/rel_description.rst [deleted file]
doc/guides/rel_notes/release_18_08.rst
doc/guides/rel_notes/release_18_11.rst [new file with mode: 0644]
doc/guides/sample_app_ug/flow_filtering.rst
doc/guides/sample_app_ug/index.rst
doc/guides/sample_app_ug/ip_pipeline.rst
doc/guides/sample_app_ug/ipsec_secgw.rst
doc/guides/sample_app_ug/kernel_nic_interface.rst
doc/guides/sample_app_ug/l3_forward_power_man.rst
doc/guides/sample_app_ug/link_status_intr.rst
doc/guides/sample_app_ug/vdpa.rst [new file with mode: 0644]
doc/guides/sample_app_ug/vhost.rst
doc/guides/sample_app_ug/vhost_crypto.rst
doc/guides/sample_app_ug/vm_power_management.rst
doc/guides/testpmd_app_ug/run_app.rst
doc/guides/testpmd_app_ug/testpmd_funcs.rst
doc/guides/tools/img/eventdev_pipeline_atq_test_generic.svg
doc/guides/tools/img/eventdev_pipeline_atq_test_internal_port.svg [moved from doc/guides/tools/img/eventdev_pipeline_atq_test_lockfree.svg with 99% similarity]
doc/guides/tools/img/eventdev_pipeline_queue_test_generic.svg
doc/guides/tools/img/eventdev_pipeline_queue_test_internal_port.svg [moved from doc/guides/tools/img/eventdev_pipeline_queue_test_lockfree.svg with 99% similarity]
doc/guides/tools/testeventdev.rst
doc/meson.build [new file with mode: 0644]
drivers/Makefile
drivers/baseband/meson.build [new file with mode: 0644]
drivers/baseband/null/meson.build [new file with mode: 0644]
drivers/bus/dpaa/Makefile
drivers/bus/dpaa/base/fman/netcfg_layer.c
drivers/bus/dpaa/base/qbman/bman_driver.c
drivers/bus/dpaa/base/qbman/qman.c
drivers/bus/dpaa/base/qbman/qman_driver.c
drivers/bus/dpaa/dpaa_bus.c
drivers/bus/dpaa/include/compat.h
drivers/bus/dpaa/include/fsl_fman_crc64.h
drivers/bus/dpaa/include/fsl_qman.h
drivers/bus/dpaa/include/fsl_usd.h
drivers/bus/dpaa/meson.build
drivers/bus/dpaa/rte_bus_dpaa_version.map
drivers/bus/dpaa/rte_dpaa_bus.h
drivers/bus/fslmc/Makefile
drivers/bus/fslmc/fslmc_bus.c
drivers/bus/fslmc/fslmc_vfio.c
drivers/bus/fslmc/mc/dpbp.c
drivers/bus/fslmc/mc/dpci.c
drivers/bus/fslmc/mc/dpcon.c
drivers/bus/fslmc/mc/dpdmai.c
drivers/bus/fslmc/mc/dpio.c
drivers/bus/fslmc/mc/fsl_dpbp.h
drivers/bus/fslmc/mc/fsl_dpbp_cmd.h
drivers/bus/fslmc/mc/fsl_dpci.h
drivers/bus/fslmc/mc/fsl_dpci_cmd.h
drivers/bus/fslmc/mc/fsl_dpcon.h
drivers/bus/fslmc/mc/fsl_dpdmai.h
drivers/bus/fslmc/mc/fsl_dpdmai_cmd.h
drivers/bus/fslmc/mc/fsl_dpmng.h
drivers/bus/fslmc/mc/fsl_dpopr.h [new file with mode: 0644]
drivers/bus/fslmc/meson.build
drivers/bus/fslmc/portal/dpaa2_hw_dpbp.c
drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
drivers/bus/fslmc/qbman/include/compat.h
drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
drivers/bus/fslmc/qbman/qbman_portal.c
drivers/bus/fslmc/qbman/qbman_portal.h
drivers/bus/fslmc/qbman/qbman_sys.h
drivers/bus/fslmc/qbman/qbman_sys_decl.h
drivers/bus/fslmc/rte_bus_fslmc_version.map
drivers/bus/ifpga/Makefile
drivers/bus/ifpga/ifpga_bus.c
drivers/bus/ifpga/meson.build
drivers/bus/ifpga/rte_bus_ifpga.h
drivers/bus/pci/Makefile
drivers/bus/pci/bsd/pci.c
drivers/bus/pci/linux/Makefile
drivers/bus/pci/linux/pci.c
drivers/bus/pci/linux/pci_vfio.c
drivers/bus/pci/meson.build
drivers/bus/pci/pci_common.c
drivers/bus/pci/pci_common_uio.c
drivers/bus/pci/pci_params.c [new file with mode: 0644]
drivers/bus/pci/private.h
drivers/bus/pci/rte_bus_pci.h
drivers/bus/vdev/Makefile
drivers/bus/vdev/meson.build
drivers/bus/vdev/vdev.c
drivers/bus/vdev/vdev_params.c [new file with mode: 0644]
drivers/bus/vdev/vdev_private.h [new file with mode: 0644]
drivers/bus/vmbus/Makefile
drivers/bus/vmbus/linux/vmbus_bus.c
drivers/bus/vmbus/meson.build
drivers/bus/vmbus/private.h
drivers/bus/vmbus/rte_bus_vmbus.h
drivers/bus/vmbus/rte_bus_vmbus_version.map
drivers/bus/vmbus/vmbus_channel.c
drivers/bus/vmbus/vmbus_common.c
drivers/common/Makefile
drivers/common/cpt/Makefile [new file with mode: 0644]
drivers/common/cpt/cpt_common.h [new file with mode: 0644]
drivers/common/cpt/cpt_hw_types.h [new file with mode: 0644]
drivers/common/cpt/cpt_mcode_defines.h [new file with mode: 0644]
drivers/common/cpt/cpt_pmd_logs.h [new file with mode: 0644]
drivers/common/cpt/cpt_pmd_ops_helper.c [new file with mode: 0644]
drivers/common/cpt/cpt_pmd_ops_helper.h [new file with mode: 0644]
drivers/common/cpt/cpt_request_mgr.h [new file with mode: 0644]
drivers/common/cpt/cpt_ucode.h [new file with mode: 0644]
drivers/common/cpt/meson.build [new file with mode: 0644]
drivers/common/cpt/rte_common_cpt_version.map [new file with mode: 0644]
drivers/common/dpaax/Makefile [new file with mode: 0644]
drivers/common/dpaax/dpaax_iova_table.c [new file with mode: 0644]
drivers/common/dpaax/dpaax_iova_table.h [new file with mode: 0644]
drivers/common/dpaax/dpaax_logs.h [new file with mode: 0644]
drivers/common/dpaax/meson.build [new file with mode: 0644]
drivers/common/dpaax/rte_common_dpaax_version.map [new file with mode: 0644]
drivers/common/meson.build
drivers/common/mvep/Makefile [new file with mode: 0644]
drivers/common/mvep/meson.build [new file with mode: 0644]
drivers/common/mvep/mvep_common.c [new file with mode: 0644]
drivers/common/mvep/rte_common_mvep_version.map [new file with mode: 0644]
drivers/common/mvep/rte_mvep_common.h [new file with mode: 0644]
drivers/common/qat/qat_common.h
drivers/common/qat/qat_device.c
drivers/common/qat/qat_device.h
drivers/common/qat/qat_qp.c
drivers/common/qat/qat_qp.h
drivers/compress/octeontx/include/zip_regs.h
drivers/compress/octeontx/otx_zip.h
drivers/compress/octeontx/otx_zip_pmd.c
drivers/compress/qat/qat_comp.c
drivers/compress/qat/qat_comp.h
drivers/compress/qat/qat_comp_pmd.c
drivers/compress/qat/qat_comp_pmd.h
drivers/crypto/Makefile
drivers/crypto/aesni_gcm/aesni_gcm_pmd.c
drivers/crypto/aesni_gcm/aesni_gcm_pmd_ops.c
drivers/crypto/aesni_gcm/aesni_gcm_pmd_private.h
drivers/crypto/aesni_gcm/meson.build [new file with mode: 0644]
drivers/crypto/aesni_mb/aesni_mb_ops.h
drivers/crypto/aesni_mb/meson.build [new file with mode: 0644]
drivers/crypto/aesni_mb/rte_aesni_mb_pmd.c
drivers/crypto/aesni_mb/rte_aesni_mb_pmd_ops.c
drivers/crypto/aesni_mb/rte_aesni_mb_pmd_private.h
drivers/crypto/caam_jr/Makefile [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr.c [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_capabilities.c [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_capabilities.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_config.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_desc.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_hw.c [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_hw_specific.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_log.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_pvt.h [new file with mode: 0644]
drivers/crypto/caam_jr/caam_jr_uio.c [new file with mode: 0644]
drivers/crypto/caam_jr/meson.build [new file with mode: 0644]
drivers/crypto/caam_jr/rte_pmd_caam_jr_version.map [new file with mode: 0644]
drivers/crypto/dpaa2_sec/Makefile
drivers/crypto/dpaa2_sec/dpaa2_sec_dpseci.c
drivers/crypto/dpaa2_sec/dpaa2_sec_event.h [new file with mode: 0644]
drivers/crypto/dpaa2_sec/dpaa2_sec_priv.h
drivers/crypto/dpaa2_sec/hw/desc.h
drivers/crypto/dpaa2_sec/hw/desc/algo.h
drivers/crypto/dpaa2_sec/hw/desc/ipsec.h
drivers/crypto/dpaa2_sec/hw/desc/pdcp.h [new file with mode: 0644]
drivers/crypto/dpaa2_sec/hw/rta/protocol_cmd.h
drivers/crypto/dpaa2_sec/hw/rta/sec_run_time_asm.h
drivers/crypto/dpaa2_sec/mc/dpseci.c
drivers/crypto/dpaa2_sec/mc/fsl_dpseci.h
drivers/crypto/dpaa2_sec/mc/fsl_dpseci_cmd.h
drivers/crypto/dpaa2_sec/meson.build
drivers/crypto/dpaa2_sec/rte_pmd_dpaa2_sec_version.map
drivers/crypto/dpaa_sec/Makefile
drivers/crypto/dpaa_sec/dpaa_sec.c
drivers/crypto/dpaa_sec/dpaa_sec.h
drivers/crypto/kasumi/meson.build [new file with mode: 0644]
drivers/crypto/meson.build
drivers/crypto/mvsam/Makefile
drivers/crypto/mvsam/meson.build
drivers/crypto/mvsam/rte_mrvl_pmd.c
drivers/crypto/mvsam/rte_mrvl_pmd_ops.c
drivers/crypto/mvsam/rte_mrvl_pmd_private.h
drivers/crypto/null/null_crypto_pmd_ops.c
drivers/crypto/octeontx/Makefile [new file with mode: 0644]
drivers/crypto/octeontx/meson.build [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev.c [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev.h [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_capabilities.c [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_capabilities.h [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_hw_access.c [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_hw_access.h [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_mbox.c [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_mbox.h [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_ops.c [new file with mode: 0644]
drivers/crypto/octeontx/otx_cryptodev_ops.h [new file with mode: 0644]
drivers/crypto/octeontx/rte_pmd_octeontx_crypto_version.map [new file with mode: 0644]
drivers/crypto/openssl/compat.h
drivers/crypto/openssl/rte_openssl_pmd.c
drivers/crypto/openssl/rte_openssl_pmd_ops.c
drivers/crypto/qat/qat_sym_capabilities.h
drivers/crypto/qat/qat_sym_pmd.c
drivers/crypto/qat/qat_sym_pmd.h
drivers/crypto/qat/qat_sym_session.c
drivers/crypto/qat/qat_sym_session.h
drivers/crypto/scheduler/meson.build [new file with mode: 0644]
drivers/crypto/scheduler/rte_cryptodev_scheduler.c
drivers/crypto/scheduler/rte_cryptodev_scheduler.h
drivers/crypto/scheduler/scheduler_failover.c
drivers/crypto/scheduler/scheduler_multicore.c
drivers/crypto/scheduler/scheduler_pkt_size_distr.c
drivers/crypto/scheduler/scheduler_pmd.c
drivers/crypto/scheduler/scheduler_pmd_ops.c
drivers/crypto/scheduler/scheduler_pmd_private.h
drivers/crypto/scheduler/scheduler_roundrobin.c
drivers/crypto/zuc/meson.build [new file with mode: 0644]
drivers/event/Makefile
drivers/event/dpaa/Makefile
drivers/event/dpaa/dpaa_eventdev.c
drivers/event/dpaa/dpaa_eventdev.h
drivers/event/dpaa2/Makefile
drivers/event/dpaa2/dpaa2_eventdev.c
drivers/event/dpaa2/dpaa2_eventdev.h
drivers/event/dpaa2/meson.build
drivers/event/dsw/Makefile [new file with mode: 0644]
drivers/event/dsw/dsw_evdev.c [new file with mode: 0644]
drivers/event/dsw/dsw_evdev.h [new file with mode: 0644]
drivers/event/dsw/dsw_event.c [new file with mode: 0644]
drivers/event/dsw/dsw_sort.h [new file with mode: 0644]
drivers/event/dsw/dsw_xstats.c [new file with mode: 0644]
drivers/event/dsw/meson.build [new file with mode: 0644]
drivers/event/dsw/rte_pmd_dsw_event_version.map [new file with mode: 0644]
drivers/event/meson.build
drivers/event/octeontx/Makefile
drivers/event/octeontx/ssovf_evdev.c
drivers/event/octeontx/ssovf_evdev.h
drivers/event/octeontx/ssovf_worker.c
drivers/event/octeontx/ssovf_worker.h
drivers/event/opdl/Makefile
drivers/event/opdl/meson.build [new file with mode: 0644]
drivers/event/opdl/rte_pmd_opdl_event_version.map [moved from drivers/event/opdl/rte_pmd_evdev_opdl_version.map with 100% similarity]
drivers/event/sw/sw_evdev.c
drivers/event/sw/sw_evdev.h
drivers/event/sw/sw_evdev_scheduler.c
drivers/event/sw/sw_evdev_selftest.c
drivers/mempool/dpaa/Makefile
drivers/mempool/dpaa/dpaa_mempool.c
drivers/mempool/dpaa/dpaa_mempool.h
drivers/mempool/dpaa2/Makefile
drivers/mempool/dpaa2/dpaa2_hw_mempool.c
drivers/mempool/dpaa2/meson.build
drivers/mempool/dpaa2/rte_mempool_dpaa2_version.map
drivers/mempool/octeontx/octeontx_fpavf.h
drivers/meson.build
drivers/net/Makefile
drivers/net/af_packet/rte_eth_af_packet.c
drivers/net/ark/ark_ddm.c
drivers/net/ark/ark_ddm.h
drivers/net/ark/ark_ethdev.c
drivers/net/ark/ark_ethdev_rx.c
drivers/net/ark/ark_ethdev_tx.c
drivers/net/atlantic/Makefile [new file with mode: 0644]
drivers/net/atlantic/atl_common.h [new file with mode: 0644]
drivers/net/atlantic/atl_ethdev.c [new file with mode: 0644]
drivers/net/atlantic/atl_ethdev.h [new file with mode: 0644]
drivers/net/atlantic/atl_hw_regs.c [new file with mode: 0644]
drivers/net/atlantic/atl_hw_regs.h [new file with mode: 0644]
drivers/net/atlantic/atl_logs.h [new file with mode: 0644]
drivers/net/atlantic/atl_rxtx.c [new file with mode: 0644]
drivers/net/atlantic/atl_types.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_b0.c [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_b0.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_b0_internal.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_llh.c [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_llh.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_llh_internal.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_utils.c [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_utils.h [new file with mode: 0644]
drivers/net/atlantic/hw_atl/hw_atl_utils_fw2x.c [new file with mode: 0644]
drivers/net/atlantic/meson.build [new file with mode: 0644]
drivers/net/atlantic/rte_pmd_atlantic_version.map [new file with mode: 0644]
drivers/net/avf/Makefile
drivers/net/avf/avf_ethdev.c
drivers/net/avf/avf_rxtx.c
drivers/net/avf/avf_rxtx.h
drivers/net/avf/avf_rxtx_vec_sse.c
drivers/net/avf/avf_vchnl.c
drivers/net/avf/base/avf_osdep.h
drivers/net/avf/base/meson.build [new file with mode: 0644]
drivers/net/avf/meson.build [new file with mode: 0644]
drivers/net/avp/avp_ethdev.c
drivers/net/avp/meson.build
drivers/net/axgbe/axgbe_ethdev.c
drivers/net/axgbe/axgbe_rxtx.c
drivers/net/bnx2x/bnx2x.c
drivers/net/bnx2x/bnx2x.h
drivers/net/bnx2x/bnx2x_ethdev.c
drivers/net/bnx2x/bnx2x_logs.h
drivers/net/bnx2x/bnx2x_rxtx.c
drivers/net/bnx2x/bnx2x_stats.c
drivers/net/bnx2x/bnx2x_vfpf.c
drivers/net/bnx2x/ecore_hsi.h
drivers/net/bnx2x/ecore_init.h
drivers/net/bnx2x/ecore_init_ops.h
drivers/net/bnx2x/ecore_reg.h
drivers/net/bnx2x/ecore_sp.c
drivers/net/bnx2x/ecore_sp.h
drivers/net/bnx2x/elink.c
drivers/net/bnx2x/elink.h
drivers/net/bnxt/bnxt.h
drivers/net/bnxt/bnxt_cpr.c
drivers/net/bnxt/bnxt_ethdev.c
drivers/net/bnxt/bnxt_filter.c
drivers/net/bnxt/bnxt_flow.c
drivers/net/bnxt/bnxt_hwrm.c
drivers/net/bnxt/bnxt_hwrm.h
drivers/net/bnxt/bnxt_rxq.c
drivers/net/bnxt/bnxt_stats.c
drivers/net/bnxt/bnxt_txr.c
drivers/net/bnxt/bnxt_vnic.c
drivers/net/bnxt/hsi_struct_def_dpdk.h
drivers/net/bonding/Makefile
drivers/net/bonding/meson.build
drivers/net/bonding/rte_eth_bond_8023ad.c
drivers/net/bonding/rte_eth_bond_8023ad_private.h
drivers/net/bonding/rte_eth_bond_api.c
drivers/net/bonding/rte_eth_bond_flow.c
drivers/net/bonding/rte_eth_bond_pmd.c
drivers/net/bonding/rte_eth_bond_private.h
drivers/net/cxgbe/Makefile
drivers/net/cxgbe/base/adapter.h
drivers/net/cxgbe/base/common.h
drivers/net/cxgbe/base/t4_hw.c
drivers/net/cxgbe/base/t4_msg.h
drivers/net/cxgbe/base/t4_regs.h
drivers/net/cxgbe/base/t4_tcb.h
drivers/net/cxgbe/base/t4fw_interface.h
drivers/net/cxgbe/cxgbe.h
drivers/net/cxgbe/cxgbe_ethdev.c
drivers/net/cxgbe/cxgbe_filter.c
drivers/net/cxgbe/cxgbe_filter.h
drivers/net/cxgbe/cxgbe_flow.c
drivers/net/cxgbe/cxgbe_flow.h
drivers/net/cxgbe/cxgbe_main.c
drivers/net/cxgbe/cxgbevf_main.c
drivers/net/cxgbe/l2t.c [new file with mode: 0644]
drivers/net/cxgbe/l2t.h [new file with mode: 0644]
drivers/net/cxgbe/meson.build
drivers/net/cxgbe/mps_tcam.c [new file with mode: 0644]
drivers/net/cxgbe/mps_tcam.h [new file with mode: 0644]
drivers/net/cxgbe/sge.c
drivers/net/dpaa/Makefile
drivers/net/dpaa/dpaa_ethdev.c
drivers/net/dpaa/dpaa_ethdev.h
drivers/net/dpaa/dpaa_rxtx.c
drivers/net/dpaa/dpaa_rxtx.h
drivers/net/dpaa2/Makefile
drivers/net/dpaa2/base/dpaa2_hw_dpni_annot.h
drivers/net/dpaa2/dpaa2_ethdev.c
drivers/net/dpaa2/dpaa2_rxtx.c
drivers/net/dpaa2/mc/dpni.c
drivers/net/dpaa2/mc/fsl_dpkg.h
drivers/net/dpaa2/mc/fsl_dpni.h
drivers/net/dpaa2/mc/fsl_dpni_cmd.h
drivers/net/dpaa2/mc/fsl_net.h
drivers/net/dpaa2/meson.build
drivers/net/e1000/Makefile
drivers/net/e1000/base/e1000_82571.c
drivers/net/e1000/base/e1000_osdep.h
drivers/net/e1000/base/meson.build
drivers/net/e1000/e1000_ethdev.h
drivers/net/e1000/em_ethdev.c
drivers/net/e1000/em_rxtx.c
drivers/net/e1000/igb_ethdev.c
drivers/net/e1000/igb_flow.c
drivers/net/e1000/igb_rxtx.c
drivers/net/e1000/meson.build
drivers/net/ena/ena_ethdev.c
drivers/net/enetc/Makefile [new file with mode: 0644]
drivers/net/enetc/base/enetc_hw.h [new file with mode: 0644]
drivers/net/enetc/enetc.h [new file with mode: 0644]
drivers/net/enetc/enetc_ethdev.c [new file with mode: 0644]
drivers/net/enetc/enetc_logs.h [new file with mode: 0644]
drivers/net/enetc/enetc_rxtx.c [new file with mode: 0644]
drivers/net/enetc/meson.build [new file with mode: 0644]
drivers/net/enetc/rte_pmd_enetc_version.map [new file with mode: 0644]
drivers/net/enic/Makefile
drivers/net/enic/base/vnic_dev.c
drivers/net/enic/base/vnic_dev.h
drivers/net/enic/base/vnic_devcmd.h
drivers/net/enic/enic.h
drivers/net/enic/enic_ethdev.c
drivers/net/enic/enic_flow.c
drivers/net/enic/enic_main.c
drivers/net/enic/enic_res.c
drivers/net/enic/enic_rxtx.c
drivers/net/enic/enic_rxtx_common.h [new file with mode: 0644]
drivers/net/enic/enic_rxtx_vec_avx2.c [new file with mode: 0644]
drivers/net/enic/meson.build
drivers/net/failsafe/failsafe.c
drivers/net/failsafe/failsafe_args.c
drivers/net/failsafe/failsafe_eal.c
drivers/net/failsafe/failsafe_ether.c
drivers/net/failsafe/failsafe_flow.c
drivers/net/failsafe/failsafe_intr.c
drivers/net/failsafe/failsafe_ops.c
drivers/net/failsafe/failsafe_private.h
drivers/net/failsafe/failsafe_rxtx.c
drivers/net/fm10k/base/meson.build
drivers/net/fm10k/fm10k_ethdev.c
drivers/net/i40e/base/README
drivers/net/i40e/base/i40e_adminq.c
drivers/net/i40e/base/i40e_adminq.h
drivers/net/i40e/base/i40e_adminq_cmd.h
drivers/net/i40e/base/i40e_alloc.h
drivers/net/i40e/base/i40e_common.c
drivers/net/i40e/base/i40e_dcb.c
drivers/net/i40e/base/i40e_dcb.h
drivers/net/i40e/base/i40e_devids.h
drivers/net/i40e/base/i40e_diag.c
drivers/net/i40e/base/i40e_diag.h
drivers/net/i40e/base/i40e_hmc.c
drivers/net/i40e/base/i40e_hmc.h
drivers/net/i40e/base/i40e_lan_hmc.c
drivers/net/i40e/base/i40e_lan_hmc.h
drivers/net/i40e/base/i40e_nvm.c
drivers/net/i40e/base/i40e_osdep.h
drivers/net/i40e/base/i40e_prototype.h
drivers/net/i40e/base/i40e_register.h
drivers/net/i40e/base/i40e_status.h
drivers/net/i40e/base/i40e_type.h
drivers/net/i40e/base/meson.build
drivers/net/i40e/base/virtchnl.h
drivers/net/i40e/i40e_ethdev.c
drivers/net/i40e/i40e_ethdev.h
drivers/net/i40e/i40e_ethdev_vf.c
drivers/net/i40e/i40e_flow.c
drivers/net/i40e/i40e_rxtx.c
drivers/net/i40e/i40e_rxtx_vec_common.h
drivers/net/i40e/i40e_vf_representor.c
drivers/net/i40e/rte_pmd_i40e.c
drivers/net/ifc/base/ifcvf.c
drivers/net/ifc/base/ifcvf.h
drivers/net/ifc/base/ifcvf_osdep.h
drivers/net/ifc/ifcvf_vdpa.c
drivers/net/ixgbe/base/README
drivers/net/ixgbe/base/ixgbe_82598.c
drivers/net/ixgbe/base/ixgbe_82598.h
drivers/net/ixgbe/base/ixgbe_82599.c
drivers/net/ixgbe/base/ixgbe_82599.h
drivers/net/ixgbe/base/ixgbe_api.c
drivers/net/ixgbe/base/ixgbe_api.h
drivers/net/ixgbe/base/ixgbe_common.c
drivers/net/ixgbe/base/ixgbe_common.h
drivers/net/ixgbe/base/ixgbe_dcb.c
drivers/net/ixgbe/base/ixgbe_dcb.h
drivers/net/ixgbe/base/ixgbe_dcb_82598.c
drivers/net/ixgbe/base/ixgbe_dcb_82598.h
drivers/net/ixgbe/base/ixgbe_dcb_82599.c
drivers/net/ixgbe/base/ixgbe_dcb_82599.h
drivers/net/ixgbe/base/ixgbe_hv_vf.c
drivers/net/ixgbe/base/ixgbe_hv_vf.h
drivers/net/ixgbe/base/ixgbe_mbx.c
drivers/net/ixgbe/base/ixgbe_mbx.h
drivers/net/ixgbe/base/ixgbe_osdep.h
drivers/net/ixgbe/base/ixgbe_phy.c
drivers/net/ixgbe/base/ixgbe_phy.h
drivers/net/ixgbe/base/ixgbe_type.h
drivers/net/ixgbe/base/ixgbe_vf.c
drivers/net/ixgbe/base/ixgbe_vf.h
drivers/net/ixgbe/base/ixgbe_x540.c
drivers/net/ixgbe/base/ixgbe_x540.h
drivers/net/ixgbe/base/ixgbe_x550.c
drivers/net/ixgbe/base/ixgbe_x550.h
drivers/net/ixgbe/base/meson.build
drivers/net/ixgbe/ixgbe_ethdev.c
drivers/net/ixgbe/ixgbe_flow.c
drivers/net/ixgbe/ixgbe_ipsec.c
drivers/net/ixgbe/ixgbe_rxtx.c
drivers/net/ixgbe/ixgbe_vf_representor.c
drivers/net/ixgbe/meson.build
drivers/net/kni/rte_eth_kni.c
drivers/net/liquidio/lio_ethdev.c
drivers/net/meson.build
drivers/net/mlx4/meson.build [new file with mode: 0644]
drivers/net/mlx4/mlx4.c
drivers/net/mlx4/mlx4_mr.c
drivers/net/mlx4/mlx4_rxq.c
drivers/net/mlx4/mlx4_rxtx.h
drivers/net/mlx5/Makefile
drivers/net/mlx5/meson.build [new file with mode: 0644]
drivers/net/mlx5/mlx5.c
drivers/net/mlx5/mlx5.h
drivers/net/mlx5/mlx5_ethdev.c
drivers/net/mlx5/mlx5_flow.c
drivers/net/mlx5/mlx5_flow.h [new file with mode: 0644]
drivers/net/mlx5/mlx5_flow_dv.c [new file with mode: 0644]
drivers/net/mlx5/mlx5_flow_tcf.c [new file with mode: 0644]
drivers/net/mlx5/mlx5_flow_verbs.c [new file with mode: 0644]
drivers/net/mlx5/mlx5_glue.c
drivers/net/mlx5/mlx5_glue.h
drivers/net/mlx5/mlx5_mac.c
drivers/net/mlx5/mlx5_mr.c
drivers/net/mlx5/mlx5_nl_flow.c [deleted file]
drivers/net/mlx5/mlx5_prm.h
drivers/net/mlx5/mlx5_rxq.c
drivers/net/mlx5/mlx5_rxtx.c
drivers/net/mlx5/mlx5_rxtx.h
drivers/net/mlx5/mlx5_rxtx_vec.c
drivers/net/mlx5/mlx5_rxtx_vec.h
drivers/net/mlx5/mlx5_rxtx_vec_neon.h
drivers/net/mlx5/mlx5_rxtx_vec_sse.h
drivers/net/mlx5/mlx5_socket.c
drivers/net/mlx5/mlx5_stats.c
drivers/net/mlx5/mlx5_txq.c
drivers/net/mvneta/Makefile [new file with mode: 0644]
drivers/net/mvneta/meson.build [new file with mode: 0644]
drivers/net/mvneta/mvneta_ethdev.c [new file with mode: 0644]
drivers/net/mvneta/mvneta_ethdev.h [new file with mode: 0644]
drivers/net/mvneta/mvneta_rxtx.c [new file with mode: 0644]
drivers/net/mvneta/mvneta_rxtx.h [new file with mode: 0644]
drivers/net/mvneta/rte_pmd_mvneta_version.map [new file with mode: 0644]
drivers/net/mvpp2/Makefile
drivers/net/mvpp2/meson.build
drivers/net/mvpp2/mrvl_ethdev.c
drivers/net/mvpp2/mrvl_ethdev.h
drivers/net/mvpp2/mrvl_flow.c
drivers/net/mvpp2/mrvl_flow.h [new file with mode: 0644]
drivers/net/mvpp2/mrvl_mtr.c [new file with mode: 0644]
drivers/net/mvpp2/mrvl_mtr.h [new file with mode: 0644]
drivers/net/mvpp2/mrvl_qos.c
drivers/net/mvpp2/mrvl_qos.h
drivers/net/mvpp2/mrvl_tm.c [new file with mode: 0644]
drivers/net/mvpp2/mrvl_tm.h [new file with mode: 0644]
drivers/net/netvsc/Makefile
drivers/net/netvsc/hn_ethdev.c
drivers/net/netvsc/hn_nvs.c
drivers/net/netvsc/hn_nvs.h
drivers/net/netvsc/hn_rndis.c
drivers/net/netvsc/hn_rndis.h
drivers/net/netvsc/hn_rxtx.c
drivers/net/netvsc/hn_var.h
drivers/net/netvsc/hn_vf.c [new file with mode: 0644]
drivers/net/netvsc/meson.build
drivers/net/nfp/Makefile
drivers/net/nfp/meson.build
drivers/net/nfp/nfp_net.c
drivers/net/nfp/nfp_net_pmd.h
drivers/net/null/rte_eth_null.c
drivers/net/octeontx/base/meson.build
drivers/net/octeontx/base/octeontx_io.h
drivers/net/octeontx/octeontx_ethdev.c
drivers/net/octeontx/octeontx_ethdev.h
drivers/net/octeontx/octeontx_rxtx.c
drivers/net/octeontx/octeontx_rxtx.h
drivers/net/pcap/rte_eth_pcap.c
drivers/net/qede/Makefile
drivers/net/qede/base/bcm_osal.c
drivers/net/qede/base/bcm_osal.h
drivers/net/qede/base/common_hsi.h
drivers/net/qede/base/ecore.h
drivers/net/qede/base/ecore_cxt.c
drivers/net/qede/base/ecore_dcbx.c
drivers/net/qede/base/ecore_dcbx_api.h
drivers/net/qede/base/ecore_dev.c
drivers/net/qede/base/ecore_dev_api.h
drivers/net/qede/base/ecore_hsi_common.h
drivers/net/qede/base/ecore_hsi_debug_tools.h
drivers/net/qede/base/ecore_hsi_eth.h
drivers/net/qede/base/ecore_hw.c
drivers/net/qede/base/ecore_hw.h
drivers/net/qede/base/ecore_init_fw_funcs.c
drivers/net/qede/base/ecore_init_fw_funcs.h
drivers/net/qede/base/ecore_init_ops.c
drivers/net/qede/base/ecore_int.c
drivers/net/qede/base/ecore_int.h
drivers/net/qede/base/ecore_int_api.h
drivers/net/qede/base/ecore_iov_api.h
drivers/net/qede/base/ecore_iro.h
drivers/net/qede/base/ecore_iro_values.h
drivers/net/qede/base/ecore_l2.c
drivers/net/qede/base/ecore_l2_api.h
drivers/net/qede/base/ecore_mcp.c
drivers/net/qede/base/ecore_mcp.h
drivers/net/qede/base/ecore_mcp_api.h
drivers/net/qede/base/ecore_rt_defs.h
drivers/net/qede/base/ecore_sp_commands.c
drivers/net/qede/base/ecore_sp_commands.h
drivers/net/qede/base/ecore_spq.c
drivers/net/qede/base/ecore_sriov.c
drivers/net/qede/base/ecore_vf.c
drivers/net/qede/base/ecore_vfpf_if.h
drivers/net/qede/base/eth_common.h
drivers/net/qede/base/mcp_public.h
drivers/net/qede/base/meson.build [new file with mode: 0644]
drivers/net/qede/base/reg_addr.h
drivers/net/qede/meson.build [new file with mode: 0644]
drivers/net/qede/qede_ethdev.c
drivers/net/qede/qede_ethdev.h
drivers/net/qede/qede_fdir.c [deleted file]
drivers/net/qede/qede_filter.c [new file with mode: 0644]
drivers/net/qede/qede_main.c
drivers/net/qede/qede_rxtx.c
drivers/net/qede/qede_rxtx.h
drivers/net/ring/rte_eth_ring.c
drivers/net/sfc/base/ef10_ev.c
drivers/net/sfc/base/ef10_filter.c
drivers/net/sfc/base/ef10_image.c
drivers/net/sfc/base/ef10_impl.h
drivers/net/sfc/base/ef10_intr.c
drivers/net/sfc/base/ef10_mac.c
drivers/net/sfc/base/ef10_nic.c
drivers/net/sfc/base/ef10_nvram.c
drivers/net/sfc/base/ef10_phy.c
drivers/net/sfc/base/ef10_rx.c
drivers/net/sfc/base/ef10_signed_image_layout.h
drivers/net/sfc/base/ef10_tx.c
drivers/net/sfc/base/efx.h
drivers/net/sfc/base/efx_annote.h [new file with mode: 0644]
drivers/net/sfc/base/efx_bootcfg.c
drivers/net/sfc/base/efx_filter.c
drivers/net/sfc/base/efx_impl.h
drivers/net/sfc/base/efx_lic.c
drivers/net/sfc/base/efx_mcdi.c
drivers/net/sfc/base/efx_mcdi.h
drivers/net/sfc/base/efx_mon.c
drivers/net/sfc/base/efx_nic.c
drivers/net/sfc/base/efx_nvram.c
drivers/net/sfc/base/efx_phy.c
drivers/net/sfc/base/efx_port.c
drivers/net/sfc/base/efx_rx.c
drivers/net/sfc/base/efx_tunnel.c
drivers/net/sfc/base/efx_tx.c
drivers/net/sfc/base/hunt_nic.c
drivers/net/sfc/base/mc_driver_pcol_strs.h [new file with mode: 0644]
drivers/net/sfc/base/mcdi_mon.c
drivers/net/sfc/base/mcdi_mon.h
drivers/net/sfc/base/medford2_nic.c
drivers/net/sfc/base/medford_nic.c
drivers/net/sfc/base/meson.build
drivers/net/sfc/base/siena_mac.c
drivers/net/sfc/base/siena_nic.c
drivers/net/sfc/base/siena_nvram.c
drivers/net/sfc/base/siena_phy.c
drivers/net/sfc/efsys.h
drivers/net/sfc/sfc_dp_tx.h
drivers/net/sfc/sfc_ef10_essb_rx.c
drivers/net/sfc/sfc_ef10_rx.c
drivers/net/sfc/sfc_ef10_tx.c
drivers/net/sfc/sfc_ethdev.c
drivers/net/sfc/sfc_rx.c
drivers/net/sfc/sfc_tso.c
drivers/net/sfc/sfc_tso.h [new file with mode: 0644]
drivers/net/sfc/sfc_tx.c
drivers/net/softnic/Makefile
drivers/net/softnic/conn.c
drivers/net/softnic/hash_func.h [deleted file]
drivers/net/softnic/hash_func_arm64.h [deleted file]
drivers/net/softnic/meson.build
drivers/net/softnic/rte_eth_softnic.c
drivers/net/softnic/rte_eth_softnic_action.c
drivers/net/softnic/rte_eth_softnic_cli.c
drivers/net/softnic/rte_eth_softnic_cryptodev.c [new file with mode: 0644]
drivers/net/softnic/rte_eth_softnic_flow.c [new file with mode: 0644]
drivers/net/softnic/rte_eth_softnic_internals.h
drivers/net/softnic/rte_eth_softnic_meter.c [new file with mode: 0644]
drivers/net/softnic/rte_eth_softnic_pipeline.c
drivers/net/softnic/rte_eth_softnic_thread.c
drivers/net/szedata2/rte_eth_szedata2.c
drivers/net/tap/Makefile
drivers/net/tap/meson.build [new file with mode: 0644]
drivers/net/tap/rte_eth_tap.c
drivers/net/tap/rte_eth_tap.h
drivers/net/tap/tap_bpf_insns.h
drivers/net/tap/tap_flow.c
drivers/net/tap/tap_intr.c
drivers/net/thunderx/base/meson.build
drivers/net/thunderx/nicvf_ethdev.c
drivers/net/thunderx/nicvf_ethdev.h
drivers/net/vdev_netvsc/meson.build [new file with mode: 0644]
drivers/net/vdev_netvsc/vdev_netvsc.c
drivers/net/vhost/rte_eth_vhost.c
drivers/net/virtio/virtio_ethdev.c
drivers/net/virtio/virtio_ethdev.h
drivers/net/virtio/virtio_pci.c
drivers/net/virtio/virtio_rxtx_simple.c
drivers/net/virtio/virtio_user/vhost.h
drivers/net/virtio/virtio_user/vhost_kernel.c
drivers/net/virtio/virtio_user/vhost_kernel_tap.c
drivers/net/virtio/virtio_user/vhost_kernel_tap.h
drivers/net/virtio/virtio_user/vhost_user.c
drivers/net/virtio/virtio_user/virtio_user_dev.c
drivers/net/virtio/virtio_user_ethdev.c
drivers/net/vmxnet3/meson.build [new file with mode: 0644]
drivers/net/vmxnet3/vmxnet3_ethdev.c
drivers/net/vmxnet3/vmxnet3_ethdev.h
drivers/raw/dpaa2_cmdif/Makefile
drivers/raw/dpaa2_cmdif/meson.build
drivers/raw/dpaa2_qdma/Makefile
drivers/raw/dpaa2_qdma/dpaa2_qdma.c
drivers/raw/dpaa2_qdma/dpaa2_qdma.h
drivers/raw/dpaa2_qdma/meson.build
drivers/raw/ifpga_rawdev/base/ifpga_enumerate.c
drivers/raw/ifpga_rawdev/base/ifpga_feature_dev.h
drivers/raw/ifpga_rawdev/base/ifpga_port.c
drivers/raw/ifpga_rawdev/base/ifpga_port_error.c
drivers/raw/ifpga_rawdev/base/meson.build
drivers/raw/ifpga_rawdev/ifpga_rawdev.c
drivers/raw/skeleton_rawdev/skeleton_rawdev.c
drivers/raw/skeleton_rawdev/skeleton_rawdev_test.c
examples/Makefile
examples/bbdev_app/main.c
examples/bond/main.c
examples/cmdline/Makefile
examples/ethtool/ethtool-app/Makefile
examples/eventdev_pipeline/main.c
examples/eventdev_pipeline/pipeline_common.h
examples/eventdev_pipeline/pipeline_worker_generic.c
examples/eventdev_pipeline/pipeline_worker_tx.c
examples/exception_path/main.c
examples/flow_filtering/main.c
examples/ip_fragmentation/main.c
examples/ip_pipeline/Makefile
examples/ip_pipeline/action.c
examples/ip_pipeline/action.h
examples/ip_pipeline/cli.c
examples/ip_pipeline/conn.c
examples/ip_pipeline/cryptodev.c [new file with mode: 0644]
examples/ip_pipeline/cryptodev.h [new file with mode: 0644]
examples/ip_pipeline/examples/flow_crypto.cli [new file with mode: 0644]
examples/ip_pipeline/hash_func.h [deleted file]
examples/ip_pipeline/hash_func_arm64.h [deleted file]
examples/ip_pipeline/link.c
examples/ip_pipeline/main.c
examples/ip_pipeline/meson.build
examples/ip_pipeline/pipeline.c
examples/ip_pipeline/pipeline.h
examples/ip_pipeline/thread.c
examples/ip_reassembly/main.c
examples/ipsec-secgw/esp.c
examples/ipsec-secgw/ipsec-secgw.c
examples/ipsec-secgw/sa.c
examples/ipv4_multicast/main.c
examples/kni/Makefile
examples/kni/main.c
examples/kni/meson.build
examples/l2fwd-cat/Makefile
examples/l2fwd-cat/meson.build
examples/l2fwd-crypto/main.c
examples/l2fwd-jobstats/main.c
examples/l2fwd-keepalive/main.c
examples/l2fwd/main.c
examples/l3fwd-acl/main.c
examples/l3fwd-power/Makefile
examples/l3fwd-power/main.c
examples/l3fwd-power/meson.build
examples/l3fwd-vf/main.c
examples/l3fwd/main.c
examples/link_status_interrupt/main.c
examples/load_balancer/Makefile
examples/load_balancer/init.c
examples/meson.build
examples/multi_process/Makefile
examples/multi_process/hotplug_mp/Makefile [new file with mode: 0644]
examples/multi_process/hotplug_mp/commands.c [new file with mode: 0644]
examples/multi_process/hotplug_mp/commands.h [new file with mode: 0644]
examples/multi_process/hotplug_mp/main.c [new file with mode: 0644]
examples/multi_process/symmetric_mp/main.c
examples/netmap_compat/bridge/bridge.c
examples/performance-thread/l3fwd-thread/main.c
examples/performance-thread/pthread_shim/main.c
examples/performance-thread/pthread_shim/pthread_shim.c
examples/qos_meter/main.c
examples/qos_sched/Makefile
examples/qos_sched/init.c
examples/quota_watermark/qw/init.c
examples/service_cores/main.c
examples/tep_termination/Makefile
examples/tep_termination/vxlan_setup.c
examples/vdpa/Makefile [new file with mode: 0644]
examples/vdpa/main.c [new file with mode: 0644]
examples/vdpa/meson.build [new file with mode: 0644]
examples/vhost/Makefile
examples/vhost/main.c
examples/vhost_crypto/Makefile
examples/vhost_crypto/main.c
examples/vhost_crypto/meson.build
examples/vhost_scsi/Makefile
examples/vhost_scsi/meson.build
examples/vm_power_manager/Makefile
examples/vm_power_manager/channel_manager.c
examples/vm_power_manager/channel_manager.h
examples/vm_power_manager/channel_monitor.c
examples/vm_power_manager/guest_cli/meson.build [new file with mode: 0644]
examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
examples/vm_power_manager/main.c
examples/vm_power_manager/meson.build
kernel/linux/igb_uio/igb_uio.c
kernel/linux/igb_uio/meson.build
kernel/linux/kni/Kbuild [new file with mode: 0644]
kernel/linux/kni/ethtool/igb/igb_ethtool.c
kernel/linux/kni/ethtool/igb/meson.build [new file with mode: 0644]
kernel/linux/kni/ethtool/ixgbe/ixgbe.h
kernel/linux/kni/ethtool/ixgbe/ixgbe_ethtool.c
kernel/linux/kni/ethtool/ixgbe/meson.build [new file with mode: 0644]
kernel/linux/kni/ethtool/meson.build [new file with mode: 0644]
kernel/linux/kni/kni_dev.h
kernel/linux/kni/kni_ethtool.c
kernel/linux/kni/kni_fifo.h
kernel/linux/kni/kni_misc.c
kernel/linux/kni/kni_net.c
kernel/linux/kni/meson.build [new file with mode: 0644]
kernel/linux/meson.build
lib/Makefile
lib/librte_acl/rte_acl.c
lib/librte_acl/rte_acl.h
lib/librte_bpf/bpf_load.c
lib/librte_bpf/rte_bpf_ethdev.h
lib/librte_cmdline/Makefile
lib/librte_cmdline/cmdline.c
lib/librte_cmdline/meson.build
lib/librte_compressdev/rte_comp.c
lib/librte_compressdev/rte_comp.h
lib/librte_compressdev/rte_compressdev.c
lib/librte_compressdev/rte_compressdev_pmd.c
lib/librte_compressdev/rte_compressdev_pmd.h
lib/librte_cryptodev/Makefile
lib/librte_cryptodev/meson.build
lib/librte_cryptodev/rte_cryptodev.c
lib/librte_cryptodev/rte_cryptodev_pmd.c
lib/librte_cryptodev/rte_cryptodev_pmd.h
lib/librte_eal/bsdapp/eal/Makefile
lib/librte_eal/bsdapp/eal/eal.c
lib/librte_eal/bsdapp/eal/eal_dev.c
lib/librte_eal/bsdapp/eal/eal_memalloc.c
lib/librte_eal/bsdapp/eal/eal_memory.c
lib/librte_eal/common/Makefile
lib/librte_eal/common/arch/arm/meson.build
lib/librte_eal/common/arch/ppc_64/meson.build [new file with mode: 0644]
lib/librte_eal/common/arch/x86/meson.build
lib/librte_eal/common/eal_common_bus.c
lib/librte_eal/common/eal_common_class.c
lib/librte_eal/common/eal_common_dev.c
lib/librte_eal/common/eal_common_devargs.c
lib/librte_eal/common/eal_common_fbarray.c
lib/librte_eal/common/eal_common_memory.c
lib/librte_eal/common/eal_common_memzone.c
lib/librte_eal/common/eal_common_options.c
lib/librte_eal/common/eal_common_proc.c
lib/librte_eal/common/eal_common_string_fns.c
lib/librte_eal/common/eal_common_timer.c
lib/librte_eal/common/eal_filesystem.h
lib/librte_eal/common/eal_internal_cfg.h
lib/librte_eal/common/eal_memalloc.h
lib/librte_eal/common/eal_options.h
lib/librte_eal/common/eal_private.h
lib/librte_eal/common/hotplug_mp.c [new file with mode: 0644]
lib/librte_eal/common/hotplug_mp.h [new file with mode: 0644]
lib/librte_eal/common/include/arch/arm/rte_cycles_32.h
lib/librte_eal/common/include/arch/ppc_64/meson.build [new file with mode: 0644]
lib/librte_eal/common/include/arch/ppc_64/rte_pause.h
lib/librte_eal/common/include/generic/rte_cycles.h
lib/librte_eal/common/include/rte_bitmap.h
lib/librte_eal/common/include/rte_bus.h
lib/librte_eal/common/include/rte_common.h
lib/librte_eal/common/include/rte_dev.h
lib/librte_eal/common/include/rte_devargs.h
lib/librte_eal/common/include/rte_eal.h
lib/librte_eal/common/include/rte_eal_interrupts.h
lib/librte_eal/common/include/rte_eal_memconfig.h
lib/librte_eal/common/include/rte_malloc.h
lib/librte_eal/common/include/rte_malloc_heap.h
lib/librte_eal/common/include/rte_memory.h
lib/librte_eal/common/include/rte_option.h [new file with mode: 0644]
lib/librte_eal/common/include/rte_string_fns.h
lib/librte_eal/common/include/rte_version.h
lib/librte_eal/common/include/rte_vfio.h
lib/librte_eal/common/malloc_elem.c
lib/librte_eal/common/malloc_heap.c
lib/librte_eal/common/malloc_heap.h
lib/librte_eal/common/malloc_mp.c
lib/librte_eal/common/meson.build
lib/librte_eal/common/rte_malloc.c
lib/librte_eal/common/rte_option.c [new file with mode: 0644]
lib/librte_eal/linuxapp/eal/Makefile
lib/librte_eal/linuxapp/eal/eal.c
lib/librte_eal/linuxapp/eal/eal_dev.c
lib/librte_eal/linuxapp/eal/eal_hugepage_info.c
lib/librte_eal/linuxapp/eal/eal_interrupts.c
lib/librte_eal/linuxapp/eal/eal_memalloc.c
lib/librte_eal/linuxapp/eal/eal_memory.c
lib/librte_eal/linuxapp/eal/eal_thread.c
lib/librte_eal/linuxapp/eal/eal_timer.c
lib/librte_eal/linuxapp/eal/eal_vfio.c
lib/librte_eal/linuxapp/eal/eal_vfio.h
lib/librte_eal/linuxapp/eal/eal_vfio_mp_sync.c
lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h
lib/librte_eal/meson.build
lib/librte_eal/rte_eal_version.map
lib/librte_ethdev/Makefile
lib/librte_ethdev/ethdev_private.c [new file with mode: 0644]
lib/librte_ethdev/ethdev_private.h [new file with mode: 0644]
lib/librte_ethdev/ethdev_profile.c
lib/librte_ethdev/ethdev_profile.h
lib/librte_ethdev/meson.build
lib/librte_ethdev/rte_class_eth.c [new file with mode: 0644]
lib/librte_ethdev/rte_ethdev.c
lib/librte_ethdev/rte_ethdev.h
lib/librte_ethdev/rte_ethdev_core.h
lib/librte_ethdev/rte_ethdev_driver.h
lib/librte_ethdev/rte_ethdev_pci.h
lib/librte_ethdev/rte_ethdev_version.map
lib/librte_ethdev/rte_flow.c
lib/librte_ethdev/rte_flow.h
lib/librte_ethdev/rte_tm.h
lib/librte_eventdev/Makefile
lib/librte_eventdev/meson.build
lib/librte_eventdev/rte_event_eth_rx_adapter.c
lib/librte_eventdev/rte_event_eth_rx_adapter.h
lib/librte_eventdev/rte_event_eth_tx_adapter.c [new file with mode: 0644]
lib/librte_eventdev/rte_event_eth_tx_adapter.h [new file with mode: 0644]
lib/librte_eventdev/rte_eventdev.c
lib/librte_eventdev/rte_eventdev.h
lib/librte_eventdev/rte_eventdev_pmd.h
lib/librte_eventdev/rte_eventdev_version.map
lib/librte_flow_classify/rte_flow_classify.c
lib/librte_hash/rte_cuckoo_hash.c
lib/librte_hash/rte_cuckoo_hash.h
lib/librte_hash/rte_hash.h
lib/librte_hash/rte_hash_version.map
lib/librte_ip_frag/ip_frag_common.h
lib/librte_ip_frag/ip_frag_internal.c
lib/librte_ip_frag/rte_ip_frag.h
lib/librte_ip_frag/rte_ip_frag_common.c
lib/librte_ip_frag/rte_ip_frag_version.map
lib/librte_kni/rte_kni.c
lib/librte_kni/rte_kni.h
lib/librte_kni/rte_kni_fifo.h
lib/librte_kni/rte_kni_version.map
lib/librte_kvargs/rte_kvargs.c
lib/librte_kvargs/rte_kvargs.h
lib/librte_latencystats/rte_latencystats.c
lib/librte_lpm/Makefile
lib/librte_lpm/meson.build
lib/librte_lpm/rte_lpm6.c
lib/librte_mbuf/meson.build
lib/librte_mbuf/rte_mbuf.c
lib/librte_mbuf/rte_mbuf.h
lib/librte_mbuf/rte_mbuf_ptype.c
lib/librte_mbuf/rte_mbuf_ptype.h
lib/librte_mempool/rte_mempool.c
lib/librte_net/Makefile
lib/librte_net/meson.build
lib/librte_net/net_crc_sse.h
lib/librte_net/rte_ether.h
lib/librte_net/rte_mpls.h [new file with mode: 0644]
lib/librte_net/rte_net.c
lib/librte_net/rte_net.h
lib/librte_pdump/Makefile
lib/librte_pipeline/Makefile
lib/librte_pipeline/meson.build
lib/librte_pipeline/rte_pipeline.c
lib/librte_pipeline/rte_pipeline_version.map
lib/librte_pipeline/rte_table_action.c
lib/librte_pipeline/rte_table_action.h
lib/librte_port/Makefile
lib/librte_port/meson.build
lib/librte_port/rte_port_sym_crypto.c [new file with mode: 0644]
lib/librte_port/rte_port_sym_crypto.h [new file with mode: 0644]
lib/librte_port/rte_port_version.map
lib/librte_power/Makefile
lib/librte_power/channel_commands.h
lib/librte_power/meson.build
lib/librte_power/rte_power_empty_poll.c [new file with mode: 0644]
lib/librte_power/rte_power_empty_poll.h [new file with mode: 0644]
lib/librte_power/rte_power_version.map
lib/librte_rawdev/rte_rawdev.c
lib/librte_rawdev/rte_rawdev_pmd.h
lib/librte_ring/meson.build
lib/librte_ring/rte_ring.h
lib/librte_sched/Makefile
lib/librte_sched/rte_sched.c
lib/librte_security/rte_security.c
lib/librte_security/rte_security.h
lib/librte_table/Makefile
lib/librte_table/meson.build
lib/librte_table/rte_table_hash_func.h [new file with mode: 0644]
lib/librte_table/rte_table_hash_func_arm64.h [new file with mode: 0644]
lib/librte_telemetry/Makefile [new file with mode: 0644]
lib/librte_telemetry/meson.build [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry.c [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry.h [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_internal.h [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_parser.c [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_parser.h [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_parser_test.c [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_parser_test.h [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_socket_tests.h [new file with mode: 0644]
lib/librte_telemetry/rte_telemetry_version.map [new file with mode: 0644]
lib/librte_vhost/Makefile
lib/librte_vhost/meson.build
lib/librte_vhost/rte_vdpa.h
lib/librte_vhost/rte_vhost.h
lib/librte_vhost/rte_vhost_version.map
lib/librte_vhost/socket.c
lib/librte_vhost/vdpa.c
lib/librte_vhost/vhost.c
lib/librte_vhost/vhost.h
lib/librte_vhost/vhost_crypto.c
lib/librte_vhost/vhost_user.c
lib/librte_vhost/vhost_user.h
lib/librte_vhost/virtio_net.c
lib/meson.build
meson.build
meson_options.txt
mk/internal/rte.compile-pre.mk
mk/rte.app.mk
mk/rte.cpuflags.mk
mk/rte.sdkdoc.mk
mk/target/generic/rte.vars.mk
mk/toolchain/gcc/rte.toolchain-compat.mk
test/test-pipeline/init.c
test/test/Makefile
test/test/autotest_data.py
test/test/meson.build
test/test/test.c
test/test/test_acl.h
test/test/test_alarm.c
test/test/test_bitmap.c
test/test/test_cryptodev.c
test/test/test_cryptodev.h
test/test/test_cryptodev_aes_test_vectors.h
test/test/test_cryptodev_blockcipher.c
test/test/test_cryptodev_blockcipher.h
test/test/test_cryptodev_des_test_vectors.h
test/test/test_cryptodev_hash_test_vectors.h
test/test/test_cycles.c
test/test/test_event_eth_rx_adapter.c
test/test/test_event_eth_tx_adapter.c [new file with mode: 0644]
test/test/test_event_timer_adapter.c
test/test/test_eventdev.c
test/test/test_external_mem.c [new file with mode: 0644]
test/test/test_hash.c
test/test/test_hash_multiwriter.c
test/test/test_hash_perf.c
test/test/test_hash_readwrite.c
test/test/test_hash_readwrite_lf.c [new file with mode: 0644]
test/test/test_hash_scaling.c [deleted file]
test/test/test_kni.c
test/test/test_kvargs.c
test/test/test_link_bonding_mode4.c
test/test/test_malloc.c
test/test/test_memzone.c
test/test/test_metrics.c [new file with mode: 0644]
test/test/test_pmd_perf.c
test/test/test_pmd_ring.c
test/test/test_timer_racecond.c
test/test/virtual_pmd.c
usertools/dpdk-telemetry-client.py [new file with mode: 0644]

index 9fd258f..e60379d 100644 (file)
@@ -81,6 +81,7 @@ M: Thomas Monjalon <thomas@monjalon.net>
 F: MAINTAINERS
 F: devtools/check-dup-includes.sh
 F: devtools/check-maintainers.sh
+F: devtools/check-forbidden-tokens.awk
 F: devtools/check-git-log.sh
 F: devtools/check-includes.sh
 F: devtools/check-symbol-maps.sh
@@ -177,6 +178,7 @@ F: lib/librte_eal/common/eal_hugepages.h
 F: lib/librte_eal/linuxapp/eal/eal_mem*
 F: lib/librte_eal/bsdapp/eal/eal_mem*
 F: doc/guides/prog_guide/env_abstraction_layer.rst
+F: test/test/test_external_mem.c
 F: test/test/test_func_reentrancy.c
 F: test/test/test_fbarray.c
 F: test/test/test_malloc.c
@@ -377,6 +379,13 @@ F: lib/librte_eventdev/*eth_rx_adapter*
 F: test/test/test_event_eth_rx_adapter.c
 F: doc/guides/prog_guide/event_ethernet_rx_adapter.rst
 
+Eventdev Ethdev Tx Adapter API - EXPERIMENTAL
+M: Nikhil Rao <nikhil.rao@intel.com>
+T: git://dpdk.org/next/dpdk-next-eventdev
+F: lib/librte_eventdev/*eth_tx_adapter*
+F: test/test/test_event_eth_tx_adapter.c
+F: doc/guides/prog_guide/event_ethernet_tx_adapter.rst
+
 Eventdev Timer Adapter API - EXPERIMENTAL
 M: Erik Gabriel Carrillo <erik.g.carrillo@intel.com>
 T: git://dpdk.org/next/dpdk-next-eventdev
@@ -419,6 +428,7 @@ F: drivers/bus/ifpga/
 NXP buses
 M: Hemant Agrawal <hemant.agrawal@nxp.com>
 M: Shreyansh Jain <shreyansh.jain@nxp.com>
+F: drivers/common/dpaax/
 F: drivers/bus/dpaa/
 F: drivers/bus/fslmc/
 
@@ -477,6 +487,13 @@ F: drivers/net/axgbe/
 F: doc/guides/nics/axgbe.rst
 F: doc/guides/nics/features/axgbe.ini
 
+Aquantia atlantic
+M: Igor Russkikh <igor.russkikh@aquantia.com>
+M: Pavel Belous <pavel.belous@aquantia.com>
+F: drivers/net/atlantic/
+F: doc/guides/nics/atlantic.rst
+F: doc/guides/nics/features/atlantic.ini
+
 Atomic Rules ARK
 M: Shepard Siegel <shepard.siegel@atomicrules.com>
 M: Ed Czeck <ed.czeck@atomicrules.com>
@@ -506,8 +523,7 @@ F: drivers/net/liquidio/
 F: doc/guides/nics/liquidio.rst
 F: doc/guides/nics/features/liquidio.ini
 
-Cavium OcteonTX
-M: Santosh Shukla <santosh.shukla@caviumnetworks.com>
+Cavium OCTEON TX
 M: Jerin Jacob <jerin.jacob@caviumnetworks.com>
 F: drivers/common/octeontx/
 F: drivers/mempool/octeontx/
@@ -581,10 +597,19 @@ Marvell mvpp2
 M: Tomasz Duszynski <tdu@semihalf.com>
 M: Dmitri Epshtein <dima@marvell.com>
 M: Natalie Samsonov <nsamsono@marvell.com>
+F: drivers/common/mvep/
 F: drivers/net/mvpp2/
 F: doc/guides/nics/mvpp2.rst
 F: doc/guides/nics/features/mvpp2.ini
 
+Marvell mvneta
+M: Zyta Szpak <zr@semihalf.com>
+M: Dmitri Epshtein <dima@marvell.com>
+M: Natalie Samsonov <nsamsono@marvell.com>
+F: drivers/net/mvneta/
+F: doc/guides/nics/mvneta.rst
+F: doc/guides/nics/features/mvneta.ini
+
 Mellanox mlx4
 M: Matan Azrad <matan@mellanox.com>
 M: Shahaf Shuler <shahafs@mellanox.com>
@@ -643,6 +668,13 @@ F: drivers/net/dpaa2/
 F: doc/guides/nics/dpaa2.rst
 F: doc/guides/nics/features/dpaa2.ini
 
+NXP enetc
+M: Gagandeep Singh <g.singh@nxp.com>
+M: Pankaj Chauhan <pankaj.chauhan@nxp.com>
+F: drivers/net/enetc/
+F: doc/guides/nics/enetc.rst
+F: doc/guides/nics/features/enetc.ini
+
 QLogic bnx2x
 M: Harish Patil <harish.patil@cavium.com>
 M: Rasesh Mody <rasesh.mody@cavium.com>
@@ -682,6 +714,8 @@ F: doc/guides/sample_app_ug/vhost.rst
 F: examples/vhost_scsi/
 F: doc/guides/sample_app_ug/vhost_scsi.rst
 F: examples/vhost_crypto/
+F: examples/vdpa/
+F: doc/guides/sample_app_ug/vdpa.rst
 
 Vhost PMD
 M: Maxime Coquelin <maxime.coquelin@redhat.com>
@@ -771,6 +805,13 @@ F: drivers/crypto/armv8/
 F: doc/guides/cryptodevs/armv8.rst
 F: doc/guides/cryptodevs/features/armv8.ini
 
+Cavium OCTEON TX crypto
+M: Anoob Joseph <anoob.joseph@caviumnetworks.com>
+F: drivers/common/cpt/
+F: drivers/crypto/octeontx/
+F: doc/guides/cryptodevs/octeontx.rst
+F: doc/guides/cryptodevs/features/octeontx.ini
+
 Crypto Scheduler
 M: Fan Zhang <roy.fan.zhang@intel.com>
 F: drivers/crypto/scheduler/
@@ -817,6 +858,13 @@ F: drivers/crypto/null/
 F: doc/guides/cryptodevs/null.rst
 F: doc/guides/cryptodevs/features/null.ini
 
+NXP CAAM JR
+M: Gagandeep Singh <g.singh@nxp.com>
+M: Hemant Agrawal <hemant.agrawal@nxp.com>
+F: drivers/crypto/caam_jr/
+F: doc/guides/cryptodevs/caam_jr.rst
+F: doc/guides/cryptodevs/features/caam_jr.ini
+
 NXP DPAA_SEC
 M: Akhil Goyal <akhil.goyal@nxp.com>
 M: Hemant Agrawal <hemant.agrawal@nxp.com>
@@ -861,7 +909,7 @@ Compression Drivers
 M: Pablo de Lara <pablo.de.lara.guarch@intel.com>
 T: git://dpdk.org/next/dpdk-next-crypto
 
-Cavium OCTEONTX zipvf
+Cavium OCTEON TX zipvf
 M: Ashish Gupta <ashish.gupta@cavium.com>
 F: drivers/compress/octeontx/
 F: doc/guides/compressdevs/octeontx.rst
@@ -890,13 +938,12 @@ Eventdev Drivers
 M: Jerin Jacob <jerin.jacob@caviumnetworks.com>
 T: git://dpdk.org/next/dpdk-next-eventdev
 
-Cavium OCTEONTX ssovf
+Cavium OCTEON TX ssovf
 M: Jerin Jacob <jerin.jacob@caviumnetworks.com>
-M: Santosh Shukla <santosh.shukla@caviumnetworks.com>
 F: drivers/event/octeontx/
 F: doc/guides/eventdevs/octeontx.rst
 
-Cavium OCTEONTX timvf
+Cavium OCTEON TX timvf
 M: Pavan Nikhilesh <pbhagavatula@caviumnetworks.com>
 F: drivers/event/octeontx/timvf_*
 
@@ -919,6 +966,11 @@ F: doc/guides/eventdevs/sw.rst
 F: examples/eventdev_pipeline/
 F: doc/guides/sample_app_ug/eventdev_pipeline.rst
 
+Distributed Software Eventdev PMD
+M: Mattias Rönnblom <mattias.ronnblom@ericsson.com>
+F: drivers/event/dsw/
+F: doc/guides/eventdevs/dsw.rst
+
 Software OPDL Eventdev PMD
 M: Liang Ma <liang.j.ma@intel.com>
 M: Peter Mccarthy <peter.mccarthy@intel.com>
@@ -1140,6 +1192,7 @@ F: doc/guides/sample_app_ug/l2_forward_job_stats.rst
 Metrics
 M: Remy Horton <remy.horton@intel.com>
 F: lib/librte_metrics/
+F: test/test/test_metrics.c
 
 Bit-rate statistics
 M: Remy Horton <remy.horton@intel.com>
@@ -1149,6 +1202,12 @@ Latency statistics
 M: Reshma Pattan <reshma.pattan@intel.com>
 F: lib/librte_latencystats/
 
+Telemetry - EXPERIMENTAL
+M: Kevin Laatz <kevin.laatz@intel.com>
+F: lib/librte_telemetry/
+F: usertools/dpdk-telemetry-client.py
+F: doc/guides/howto/telemetry.rst
+
 BPF - EXPERIMENTAL
 M: Konstantin Ananyev <konstantin.ananyev@intel.com>
 F: lib/librte_bpf/
index 99e0b93..a9a026b 100644 (file)
@@ -11,20 +11,25 @@ apps = ['pdump',
 # for BSD only
 lib_execinfo = cc.find_library('execinfo', required: false)
 
+default_cflags = machine_args
+
+# specify -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 foreach app:apps
        build = true
        name = app
        allow_experimental_apis = false
        sources = []
        includes = []
-       cflags = machine_args
+       cflags = default_cflags
        objs = [] # other object files to link against, used e.g. for
                  # instruction-set optimized versions of code
 
        # use "deps" for internal DPDK dependencies, and "ext_deps" for
        # external package/library requirements
        ext_deps = []
-       deps = []
+       deps = dpdk_app_link_libraries
 
        subdir(name)
 
@@ -38,7 +43,7 @@ foreach app:apps
 
                link_libs = []
                if get_option('default_library') == 'static'
-                       link_libs = dpdk_drivers
+                       link_libs = dpdk_static_libraries + dpdk_drivers
                endif
 
                if allow_experimental_apis
index ac22871..d96556e 100644 (file)
@@ -81,7 +81,7 @@ enum pdump_by {
        DEVICE_ID = 2
 };
 
-const char *valid_pdump_arguments[] = {
+static const char * const valid_pdump_arguments[] = {
        PDUMP_PORT_ARG,
        PDUMP_PCI_ARG,
        PDUMP_QUEUE_ARG,
@@ -136,9 +136,9 @@ struct parse_val {
        uint64_t val;
 };
 
-int num_tuples;
+static int num_tuples;
 static struct rte_eth_conf port_conf_default;
-volatile uint8_t quit_signal;
+static volatile uint8_t quit_signal;
 
 /**< display usage */
 static void
index 988cb4e..116c27f 100644 (file)
@@ -3,4 +3,4 @@
 
 sources = files('main.c')
 allow_experimental_apis = true
-deps = ['ethdev', 'kvargs', 'pdump']
+deps += ['ethdev', 'kvargs', 'pdump']
index 9c148e3..a52b2ee 100644 (file)
@@ -3,4 +3,4 @@
 
 sources = files('main.c')
 allow_experimental_apis = true
-deps = ['ethdev', 'metrics']
+deps += ['ethdev', 'metrics']
index 653907d..eb8cc04 100644 (file)
@@ -6,4 +6,4 @@ sources = files('main.c',
                'test_bbdev_perf.c',
                'test_bbdev_vector.c')
 allow_experimental_apis = true
-deps = ['bbdev', 'bus_vdev']
+deps += ['bbdev', 'bus_vdev']
index 6861edc..fbe6cc9 100644 (file)
@@ -267,12 +267,13 @@ create_mbuf_pool(struct op_data_entries *entries, uint8_t dev_id,
 
 static int
 create_mempools(struct active_device *ad, int socket_id,
-               enum rte_bbdev_op_type op_type, uint16_t num_ops)
+               enum rte_bbdev_op_type org_op_type, uint16_t num_ops)
 {
        struct rte_mempool *mp;
        unsigned int ops_pool_size, mbuf_pool_size = 0;
        char pool_name[RTE_MEMPOOL_NAMESIZE];
        const char *op_type_str;
+       enum rte_bbdev_op_type op_type = org_op_type;
 
        struct op_data_entries *in = &test_vector.entries[DATA_INPUT];
        struct op_data_entries *hard_out =
@@ -289,6 +290,9 @@ create_mempools(struct active_device *ad, int socket_id,
                                        OPS_CACHE_SIZE + 1)),
                        OPS_POOL_SIZE_MIN));
 
+       if (org_op_type == RTE_BBDEV_OP_NONE)
+               op_type = RTE_BBDEV_OP_TURBO_ENC;
+
        op_type_str = rte_bbdev_op_type_str(op_type);
        TEST_ASSERT_NOT_NULL(op_type_str, "Invalid op type: %u", op_type);
 
@@ -303,6 +307,10 @@ create_mempools(struct active_device *ad, int socket_id,
                        socket_id);
        ad->ops_mempool = mp;
 
+       /* Do not create inputs and outputs mbufs for BaseBand Null Device */
+       if (org_op_type == RTE_BBDEV_OP_NONE)
+               return TEST_SUCCESS;
+
        /* Inputs */
        mbuf_pool_size = optimal_mempool_size(ops_pool_size * in->nb_segments);
        mp = create_mbuf_pool(in, ad->dev_id, socket_id, mbuf_pool_size, "in");
@@ -1058,14 +1066,14 @@ run_test_case_on_device(test_case_function *test_case_func, uint8_t dev_id,
        rte_bbdev_info_get(ad->dev_id, &info);
        socket_id = GET_SOCKET(info.socket_id);
 
-       if (op_type == RTE_BBDEV_OP_NONE)
-               op_type = RTE_BBDEV_OP_TURBO_ENC;
        f_ret = create_mempools(ad, socket_id, op_type,
                        get_num_ops());
        if (f_ret != TEST_SUCCESS) {
                printf("Couldn't create mempools");
                goto fail;
        }
+       if (op_type == RTE_BBDEV_OP_NONE)
+               op_type = RTE_BBDEV_OP_TURBO_ENC;
 
        f_ret = init_test_op_params(op_params, test_vector.op_type,
                        test_vector.expected_status,
index 907a995..1af9524 100644 (file)
@@ -419,13 +419,19 @@ cperf_test_vector_get_dummy(struct cperf_options *options)
                        t_vec->cipher_key.length = 0;
                        t_vec->ciphertext.data = plaintext;
                        t_vec->cipher_key.data = NULL;
-                       t_vec->cipher_iv.data = NULL;
                } else {
                        t_vec->cipher_key.length = options->cipher_key_sz;
                        t_vec->ciphertext.data = ciphertext;
                        t_vec->cipher_key.data = cipher_key;
-                       t_vec->cipher_iv.data = rte_malloc(NULL, options->cipher_iv_sz,
-                                       16);
+               }
+
+               /* Init IV data ptr */
+               t_vec->cipher_iv.data = NULL;
+
+               if (options->cipher_iv_sz != 0) {
+                       /* Set IV parameters */
+                       t_vec->cipher_iv.data = rte_malloc(NULL,
+                                       options->cipher_iv_sz, 16);
                        if (t_vec->cipher_iv.data == NULL) {
                                rte_free(t_vec);
                                return NULL;
@@ -433,17 +439,7 @@ cperf_test_vector_get_dummy(struct cperf_options *options)
                        memcpy(t_vec->cipher_iv.data, iv, options->cipher_iv_sz);
                }
                t_vec->ciphertext.length = options->max_buffer_size;
-
-               /* Set IV parameters */
-               t_vec->cipher_iv.data = rte_malloc(NULL, options->cipher_iv_sz,
-                               16);
-               if (options->cipher_iv_sz && t_vec->cipher_iv.data == NULL) {
-                       rte_free(t_vec);
-                       return NULL;
-               }
-               memcpy(t_vec->cipher_iv.data, iv, options->cipher_iv_sz);
                t_vec->cipher_iv.length = options->cipher_iv_sz;
-
                t_vec->data.cipher_offset = 0;
                t_vec->data.cipher_length = options->max_buffer_size;
 
index 5c7dadb..953e058 100644 (file)
@@ -342,7 +342,9 @@ cperf_check_test_vector(struct cperf_options *opts,
                                return -1;
                        if (test_vec->ciphertext.length < opts->max_buffer_size)
                                return -1;
-                       if (test_vec->cipher_iv.data == NULL)
+                       /* Cipher IV is only required for some algorithms */
+                       if (opts->cipher_iv_sz &&
+                                       test_vec->cipher_iv.data == NULL)
                                return -1;
                        if (test_vec->cipher_iv.length != opts->cipher_iv_sz)
                                return -1;
@@ -357,7 +359,9 @@ cperf_check_test_vector(struct cperf_options *opts,
                                return -1;
                        if (test_vec->plaintext.length < opts->max_buffer_size)
                                return -1;
-                       if (test_vec->auth_key.data == NULL)
+                       /* Auth key is only required for some algorithms */
+                       if (opts->auth_key_sz &&
+                                       test_vec->auth_key.data == NULL)
                                return -1;
                        if (test_vec->auth_key.length != opts->auth_key_sz)
                                return -1;
@@ -421,6 +425,10 @@ cperf_check_test_vector(struct cperf_options *opts,
                        return -1;
                if (test_vec->ciphertext.length < opts->max_buffer_size)
                        return -1;
+               if (test_vec->aead_key.data == NULL)
+                       return -1;
+               if (test_vec->aead_key.length != opts->aead_key_sz)
+                       return -1;
                if (test_vec->aead_iv.data == NULL)
                        return -1;
                if (test_vec->aead_iv.length != opts->aead_iv_sz)
index eacd7a0..d735b18 100644 (file)
@@ -12,4 +12,4 @@ sources = files('cperf_ops.c',
                'cperf_test_vectors.c',
                'cperf_test_verify.c',
                'main.c')
-deps = ['cryptodev']
+deps += ['cryptodev']
index d33cb2c..8618775 100644 (file)
@@ -680,7 +680,6 @@ perf_ethdev_setup(struct evt_test *test, struct evt_options *opt)
                        .mq_mode = ETH_MQ_RX_RSS,
                        .max_rx_pkt_len = ETHER_MAX_LEN,
                        .split_hdr_size = 0,
-                       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                },
                .rx_adv_conf = {
                        .rss_conf = {
index 26dc79f..c60635b 100644 (file)
@@ -15,10 +15,10 @@ pipeline_atq_nb_event_queues(struct evt_options *opt)
        return rte_eth_dev_count_avail();
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_single_stage_tx(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_INIT;
+       PIPELINE_WORKER_SINGLE_STAGE_INIT;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -28,23 +28,18 @@ pipeline_atq_worker_single_stage_tx(void *arg)
                        continue;
                }
 
-               if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                       pipeline_tx_pkt(ev.mbuf);
-                       w->processed_pkts++;
-                       continue;
-               }
-               pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
-               pipeline_event_enqueue(dev, port, &ev);
+               pipeline_event_tx(dev, port, &ev);
+               w->processed_pkts++;
        }
 
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_single_stage_fwd(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_INIT;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_SINGLE_STAGE_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -54,19 +49,19 @@ pipeline_atq_worker_single_stage_fwd(void *arg)
                        continue;
                }
 
-               w->processed_pkts++;
-               ev.queue_id = tx_queue;
+               ev.queue_id = tx_queue[ev.mbuf->port];
                pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
                pipeline_event_enqueue(dev, port, &ev);
+               w->processed_pkts++;
        }
 
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_single_stage_burst_tx(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_BURST_INIT;
+       PIPELINE_WORKER_SINGLE_STAGE_BURST_INIT;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -79,27 +74,21 @@ pipeline_atq_worker_single_stage_burst_tx(void *arg)
 
                for (i = 0; i < nb_rx; i++) {
                        rte_prefetch0(ev[i + 1].mbuf);
-                       if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                               pipeline_tx_pkt(ev[i].mbuf);
-                               ev[i].op = RTE_EVENT_OP_RELEASE;
-                               w->processed_pkts++;
-                       } else
-                               pipeline_fwd_event(&ev[i],
-                                               RTE_SCHED_TYPE_ATOMIC);
+                       rte_event_eth_tx_adapter_txq_set(ev[i].mbuf, 0);
                }
 
-               pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
+               pipeline_event_tx_burst(dev, port, ev, nb_rx);
+               w->processed_pkts += nb_rx;
        }
 
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_single_stage_burst_fwd(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_BURST_INIT;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_SINGLE_STAGE_BURST_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -112,23 +101,22 @@ pipeline_atq_worker_single_stage_burst_fwd(void *arg)
 
                for (i = 0; i < nb_rx; i++) {
                        rte_prefetch0(ev[i + 1].mbuf);
-                       ev[i].queue_id = tx_queue;
+                       rte_event_eth_tx_adapter_txq_set(ev[i].mbuf, 0);
+                       ev[i].queue_id = tx_queue[ev[i].mbuf->port];
                        pipeline_fwd_event(&ev[i], RTE_SCHED_TYPE_ATOMIC);
-                       w->processed_pkts++;
                }
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
+               w->processed_pkts += nb_rx;
        }
 
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_multi_stage_tx(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages;
-
+       PIPELINE_WORKER_MULTI_STAGE_INIT;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -141,29 +129,24 @@ pipeline_atq_worker_multi_stage_tx(void *arg)
                cq_id = ev.sub_event_type % nb_stages;
 
                if (cq_id == last_queue) {
-                       if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                               pipeline_tx_pkt(ev.mbuf);
-                               w->processed_pkts++;
-                               continue;
-                       }
-                       pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
-               } else {
-                       ev.sub_event_type++;
-                       pipeline_fwd_event(&ev, sched_type_list[cq_id]);
+                       pipeline_event_tx(dev, port, &ev);
+                       w->processed_pkts++;
+                       continue;
                }
 
+               ev.sub_event_type++;
+               pipeline_fwd_event(&ev, sched_type_list[cq_id]);
                pipeline_event_enqueue(dev, port, &ev);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_multi_stage_fwd(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_MULTI_STAGE_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -176,9 +159,9 @@ pipeline_atq_worker_multi_stage_fwd(void *arg)
                cq_id = ev.sub_event_type % nb_stages;
 
                if (cq_id == last_queue) {
-                       w->processed_pkts++;
-                       ev.queue_id = tx_queue;
+                       ev.queue_id = tx_queue[ev.mbuf->port];
                        pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
+                       w->processed_pkts++;
                } else {
                        ev.sub_event_type++;
                        pipeline_fwd_event(&ev, sched_type_list[cq_id]);
@@ -186,14 +169,14 @@ pipeline_atq_worker_multi_stage_fwd(void *arg)
 
                pipeline_event_enqueue(dev, port, &ev);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_multi_stage_burst_tx(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_BURST_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages;
+       PIPELINE_WORKER_MULTI_STAGE_BURST_INIT;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -209,34 +192,27 @@ pipeline_atq_worker_multi_stage_burst_tx(void *arg)
                        cq_id = ev[i].sub_event_type % nb_stages;
 
                        if (cq_id == last_queue) {
-                               if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                                       pipeline_tx_pkt(ev[i].mbuf);
-                                       ev[i].op = RTE_EVENT_OP_RELEASE;
-                                       w->processed_pkts++;
-                                       continue;
-                               }
-
-                               pipeline_fwd_event(&ev[i],
-                                               RTE_SCHED_TYPE_ATOMIC);
-                       } else {
-                               ev[i].sub_event_type++;
-                               pipeline_fwd_event(&ev[i],
-                                               sched_type_list[cq_id]);
+                               pipeline_event_tx(dev, port, &ev[i]);
+                               ev[i].op = RTE_EVENT_OP_RELEASE;
+                               w->processed_pkts++;
+                               continue;
                        }
+
+                       ev[i].sub_event_type++;
+                       pipeline_fwd_event(&ev[i], sched_type_list[cq_id]);
                }
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_atq_worker_multi_stage_burst_fwd(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_BURST_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_MULTI_STAGE_BURST_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -253,7 +229,7 @@ pipeline_atq_worker_multi_stage_burst_fwd(void *arg)
 
                        if (cq_id == last_queue) {
                                w->processed_pkts++;
-                               ev[i].queue_id = tx_queue;
+                               ev[i].queue_id = tx_queue[ev[i].mbuf->port];
                                pipeline_fwd_event(&ev[i],
                                                RTE_SCHED_TYPE_ATOMIC);
                        } else {
@@ -265,6 +241,7 @@ pipeline_atq_worker_multi_stage_burst_fwd(void *arg)
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
        }
+
        return 0;
 }
 
@@ -274,39 +251,36 @@ worker_wrapper(void *arg)
        struct worker_data *w  = arg;
        struct evt_options *opt = w->t->opt;
        const bool burst = evt_has_burst_mode(w->dev_id);
-       const bool mt_safe = !w->t->mt_unsafe;
+       const bool internal_port = w->t->internal_port;
        const uint8_t nb_stages = opt->nb_stages;
        RTE_SET_USED(opt);
 
        if (nb_stages == 1) {
-               if (!burst && mt_safe)
+               if (!burst && internal_port)
                        return pipeline_atq_worker_single_stage_tx(arg);
-               else if (!burst && !mt_safe)
+               else if (!burst && !internal_port)
                        return pipeline_atq_worker_single_stage_fwd(arg);
-               else if (burst && mt_safe)
+               else if (burst && internal_port)
                        return pipeline_atq_worker_single_stage_burst_tx(arg);
-               else if (burst && !mt_safe)
+               else if (burst && !internal_port)
                        return pipeline_atq_worker_single_stage_burst_fwd(arg);
        } else {
-               if (!burst && mt_safe)
+               if (!burst && internal_port)
                        return pipeline_atq_worker_multi_stage_tx(arg);
-               else if (!burst && !mt_safe)
+               else if (!burst && !internal_port)
                        return pipeline_atq_worker_multi_stage_fwd(arg);
-               if (burst && mt_safe)
+               if (burst && internal_port)
                        return pipeline_atq_worker_multi_stage_burst_tx(arg);
-               else if (burst && !mt_safe)
+               else if (burst && !internal_port)
                        return pipeline_atq_worker_multi_stage_burst_fwd(arg);
        }
+
        rte_panic("invalid worker\n");
 }
 
 static int
 pipeline_atq_launch_lcores(struct evt_test *test, struct evt_options *opt)
 {
-       struct test_pipeline *t = evt_test_priv(test);
-
-       if (t->mt_unsafe)
-               rte_service_component_runstate_set(t->tx_service.service_id, 1);
        return pipeline_launch_lcores(test, opt, worker_wrapper);
 }
 
@@ -317,34 +291,38 @@ pipeline_atq_eventdev_setup(struct evt_test *test, struct evt_options *opt)
        int nb_ports;
        int nb_queues;
        uint8_t queue;
-       struct rte_event_dev_info info;
-       struct test_pipeline *t = evt_test_priv(test);
-       uint8_t tx_evqueue_id = 0;
+       uint8_t tx_evqueue_id[RTE_MAX_ETHPORTS];
        uint8_t queue_arr[RTE_EVENT_MAX_QUEUES_PER_DEV];
        uint8_t nb_worker_queues = 0;
+       uint8_t tx_evport_id = 0;
+       uint16_t prod = 0;
+       struct rte_event_dev_info info;
+       struct test_pipeline *t = evt_test_priv(test);
 
        nb_ports = evt_nr_active_lcores(opt->wlcores);
        nb_queues = rte_eth_dev_count_avail();
 
-       /* One extra port and queueu for Tx service */
-       if (t->mt_unsafe) {
-               tx_evqueue_id = nb_queues;
-               nb_ports++;
-               nb_queues++;
+       memset(tx_evqueue_id, 0, sizeof(uint8_t) * RTE_MAX_ETHPORTS);
+       memset(queue_arr, 0, sizeof(uint8_t) * RTE_EVENT_MAX_QUEUES_PER_DEV);
+       /* One queue for Tx adapter per port */
+       if (!t->internal_port) {
+               RTE_ETH_FOREACH_DEV(prod) {
+                       tx_evqueue_id[prod] = nb_queues;
+                       nb_queues++;
+               }
        }
 
-
        rte_event_dev_info_get(opt->dev_id, &info);
 
        const struct rte_event_dev_config config = {
-                       .nb_event_queues = nb_queues,
-                       .nb_event_ports = nb_ports,
-                       .nb_events_limit  = info.max_num_events,
-                       .nb_event_queue_flows = opt->nb_flows,
-                       .nb_event_port_dequeue_depth =
-                               info.max_event_port_dequeue_depth,
-                       .nb_event_port_enqueue_depth =
-                               info.max_event_port_enqueue_depth,
+               .nb_event_queues = nb_queues,
+               .nb_event_ports = nb_ports,
+               .nb_events_limit  = info.max_num_events,
+               .nb_event_queue_flows = opt->nb_flows,
+               .nb_event_port_dequeue_depth =
+                       info.max_event_port_dequeue_depth,
+               .nb_event_port_enqueue_depth =
+                       info.max_event_port_enqueue_depth,
        };
        ret = rte_event_dev_configure(opt->dev_id, &config);
        if (ret) {
@@ -353,21 +331,23 @@ pipeline_atq_eventdev_setup(struct evt_test *test, struct evt_options *opt)
        }
 
        struct rte_event_queue_conf q_conf = {
-                       .priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
-                       .nb_atomic_flows = opt->nb_flows,
-                       .nb_atomic_order_sequences = opt->nb_flows,
+               .priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+               .nb_atomic_flows = opt->nb_flows,
+               .nb_atomic_order_sequences = opt->nb_flows,
        };
        /* queue configurations */
        for (queue = 0; queue < nb_queues; queue++) {
                q_conf.event_queue_cfg = RTE_EVENT_QUEUE_CFG_ALL_TYPES;
 
-               if (t->mt_unsafe) {
-                       if (queue == tx_evqueue_id) {
-                               q_conf.event_queue_cfg =
-                                       RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
-                       } else {
-                               queue_arr[nb_worker_queues] = queue;
-                               nb_worker_queues++;
+               if (!t->internal_port) {
+                       RTE_ETH_FOREACH_DEV(prod) {
+                               if (queue == tx_evqueue_id[prod]) {
+                                       q_conf.event_queue_cfg =
+                                               RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
+                               } else {
+                                       queue_arr[nb_worker_queues] = queue;
+                                       nb_worker_queues++;
+                               }
                        }
                }
 
@@ -383,20 +363,15 @@ pipeline_atq_eventdev_setup(struct evt_test *test, struct evt_options *opt)
 
        /* port configuration */
        const struct rte_event_port_conf p_conf = {
-                       .dequeue_depth = opt->wkr_deq_dep,
-                       .enqueue_depth = info.max_event_port_dequeue_depth,
-                       .new_event_threshold = info.max_num_events,
+               .dequeue_depth = opt->wkr_deq_dep,
+               .enqueue_depth = info.max_event_port_dequeue_depth,
+               .new_event_threshold = info.max_num_events,
        };
 
-       if (t->mt_unsafe) {
+       if (!t->internal_port)
                ret = pipeline_event_port_setup(test, opt, queue_arr,
                                nb_worker_queues, p_conf);
-               if (ret)
-                       return ret;
-
-               ret = pipeline_event_tx_service_setup(test, opt, tx_evqueue_id,
-                               nb_ports - 1, p_conf);
-       } else
+       else
                ret = pipeline_event_port_setup(test, opt, NULL, nb_queues,
                                p_conf);
 
@@ -408,30 +383,32 @@ pipeline_atq_eventdev_setup(struct evt_test *test, struct evt_options *opt)
         *
         * eth_dev_count = 2, nb_stages = 2, atq mode
         *
-        * Multi thread safe :
+        * eth0, eth1 have Internal port capability :
         *      queues = 2
         *      stride = 1
         *
         *      event queue pipelines:
-        *      eth0 -> q0 ->tx
-        *      eth1 -> q1 ->tx
+        *      eth0 -> q0 ->Tx
+        *      eth1 -> q1 ->Tx
         *
         *      q0, q1 are configured as ATQ so, all the different stages can
         *      be enqueued on the same queue.
         *
-        * Multi thread unsafe :
-        *      queues = 3
+        * eth0, eth1 use Tx adapters service core :
+        *      queues = 4
         *      stride = 1
         *
         *      event queue pipelines:
-        *      eth0 -> q0
-        *                } (q3->tx) Tx service
-        *      eth1 -> q1
+        *      eth0 -> q0  -> q2 -> Tx
+        *      eth1 -> q1  -> q3 -> Tx
         *
-        *      q0,q1 are configured as stated above.
-        *      q3 configured as SINGLE_LINK|ATOMIC.
+        *      q0, q1 are configured as stated above.
+        *      q2, q3 configured as SINGLE_LINK.
         */
        ret = pipeline_event_rx_adapter_setup(opt, 1, p_conf);
+       if (ret)
+               return ret;
+       ret = pipeline_event_tx_adapter_setup(opt, p_conf);
        if (ret)
                return ret;
 
@@ -445,12 +422,58 @@ pipeline_atq_eventdev_setup(struct evt_test *test, struct evt_options *opt)
                }
        }
 
+       /* Connect the tx_evqueue_id to the Tx adapter port */
+       if (!t->internal_port) {
+               RTE_ETH_FOREACH_DEV(prod) {
+                       ret = rte_event_eth_tx_adapter_event_port_get(prod,
+                                       &tx_evport_id);
+                       if (ret) {
+                               evt_err("Unable to get Tx adapter[%d]", prod);
+                               return ret;
+                       }
+
+                       if (rte_event_port_link(opt->dev_id, tx_evport_id,
+                                               &tx_evqueue_id[prod],
+                                               NULL, 1) != 1) {
+                               evt_err("Unable to link Tx adptr[%d] evprt[%d]",
+                                               prod, tx_evport_id);
+                               return ret;
+                       }
+               }
+       }
+
+       RTE_ETH_FOREACH_DEV(prod) {
+               ret = rte_eth_dev_start(prod);
+               if (ret) {
+                       evt_err("Ethernet dev [%d] failed to start."
+                                       " Using synthetic producer", prod);
+                       return ret;
+               }
+       }
+
        ret = rte_event_dev_start(opt->dev_id);
        if (ret) {
                evt_err("failed to start eventdev %d", opt->dev_id);
                return ret;
        }
 
+       RTE_ETH_FOREACH_DEV(prod) {
+               ret = rte_event_eth_rx_adapter_start(prod);
+               if (ret) {
+                       evt_err("Rx adapter[%d] start failed", prod);
+                       return ret;
+               }
+
+               ret = rte_event_eth_tx_adapter_start(prod);
+               if (ret) {
+                       evt_err("Tx adapter[%d] start failed", prod);
+                       return ret;
+               }
+       }
+
+       memcpy(t->tx_evqueue_id, tx_evqueue_id, sizeof(uint8_t) *
+                       RTE_MAX_ETHPORTS);
+
        return 0;
 }
 
index a54068d..d07fa88 100644 (file)
@@ -5,58 +5,6 @@
 
 #include "test_pipeline_common.h"
 
-static int32_t
-pipeline_event_tx_burst_service_func(void *args)
-{
-
-       int i;
-       struct tx_service_data *tx = args;
-       const uint8_t dev = tx->dev_id;
-       const uint8_t port = tx->port_id;
-       struct rte_event ev[BURST_SIZE + 1];
-
-       uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev, BURST_SIZE, 0);
-
-       if (!nb_rx) {
-               for (i = 0; i < tx->nb_ethports; i++)
-                       rte_eth_tx_buffer_flush(i, 0, tx->tx_buf[i]);
-               return 0;
-       }
-
-       for (i = 0; i < nb_rx; i++) {
-               struct rte_mbuf *m = ev[i].mbuf;
-               rte_eth_tx_buffer(m->port, 0, tx->tx_buf[m->port], m);
-       }
-       tx->processed_pkts += nb_rx;
-
-       return 0;
-}
-
-static int32_t
-pipeline_event_tx_service_func(void *args)
-{
-
-       int i;
-       struct tx_service_data *tx = args;
-       const uint8_t dev = tx->dev_id;
-       const uint8_t port = tx->port_id;
-       struct rte_event ev;
-
-       uint16_t nb_rx = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
-
-       if (!nb_rx) {
-               for (i = 0; i < tx->nb_ethports; i++)
-                       rte_eth_tx_buffer_flush(i, 0, tx->tx_buf[i]);
-               return 0;
-       }
-
-       struct rte_mbuf *m = ev.mbuf;
-       rte_eth_tx_buffer(m->port, 0, tx->tx_buf[m->port], m);
-       tx->processed_pkts++;
-
-       return 0;
-}
-
 int
 pipeline_test_result(struct evt_test *test, struct evt_options *opt)
 {
@@ -65,12 +13,12 @@ pipeline_test_result(struct evt_test *test, struct evt_options *opt)
        uint64_t total = 0;
        struct test_pipeline *t = evt_test_priv(test);
 
-       printf("Packet distribution across worker cores :\n");
+       evt_info("Packet distribution across worker cores :");
        for (i = 0; i < t->nb_workers; i++)
                total += t->worker[i].processed_pkts;
        for (i = 0; i < t->nb_workers; i++)
-               printf("Worker %d packets: "CLGRN"%"PRIx64" "CLNRM"percentage:"
-                               CLGRN" %3.2f\n"CLNRM, i,
+               evt_info("Worker %d packets: "CLGRN"%"PRIx64""CLNRM" percentage:"
+                               CLGRN" %3.2f"CLNRM, i,
                                t->worker[i].processed_pkts,
                                (((double)t->worker[i].processed_pkts)/total)
                                * 100);
@@ -97,11 +45,8 @@ processed_pkts(struct test_pipeline *t)
        uint64_t total = 0;
 
        rte_smp_rmb();
-       if (t->mt_unsafe)
-               total = t->tx_service.processed_pkts;
-       else
-               for (i = 0; i < t->nb_workers; i++)
-                       total += t->worker[i].processed_pkts;
+       for (i = 0; i < t->nb_workers; i++)
+               total += t->worker[i].processed_pkts;
 
        return total;
 }
@@ -215,14 +160,12 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
 {
        uint16_t i;
        uint8_t nb_queues = 1;
-       uint8_t mt_state = 0;
        struct test_pipeline *t = evt_test_priv(test);
        struct rte_eth_rxconf rx_conf;
        struct rte_eth_conf port_conf = {
                .rxmode = {
                        .mq_mode = ETH_MQ_RX_RSS,
                        .max_rx_pkt_len = ETHER_MAX_LEN,
-                       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                },
                .rx_adv_conf = {
                        .rss_conf = {
@@ -234,17 +177,21 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
 
        RTE_SET_USED(opt);
        if (!rte_eth_dev_count_avail()) {
-               evt_err("No ethernet ports found.\n");
+               evt_err("No ethernet ports found.");
                return -ENODEV;
        }
 
+       t->internal_port = 1;
        RTE_ETH_FOREACH_DEV(i) {
                struct rte_eth_dev_info dev_info;
                struct rte_eth_conf local_port_conf = port_conf;
+               uint32_t caps = 0;
+
+               rte_event_eth_tx_adapter_caps_get(opt->dev_id, i, &caps);
+               if (!(caps & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT))
+                       t->internal_port = 0;
 
                rte_eth_dev_info_get(i, &dev_info);
-               mt_state = !(dev_info.tx_offload_capa &
-                               DEV_TX_OFFLOAD_MT_LOCKFREE);
                rx_conf = dev_info.default_rxconf;
                rx_conf.offloads = port_conf.rxmode.offloads;
 
@@ -253,7 +200,7 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
                if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
                                port_conf.rx_adv_conf.rss_conf.rss_hf) {
                        evt_info("Port %u modified RSS hash function based on hardware support,"
-                               "requested:%#"PRIx64" configured:%#"PRIx64"\n",
+                               "requested:%#"PRIx64" configured:%#"PRIx64"",
                                i,
                                port_conf.rx_adv_conf.rss_conf.rss_hf,
                                local_port_conf.rx_adv_conf.rss_conf.rss_hf);
@@ -262,28 +209,23 @@ pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt)
                if (rte_eth_dev_configure(i, nb_queues, nb_queues,
                                        &local_port_conf)
                                < 0) {
-                       evt_err("Failed to configure eth port [%d]\n", i);
+                       evt_err("Failed to configure eth port [%d]", i);
                        return -EINVAL;
                }
 
                if (rte_eth_rx_queue_setup(i, 0, NB_RX_DESC,
                                rte_socket_id(), &rx_conf, t->pool) < 0) {
-                       evt_err("Failed to setup eth port [%d] rx_queue: %d.\n",
+                       evt_err("Failed to setup eth port [%d] rx_queue: %d.",
                                        i, 0);
                        return -EINVAL;
                }
                if (rte_eth_tx_queue_setup(i, 0, NB_TX_DESC,
                                        rte_socket_id(), NULL) < 0) {
-                       evt_err("Failed to setup eth port [%d] tx_queue: %d.\n",
+                       evt_err("Failed to setup eth port [%d] tx_queue: %d.",
                                        i, 0);
                        return -EINVAL;
                }
 
-               t->mt_unsafe |= mt_state;
-               t->tx_service.tx_buf[i] =
-                       rte_malloc(NULL, RTE_ETH_TX_BUFFER_SIZE(BURST_SIZE), 0);
-               if (t->tx_service.tx_buf[i] == NULL)
-                       rte_panic("Unable to allocate Tx buffer memory.");
                rte_eth_promiscuous_enable(i);
        }
 
@@ -295,7 +237,6 @@ pipeline_event_port_setup(struct evt_test *test, struct evt_options *opt,
                uint8_t *queue_arr, uint8_t nb_queues,
                const struct rte_event_port_conf p_conf)
 {
-       int i;
        int ret;
        uint8_t port;
        struct test_pipeline *t = evt_test_priv(test);
@@ -316,23 +257,15 @@ pipeline_event_port_setup(struct evt_test *test, struct evt_options *opt,
                        return ret;
                }
 
-               if (queue_arr == NULL) {
-                       if (rte_event_port_link(opt->dev_id, port, NULL, NULL,
-                                               0) != nb_queues)
-                               goto link_fail;
-               } else {
-                       for (i = 0; i < nb_queues; i++) {
-                               if (rte_event_port_link(opt->dev_id, port,
-                                               &queue_arr[i], NULL, 1) != 1)
-                                       goto link_fail;
-                       }
-               }
+               if (rte_event_port_link(opt->dev_id, port, queue_arr, NULL,
+                                       nb_queues) != nb_queues)
+                       goto link_fail;
        }
 
        return 0;
 
 link_fail:
-       evt_err("failed to link all queues to port %d", port);
+       evt_err("failed to link queues to port %d", port);
        return -EINVAL;
 }
 
@@ -380,85 +313,69 @@ pipeline_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
                        ret = evt_service_setup(service_id);
                        if (ret) {
                                evt_err("Failed to setup service core"
-                                               " for Rx adapter\n");
+                                               " for Rx adapter");
                                return ret;
                        }
                }
 
-               ret = rte_eth_dev_start(prod);
-               if (ret) {
-                       evt_err("Ethernet dev [%d] failed to start."
-                                       " Using synthetic producer", prod);
-                       return ret;
-               }
-
-               ret = rte_event_eth_rx_adapter_start(prod);
-               if (ret) {
-                       evt_err("Rx adapter[%d] start failed", prod);
-                       return ret;
-               }
-               printf("%s: Port[%d] using Rx adapter[%d] started\n", __func__,
-                               prod, prod);
+               evt_info("Port[%d] using Rx adapter[%d] configured", prod,
+                               prod);
        }
 
        return ret;
 }
 
 int
-pipeline_event_tx_service_setup(struct evt_test *test, struct evt_options *opt,
-               uint8_t tx_queue_id, uint8_t tx_port_id,
-               const struct rte_event_port_conf p_conf)
+pipeline_event_tx_adapter_setup(struct evt_options *opt,
+               struct rte_event_port_conf port_conf)
 {
        int ret;
-       struct rte_service_spec serv;
-       struct test_pipeline *t = evt_test_priv(test);
-       struct tx_service_data *tx = &t->tx_service;
+       uint16_t consm;
 
-       ret = rte_event_port_setup(opt->dev_id, tx_port_id, &p_conf);
-       if (ret) {
-               evt_err("failed to setup port %d", tx_port_id);
-               return ret;
-       }
+       RTE_ETH_FOREACH_DEV(consm) {
+               uint32_t cap;
 
-       if (rte_event_port_link(opt->dev_id, tx_port_id, &tx_queue_id,
-                               NULL, 1) != 1) {
-               evt_err("failed to link queues to port %d", tx_port_id);
-               return -EINVAL;
-       }
+               ret = rte_event_eth_tx_adapter_caps_get(opt->dev_id,
+                               consm, &cap);
+               if (ret) {
+                       evt_err("failed to get event tx adapter[%d] caps",
+                                       consm);
+                       return ret;
+               }
 
-       tx->dev_id = opt->dev_id;
-       tx->queue_id = tx_queue_id;
-       tx->port_id = tx_port_id;
-       tx->nb_ethports = rte_eth_dev_count_avail();
-       tx->t = t;
-
-       /* Register Tx service */
-       memset(&serv, 0, sizeof(struct rte_service_spec));
-       snprintf(serv.name, sizeof(serv.name), "Tx_service");
-
-       if (evt_has_burst_mode(opt->dev_id))
-               serv.callback = pipeline_event_tx_burst_service_func;
-       else
-               serv.callback = pipeline_event_tx_service_func;
-
-       serv.callback_userdata = (void *)tx;
-       ret = rte_service_component_register(&serv, &tx->service_id);
-       if (ret) {
-               evt_err("failed to register Tx service");
-               return ret;
-       }
+               ret = rte_event_eth_tx_adapter_create(consm, opt->dev_id,
+                               &port_conf);
+               if (ret) {
+                       evt_err("failed to create tx adapter[%d]", consm);
+                       return ret;
+               }
 
-       ret = evt_service_setup(tx->service_id);
-       if (ret) {
-               evt_err("Failed to setup service core for Tx service\n");
-               return ret;
-       }
+               ret = rte_event_eth_tx_adapter_queue_add(consm, consm, -1);
+               if (ret) {
+                       evt_err("failed to add tx queues to adapter[%d]",
+                                       consm);
+                       return ret;
+               }
 
-       rte_service_runstate_set(tx->service_id, 1);
+               if (!(cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)) {
+                       uint32_t service_id;
 
-       return 0;
-}
+                       rte_event_eth_tx_adapter_service_id_get(consm,
+                                       &service_id);
+                       ret = evt_service_setup(service_id);
+                       if (ret) {
+                               evt_err("Failed to setup service core"
+                                               " for Tx adapter\n");
+                               return ret;
+                       }
+               }
+
+               evt_info("Port[%d] using Tx adapter[%d] Configured", consm,
+                               consm);
+       }
 
+       return ret;
+}
 
 void
 pipeline_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
@@ -466,16 +383,10 @@ pipeline_ethdev_destroy(struct evt_test *test, struct evt_options *opt)
        uint16_t i;
        RTE_SET_USED(test);
        RTE_SET_USED(opt);
-       struct test_pipeline *t = evt_test_priv(test);
-
-       if (t->mt_unsafe) {
-               rte_service_component_runstate_set(t->tx_service.service_id, 0);
-               rte_service_runstate_set(t->tx_service.service_id, 0);
-               rte_service_component_unregister(t->tx_service.service_id);
-       }
 
        RTE_ETH_FOREACH_DEV(i) {
                rte_event_eth_rx_adapter_stop(i);
+               rte_event_eth_tx_adapter_stop(i);
                rte_eth_dev_stop(i);
        }
 }
@@ -485,7 +396,6 @@ pipeline_eventdev_destroy(struct evt_test *test, struct evt_options *opt)
 {
        RTE_SET_USED(test);
 
-       rte_event_dev_stop(opt->dev_id);
        rte_event_dev_close(opt->dev_id);
 }
 
index 5fb9160..0440b9e 100644 (file)
@@ -14,6 +14,7 @@
 #include <rte_ethdev.h>
 #include <rte_eventdev.h>
 #include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
 #include <rte_lcore.h>
 #include <rte_malloc.h>
 #include <rte_mempool.h>
@@ -35,52 +36,41 @@ struct worker_data {
        struct test_pipeline *t;
 } __rte_cache_aligned;
 
-struct tx_service_data {
-       uint8_t dev_id;
-       uint8_t queue_id;
-       uint8_t port_id;
-       uint32_t service_id;
-       uint64_t processed_pkts;
-       uint16_t nb_ethports;
-       struct rte_eth_dev_tx_buffer *tx_buf[RTE_MAX_ETHPORTS];
-       struct test_pipeline *t;
-} __rte_cache_aligned;
-
 struct test_pipeline {
        /* Don't change the offset of "done". Signal handler use this memory
         * to terminate all lcores work.
         */
        int done;
        uint8_t nb_workers;
-       uint8_t mt_unsafe;
+       uint8_t internal_port;
+       uint8_t tx_evqueue_id[RTE_MAX_ETHPORTS];
        enum evt_test_result result;
        uint32_t nb_flows;
        uint64_t outstand_pkts;
        struct rte_mempool *pool;
        struct worker_data worker[EVT_MAX_PORTS];
-       struct tx_service_data tx_service;
        struct evt_options *opt;
        uint8_t sched_type_list[EVT_MAX_STAGES] __rte_cache_aligned;
 } __rte_cache_aligned;
 
 #define BURST_SIZE 16
 
-#define PIPELINE_WROKER_SINGLE_STAGE_INIT \
+#define PIPELINE_WORKER_SINGLE_STAGE_INIT \
        struct worker_data *w  = arg;     \
        struct test_pipeline *t = w->t;   \
        const uint8_t dev = w->dev_id;    \
        const uint8_t port = w->port_id;  \
-       struct rte_event ev
+       struct rte_event ev __rte_cache_aligned
 
-#define PIPELINE_WROKER_SINGLE_STAGE_BURST_INIT \
+#define PIPELINE_WORKER_SINGLE_STAGE_BURST_INIT \
        int i;                                  \
        struct worker_data *w  = arg;           \
        struct test_pipeline *t = w->t;         \
        const uint8_t dev = w->dev_id;          \
        const uint8_t port = w->port_id;        \
-       struct rte_event ev[BURST_SIZE + 1]
+       struct rte_event ev[BURST_SIZE + 1] __rte_cache_aligned
 
-#define PIPELINE_WROKER_MULTI_STAGE_INIT                         \
+#define PIPELINE_WORKER_MULTI_STAGE_INIT                         \
        struct worker_data *w  = arg;                            \
        struct test_pipeline *t = w->t;                          \
        uint8_t cq_id;                                           \
@@ -88,10 +78,11 @@ struct test_pipeline {
        const uint8_t port = w->port_id;                         \
        const uint8_t last_queue = t->opt->nb_stages - 1;        \
        uint8_t *const sched_type_list = &t->sched_type_list[0]; \
-       struct rte_event ev
+       const uint8_t nb_stages = t->opt->nb_stages + 1;         \
+       struct rte_event ev __rte_cache_aligned
 
-#define PIPELINE_WROKER_MULTI_STAGE_BURST_INIT                   \
-       int i;                                  \
+#define PIPELINE_WORKER_MULTI_STAGE_BURST_INIT                   \
+       int i;                                                   \
        struct worker_data *w  = arg;                            \
        struct test_pipeline *t = w->t;                          \
        uint8_t cq_id;                                           \
@@ -99,7 +90,8 @@ struct test_pipeline {
        const uint8_t port = w->port_id;                         \
        const uint8_t last_queue = t->opt->nb_stages - 1;        \
        uint8_t *const sched_type_list = &t->sched_type_list[0]; \
-       struct rte_event ev[BURST_SIZE + 1]
+       const uint8_t nb_stages = t->opt->nb_stages + 1;         \
+       struct rte_event ev[BURST_SIZE + 1] __rte_cache_aligned
 
 static __rte_always_inline void
 pipeline_fwd_event(struct rte_event *ev, uint8_t sched)
@@ -109,6 +101,28 @@ pipeline_fwd_event(struct rte_event *ev, uint8_t sched)
        ev->sched_type = sched;
 }
 
+static __rte_always_inline void
+pipeline_event_tx(const uint8_t dev, const uint8_t port,
+               struct rte_event * const ev)
+{
+       rte_event_eth_tx_adapter_txq_set(ev->mbuf, 0);
+       while (!rte_event_eth_tx_adapter_enqueue(dev, port, ev, 1))
+               rte_pause();
+}
+
+static __rte_always_inline void
+pipeline_event_tx_burst(const uint8_t dev, const uint8_t port,
+               struct rte_event *ev, const uint16_t nb_rx)
+{
+       uint16_t enq;
+
+       enq = rte_event_eth_tx_adapter_enqueue(dev, port, ev, nb_rx);
+       while (enq < nb_rx) {
+               enq += rte_event_eth_tx_adapter_enqueue(dev, port,
+                               ev + enq, nb_rx - enq);
+       }
+}
+
 static __rte_always_inline void
 pipeline_event_enqueue(const uint8_t dev, const uint8_t port,
                struct rte_event *ev)
@@ -130,13 +144,6 @@ pipeline_event_enqueue_burst(const uint8_t dev, const uint8_t port,
        }
 }
 
-static __rte_always_inline void
-pipeline_tx_pkt(struct rte_mbuf *mbuf)
-{
-       while (rte_eth_tx_burst(mbuf->port, 0, &mbuf, 1) != 1)
-               rte_pause();
-}
-
 static inline int
 pipeline_nb_event_ports(struct evt_options *opt)
 {
@@ -149,9 +156,8 @@ int pipeline_test_setup(struct evt_test *test, struct evt_options *opt);
 int pipeline_ethdev_setup(struct evt_test *test, struct evt_options *opt);
 int pipeline_event_rx_adapter_setup(struct evt_options *opt, uint8_t stride,
                struct rte_event_port_conf prod_conf);
-int pipeline_event_tx_service_setup(struct evt_test *test,
-               struct evt_options *opt, uint8_t tx_queue_id,
-               uint8_t tx_port_id, const struct rte_event_port_conf p_conf);
+int pipeline_event_tx_adapter_setup(struct evt_options *opt,
+               struct rte_event_port_conf prod_conf);
 int pipeline_mempool_setup(struct evt_test *test, struct evt_options *opt);
 int pipeline_event_port_setup(struct evt_test *test, struct evt_options *opt,
                uint8_t *queue_arr, uint8_t nb_queues,
index ca5f457..2521700 100644 (file)
@@ -15,10 +15,10 @@ pipeline_queue_nb_event_queues(struct evt_options *opt)
        return (eth_count * opt->nb_stages) + eth_count;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_single_stage_tx(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_INIT;
+       PIPELINE_WORKER_SINGLE_STAGE_INIT;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -29,7 +29,7 @@ pipeline_queue_worker_single_stage_tx(void *arg)
                }
 
                if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                       pipeline_tx_pkt(ev.mbuf);
+                       pipeline_event_tx(dev, port, &ev);
                        w->processed_pkts++;
                } else {
                        ev.queue_id++;
@@ -41,11 +41,11 @@ pipeline_queue_worker_single_stage_tx(void *arg)
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_single_stage_fwd(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_INIT;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_SINGLE_STAGE_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -55,7 +55,8 @@ pipeline_queue_worker_single_stage_fwd(void *arg)
                        continue;
                }
 
-               ev.queue_id = tx_queue;
+               ev.queue_id = tx_queue[ev.mbuf->port];
+               rte_event_eth_tx_adapter_txq_set(ev.mbuf, 0);
                pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
                pipeline_event_enqueue(dev, port, &ev);
                w->processed_pkts++;
@@ -64,10 +65,10 @@ pipeline_queue_worker_single_stage_fwd(void *arg)
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_single_stage_burst_tx(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_BURST_INIT;
+       PIPELINE_WORKER_SINGLE_STAGE_BURST_INIT;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -81,8 +82,7 @@ pipeline_queue_worker_single_stage_burst_tx(void *arg)
                for (i = 0; i < nb_rx; i++) {
                        rte_prefetch0(ev[i + 1].mbuf);
                        if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                               pipeline_tx_pkt(ev[i].mbuf);
+                               pipeline_event_tx(dev, port, &ev[i]);
                                ev[i].op = RTE_EVENT_OP_RELEASE;
                                w->processed_pkts++;
                        } else {
@@ -98,11 +98,11 @@ pipeline_queue_worker_single_stage_burst_tx(void *arg)
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_single_stage_burst_fwd(void *arg)
 {
-       PIPELINE_WROKER_SINGLE_STAGE_BURST_INIT;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_SINGLE_STAGE_BURST_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -115,23 +115,24 @@ pipeline_queue_worker_single_stage_burst_fwd(void *arg)
 
                for (i = 0; i < nb_rx; i++) {
                        rte_prefetch0(ev[i + 1].mbuf);
-                       ev[i].queue_id = tx_queue;
+                       ev[i].queue_id = tx_queue[ev[i].mbuf->port];
+                       rte_event_eth_tx_adapter_txq_set(ev[i].mbuf, 0);
                        pipeline_fwd_event(&ev[i], RTE_SCHED_TYPE_ATOMIC);
-                       w->processed_pkts++;
                }
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
+               w->processed_pkts += nb_rx;
        }
 
        return 0;
 }
 
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_multi_stage_tx(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages + 1;
+       PIPELINE_WORKER_MULTI_STAGE_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -143,31 +144,27 @@ pipeline_queue_worker_multi_stage_tx(void *arg)
 
                cq_id = ev.queue_id % nb_stages;
 
-               if (cq_id >= last_queue) {
-                       if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                               pipeline_tx_pkt(ev.mbuf);
-                               w->processed_pkts++;
-                               continue;
-                       }
-                       ev.queue_id += (cq_id == last_queue) ? 1 : 0;
-                       pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
-               } else {
-                       ev.queue_id++;
-                       pipeline_fwd_event(&ev, sched_type_list[cq_id]);
+               if (ev.queue_id == tx_queue[ev.mbuf->port]) {
+                       pipeline_event_tx(dev, port, &ev);
+                       w->processed_pkts++;
+                       continue;
                }
 
+               ev.queue_id++;
+               pipeline_fwd_event(&ev, cq_id != last_queue ?
+                               sched_type_list[cq_id] :
+                               RTE_SCHED_TYPE_ATOMIC);
                pipeline_event_enqueue(dev, port, &ev);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_multi_stage_fwd(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages + 1;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_MULTI_STAGE_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t event = rte_event_dequeue_burst(dev, port, &ev, 1, 0);
@@ -180,7 +177,8 @@ pipeline_queue_worker_multi_stage_fwd(void *arg)
                cq_id = ev.queue_id % nb_stages;
 
                if (cq_id == last_queue) {
-                       ev.queue_id = tx_queue;
+                       ev.queue_id = tx_queue[ev.mbuf->port];
+                       rte_event_eth_tx_adapter_txq_set(ev.mbuf, 0);
                        pipeline_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
                        w->processed_pkts++;
                } else {
@@ -190,14 +188,15 @@ pipeline_queue_worker_multi_stage_fwd(void *arg)
 
                pipeline_event_enqueue(dev, port, &ev);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_multi_stage_burst_tx(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_BURST_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages + 1;
+       PIPELINE_WORKER_MULTI_STAGE_BURST_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -212,37 +211,30 @@ pipeline_queue_worker_multi_stage_burst_tx(void *arg)
                        rte_prefetch0(ev[i + 1].mbuf);
                        cq_id = ev[i].queue_id % nb_stages;
 
-                       if (cq_id >= last_queue) {
-                               if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-
-                                       pipeline_tx_pkt(ev[i].mbuf);
-                                       ev[i].op = RTE_EVENT_OP_RELEASE;
-                                       w->processed_pkts++;
-                                       continue;
-                               }
-
-                               ev[i].queue_id += (cq_id == last_queue) ? 1 : 0;
-                               pipeline_fwd_event(&ev[i],
-                                               RTE_SCHED_TYPE_ATOMIC);
-                       } else {
-                               ev[i].queue_id++;
-                               pipeline_fwd_event(&ev[i],
-                                               sched_type_list[cq_id]);
+                       if (ev[i].queue_id == tx_queue[ev[i].mbuf->port]) {
+                               pipeline_event_tx(dev, port, &ev[i]);
+                               ev[i].op = RTE_EVENT_OP_RELEASE;
+                               w->processed_pkts++;
+                               continue;
                        }
 
+                       ev[i].queue_id++;
+                       pipeline_fwd_event(&ev[i], cq_id != last_queue ?
+                                       sched_type_list[cq_id] :
+                                       RTE_SCHED_TYPE_ATOMIC);
                }
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
        }
+
        return 0;
 }
 
-static int
+static __rte_noinline int
 pipeline_queue_worker_multi_stage_burst_fwd(void *arg)
 {
-       PIPELINE_WROKER_MULTI_STAGE_BURST_INIT;
-       const uint8_t nb_stages = t->opt->nb_stages + 1;
-       const uint8_t tx_queue = t->tx_service.queue_id;
+       PIPELINE_WORKER_MULTI_STAGE_BURST_INIT;
+       const uint8_t *tx_queue = t->tx_evqueue_id;
 
        while (t->done == false) {
                uint16_t nb_rx = rte_event_dequeue_burst(dev, port, ev,
@@ -258,7 +250,8 @@ pipeline_queue_worker_multi_stage_burst_fwd(void *arg)
                        cq_id = ev[i].queue_id % nb_stages;
 
                        if (cq_id == last_queue) {
-                               ev[i].queue_id = tx_queue;
+                               ev[i].queue_id = tx_queue[ev[i].mbuf->port];
+                               rte_event_eth_tx_adapter_txq_set(ev[i].mbuf, 0);
                                pipeline_fwd_event(&ev[i],
                                                RTE_SCHED_TYPE_ATOMIC);
                                w->processed_pkts++;
@@ -271,6 +264,7 @@ pipeline_queue_worker_multi_stage_burst_fwd(void *arg)
 
                pipeline_event_enqueue_burst(dev, port, ev, nb_rx);
        }
+
        return 0;
 }
 
@@ -280,28 +274,28 @@ worker_wrapper(void *arg)
        struct worker_data *w  = arg;
        struct evt_options *opt = w->t->opt;
        const bool burst = evt_has_burst_mode(w->dev_id);
-       const bool mt_safe = !w->t->mt_unsafe;
+       const bool internal_port = w->t->internal_port;
        const uint8_t nb_stages = opt->nb_stages;
        RTE_SET_USED(opt);
 
        if (nb_stages == 1) {
-               if (!burst && mt_safe)
+               if (!burst && internal_port)
                        return pipeline_queue_worker_single_stage_tx(arg);
-               else if (!burst && !mt_safe)
+               else if (!burst && !internal_port)
                        return pipeline_queue_worker_single_stage_fwd(arg);
-               else if (burst && mt_safe)
+               else if (burst && internal_port)
                        return pipeline_queue_worker_single_stage_burst_tx(arg);
-               else if (burst && !mt_safe)
+               else if (burst && !internal_port)
                        return pipeline_queue_worker_single_stage_burst_fwd(
                                        arg);
        } else {
-               if (!burst && mt_safe)
+               if (!burst && internal_port)
                        return pipeline_queue_worker_multi_stage_tx(arg);
-               else if (!burst && !mt_safe)
+               else if (!burst && !internal_port)
                        return pipeline_queue_worker_multi_stage_fwd(arg);
-               else if (burst && mt_safe)
+               else if (burst && internal_port)
                        return pipeline_queue_worker_multi_stage_burst_tx(arg);
-               else if (burst && !mt_safe)
+               else if (burst && !internal_port)
                        return pipeline_queue_worker_multi_stage_burst_fwd(arg);
 
        }
@@ -311,10 +305,6 @@ worker_wrapper(void *arg)
 static int
 pipeline_queue_launch_lcores(struct evt_test *test, struct evt_options *opt)
 {
-       struct test_pipeline *t = evt_test_priv(test);
-
-       if (t->mt_unsafe)
-               rte_service_component_runstate_set(t->tx_service.service_id, 1);
        return pipeline_launch_lcores(test, opt, worker_wrapper);
 }
 
@@ -326,25 +316,24 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
        int nb_queues;
        int nb_stages = opt->nb_stages;
        uint8_t queue;
-       struct rte_event_dev_info info;
-       struct test_pipeline *t = evt_test_priv(test);
-       uint8_t tx_evqueue_id = 0;
+       uint8_t tx_evport_id = 0;
+       uint8_t tx_evqueue_id[RTE_MAX_ETHPORTS];
        uint8_t queue_arr[RTE_EVENT_MAX_QUEUES_PER_DEV];
        uint8_t nb_worker_queues = 0;
+       uint16_t prod = 0;
+       struct rte_event_dev_info info;
+       struct test_pipeline *t = evt_test_priv(test);
 
        nb_ports = evt_nr_active_lcores(opt->wlcores);
        nb_queues = rte_eth_dev_count_avail() * (nb_stages);
 
-       /* Extra port for Tx service. */
-       if (t->mt_unsafe) {
-               tx_evqueue_id = nb_queues;
-               nb_ports++;
-               nb_queues++;
-       } else
-               nb_queues += rte_eth_dev_count_avail();
+       /* One queue for Tx adapter per port */
+       nb_queues += rte_eth_dev_count_avail();
 
-       rte_event_dev_info_get(opt->dev_id, &info);
+       memset(tx_evqueue_id, 0, sizeof(uint8_t) * RTE_MAX_ETHPORTS);
+       memset(queue_arr, 0, sizeof(uint8_t) * RTE_EVENT_MAX_QUEUES_PER_DEV);
 
+       rte_event_dev_info_get(opt->dev_id, &info);
        const struct rte_event_dev_config config = {
                        .nb_event_queues = nb_queues,
                        .nb_event_ports = nb_ports,
@@ -370,24 +359,19 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
        for (queue = 0; queue < nb_queues; queue++) {
                uint8_t slot;
 
-               if (!t->mt_unsafe) {
-                       slot = queue % (nb_stages + 1);
-                       q_conf.schedule_type = slot == nb_stages ?
-                               RTE_SCHED_TYPE_ATOMIC :
-                               opt->sched_type_list[slot];
-               } else {
-                       slot = queue % nb_stages;
-
-                       if (queue == tx_evqueue_id) {
-                               q_conf.schedule_type = RTE_SCHED_TYPE_ATOMIC;
+               q_conf.event_queue_cfg = 0;
+               slot = queue % (nb_stages + 1);
+               if (slot == nb_stages) {
+                       q_conf.schedule_type = RTE_SCHED_TYPE_ATOMIC;
+                       if (!t->internal_port) {
                                q_conf.event_queue_cfg =
                                        RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
-                       } else {
-                               q_conf.schedule_type =
-                                       opt->sched_type_list[slot];
-                               queue_arr[nb_worker_queues] = queue;
-                               nb_worker_queues++;
                        }
+                       tx_evqueue_id[prod++] = queue;
+               } else {
+                       q_conf.schedule_type = opt->sched_type_list[slot];
+                       queue_arr[nb_worker_queues] = queue;
+                       nb_worker_queues++;
                }
 
                ret = rte_event_queue_setup(opt->dev_id, queue, &q_conf);
@@ -407,19 +391,11 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
                        .new_event_threshold = info.max_num_events,
        };
 
-       /*
-        * If tx is multi thread safe then allow workers to do Tx else use Tx
-        * service to Tx packets.
-        */
-       if (t->mt_unsafe) {
+       if (!t->internal_port) {
                ret = pipeline_event_port_setup(test, opt, queue_arr,
                                nb_worker_queues, p_conf);
                if (ret)
                        return ret;
-
-               ret = pipeline_event_tx_service_setup(test, opt, tx_evqueue_id,
-                               nb_ports - 1, p_conf);
-
        } else
                ret = pipeline_event_port_setup(test, opt, NULL, nb_queues,
                                p_conf);
@@ -431,7 +407,6 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
         *
         * eth_dev_count = 2, nb_stages = 2.
         *
-        * Multi thread safe :
         *      queues = 6
         *      stride = 3
         *
@@ -439,21 +414,14 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
         *      eth0 -> q0 -> q1 -> (q2->tx)
         *      eth1 -> q3 -> q4 -> (q5->tx)
         *
-        *      q2, q5 configured as ATOMIC
-        *
-        * Multi thread unsafe :
-        *      queues = 5
-        *      stride = 2
-        *
-        *      event queue pipelines:
-        *      eth0 -> q0 -> q1
-        *                      } (q4->tx) Tx service
-        *      eth1 -> q2 -> q3
+        *      q2, q5 configured as ATOMIC | SINGLE_LINK
         *
-        *      q4 configured as SINGLE_LINK|ATOMIC
         */
-       ret = pipeline_event_rx_adapter_setup(opt,
-                       t->mt_unsafe ? nb_stages : nb_stages + 1, p_conf);
+       ret = pipeline_event_rx_adapter_setup(opt, nb_stages + 1, p_conf);
+       if (ret)
+               return ret;
+
+       ret = pipeline_event_tx_adapter_setup(opt, p_conf);
        if (ret)
                return ret;
 
@@ -467,12 +435,60 @@ pipeline_queue_eventdev_setup(struct evt_test *test, struct evt_options *opt)
                }
        }
 
+       /* Connect the tx_evqueue_id to the Tx adapter port */
+       if (!t->internal_port) {
+               RTE_ETH_FOREACH_DEV(prod) {
+                       ret = rte_event_eth_tx_adapter_event_port_get(prod,
+                                       &tx_evport_id);
+                       if (ret) {
+                               evt_err("Unable to get Tx adptr[%d] evprt[%d]",
+                                               prod, tx_evport_id);
+                               return ret;
+                       }
+
+                       if (rte_event_port_link(opt->dev_id, tx_evport_id,
+                                               &tx_evqueue_id[prod],
+                                               NULL, 1) != 1) {
+                               evt_err("Unable to link Tx adptr[%d] evprt[%d]",
+                                               prod, tx_evport_id);
+                               return ret;
+                       }
+               }
+       }
+
+       RTE_ETH_FOREACH_DEV(prod) {
+               ret = rte_eth_dev_start(prod);
+               if (ret) {
+                       evt_err("Ethernet dev [%d] failed to start."
+                                       " Using synthetic producer", prod);
+                       return ret;
+               }
+
+       }
+
        ret = rte_event_dev_start(opt->dev_id);
        if (ret) {
                evt_err("failed to start eventdev %d", opt->dev_id);
                return ret;
        }
 
+       RTE_ETH_FOREACH_DEV(prod) {
+               ret = rte_event_eth_rx_adapter_start(prod);
+               if (ret) {
+                       evt_err("Rx adapter[%d] start failed", prod);
+                       return ret;
+               }
+
+               ret = rte_event_eth_tx_adapter_start(prod);
+               if (ret) {
+                       evt_err("Tx adapter[%d] start failed", prod);
+                       return ret;
+               }
+       }
+
+       memcpy(t->tx_evqueue_id, tx_evqueue_id, sizeof(uint8_t) *
+                       RTE_MAX_ETHPORTS);
+
        return 0;
 }
 
index 2b4d604..d5258ea 100644 (file)
@@ -33,8 +33,10 @@ SRCS-y += rxonly.c
 SRCS-y += txonly.c
 SRCS-y += csumonly.c
 SRCS-y += icmpecho.c
+SRCS-y += noisy_vnf.c
 SRCS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ieee1588fwd.c
 SRCS-$(CONFIG_RTE_LIBRTE_BPF) += bpf_cmd.c
+SRCS-y += util.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_PMD_SOFTNIC), y)
 SRCS-y += softnicfwd.c
@@ -70,8 +72,6 @@ endif
 
 endif
 
-CFLAGS_cmdline.o := -D_GNU_SOURCE
-
 include $(RTE_SDK)/mk/rte.app.mk
 
 endif
index 589121d..1050fde 100644 (file)
@@ -167,7 +167,7 @@ static void cmd_help_long_parsed(void *parsed_result,
                        "Display:\n"
                        "--------\n\n"
 
-                       "show port (info|stats|xstats|fdir|stat_qmap|dcb_tc|cap) (port_id|all)\n"
+                       "show port (info|stats|summary|xstats|fdir|stat_qmap|dcb_tc|cap) (port_id|all)\n"
                        "    Display information for port_id, or all.\n\n"
 
                        "show port X rss reta (size) (mask0,mask1,...)\n"
@@ -175,11 +175,8 @@ static void cmd_help_long_parsed(void *parsed_result,
                        " by masks on port X. size is used to indicate the"
                        " hardware supported reta size\n\n"
 
-                       "show port rss-hash ipv4|ipv4-frag|ipv4-tcp|ipv4-udp|"
-                       "ipv4-sctp|ipv4-other|ipv6|ipv6-frag|ipv6-tcp|ipv6-udp|ipv6-sctp|"
-                       "ipv6-other|l2-payload|ipv6-ex|ipv6-tcp-ex|ipv6-udp-ex [key]\n"
-                       "    Display the RSS hash functions and RSS hash key"
-                       " of port X\n\n"
+                       "show port (port_id) rss-hash [key]\n"
+                       "    Display the RSS hash functions and RSS hash key of port\n\n"
 
                        "clear port (info|stats|xstats|fdir|stat_qmap) (port_id|all)\n"
                        "    Clear information for port_id, or all.\n\n"
@@ -283,6 +280,9 @@ static void cmd_help_long_parsed(void *parsed_result,
                        "set portlist (x[,y]*)\n"
                        "    Set the list of forwarding ports.\n\n"
 
+                       "set port setup on (iterator|event)\n"
+                       "    Select how attached port is retrieved for setup.\n\n"
+
                        "set tx loopback (port_id) (on|off)\n"
                        "    Enable or disable tx loopback.\n\n"
 
@@ -397,12 +397,13 @@ static void cmd_help_long_parsed(void *parsed_result,
                        "    Disable hardware insertion of a VLAN header in"
                        " packets sent on a port.\n\n"
 
-                       "csum set (ip|udp|tcp|sctp|outer-ip) (hw|sw) (port_id)\n"
+                       "csum set (ip|udp|tcp|sctp|outer-ip|outer-udp) (hw|sw) (port_id)\n"
                        "    Select hardware or software calculation of the"
                        " checksum when transmitting a packet using the"
                        " csum forward engine.\n"
                        "    ip|udp|tcp|sctp always concern the inner layer.\n"
                        "    outer-ip concerns the outer IP layer in"
+                       "    outer-udp concerns the outer UDP layer in"
                        " case the packet is recognized as a tunnel packet by"
                        " the forward engine (vxlan, gre and ipip are supported)\n"
                        "    Please check the NIC datasheet for HW limits.\n\n"
@@ -883,6 +884,10 @@ static void cmd_help_long_parsed(void *parsed_result,
                        "    Start/stop a rx/tx queue of port X. Only take effect"
                        " when port X is started\n\n"
 
+                       "port (port_id) (rxq|txq) (queue_id) deferred_start (on|off)\n"
+                       "    Switch on/off a deferred start of port X rx/tx queue. Only"
+                       " take effect when port X is stopped.\n\n"
+
                        "port (port_id) (rxq|txq) (queue_id) setup\n"
                        "    Setup a rx/tx queue of port X.\n\n"
 
@@ -1247,6 +1252,59 @@ cmdline_parse_inst_t cmd_operate_specific_port = {
        },
 };
 
+/* *** enable port setup (after attach) via iterator or event *** */
+struct cmd_set_port_setup_on_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t port;
+       cmdline_fixed_string_t setup;
+       cmdline_fixed_string_t on;
+       cmdline_fixed_string_t mode;
+};
+
+static void cmd_set_port_setup_on_parsed(void *parsed_result,
+                               __attribute__((unused)) struct cmdline *cl,
+                               __attribute__((unused)) void *data)
+{
+       struct cmd_set_port_setup_on_result *res = parsed_result;
+
+       if (strcmp(res->mode, "event") == 0)
+               setup_on_probe_event = true;
+       else if (strcmp(res->mode, "iterator") == 0)
+               setup_on_probe_event = false;
+       else
+               printf("Unknown mode\n");
+}
+
+cmdline_parse_token_string_t cmd_set_port_setup_on_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_port_setup_on_result,
+                       set, "set");
+cmdline_parse_token_string_t cmd_set_port_setup_on_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_port_setup_on_result,
+                       port, "port");
+cmdline_parse_token_string_t cmd_set_port_setup_on_setup =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_port_setup_on_result,
+                       setup, "setup");
+cmdline_parse_token_string_t cmd_set_port_setup_on_on =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_port_setup_on_result,
+                       on, "on");
+cmdline_parse_token_string_t cmd_set_port_setup_on_mode =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_port_setup_on_result,
+                       mode, "iterator#event");
+
+cmdline_parse_inst_t cmd_set_port_setup_on = {
+       .f = cmd_set_port_setup_on_parsed,
+       .data = NULL,
+       .help_str = "set port setup on iterator|event",
+       .tokens = {
+               (void *)&cmd_set_port_setup_on_set,
+               (void *)&cmd_set_port_setup_on_port,
+               (void *)&cmd_set_port_setup_on_setup,
+               (void *)&cmd_set_port_setup_on_on,
+               (void *)&cmd_set_port_setup_on_mode,
+               NULL,
+       },
+};
+
 /* *** attach a specified port *** */
 struct cmd_operate_attach_port_result {
        cmdline_fixed_string_t port;
@@ -1303,7 +1361,7 @@ static void cmd_operate_detach_port_parsed(void *parsed_result,
        struct cmd_operate_detach_port_result *res = parsed_result;
 
        if (!strcmp(res->keyword, "detach"))
-               detach_port(res->port_id);
+               detach_port_device(res->port_id);
        else
                printf("Unknown parameter\n");
 }
@@ -1898,11 +1956,9 @@ cmd_config_rx_mode_flag_parsed(void *parsed_result,
                rx_offloads = port->dev_conf.rxmode.offloads;
                if (!strcmp(res->name, "crc-strip")) {
                        if (!strcmp(res->value, "on")) {
-                               rx_offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
                                rx_offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC;
                        } else if (!strcmp(res->value, "off")) {
                                rx_offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
-                               rx_offloads &= ~DEV_RX_OFFLOAD_CRC_STRIP;
                        } else {
                                printf("Unknown parameter\n");
                                return;
@@ -2441,6 +2497,92 @@ cmdline_parse_inst_t cmd_config_rxtx_queue = {
        },
 };
 
+/* *** configure port rxq/txq deferred start on/off *** */
+struct cmd_config_deferred_start_rxtx_queue {
+       cmdline_fixed_string_t port;
+       portid_t port_id;
+       cmdline_fixed_string_t rxtxq;
+       uint16_t qid;
+       cmdline_fixed_string_t opname;
+       cmdline_fixed_string_t state;
+};
+
+static void
+cmd_config_deferred_start_rxtx_queue_parsed(void *parsed_result,
+                       __attribute__((unused)) struct cmdline *cl,
+                       __attribute__((unused)) void *data)
+{
+       struct cmd_config_deferred_start_rxtx_queue *res = parsed_result;
+       struct rte_port *port;
+       uint8_t isrx;
+       uint8_t ison;
+       uint8_t needreconfig = 0;
+
+       if (port_id_is_invalid(res->port_id, ENABLED_WARN))
+               return;
+
+       if (port_is_started(res->port_id) != 0) {
+               printf("Please stop port %u first\n", res->port_id);
+               return;
+       }
+
+       port = &ports[res->port_id];
+
+       isrx = !strcmp(res->rxtxq, "rxq");
+
+       if (isrx && rx_queue_id_is_invalid(res->qid))
+               return;
+       else if (!isrx && tx_queue_id_is_invalid(res->qid))
+               return;
+
+       ison = !strcmp(res->state, "on");
+
+       if (isrx && port->rx_conf[res->qid].rx_deferred_start != ison) {
+               port->rx_conf[res->qid].rx_deferred_start = ison;
+               needreconfig = 1;
+       } else if (!isrx && port->tx_conf[res->qid].tx_deferred_start != ison) {
+               port->tx_conf[res->qid].tx_deferred_start = ison;
+               needreconfig = 1;
+       }
+
+       if (needreconfig)
+               cmd_reconfig_device_queue(res->port_id, 0, 1);
+}
+
+cmdline_parse_token_string_t cmd_config_deferred_start_rxtx_queue_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               port, "port");
+cmdline_parse_token_num_t cmd_config_deferred_start_rxtx_queue_port_id =
+       TOKEN_NUM_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               port_id, UINT16);
+cmdline_parse_token_string_t cmd_config_deferred_start_rxtx_queue_rxtxq =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               rxtxq, "rxq#txq");
+cmdline_parse_token_num_t cmd_config_deferred_start_rxtx_queue_qid =
+       TOKEN_NUM_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               qid, UINT16);
+cmdline_parse_token_string_t cmd_config_deferred_start_rxtx_queue_opname =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               opname, "deferred_start");
+cmdline_parse_token_string_t cmd_config_deferred_start_rxtx_queue_state =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_deferred_start_rxtx_queue,
+                                               state, "on#off");
+
+cmdline_parse_inst_t cmd_config_deferred_start_rxtx_queue = {
+       .f = cmd_config_deferred_start_rxtx_queue_parsed,
+       .data = NULL,
+       .help_str = "port <port_id> rxq|txq <queue_id> deferred_start on|off",
+       .tokens = {
+               (void *)&cmd_config_deferred_start_rxtx_queue_port,
+               (void *)&cmd_config_deferred_start_rxtx_queue_port_id,
+               (void *)&cmd_config_deferred_start_rxtx_queue_rxtxq,
+               (void *)&cmd_config_deferred_start_rxtx_queue_qid,
+               (void *)&cmd_config_deferred_start_rxtx_queue_opname,
+               (void *)&cmd_config_deferred_start_rxtx_queue_state,
+               NULL,
+       },
+};
+
 /* *** configure port rxq/txq setup *** */
 struct cmd_setup_rxtx_queue {
        cmdline_fixed_string_t port;
@@ -2816,8 +2958,7 @@ static void cmd_showport_rss_hash_parsed(void *parsed_result,
 {
        struct cmd_showport_rss_hash *res = parsed_result;
 
-       port_rss_hash_conf_show(res->port_id, res->rss_type,
-                               show_rss_key != NULL);
+       port_rss_hash_conf_show(res->port_id, show_rss_key != NULL);
 }
 
 cmdline_parse_token_string_t cmd_showport_rss_hash_show =
@@ -2829,28 +2970,18 @@ cmdline_parse_token_num_t cmd_showport_rss_hash_port_id =
 cmdline_parse_token_string_t cmd_showport_rss_hash_rss_hash =
        TOKEN_STRING_INITIALIZER(struct cmd_showport_rss_hash, rss_hash,
                                 "rss-hash");
-cmdline_parse_token_string_t cmd_showport_rss_hash_rss_hash_info =
-       TOKEN_STRING_INITIALIZER(struct cmd_showport_rss_hash, rss_type,
-                                "ipv4#ipv4-frag#ipv4-tcp#ipv4-udp#ipv4-sctp#"
-                                "ipv4-other#ipv6#ipv6-frag#ipv6-tcp#ipv6-udp#"
-                                "ipv6-sctp#ipv6-other#l2-payload#ipv6-ex#"
-                                "ipv6-tcp-ex#ipv6-udp-ex");
 cmdline_parse_token_string_t cmd_showport_rss_hash_rss_key =
        TOKEN_STRING_INITIALIZER(struct cmd_showport_rss_hash, key, "key");
 
 cmdline_parse_inst_t cmd_showport_rss_hash = {
        .f = cmd_showport_rss_hash_parsed,
        .data = NULL,
-       .help_str = "show port <port_id> rss-hash "
-               "ipv4|ipv4-frag|ipv4-tcp|ipv4-udp|ipv4-sctp|ipv4-other|"
-               "ipv6|ipv6-frag|ipv6-tcp|ipv6-udp|ipv6-sctp|ipv6-other|"
-               "l2-payload|ipv6-ex|ipv6-tcp-ex|ipv6-udp-ex",
+       .help_str = "show port <port_id> rss-hash",
        .tokens = {
                (void *)&cmd_showport_rss_hash_show,
                (void *)&cmd_showport_rss_hash_port,
                (void *)&cmd_showport_rss_hash_port_id,
                (void *)&cmd_showport_rss_hash_rss_hash,
-               (void *)&cmd_showport_rss_hash_rss_hash_info,
                NULL,
        },
 };
@@ -2858,16 +2989,12 @@ cmdline_parse_inst_t cmd_showport_rss_hash = {
 cmdline_parse_inst_t cmd_showport_rss_hash_key = {
        .f = cmd_showport_rss_hash_parsed,
        .data = (void *)1,
-       .help_str = "show port <port_id> rss-hash "
-               "ipv4|ipv4-frag|ipv4-tcp|ipv4-udp|ipv4-sctp|ipv4-other|"
-               "ipv6|ipv6-frag|ipv6-tcp|ipv6-udp|ipv6-sctp|ipv6-other|"
-               "l2-payload|ipv6-ex|ipv6-tcp-ex|ipv6-udp-ex key",
+       .help_str = "show port <port_id> rss-hash key",
        .tokens = {
                (void *)&cmd_showport_rss_hash_show,
                (void *)&cmd_showport_rss_hash_port,
                (void *)&cmd_showport_rss_hash_port_id,
                (void *)&cmd_showport_rss_hash_rss_hash,
-               (void *)&cmd_showport_rss_hash_rss_hash_info,
                (void *)&cmd_showport_rss_hash_rss_key,
                NULL,
        },
@@ -4089,6 +4216,8 @@ csum_show(int port_id)
                (tx_offloads & DEV_TX_OFFLOAD_SCTP_CKSUM) ? "hw" : "sw");
        printf("Outer-Ip checksum offload is %s\n",
                (tx_offloads & DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ? "hw" : "sw");
+       printf("Outer-Udp checksum offload is %s\n",
+               (tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) ? "hw" : "sw");
 
        /* display warnings if configuration is not supported by the NIC */
        rte_eth_dev_info_get(port_id, &dev_info);
@@ -4117,6 +4246,12 @@ csum_show(int port_id)
                printf("Warning: hardware outer IP checksum enabled but not "
                        "supported by port %d\n", port_id);
        }
+       if ((tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) &&
+               (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM)
+                       == 0) {
+               printf("Warning: hardware outer UDP checksum enabled but not "
+                       "supported by port %d\n", port_id);
+       }
 }
 
 static void
@@ -4185,6 +4320,15 @@ cmd_csum_parsed(void *parsed_result,
                                printf("Outer IP checksum offload is not "
                                       "supported by port %u\n", res->port_id);
                        }
+               } else if (!strcmp(res->proto, "outer-udp")) {
+                       if (hw == 0 || (dev_info.tx_offload_capa &
+                                       DEV_TX_OFFLOAD_OUTER_UDP_CKSUM)) {
+                               csum_offloads |=
+                                               DEV_TX_OFFLOAD_OUTER_UDP_CKSUM;
+                       } else {
+                               printf("Outer UDP checksum offload is not "
+                                      "supported by port %u\n", res->port_id);
+                       }
                }
 
                if (hw) {
@@ -4208,7 +4352,7 @@ cmdline_parse_token_string_t cmd_csum_mode =
                                mode, "set");
 cmdline_parse_token_string_t cmd_csum_proto =
        TOKEN_STRING_INITIALIZER(struct cmd_csum_result,
-                               proto, "ip#tcp#udp#sctp#outer-ip");
+                               proto, "ip#tcp#udp#sctp#outer-ip#outer-udp");
 cmdline_parse_token_string_t cmd_csum_hwsw =
        TOKEN_STRING_INITIALIZER(struct cmd_csum_result,
                                hwsw, "hw#sw");
@@ -4219,7 +4363,7 @@ cmdline_parse_token_num_t cmd_csum_portid =
 cmdline_parse_inst_t cmd_csum_set = {
        .f = cmd_csum_parsed,
        .data = NULL,
-       .help_str = "csum set ip|tcp|udp|sctp|outer-ip hw|sw <port_id>: "
+       .help_str = "csum set ip|tcp|udp|sctp|outer-ip|outer-udp hw|sw <port_id>: "
                "Enable/Disable hardware calculation of L3/L4 checksum when "
                "using csum forward engine",
        .tokens = {
@@ -4279,7 +4423,7 @@ cmdline_parse_token_string_t cmd_csum_tunnel_csum =
                                csum, "csum");
 cmdline_parse_token_string_t cmd_csum_tunnel_parse =
        TOKEN_STRING_INITIALIZER(struct cmd_csum_tunnel_result,
-                               parse, "parse_tunnel");
+                               parse, "parse-tunnel");
 cmdline_parse_token_string_t cmd_csum_tunnel_onoff =
        TOKEN_STRING_INITIALIZER(struct cmd_csum_tunnel_result,
                                onoff, "on#off");
@@ -4290,7 +4434,7 @@ cmdline_parse_token_num_t cmd_csum_tunnel_portid =
 cmdline_parse_inst_t cmd_csum_tunnel = {
        .f = cmd_csum_tunnel_parsed,
        .data = NULL,
-       .help_str = "csum parse_tunnel on|off <port_id>: "
+       .help_str = "csum parse-tunnel on|off <port_id>: "
                "Enable/Disable parsing of tunnels for csum engine",
        .tokens = {
                (void *)&cmd_csum_tunnel_csum,
@@ -7073,6 +7217,11 @@ static void cmd_showportall_parsed(void *parsed_result,
        } else if (!strcmp(res->what, "info"))
                RTE_ETH_FOREACH_DEV(i)
                        port_infos_display(i);
+       else if (!strcmp(res->what, "summary")) {
+               port_summary_header_display();
+               RTE_ETH_FOREACH_DEV(i)
+                       port_summary_display(i);
+       }
        else if (!strcmp(res->what, "stats"))
                RTE_ETH_FOREACH_DEV(i)
                        nic_stats_display(i);
@@ -7100,14 +7249,14 @@ cmdline_parse_token_string_t cmd_showportall_port =
        TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, port, "port");
 cmdline_parse_token_string_t cmd_showportall_what =
        TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, what,
-                                "info#stats#xstats#fdir#stat_qmap#dcb_tc#cap");
+                                "info#summary#stats#xstats#fdir#stat_qmap#dcb_tc#cap");
 cmdline_parse_token_string_t cmd_showportall_all =
        TOKEN_STRING_INITIALIZER(struct cmd_showportall_result, all, "all");
 cmdline_parse_inst_t cmd_showportall = {
        .f = cmd_showportall_parsed,
        .data = NULL,
        .help_str = "show|clear port "
-               "info|stats|xstats|fdir|stat_qmap|dcb_tc|cap all",
+               "info|summary|stats|xstats|fdir|stat_qmap|dcb_tc|cap all",
        .tokens = {
                (void *)&cmd_showportall_show,
                (void *)&cmd_showportall_port,
@@ -7137,6 +7286,10 @@ static void cmd_showport_parsed(void *parsed_result,
                        nic_xstats_clear(res->portnum);
        } else if (!strcmp(res->what, "info"))
                port_infos_display(res->portnum);
+       else if (!strcmp(res->what, "summary")) {
+               port_summary_header_display();
+               port_summary_display(res->portnum);
+       }
        else if (!strcmp(res->what, "stats"))
                nic_stats_display(res->portnum);
        else if (!strcmp(res->what, "xstats"))
@@ -7158,7 +7311,7 @@ cmdline_parse_token_string_t cmd_showport_port =
        TOKEN_STRING_INITIALIZER(struct cmd_showport_result, port, "port");
 cmdline_parse_token_string_t cmd_showport_what =
        TOKEN_STRING_INITIALIZER(struct cmd_showport_result, what,
-                                "info#stats#xstats#fdir#stat_qmap#dcb_tc#cap");
+                                "info#summary#stats#xstats#fdir#stat_qmap#dcb_tc#cap");
 cmdline_parse_token_num_t cmd_showport_portnum =
        TOKEN_NUM_INITIALIZER(struct cmd_showport_result, portnum, UINT16);
 
@@ -7166,7 +7319,7 @@ cmdline_parse_inst_t cmd_showport = {
        .f = cmd_showport_parsed,
        .data = NULL,
        .help_str = "show|clear port "
-               "info|stats|xstats|fdir|stat_qmap|dcb_tc|cap "
+               "info|summary|stats|xstats|fdir|stat_qmap|dcb_tc|cap "
                "<port_id>",
        .tokens = {
                (void *)&cmd_showport_show,
@@ -7573,7 +7726,6 @@ static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
                            struct cmdline *cl,
                            __attribute__((unused)) void *data)
 {
-       pmd_test_exit();
        cmdline_quit(cl);
 }
 
@@ -15186,6 +15338,631 @@ cmdline_parse_inst_t cmd_set_nvgre_with_vlan = {
        },
 };
 
+/** Set L2 encapsulation details */
+struct cmd_set_l2_encap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t l2_encap;
+       cmdline_fixed_string_t pos_token;
+       cmdline_fixed_string_t ip_version;
+       uint32_t vlan_present:1;
+       uint16_t tci;
+       struct ether_addr eth_src;
+       struct ether_addr eth_dst;
+};
+
+cmdline_parse_token_string_t cmd_set_l2_encap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, set, "set");
+cmdline_parse_token_string_t cmd_set_l2_encap_l2_encap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, l2_encap, "l2_encap");
+cmdline_parse_token_string_t cmd_set_l2_encap_l2_encap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, l2_encap,
+                                "l2_encap-with-vlan");
+cmdline_parse_token_string_t cmd_set_l2_encap_ip_version =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, pos_token,
+                                "ip-version");
+cmdline_parse_token_string_t cmd_set_l2_encap_ip_version_value =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, ip_version,
+                                "ipv4#ipv6");
+cmdline_parse_token_string_t cmd_set_l2_encap_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, pos_token,
+                                "vlan-tci");
+cmdline_parse_token_num_t cmd_set_l2_encap_vlan_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_l2_encap_result, tci, UINT16);
+cmdline_parse_token_string_t cmd_set_l2_encap_eth_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, pos_token,
+                                "eth-src");
+cmdline_parse_token_etheraddr_t cmd_set_l2_encap_eth_src_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_l2_encap_result, eth_src);
+cmdline_parse_token_string_t cmd_set_l2_encap_eth_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_encap_result, pos_token,
+                                "eth-dst");
+cmdline_parse_token_etheraddr_t cmd_set_l2_encap_eth_dst_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_l2_encap_result, eth_dst);
+
+static void cmd_set_l2_encap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_l2_encap_result *res = parsed_result;
+
+       if (strcmp(res->l2_encap, "l2_encap") == 0)
+               l2_encap_conf.select_vlan = 0;
+       else if (strcmp(res->l2_encap, "l2_encap-with-vlan") == 0)
+               l2_encap_conf.select_vlan = 1;
+       if (strcmp(res->ip_version, "ipv4") == 0)
+               l2_encap_conf.select_ipv4 = 1;
+       else if (strcmp(res->ip_version, "ipv6") == 0)
+               l2_encap_conf.select_ipv4 = 0;
+       else
+               return;
+       if (l2_encap_conf.select_vlan)
+               l2_encap_conf.vlan_tci = rte_cpu_to_be_16(res->tci);
+       rte_memcpy(l2_encap_conf.eth_src, res->eth_src.addr_bytes,
+                  ETHER_ADDR_LEN);
+       rte_memcpy(l2_encap_conf.eth_dst, res->eth_dst.addr_bytes,
+                  ETHER_ADDR_LEN);
+}
+
+cmdline_parse_inst_t cmd_set_l2_encap = {
+       .f = cmd_set_l2_encap_parsed,
+       .data = NULL,
+       .help_str = "set l2_encap ip-version ipv4|ipv6"
+               " eth-src <eth-src> eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_l2_encap_set,
+               (void *)&cmd_set_l2_encap_l2_encap,
+               (void *)&cmd_set_l2_encap_ip_version,
+               (void *)&cmd_set_l2_encap_ip_version_value,
+               (void *)&cmd_set_l2_encap_eth_src,
+               (void *)&cmd_set_l2_encap_eth_src_value,
+               (void *)&cmd_set_l2_encap_eth_dst,
+               (void *)&cmd_set_l2_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_l2_encap_with_vlan = {
+       .f = cmd_set_l2_encap_parsed,
+       .data = NULL,
+       .help_str = "set l2_encap-with-vlan ip-version ipv4|ipv6"
+               " vlan-tci <vlan-tci> eth-src <eth-src> eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_l2_encap_set,
+               (void *)&cmd_set_l2_encap_l2_encap_with_vlan,
+               (void *)&cmd_set_l2_encap_ip_version,
+               (void *)&cmd_set_l2_encap_ip_version_value,
+               (void *)&cmd_set_l2_encap_vlan,
+               (void *)&cmd_set_l2_encap_vlan_value,
+               (void *)&cmd_set_l2_encap_eth_src,
+               (void *)&cmd_set_l2_encap_eth_src_value,
+               (void *)&cmd_set_l2_encap_eth_dst,
+               (void *)&cmd_set_l2_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+/** Set L2 decapsulation details */
+struct cmd_set_l2_decap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t l2_decap;
+       cmdline_fixed_string_t pos_token;
+       uint32_t vlan_present:1;
+};
+
+cmdline_parse_token_string_t cmd_set_l2_decap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_decap_result, set, "set");
+cmdline_parse_token_string_t cmd_set_l2_decap_l2_decap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_decap_result, l2_decap,
+                                "l2_decap");
+cmdline_parse_token_string_t cmd_set_l2_decap_l2_decap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_l2_decap_result, l2_decap,
+                                "l2_decap-with-vlan");
+
+static void cmd_set_l2_decap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_l2_decap_result *res = parsed_result;
+
+       if (strcmp(res->l2_decap, "l2_decap") == 0)
+               l2_decap_conf.select_vlan = 0;
+       else if (strcmp(res->l2_decap, "l2_decap-with-vlan") == 0)
+               l2_decap_conf.select_vlan = 1;
+}
+
+cmdline_parse_inst_t cmd_set_l2_decap = {
+       .f = cmd_set_l2_decap_parsed,
+       .data = NULL,
+       .help_str = "set l2_decap",
+       .tokens = {
+               (void *)&cmd_set_l2_decap_set,
+               (void *)&cmd_set_l2_decap_l2_decap,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_l2_decap_with_vlan = {
+       .f = cmd_set_l2_decap_parsed,
+       .data = NULL,
+       .help_str = "set l2_decap-with-vlan",
+       .tokens = {
+               (void *)&cmd_set_l2_decap_set,
+               (void *)&cmd_set_l2_decap_l2_decap_with_vlan,
+               NULL,
+       },
+};
+
+/** Set MPLSoGRE encapsulation details */
+struct cmd_set_mplsogre_encap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t mplsogre;
+       cmdline_fixed_string_t pos_token;
+       cmdline_fixed_string_t ip_version;
+       uint32_t vlan_present:1;
+       uint32_t label;
+       cmdline_ipaddr_t ip_src;
+       cmdline_ipaddr_t ip_dst;
+       uint16_t tci;
+       struct ether_addr eth_src;
+       struct ether_addr eth_dst;
+};
+
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result, set,
+                                "set");
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_mplsogre_encap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result, mplsogre,
+                                "mplsogre_encap");
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_mplsogre_encap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                mplsogre, "mplsogre_encap-with-vlan");
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_ip_version =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "ip-version");
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_ip_version_value =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                ip_version, "ipv4#ipv6");
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_label =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "label");
+cmdline_parse_token_num_t cmd_set_mplsogre_encap_label_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsogre_encap_result, label,
+                             UINT32);
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_ip_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "ip-src");
+cmdline_parse_token_ipaddr_t cmd_set_mplsogre_encap_ip_src_value =
+       TOKEN_IPADDR_INITIALIZER(struct cmd_set_mplsogre_encap_result, ip_src);
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_ip_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "ip-dst");
+cmdline_parse_token_ipaddr_t cmd_set_mplsogre_encap_ip_dst_value =
+       TOKEN_IPADDR_INITIALIZER(struct cmd_set_mplsogre_encap_result, ip_dst);
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "vlan-tci");
+cmdline_parse_token_num_t cmd_set_mplsogre_encap_vlan_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsogre_encap_result, tci,
+                             UINT16);
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_eth_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "eth-src");
+cmdline_parse_token_etheraddr_t cmd_set_mplsogre_encap_eth_src_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                   eth_src);
+cmdline_parse_token_string_t cmd_set_mplsogre_encap_eth_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                pos_token, "eth-dst");
+cmdline_parse_token_etheraddr_t cmd_set_mplsogre_encap_eth_dst_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_mplsogre_encap_result,
+                                   eth_dst);
+
+static void cmd_set_mplsogre_encap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_mplsogre_encap_result *res = parsed_result;
+       union {
+               uint32_t mplsogre_label;
+               uint8_t label[3];
+       } id = {
+               .mplsogre_label =
+                       rte_cpu_to_be_32(res->label) & RTE_BE32(0x00ffffff),
+       };
+
+       if (strcmp(res->mplsogre, "mplsogre_encap") == 0)
+               mplsogre_encap_conf.select_vlan = 0;
+       else if (strcmp(res->mplsogre, "mplsogre_encap-with-vlan") == 0)
+               mplsogre_encap_conf.select_vlan = 1;
+       if (strcmp(res->ip_version, "ipv4") == 0)
+               mplsogre_encap_conf.select_ipv4 = 1;
+       else if (strcmp(res->ip_version, "ipv6") == 0)
+               mplsogre_encap_conf.select_ipv4 = 0;
+       else
+               return;
+       rte_memcpy(mplsogre_encap_conf.label, &id.label[1], 3);
+       if (mplsogre_encap_conf.select_ipv4) {
+               IPV4_ADDR_TO_UINT(res->ip_src, mplsogre_encap_conf.ipv4_src);
+               IPV4_ADDR_TO_UINT(res->ip_dst, mplsogre_encap_conf.ipv4_dst);
+       } else {
+               IPV6_ADDR_TO_ARRAY(res->ip_src, mplsogre_encap_conf.ipv6_src);
+               IPV6_ADDR_TO_ARRAY(res->ip_dst, mplsogre_encap_conf.ipv6_dst);
+       }
+       if (mplsogre_encap_conf.select_vlan)
+               mplsogre_encap_conf.vlan_tci = rte_cpu_to_be_16(res->tci);
+       rte_memcpy(mplsogre_encap_conf.eth_src, res->eth_src.addr_bytes,
+                  ETHER_ADDR_LEN);
+       rte_memcpy(mplsogre_encap_conf.eth_dst, res->eth_dst.addr_bytes,
+                  ETHER_ADDR_LEN);
+}
+
+cmdline_parse_inst_t cmd_set_mplsogre_encap = {
+       .f = cmd_set_mplsogre_encap_parsed,
+       .data = NULL,
+       .help_str = "set mplsogre_encap ip-version ipv4|ipv6 label <label>"
+               " ip-src <ip-src> ip-dst <ip-dst> eth-src <eth-src>"
+               " eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_mplsogre_encap_set,
+               (void *)&cmd_set_mplsogre_encap_mplsogre_encap,
+               (void *)&cmd_set_mplsogre_encap_ip_version,
+               (void *)&cmd_set_mplsogre_encap_ip_version_value,
+               (void *)&cmd_set_mplsogre_encap_label,
+               (void *)&cmd_set_mplsogre_encap_label_value,
+               (void *)&cmd_set_mplsogre_encap_ip_src,
+               (void *)&cmd_set_mplsogre_encap_ip_src_value,
+               (void *)&cmd_set_mplsogre_encap_ip_dst,
+               (void *)&cmd_set_mplsogre_encap_ip_dst_value,
+               (void *)&cmd_set_mplsogre_encap_eth_src,
+               (void *)&cmd_set_mplsogre_encap_eth_src_value,
+               (void *)&cmd_set_mplsogre_encap_eth_dst,
+               (void *)&cmd_set_mplsogre_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_mplsogre_encap_with_vlan = {
+       .f = cmd_set_mplsogre_encap_parsed,
+       .data = NULL,
+       .help_str = "set mplsogre_encap-with-vlan ip-version ipv4|ipv6"
+               " label <label> ip-src <ip-src> ip-dst <ip-dst>"
+               " vlan-tci <vlan-tci> eth-src <eth-src> eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_mplsogre_encap_set,
+               (void *)&cmd_set_mplsogre_encap_mplsogre_encap_with_vlan,
+               (void *)&cmd_set_mplsogre_encap_ip_version,
+               (void *)&cmd_set_mplsogre_encap_ip_version_value,
+               (void *)&cmd_set_mplsogre_encap_label,
+               (void *)&cmd_set_mplsogre_encap_label_value,
+               (void *)&cmd_set_mplsogre_encap_ip_src,
+               (void *)&cmd_set_mplsogre_encap_ip_src_value,
+               (void *)&cmd_set_mplsogre_encap_ip_dst,
+               (void *)&cmd_set_mplsogre_encap_ip_dst_value,
+               (void *)&cmd_set_mplsogre_encap_vlan,
+               (void *)&cmd_set_mplsogre_encap_vlan_value,
+               (void *)&cmd_set_mplsogre_encap_eth_src,
+               (void *)&cmd_set_mplsogre_encap_eth_src_value,
+               (void *)&cmd_set_mplsogre_encap_eth_dst,
+               (void *)&cmd_set_mplsogre_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+/** Set MPLSoGRE decapsulation details */
+struct cmd_set_mplsogre_decap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t mplsogre;
+       cmdline_fixed_string_t pos_token;
+       cmdline_fixed_string_t ip_version;
+       uint32_t vlan_present:1;
+};
+
+cmdline_parse_token_string_t cmd_set_mplsogre_decap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_decap_result, set,
+                                "set");
+cmdline_parse_token_string_t cmd_set_mplsogre_decap_mplsogre_decap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_decap_result, mplsogre,
+                                "mplsogre_decap");
+cmdline_parse_token_string_t cmd_set_mplsogre_decap_mplsogre_decap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_decap_result,
+                                mplsogre, "mplsogre_decap-with-vlan");
+cmdline_parse_token_string_t cmd_set_mplsogre_decap_ip_version =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_decap_result,
+                                pos_token, "ip-version");
+cmdline_parse_token_string_t cmd_set_mplsogre_decap_ip_version_value =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsogre_decap_result,
+                                ip_version, "ipv4#ipv6");
+
+static void cmd_set_mplsogre_decap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_mplsogre_decap_result *res = parsed_result;
+
+       if (strcmp(res->mplsogre, "mplsogre_decap") == 0)
+               mplsogre_decap_conf.select_vlan = 0;
+       else if (strcmp(res->mplsogre, "mplsogre_decap-with-vlan") == 0)
+               mplsogre_decap_conf.select_vlan = 1;
+       if (strcmp(res->ip_version, "ipv4") == 0)
+               mplsogre_decap_conf.select_ipv4 = 1;
+       else if (strcmp(res->ip_version, "ipv6") == 0)
+               mplsogre_decap_conf.select_ipv4 = 0;
+}
+
+cmdline_parse_inst_t cmd_set_mplsogre_decap = {
+       .f = cmd_set_mplsogre_decap_parsed,
+       .data = NULL,
+       .help_str = "set mplsogre_decap ip-version ipv4|ipv6",
+       .tokens = {
+               (void *)&cmd_set_mplsogre_decap_set,
+               (void *)&cmd_set_mplsogre_decap_mplsogre_decap,
+               (void *)&cmd_set_mplsogre_decap_ip_version,
+               (void *)&cmd_set_mplsogre_decap_ip_version_value,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_mplsogre_decap_with_vlan = {
+       .f = cmd_set_mplsogre_decap_parsed,
+       .data = NULL,
+       .help_str = "set mplsogre_decap-with-vlan ip-version ipv4|ipv6",
+       .tokens = {
+               (void *)&cmd_set_mplsogre_decap_set,
+               (void *)&cmd_set_mplsogre_decap_mplsogre_decap_with_vlan,
+               (void *)&cmd_set_mplsogre_decap_ip_version,
+               (void *)&cmd_set_mplsogre_decap_ip_version_value,
+               NULL,
+       },
+};
+
+/** Set MPLSoUDP encapsulation details */
+struct cmd_set_mplsoudp_encap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t mplsoudp;
+       cmdline_fixed_string_t pos_token;
+       cmdline_fixed_string_t ip_version;
+       uint32_t vlan_present:1;
+       uint32_t label;
+       uint16_t udp_src;
+       uint16_t udp_dst;
+       cmdline_ipaddr_t ip_src;
+       cmdline_ipaddr_t ip_dst;
+       uint16_t tci;
+       struct ether_addr eth_src;
+       struct ether_addr eth_dst;
+};
+
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result, set,
+                                "set");
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_mplsoudp_encap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result, mplsoudp,
+                                "mplsoudp_encap");
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_mplsoudp_encap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                mplsoudp, "mplsoudp_encap-with-vlan");
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_ip_version =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "ip-version");
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_ip_version_value =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                ip_version, "ipv4#ipv6");
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_label =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "label");
+cmdline_parse_token_num_t cmd_set_mplsoudp_encap_label_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsoudp_encap_result, label,
+                             UINT32);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_udp_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "udp-src");
+cmdline_parse_token_num_t cmd_set_mplsoudp_encap_udp_src_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsoudp_encap_result, udp_src,
+                             UINT16);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_udp_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "udp-dst");
+cmdline_parse_token_num_t cmd_set_mplsoudp_encap_udp_dst_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsoudp_encap_result, udp_dst,
+                             UINT16);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_ip_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "ip-src");
+cmdline_parse_token_ipaddr_t cmd_set_mplsoudp_encap_ip_src_value =
+       TOKEN_IPADDR_INITIALIZER(struct cmd_set_mplsoudp_encap_result, ip_src);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_ip_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "ip-dst");
+cmdline_parse_token_ipaddr_t cmd_set_mplsoudp_encap_ip_dst_value =
+       TOKEN_IPADDR_INITIALIZER(struct cmd_set_mplsoudp_encap_result, ip_dst);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "vlan-tci");
+cmdline_parse_token_num_t cmd_set_mplsoudp_encap_vlan_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_set_mplsoudp_encap_result, tci,
+                             UINT16);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_eth_src =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "eth-src");
+cmdline_parse_token_etheraddr_t cmd_set_mplsoudp_encap_eth_src_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                   eth_src);
+cmdline_parse_token_string_t cmd_set_mplsoudp_encap_eth_dst =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                pos_token, "eth-dst");
+cmdline_parse_token_etheraddr_t cmd_set_mplsoudp_encap_eth_dst_value =
+       TOKEN_ETHERADDR_INITIALIZER(struct cmd_set_mplsoudp_encap_result,
+                                   eth_dst);
+
+static void cmd_set_mplsoudp_encap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_mplsoudp_encap_result *res = parsed_result;
+       union {
+               uint32_t mplsoudp_label;
+               uint8_t label[3];
+       } id = {
+               .mplsoudp_label =
+                       rte_cpu_to_be_32(res->label) & RTE_BE32(0x00ffffff),
+       };
+
+       if (strcmp(res->mplsoudp, "mplsoudp_encap") == 0)
+               mplsoudp_encap_conf.select_vlan = 0;
+       else if (strcmp(res->mplsoudp, "mplsoudp_encap-with-vlan") == 0)
+               mplsoudp_encap_conf.select_vlan = 1;
+       if (strcmp(res->ip_version, "ipv4") == 0)
+               mplsoudp_encap_conf.select_ipv4 = 1;
+       else if (strcmp(res->ip_version, "ipv6") == 0)
+               mplsoudp_encap_conf.select_ipv4 = 0;
+       else
+               return;
+       rte_memcpy(mplsoudp_encap_conf.label, &id.label[1], 3);
+       mplsoudp_encap_conf.udp_src = rte_cpu_to_be_16(res->udp_src);
+       mplsoudp_encap_conf.udp_dst = rte_cpu_to_be_16(res->udp_dst);
+       if (mplsoudp_encap_conf.select_ipv4) {
+               IPV4_ADDR_TO_UINT(res->ip_src, mplsoudp_encap_conf.ipv4_src);
+               IPV4_ADDR_TO_UINT(res->ip_dst, mplsoudp_encap_conf.ipv4_dst);
+       } else {
+               IPV6_ADDR_TO_ARRAY(res->ip_src, mplsoudp_encap_conf.ipv6_src);
+               IPV6_ADDR_TO_ARRAY(res->ip_dst, mplsoudp_encap_conf.ipv6_dst);
+       }
+       if (mplsoudp_encap_conf.select_vlan)
+               mplsoudp_encap_conf.vlan_tci = rte_cpu_to_be_16(res->tci);
+       rte_memcpy(mplsoudp_encap_conf.eth_src, res->eth_src.addr_bytes,
+                  ETHER_ADDR_LEN);
+       rte_memcpy(mplsoudp_encap_conf.eth_dst, res->eth_dst.addr_bytes,
+                  ETHER_ADDR_LEN);
+}
+
+cmdline_parse_inst_t cmd_set_mplsoudp_encap = {
+       .f = cmd_set_mplsoudp_encap_parsed,
+       .data = NULL,
+       .help_str = "set mplsoudp_encap ip-version ipv4|ipv6 label <label>"
+               " udp-src <udp-src> udp-dst <udp-dst> ip-src <ip-src>"
+               " ip-dst <ip-dst> eth-src <eth-src> eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_mplsoudp_encap_set,
+               (void *)&cmd_set_mplsoudp_encap_mplsoudp_encap,
+               (void *)&cmd_set_mplsoudp_encap_ip_version,
+               (void *)&cmd_set_mplsoudp_encap_ip_version_value,
+               (void *)&cmd_set_mplsoudp_encap_label,
+               (void *)&cmd_set_mplsoudp_encap_label_value,
+               (void *)&cmd_set_mplsoudp_encap_udp_src,
+               (void *)&cmd_set_mplsoudp_encap_udp_src_value,
+               (void *)&cmd_set_mplsoudp_encap_udp_dst,
+               (void *)&cmd_set_mplsoudp_encap_udp_dst_value,
+               (void *)&cmd_set_mplsoudp_encap_ip_src,
+               (void *)&cmd_set_mplsoudp_encap_ip_src_value,
+               (void *)&cmd_set_mplsoudp_encap_ip_dst,
+               (void *)&cmd_set_mplsoudp_encap_ip_dst_value,
+               (void *)&cmd_set_mplsoudp_encap_eth_src,
+               (void *)&cmd_set_mplsoudp_encap_eth_src_value,
+               (void *)&cmd_set_mplsoudp_encap_eth_dst,
+               (void *)&cmd_set_mplsoudp_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_mplsoudp_encap_with_vlan = {
+       .f = cmd_set_mplsoudp_encap_parsed,
+       .data = NULL,
+       .help_str = "set mplsoudp_encap-with-vlan ip-version ipv4|ipv6"
+               " label <label> udp-src <udp-src> udp-dst <udp-dst>"
+               " ip-src <ip-src> ip-dst <ip-dst> vlan-tci <vlan-tci>"
+               " eth-src <eth-src> eth-dst <eth-dst>",
+       .tokens = {
+               (void *)&cmd_set_mplsoudp_encap_set,
+               (void *)&cmd_set_mplsoudp_encap_mplsoudp_encap_with_vlan,
+               (void *)&cmd_set_mplsoudp_encap_ip_version,
+               (void *)&cmd_set_mplsoudp_encap_ip_version_value,
+               (void *)&cmd_set_mplsoudp_encap_label,
+               (void *)&cmd_set_mplsoudp_encap_label_value,
+               (void *)&cmd_set_mplsoudp_encap_udp_src,
+               (void *)&cmd_set_mplsoudp_encap_udp_src_value,
+               (void *)&cmd_set_mplsoudp_encap_udp_dst,
+               (void *)&cmd_set_mplsoudp_encap_udp_dst_value,
+               (void *)&cmd_set_mplsoudp_encap_ip_src,
+               (void *)&cmd_set_mplsoudp_encap_ip_src_value,
+               (void *)&cmd_set_mplsoudp_encap_ip_dst,
+               (void *)&cmd_set_mplsoudp_encap_ip_dst_value,
+               (void *)&cmd_set_mplsoudp_encap_vlan,
+               (void *)&cmd_set_mplsoudp_encap_vlan_value,
+               (void *)&cmd_set_mplsoudp_encap_eth_src,
+               (void *)&cmd_set_mplsoudp_encap_eth_src_value,
+               (void *)&cmd_set_mplsoudp_encap_eth_dst,
+               (void *)&cmd_set_mplsoudp_encap_eth_dst_value,
+               NULL,
+       },
+};
+
+/** Set MPLSoUDP decapsulation details */
+struct cmd_set_mplsoudp_decap_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t mplsoudp;
+       cmdline_fixed_string_t pos_token;
+       cmdline_fixed_string_t ip_version;
+       uint32_t vlan_present:1;
+};
+
+cmdline_parse_token_string_t cmd_set_mplsoudp_decap_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_decap_result, set,
+                                "set");
+cmdline_parse_token_string_t cmd_set_mplsoudp_decap_mplsoudp_decap =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_decap_result, mplsoudp,
+                                "mplsoudp_decap");
+cmdline_parse_token_string_t cmd_set_mplsoudp_decap_mplsoudp_decap_with_vlan =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_decap_result,
+                                mplsoudp, "mplsoudp_decap-with-vlan");
+cmdline_parse_token_string_t cmd_set_mplsoudp_decap_ip_version =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_decap_result,
+                                pos_token, "ip-version");
+cmdline_parse_token_string_t cmd_set_mplsoudp_decap_ip_version_value =
+       TOKEN_STRING_INITIALIZER(struct cmd_set_mplsoudp_decap_result,
+                                ip_version, "ipv4#ipv6");
+
+static void cmd_set_mplsoudp_decap_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_set_mplsoudp_decap_result *res = parsed_result;
+
+       if (strcmp(res->mplsoudp, "mplsoudp_decap") == 0)
+               mplsoudp_decap_conf.select_vlan = 0;
+       else if (strcmp(res->mplsoudp, "mplsoudp_decap-with-vlan") == 0)
+               mplsoudp_decap_conf.select_vlan = 1;
+       if (strcmp(res->ip_version, "ipv4") == 0)
+               mplsoudp_decap_conf.select_ipv4 = 1;
+       else if (strcmp(res->ip_version, "ipv6") == 0)
+               mplsoudp_decap_conf.select_ipv4 = 0;
+}
+
+cmdline_parse_inst_t cmd_set_mplsoudp_decap = {
+       .f = cmd_set_mplsoudp_decap_parsed,
+       .data = NULL,
+       .help_str = "set mplsoudp_decap ip-version ipv4|ipv6",
+       .tokens = {
+               (void *)&cmd_set_mplsoudp_decap_set,
+               (void *)&cmd_set_mplsoudp_decap_mplsoudp_decap,
+               (void *)&cmd_set_mplsoudp_decap_ip_version,
+               (void *)&cmd_set_mplsoudp_decap_ip_version_value,
+               NULL,
+       },
+};
+
+cmdline_parse_inst_t cmd_set_mplsoudp_decap_with_vlan = {
+       .f = cmd_set_mplsoudp_decap_parsed,
+       .data = NULL,
+       .help_str = "set mplsoudp_decap-with-vlan ip-version ipv4|ipv6",
+       .tokens = {
+               (void *)&cmd_set_mplsoudp_decap_set,
+               (void *)&cmd_set_mplsoudp_decap_mplsoudp_decap_with_vlan,
+               (void *)&cmd_set_mplsoudp_decap_ip_version,
+               (void *)&cmd_set_mplsoudp_decap_ip_version_value,
+               NULL,
+       },
+};
+
 /* Strict link priority scheduling mode setting */
 static void
 cmd_strict_link_prio_parsed(
@@ -17403,7 +18180,8 @@ cmdline_parse_token_string_t cmd_config_per_port_tx_offload_result_offload =
                          "sctp_cksum#tcp_tso#udp_tso#outer_ipv4_cksum#"
                          "qinq_insert#vxlan_tnl_tso#gre_tnl_tso#"
                          "ipip_tnl_tso#geneve_tnl_tso#macsec_insert#"
-                         "mt_lockfree#multi_segs#mbuf_fast_free#security");
+                         "mt_lockfree#multi_segs#mbuf_fast_free#security#"
+                         "match_metadata");
 cmdline_parse_token_string_t cmd_config_per_port_tx_offload_result_on_off =
        TOKEN_STRING_INITIALIZER
                (struct cmd_config_per_port_tx_offload_result,
@@ -17484,8 +18262,8 @@ cmdline_parse_inst_t cmd_config_per_port_tx_offload = {
                    "sctp_cksum|tcp_tso|udp_tso|outer_ipv4_cksum|"
                    "qinq_insert|vxlan_tnl_tso|gre_tnl_tso|"
                    "ipip_tnl_tso|geneve_tnl_tso|macsec_insert|"
-                   "mt_lockfree|multi_segs|mbuf_fast_free|security "
-                   "on|off",
+                   "mt_lockfree|multi_segs|mbuf_fast_free|security|"
+                   "match_metadata on|off",
        .tokens = {
                (void *)&cmd_config_per_port_tx_offload_result_port,
                (void *)&cmd_config_per_port_tx_offload_result_config,
@@ -17602,6 +18380,113 @@ cmdline_parse_inst_t cmd_config_per_queue_tx_offload = {
        }
 };
 
+/* *** configure tx_metadata for specific port *** */
+struct cmd_config_tx_metadata_specific_result {
+       cmdline_fixed_string_t port;
+       cmdline_fixed_string_t keyword;
+       uint16_t port_id;
+       cmdline_fixed_string_t item;
+       uint32_t value;
+};
+
+static void
+cmd_config_tx_metadata_specific_parsed(void *parsed_result,
+                               __attribute__((unused)) struct cmdline *cl,
+                               __attribute__((unused)) void *data)
+{
+       struct cmd_config_tx_metadata_specific_result *res = parsed_result;
+
+       if (port_id_is_invalid(res->port_id, ENABLED_WARN))
+               return;
+       ports[res->port_id].tx_metadata = rte_cpu_to_be_32(res->value);
+       /* Add/remove callback to insert valid metadata in every Tx packet. */
+       if (ports[res->port_id].tx_metadata)
+               add_tx_md_callback(res->port_id);
+       else
+               remove_tx_md_callback(res->port_id);
+}
+
+cmdline_parse_token_string_t cmd_config_tx_metadata_specific_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_tx_metadata_specific_result,
+                       port, "port");
+cmdline_parse_token_string_t cmd_config_tx_metadata_specific_keyword =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_tx_metadata_specific_result,
+                       keyword, "config");
+cmdline_parse_token_num_t cmd_config_tx_metadata_specific_id =
+       TOKEN_NUM_INITIALIZER(struct cmd_config_tx_metadata_specific_result,
+                       port_id, UINT16);
+cmdline_parse_token_string_t cmd_config_tx_metadata_specific_item =
+       TOKEN_STRING_INITIALIZER(struct cmd_config_tx_metadata_specific_result,
+                       item, "tx_metadata");
+cmdline_parse_token_num_t cmd_config_tx_metadata_specific_value =
+       TOKEN_NUM_INITIALIZER(struct cmd_config_tx_metadata_specific_result,
+                       value, UINT32);
+
+cmdline_parse_inst_t cmd_config_tx_metadata_specific = {
+       .f = cmd_config_tx_metadata_specific_parsed,
+       .data = NULL,
+       .help_str = "port config <port_id> tx_metadata <value>",
+       .tokens = {
+               (void *)&cmd_config_tx_metadata_specific_port,
+               (void *)&cmd_config_tx_metadata_specific_keyword,
+               (void *)&cmd_config_tx_metadata_specific_id,
+               (void *)&cmd_config_tx_metadata_specific_item,
+               (void *)&cmd_config_tx_metadata_specific_value,
+               NULL,
+       },
+};
+
+/* *** display tx_metadata per port configuration *** */
+struct cmd_show_tx_metadata_result {
+       cmdline_fixed_string_t cmd_show;
+       cmdline_fixed_string_t cmd_port;
+       cmdline_fixed_string_t cmd_keyword;
+       portid_t cmd_pid;
+};
+
+static void
+cmd_show_tx_metadata_parsed(void *parsed_result,
+               __attribute__((unused)) struct cmdline *cl,
+               __attribute__((unused)) void *data)
+{
+       struct cmd_show_tx_metadata_result *res = parsed_result;
+
+       if (!rte_eth_dev_is_valid_port(res->cmd_pid)) {
+               printf("invalid port id %u\n", res->cmd_pid);
+               return;
+       }
+       if (!strcmp(res->cmd_keyword, "tx_metadata")) {
+               printf("Port %u tx_metadata: %u\n", res->cmd_pid,
+                               ports[res->cmd_pid].tx_metadata);
+       }
+}
+
+cmdline_parse_token_string_t cmd_show_tx_metadata_show =
+       TOKEN_STRING_INITIALIZER(struct cmd_show_tx_metadata_result,
+                       cmd_show, "show");
+cmdline_parse_token_string_t cmd_show_tx_metadata_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_show_tx_metadata_result,
+                       cmd_port, "port");
+cmdline_parse_token_num_t cmd_show_tx_metadata_pid =
+       TOKEN_NUM_INITIALIZER(struct cmd_show_tx_metadata_result,
+                       cmd_pid, UINT16);
+cmdline_parse_token_string_t cmd_show_tx_metadata_keyword =
+       TOKEN_STRING_INITIALIZER(struct cmd_show_tx_metadata_result,
+                       cmd_keyword, "tx_metadata");
+
+cmdline_parse_inst_t cmd_show_tx_metadata = {
+       .f = cmd_show_tx_metadata_parsed,
+       .data = NULL,
+       .help_str = "show port <port_id> tx_metadata",
+       .tokens = {
+               (void *)&cmd_show_tx_metadata_show,
+               (void *)&cmd_show_tx_metadata_port,
+               (void *)&cmd_show_tx_metadata_pid,
+               (void *)&cmd_show_tx_metadata_keyword,
+               NULL,
+       },
+};
+
 /* ******************************************************************************** */
 
 /* list of instructions */
@@ -17700,6 +18585,7 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_operate_specific_port,
        (cmdline_parse_inst_t *)&cmd_operate_attach_port,
        (cmdline_parse_inst_t *)&cmd_operate_detach_port,
+       (cmdline_parse_inst_t *)&cmd_set_port_setup_on,
        (cmdline_parse_inst_t *)&cmd_config_speed_all,
        (cmdline_parse_inst_t *)&cmd_config_speed_specific,
        (cmdline_parse_inst_t *)&cmd_config_loopback_all,
@@ -17711,6 +18597,7 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_config_rss,
        (cmdline_parse_inst_t *)&cmd_config_rxtx_ring_size,
        (cmdline_parse_inst_t *)&cmd_config_rxtx_queue,
+       (cmdline_parse_inst_t *)&cmd_config_deferred_start_rxtx_queue,
        (cmdline_parse_inst_t *)&cmd_setup_rxtx_queue,
        (cmdline_parse_inst_t *)&cmd_config_rss_reta,
        (cmdline_parse_inst_t *)&cmd_showport_reta,
@@ -17814,6 +18701,18 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_set_vxlan_with_vlan,
        (cmdline_parse_inst_t *)&cmd_set_nvgre,
        (cmdline_parse_inst_t *)&cmd_set_nvgre_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_l2_encap,
+       (cmdline_parse_inst_t *)&cmd_set_l2_encap_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_l2_decap,
+       (cmdline_parse_inst_t *)&cmd_set_l2_decap_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_mplsogre_encap,
+       (cmdline_parse_inst_t *)&cmd_set_mplsogre_encap_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_mplsogre_decap,
+       (cmdline_parse_inst_t *)&cmd_set_mplsogre_decap_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_mplsoudp_encap,
+       (cmdline_parse_inst_t *)&cmd_set_mplsoudp_encap_with_vlan,
+       (cmdline_parse_inst_t *)&cmd_set_mplsoudp_decap,
+       (cmdline_parse_inst_t *)&cmd_set_mplsoudp_decap_with_vlan,
        (cmdline_parse_inst_t *)&cmd_ddp_add,
        (cmdline_parse_inst_t *)&cmd_ddp_del,
        (cmdline_parse_inst_t *)&cmd_ddp_get_list,
@@ -17854,6 +18753,9 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_suspend_port_tm_node,
        (cmdline_parse_inst_t *)&cmd_resume_port_tm_node,
        (cmdline_parse_inst_t *)&cmd_port_tm_hierarchy_commit,
+       (cmdline_parse_inst_t *)&cmd_port_tm_mark_ip_ecn,
+       (cmdline_parse_inst_t *)&cmd_port_tm_mark_ip_dscp,
+       (cmdline_parse_inst_t *)&cmd_port_tm_mark_vlan_dei,
        (cmdline_parse_inst_t *)&cmd_cfg_tunnel_udp_port,
        (cmdline_parse_inst_t *)&cmd_rx_offload_get_capa,
        (cmdline_parse_inst_t *)&cmd_rx_offload_get_configuration,
@@ -17867,6 +18769,8 @@ cmdline_parse_ctx_t main_ctx[] = {
        (cmdline_parse_inst_t *)&cmd_operate_bpf_ld_parse,
        (cmdline_parse_inst_t *)&cmd_operate_bpf_unld_parse,
 #endif
+       (cmdline_parse_inst_t *)&cmd_config_tx_metadata_specific,
+       (cmdline_parse_inst_t *)&cmd_show_tx_metadata,
        NULL,
 };
 
index f926060..23ea7cc 100644 (file)
@@ -178,6 +178,8 @@ enum index {
        ITEM_ICMP6_ND_OPT_SLA_ETH_SLA,
        ITEM_ICMP6_ND_OPT_TLA_ETH,
        ITEM_ICMP6_ND_OPT_TLA_ETH_TLA,
+       ITEM_META,
+       ITEM_META_DATA,
 
        /* Validate/create actions. */
        ACTIONS,
@@ -243,6 +245,32 @@ enum index {
        ACTION_VXLAN_DECAP,
        ACTION_NVGRE_ENCAP,
        ACTION_NVGRE_DECAP,
+       ACTION_L2_ENCAP,
+       ACTION_L2_DECAP,
+       ACTION_MPLSOGRE_ENCAP,
+       ACTION_MPLSOGRE_DECAP,
+       ACTION_MPLSOUDP_ENCAP,
+       ACTION_MPLSOUDP_DECAP,
+       ACTION_SET_IPV4_SRC,
+       ACTION_SET_IPV4_SRC_IPV4_SRC,
+       ACTION_SET_IPV4_DST,
+       ACTION_SET_IPV4_DST_IPV4_DST,
+       ACTION_SET_IPV6_SRC,
+       ACTION_SET_IPV6_SRC_IPV6_SRC,
+       ACTION_SET_IPV6_DST,
+       ACTION_SET_IPV6_DST_IPV6_DST,
+       ACTION_SET_TP_SRC,
+       ACTION_SET_TP_SRC_TP_SRC,
+       ACTION_SET_TP_DST,
+       ACTION_SET_TP_DST_TP_DST,
+       ACTION_MAC_SWAP,
+       ACTION_DEC_TTL,
+       ACTION_SET_TTL,
+       ACTION_SET_TTL_TTL,
+       ACTION_SET_MAC_SRC,
+       ACTION_SET_MAC_SRC_MAC_SRC,
+       ACTION_SET_MAC_DST,
+       ACTION_SET_MAC_DST_MAC_DST,
 };
 
 /** Maximum size for pattern in struct rte_flow_item_raw. */
@@ -295,6 +323,22 @@ struct action_nvgre_encap_data {
        struct rte_flow_item_nvgre item_nvgre;
 };
 
+/** Maximum data size in struct rte_flow_action_raw_encap. */
+#define ACTION_RAW_ENCAP_MAX_DATA 128
+
+/** Storage for struct rte_flow_action_raw_encap including external data. */
+struct action_raw_encap_data {
+       struct rte_flow_action_raw_encap conf;
+       uint8_t data[ACTION_RAW_ENCAP_MAX_DATA];
+       uint8_t preserve[ACTION_RAW_ENCAP_MAX_DATA];
+};
+
+/** Storage for struct rte_flow_action_raw_decap including external data. */
+struct action_raw_decap_data {
+       struct rte_flow_action_raw_decap conf;
+       uint8_t data[ACTION_RAW_ENCAP_MAX_DATA];
+};
+
 /** Maximum number of subsequent tokens and arguments on the stack. */
 #define CTX_STACK_SIZE 16
 
@@ -564,6 +608,7 @@ static const enum index next_item[] = {
        ITEM_ICMP6_ND_OPT,
        ITEM_ICMP6_ND_OPT_SLA_ETH,
        ITEM_ICMP6_ND_OPT_TLA_ETH,
+       ITEM_META,
        ZERO,
 };
 
@@ -784,6 +829,12 @@ static const enum index item_icmp6_nd_opt_tla_eth[] = {
        ZERO,
 };
 
+static const enum index item_meta[] = {
+       ITEM_META_DATA,
+       ITEM_NEXT,
+       ZERO,
+};
+
 static const enum index next_action[] = {
        ACTION_END,
        ACTION_VOID,
@@ -816,6 +867,23 @@ static const enum index next_action[] = {
        ACTION_VXLAN_DECAP,
        ACTION_NVGRE_ENCAP,
        ACTION_NVGRE_DECAP,
+       ACTION_L2_ENCAP,
+       ACTION_L2_DECAP,
+       ACTION_MPLSOGRE_ENCAP,
+       ACTION_MPLSOGRE_DECAP,
+       ACTION_MPLSOUDP_ENCAP,
+       ACTION_MPLSOUDP_DECAP,
+       ACTION_SET_IPV4_SRC,
+       ACTION_SET_IPV4_DST,
+       ACTION_SET_IPV6_SRC,
+       ACTION_SET_IPV6_DST,
+       ACTION_SET_TP_SRC,
+       ACTION_SET_TP_DST,
+       ACTION_MAC_SWAP,
+       ACTION_DEC_TTL,
+       ACTION_SET_TTL,
+       ACTION_SET_MAC_SRC,
+       ACTION_SET_MAC_DST,
        ZERO,
 };
 
@@ -918,12 +986,66 @@ static const enum index action_of_push_mpls[] = {
        ZERO,
 };
 
+static const enum index action_set_ipv4_src[] = {
+       ACTION_SET_IPV4_SRC_IPV4_SRC,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_mac_src[] = {
+       ACTION_SET_MAC_SRC_MAC_SRC,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_ipv4_dst[] = {
+       ACTION_SET_IPV4_DST_IPV4_DST,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_ipv6_src[] = {
+       ACTION_SET_IPV6_SRC_IPV6_SRC,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_ipv6_dst[] = {
+       ACTION_SET_IPV6_DST_IPV6_DST,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_tp_src[] = {
+       ACTION_SET_TP_SRC_TP_SRC,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_tp_dst[] = {
+       ACTION_SET_TP_DST_TP_DST,
+       ACTION_NEXT,
+       ZERO,
+};
+
+static const enum index action_set_ttl[] = {
+       ACTION_SET_TTL_TTL,
+       ACTION_NEXT,
+       ZERO,
+};
+
 static const enum index action_jump[] = {
        ACTION_JUMP_GROUP,
        ACTION_NEXT,
        ZERO,
 };
 
+static const enum index action_set_mac_dst[] = {
+       ACTION_SET_MAC_DST_MAC_DST,
+       ACTION_NEXT,
+       ZERO,
+};
+
 static int parse_init(struct context *, const struct token *,
                      const char *, unsigned int,
                      void *, unsigned int);
@@ -952,6 +1074,24 @@ static int parse_vc_action_vxlan_encap(struct context *, const struct token *,
 static int parse_vc_action_nvgre_encap(struct context *, const struct token *,
                                       const char *, unsigned int, void *,
                                       unsigned int);
+static int parse_vc_action_l2_encap(struct context *, const struct token *,
+                                   const char *, unsigned int, void *,
+                                   unsigned int);
+static int parse_vc_action_l2_decap(struct context *, const struct token *,
+                                   const char *, unsigned int, void *,
+                                   unsigned int);
+static int parse_vc_action_mplsogre_encap(struct context *,
+                                         const struct token *, const char *,
+                                         unsigned int, void *, unsigned int);
+static int parse_vc_action_mplsogre_decap(struct context *,
+                                         const struct token *, const char *,
+                                         unsigned int, void *, unsigned int);
+static int parse_vc_action_mplsoudp_encap(struct context *,
+                                         const struct token *, const char *,
+                                         unsigned int, void *, unsigned int);
+static int parse_vc_action_mplsoudp_decap(struct context *,
+                                         const struct token *, const char *,
+                                         unsigned int, void *, unsigned int);
 static int parse_destroy(struct context *, const struct token *,
                         const char *, unsigned int,
                         void *, unsigned int);
@@ -1985,6 +2125,20 @@ static const struct token token_list[] = {
                .args = ARGS(ARGS_ENTRY_HTON
                             (struct rte_flow_item_icmp6_nd_opt_tla_eth, tla)),
        },
+       [ITEM_META] = {
+               .name = "meta",
+               .help = "match metadata header",
+               .priv = PRIV_ITEM(META, sizeof(struct rte_flow_item_meta)),
+               .next = NEXT(item_meta),
+               .call = parse_vc,
+       },
+       [ITEM_META_DATA] = {
+               .name = "data",
+               .help = "metadata value",
+               .next = NEXT(item_meta, NEXT_ENTRY(UNSIGNED), item_param),
+               .args = ARGS(ARGS_ENTRY_MASK_HTON(struct rte_flow_item_meta,
+                                                 data, "\xff\xff\xff\xff")),
+       },
 
        /* Validate/create actions. */
        [ACTIONS] = {
@@ -2470,6 +2624,225 @@ static const struct token token_list[] = {
                .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
                .call = parse_vc,
        },
+       [ACTION_L2_ENCAP] = {
+               .name = "l2_encap",
+               .help = "l2 encap, uses configuration set by"
+                       " \"set l2_encap\"",
+               .priv = PRIV_ACTION(RAW_ENCAP,
+                                   sizeof(struct action_raw_encap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_l2_encap,
+       },
+       [ACTION_L2_DECAP] = {
+               .name = "l2_decap",
+               .help = "l2 decap, uses configuration set by"
+                       " \"set l2_decap\"",
+               .priv = PRIV_ACTION(RAW_DECAP,
+                                   sizeof(struct action_raw_decap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_l2_decap,
+       },
+       [ACTION_MPLSOGRE_ENCAP] = {
+               .name = "mplsogre_encap",
+               .help = "mplsogre encapsulation, uses configuration set by"
+                       " \"set mplsogre_encap\"",
+               .priv = PRIV_ACTION(RAW_ENCAP,
+                                   sizeof(struct action_raw_encap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_mplsogre_encap,
+       },
+       [ACTION_MPLSOGRE_DECAP] = {
+               .name = "mplsogre_decap",
+               .help = "mplsogre decapsulation, uses configuration set by"
+                       " \"set mplsogre_decap\"",
+               .priv = PRIV_ACTION(RAW_DECAP,
+                                   sizeof(struct action_raw_decap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_mplsogre_decap,
+       },
+       [ACTION_MPLSOUDP_ENCAP] = {
+               .name = "mplsoudp_encap",
+               .help = "mplsoudp encapsulation, uses configuration set by"
+                       " \"set mplsoudp_encap\"",
+               .priv = PRIV_ACTION(RAW_ENCAP,
+                                   sizeof(struct action_raw_encap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_mplsoudp_encap,
+       },
+       [ACTION_MPLSOUDP_DECAP] = {
+               .name = "mplsoudp_decap",
+               .help = "mplsoudp decapsulation, uses configuration set by"
+                       " \"set mplsoudp_decap\"",
+               .priv = PRIV_ACTION(RAW_DECAP,
+                                   sizeof(struct action_raw_decap_data)),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc_action_mplsoudp_decap,
+       },
+       [ACTION_SET_IPV4_SRC] = {
+               .name = "set_ipv4_src",
+               .help = "Set a new IPv4 source address in the outermost"
+                       " IPv4 header",
+               .priv = PRIV_ACTION(SET_IPV4_SRC,
+                       sizeof(struct rte_flow_action_set_ipv4)),
+               .next = NEXT(action_set_ipv4_src),
+               .call = parse_vc,
+       },
+       [ACTION_SET_IPV4_SRC_IPV4_SRC] = {
+               .name = "ipv4_addr",
+               .help = "new IPv4 source address to set",
+               .next = NEXT(action_set_ipv4_src, NEXT_ENTRY(IPV4_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                       (struct rte_flow_action_set_ipv4, ipv4_addr)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_IPV4_DST] = {
+               .name = "set_ipv4_dst",
+               .help = "Set a new IPv4 destination address in the outermost"
+                       " IPv4 header",
+               .priv = PRIV_ACTION(SET_IPV4_DST,
+                       sizeof(struct rte_flow_action_set_ipv4)),
+               .next = NEXT(action_set_ipv4_dst),
+               .call = parse_vc,
+       },
+       [ACTION_SET_IPV4_DST_IPV4_DST] = {
+               .name = "ipv4_addr",
+               .help = "new IPv4 destination address to set",
+               .next = NEXT(action_set_ipv4_dst, NEXT_ENTRY(IPV4_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                       (struct rte_flow_action_set_ipv4, ipv4_addr)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_IPV6_SRC] = {
+               .name = "set_ipv6_src",
+               .help = "Set a new IPv6 source address in the outermost"
+                       " IPv6 header",
+               .priv = PRIV_ACTION(SET_IPV6_SRC,
+                       sizeof(struct rte_flow_action_set_ipv6)),
+               .next = NEXT(action_set_ipv6_src),
+               .call = parse_vc,
+       },
+       [ACTION_SET_IPV6_SRC_IPV6_SRC] = {
+               .name = "ipv6_addr",
+               .help = "new IPv6 source address to set",
+               .next = NEXT(action_set_ipv6_src, NEXT_ENTRY(IPV6_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                       (struct rte_flow_action_set_ipv6, ipv6_addr)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_IPV6_DST] = {
+               .name = "set_ipv6_dst",
+               .help = "Set a new IPv6 destination address in the outermost"
+                       " IPv6 header",
+               .priv = PRIV_ACTION(SET_IPV6_DST,
+                       sizeof(struct rte_flow_action_set_ipv6)),
+               .next = NEXT(action_set_ipv6_dst),
+               .call = parse_vc,
+       },
+       [ACTION_SET_IPV6_DST_IPV6_DST] = {
+               .name = "ipv6_addr",
+               .help = "new IPv6 destination address to set",
+               .next = NEXT(action_set_ipv6_dst, NEXT_ENTRY(IPV6_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                       (struct rte_flow_action_set_ipv6, ipv6_addr)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_TP_SRC] = {
+               .name = "set_tp_src",
+               .help = "set a new source port number in the outermost"
+                       " TCP/UDP header",
+               .priv = PRIV_ACTION(SET_TP_SRC,
+                       sizeof(struct rte_flow_action_set_tp)),
+               .next = NEXT(action_set_tp_src),
+               .call = parse_vc,
+       },
+       [ACTION_SET_TP_SRC_TP_SRC] = {
+               .name = "port",
+               .help = "new source port number to set",
+               .next = NEXT(action_set_tp_src, NEXT_ENTRY(UNSIGNED)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                            (struct rte_flow_action_set_tp, port)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_TP_DST] = {
+               .name = "set_tp_dst",
+               .help = "set a new destination port number in the outermost"
+                       " TCP/UDP header",
+               .priv = PRIV_ACTION(SET_TP_DST,
+                       sizeof(struct rte_flow_action_set_tp)),
+               .next = NEXT(action_set_tp_dst),
+               .call = parse_vc,
+       },
+       [ACTION_SET_TP_DST_TP_DST] = {
+               .name = "port",
+               .help = "new destination port number to set",
+               .next = NEXT(action_set_tp_dst, NEXT_ENTRY(UNSIGNED)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                            (struct rte_flow_action_set_tp, port)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_MAC_SWAP] = {
+               .name = "mac_swap",
+               .help = "Swap the source and destination MAC addresses"
+                       " in the outermost Ethernet header",
+               .priv = PRIV_ACTION(MAC_SWAP, 0),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc,
+       },
+       [ACTION_DEC_TTL] = {
+               .name = "dec_ttl",
+               .help = "decrease network TTL if available",
+               .priv = PRIV_ACTION(DEC_TTL, 0),
+               .next = NEXT(NEXT_ENTRY(ACTION_NEXT)),
+               .call = parse_vc,
+       },
+       [ACTION_SET_TTL] = {
+               .name = "set_ttl",
+               .help = "set ttl value",
+               .priv = PRIV_ACTION(SET_TTL,
+                       sizeof(struct rte_flow_action_set_ttl)),
+               .next = NEXT(action_set_ttl),
+               .call = parse_vc,
+       },
+       [ACTION_SET_TTL_TTL] = {
+               .name = "ttl_value",
+               .help = "new ttl value to set",
+               .next = NEXT(action_set_ttl, NEXT_ENTRY(UNSIGNED)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                            (struct rte_flow_action_set_ttl, ttl_value)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_MAC_SRC] = {
+               .name = "set_mac_src",
+               .help = "set source mac address",
+               .priv = PRIV_ACTION(SET_MAC_SRC,
+                       sizeof(struct rte_flow_action_set_mac)),
+               .next = NEXT(action_set_mac_src),
+               .call = parse_vc,
+       },
+       [ACTION_SET_MAC_SRC_MAC_SRC] = {
+               .name = "mac_addr",
+               .help = "new source mac address",
+               .next = NEXT(action_set_mac_src, NEXT_ENTRY(MAC_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                            (struct rte_flow_action_set_mac, mac_addr)),
+               .call = parse_vc_conf,
+       },
+       [ACTION_SET_MAC_DST] = {
+               .name = "set_mac_dst",
+               .help = "set destination mac address",
+               .priv = PRIV_ACTION(SET_MAC_DST,
+                       sizeof(struct rte_flow_action_set_mac)),
+               .next = NEXT(action_set_mac_dst),
+               .call = parse_vc,
+       },
+       [ACTION_SET_MAC_DST_MAC_DST] = {
+               .name = "mac_addr",
+               .help = "new destination mac address to set",
+               .next = NEXT(action_set_mac_dst, NEXT_ENTRY(MAC_ADDR)),
+               .args = ARGS(ARGS_ENTRY_HTON
+                            (struct rte_flow_action_set_mac, mac_addr)),
+               .call = parse_vc_conf,
+       },
 };
 
 /** Remove and return last entry from argument stack. */
@@ -3225,6 +3598,503 @@ parse_vc_action_nvgre_encap(struct context *ctx, const struct token *token,
        return ret;
 }
 
+/** Parse l2 encap action. */
+static int
+parse_vc_action_l2_encap(struct context *ctx, const struct token *token,
+                        const char *str, unsigned int len,
+                        void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_encap_data *action_encap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {
+               .tci = mplsoudp_encap_conf.vlan_tci,
+               .inner_type = 0,
+       };
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_encap_data = ctx->object;
+       *action_encap_data = (struct action_raw_encap_data) {
+               .conf = (struct rte_flow_action_raw_encap){
+                       .data = action_encap_data->data,
+               },
+               .data = {},
+       };
+       header = action_encap_data->data;
+       if (l2_encap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       else if (l2_encap_conf.select_ipv4)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       else
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+       memcpy(eth.dst.addr_bytes,
+              l2_encap_conf.eth_dst, ETHER_ADDR_LEN);
+       memcpy(eth.src.addr_bytes,
+              l2_encap_conf.eth_src, ETHER_ADDR_LEN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (l2_encap_conf.select_vlan) {
+               if (l2_encap_conf.select_ipv4)
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               else
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       action_encap_data->conf.size = header -
+               action_encap_data->data;
+       action->conf = &action_encap_data->conf;
+       return ret;
+}
+
+/** Parse l2 decap action. */
+static int
+parse_vc_action_l2_decap(struct context *ctx, const struct token *token,
+                        const char *str, unsigned int len,
+                        void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_decap_data *action_decap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {
+               .tci = mplsoudp_encap_conf.vlan_tci,
+               .inner_type = 0,
+       };
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_decap_data = ctx->object;
+       *action_decap_data = (struct action_raw_decap_data) {
+               .conf = (struct rte_flow_action_raw_decap){
+                       .data = action_decap_data->data,
+               },
+               .data = {},
+       };
+       header = action_decap_data->data;
+       if (l2_decap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (l2_decap_conf.select_vlan) {
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       action_decap_data->conf.size = header -
+               action_decap_data->data;
+       action->conf = &action_decap_data->conf;
+       return ret;
+}
+
+#define ETHER_TYPE_MPLS_UNICAST 0x8847
+
+/** Parse MPLSOGRE encap action. */
+static int
+parse_vc_action_mplsogre_encap(struct context *ctx, const struct token *token,
+                              const char *str, unsigned int len,
+                              void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_encap_data *action_encap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {
+               .tci = mplsogre_encap_conf.vlan_tci,
+               .inner_type = 0,
+       };
+       struct rte_flow_item_ipv4 ipv4 = {
+               .hdr =  {
+                       .src_addr = mplsogre_encap_conf.ipv4_src,
+                       .dst_addr = mplsogre_encap_conf.ipv4_dst,
+                       .next_proto_id = IPPROTO_GRE,
+               },
+       };
+       struct rte_flow_item_ipv6 ipv6 = {
+               .hdr =  {
+                       .proto = IPPROTO_GRE,
+               },
+       };
+       struct rte_flow_item_gre gre = {
+               .protocol = rte_cpu_to_be_16(ETHER_TYPE_MPLS_UNICAST),
+       };
+       struct rte_flow_item_mpls mpls;
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_encap_data = ctx->object;
+       *action_encap_data = (struct action_raw_encap_data) {
+               .conf = (struct rte_flow_action_raw_encap){
+                       .data = action_encap_data->data,
+               },
+               .data = {},
+               .preserve = {},
+       };
+       header = action_encap_data->data;
+       if (mplsogre_encap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       else if (mplsogre_encap_conf.select_ipv4)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       else
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+       memcpy(eth.dst.addr_bytes,
+              mplsogre_encap_conf.eth_dst, ETHER_ADDR_LEN);
+       memcpy(eth.src.addr_bytes,
+              mplsogre_encap_conf.eth_src, ETHER_ADDR_LEN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (mplsogre_encap_conf.select_vlan) {
+               if (mplsogre_encap_conf.select_ipv4)
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               else
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       if (mplsogre_encap_conf.select_ipv4) {
+               memcpy(header, &ipv4, sizeof(ipv4));
+               header += sizeof(ipv4);
+       } else {
+               memcpy(&ipv6.hdr.src_addr,
+                      &mplsogre_encap_conf.ipv6_src,
+                      sizeof(mplsogre_encap_conf.ipv6_src));
+               memcpy(&ipv6.hdr.dst_addr,
+                      &mplsogre_encap_conf.ipv6_dst,
+                      sizeof(mplsogre_encap_conf.ipv6_dst));
+               memcpy(header, &ipv6, sizeof(ipv6));
+               header += sizeof(ipv6);
+       }
+       memcpy(header, &gre, sizeof(gre));
+       header += sizeof(gre);
+       memcpy(mpls.label_tc_s, mplsogre_encap_conf.label,
+              RTE_DIM(mplsogre_encap_conf.label));
+       memcpy(header, &mpls, sizeof(mpls));
+       header += sizeof(mpls);
+       action_encap_data->conf.size = header -
+               action_encap_data->data;
+       action->conf = &action_encap_data->conf;
+       return ret;
+}
+
+/** Parse MPLSOGRE decap action. */
+static int
+parse_vc_action_mplsogre_decap(struct context *ctx, const struct token *token,
+                              const char *str, unsigned int len,
+                              void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_decap_data *action_decap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {.tci = 0};
+       struct rte_flow_item_ipv4 ipv4 = {
+               .hdr =  {
+                       .next_proto_id = IPPROTO_GRE,
+               },
+       };
+       struct rte_flow_item_ipv6 ipv6 = {
+               .hdr =  {
+                       .proto = IPPROTO_GRE,
+               },
+       };
+       struct rte_flow_item_gre gre = {
+               .protocol = rte_cpu_to_be_16(ETHER_TYPE_MPLS_UNICAST),
+       };
+       struct rte_flow_item_mpls mpls;
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_decap_data = ctx->object;
+       *action_decap_data = (struct action_raw_decap_data) {
+               .conf = (struct rte_flow_action_raw_decap){
+                       .data = action_decap_data->data,
+               },
+               .data = {},
+       };
+       header = action_decap_data->data;
+       if (mplsogre_decap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       else if (mplsogre_encap_conf.select_ipv4)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       else
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+       memcpy(eth.dst.addr_bytes,
+              mplsogre_encap_conf.eth_dst, ETHER_ADDR_LEN);
+       memcpy(eth.src.addr_bytes,
+              mplsogre_encap_conf.eth_src, ETHER_ADDR_LEN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (mplsogre_encap_conf.select_vlan) {
+               if (mplsogre_encap_conf.select_ipv4)
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               else
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       if (mplsogre_encap_conf.select_ipv4) {
+               memcpy(header, &ipv4, sizeof(ipv4));
+               header += sizeof(ipv4);
+       } else {
+               memcpy(header, &ipv6, sizeof(ipv6));
+               header += sizeof(ipv6);
+       }
+       memcpy(header, &gre, sizeof(gre));
+       header += sizeof(gre);
+       memset(&mpls, 0, sizeof(mpls));
+       memcpy(header, &mpls, sizeof(mpls));
+       header += sizeof(mpls);
+       action_decap_data->conf.size = header -
+               action_decap_data->data;
+       action->conf = &action_decap_data->conf;
+       return ret;
+}
+
+/** Parse MPLSOUDP encap action. */
+static int
+parse_vc_action_mplsoudp_encap(struct context *ctx, const struct token *token,
+                              const char *str, unsigned int len,
+                              void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_encap_data *action_encap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {
+               .tci = mplsoudp_encap_conf.vlan_tci,
+               .inner_type = 0,
+       };
+       struct rte_flow_item_ipv4 ipv4 = {
+               .hdr =  {
+                       .src_addr = mplsoudp_encap_conf.ipv4_src,
+                       .dst_addr = mplsoudp_encap_conf.ipv4_dst,
+                       .next_proto_id = IPPROTO_UDP,
+               },
+       };
+       struct rte_flow_item_ipv6 ipv6 = {
+               .hdr =  {
+                       .proto = IPPROTO_UDP,
+               },
+       };
+       struct rte_flow_item_udp udp = {
+               .hdr = {
+                       .src_port = mplsoudp_encap_conf.udp_src,
+                       .dst_port = mplsoudp_encap_conf.udp_dst,
+               },
+       };
+       struct rte_flow_item_mpls mpls;
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_encap_data = ctx->object;
+       *action_encap_data = (struct action_raw_encap_data) {
+               .conf = (struct rte_flow_action_raw_encap){
+                       .data = action_encap_data->data,
+               },
+               .data = {},
+               .preserve = {},
+       };
+       header = action_encap_data->data;
+       if (mplsoudp_encap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       else if (mplsoudp_encap_conf.select_ipv4)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       else
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+       memcpy(eth.dst.addr_bytes,
+              mplsoudp_encap_conf.eth_dst, ETHER_ADDR_LEN);
+       memcpy(eth.src.addr_bytes,
+              mplsoudp_encap_conf.eth_src, ETHER_ADDR_LEN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (mplsoudp_encap_conf.select_vlan) {
+               if (mplsoudp_encap_conf.select_ipv4)
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               else
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       if (mplsoudp_encap_conf.select_ipv4) {
+               memcpy(header, &ipv4, sizeof(ipv4));
+               header += sizeof(ipv4);
+       } else {
+               memcpy(&ipv6.hdr.src_addr,
+                      &mplsoudp_encap_conf.ipv6_src,
+                      sizeof(mplsoudp_encap_conf.ipv6_src));
+               memcpy(&ipv6.hdr.dst_addr,
+                      &mplsoudp_encap_conf.ipv6_dst,
+                      sizeof(mplsoudp_encap_conf.ipv6_dst));
+               memcpy(header, &ipv6, sizeof(ipv6));
+               header += sizeof(ipv6);
+       }
+       memcpy(header, &udp, sizeof(udp));
+       header += sizeof(udp);
+       memcpy(mpls.label_tc_s, mplsoudp_encap_conf.label,
+              RTE_DIM(mplsoudp_encap_conf.label));
+       memcpy(header, &mpls, sizeof(mpls));
+       header += sizeof(mpls);
+       action_encap_data->conf.size = header -
+               action_encap_data->data;
+       action->conf = &action_encap_data->conf;
+       return ret;
+}
+
+/** Parse MPLSOUDP decap action. */
+static int
+parse_vc_action_mplsoudp_decap(struct context *ctx, const struct token *token,
+                              const char *str, unsigned int len,
+                              void *buf, unsigned int size)
+{
+       struct buffer *out = buf;
+       struct rte_flow_action *action;
+       struct action_raw_decap_data *action_decap_data;
+       struct rte_flow_item_eth eth = { .type = 0, };
+       struct rte_flow_item_vlan vlan = {.tci = 0};
+       struct rte_flow_item_ipv4 ipv4 = {
+               .hdr =  {
+                       .next_proto_id = IPPROTO_UDP,
+               },
+       };
+       struct rte_flow_item_ipv6 ipv6 = {
+               .hdr =  {
+                       .proto = IPPROTO_UDP,
+               },
+       };
+       struct rte_flow_item_udp udp = {
+               .hdr = {
+                       .dst_port = rte_cpu_to_be_16(6635),
+               },
+       };
+       struct rte_flow_item_mpls mpls;
+       uint8_t *header;
+       int ret;
+
+       ret = parse_vc(ctx, token, str, len, buf, size);
+       if (ret < 0)
+               return ret;
+       /* Nothing else to do if there is no buffer. */
+       if (!out)
+               return ret;
+       if (!out->args.vc.actions_n)
+               return -1;
+       action = &out->args.vc.actions[out->args.vc.actions_n - 1];
+       /* Point to selected object. */
+       ctx->object = out->args.vc.data;
+       ctx->objmask = NULL;
+       /* Copy the headers to the buffer. */
+       action_decap_data = ctx->object;
+       *action_decap_data = (struct action_raw_decap_data) {
+               .conf = (struct rte_flow_action_raw_decap){
+                       .data = action_decap_data->data,
+               },
+               .data = {},
+       };
+       header = action_decap_data->data;
+       if (mplsoudp_decap_conf.select_vlan)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
+       else if (mplsoudp_encap_conf.select_ipv4)
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+       else
+               eth.type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+       memcpy(eth.dst.addr_bytes,
+              mplsoudp_encap_conf.eth_dst, ETHER_ADDR_LEN);
+       memcpy(eth.src.addr_bytes,
+              mplsoudp_encap_conf.eth_src, ETHER_ADDR_LEN);
+       memcpy(header, &eth, sizeof(eth));
+       header += sizeof(eth);
+       if (mplsoudp_encap_conf.select_vlan) {
+               if (mplsoudp_encap_conf.select_ipv4)
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+               else
+                       vlan.inner_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
+               memcpy(header, &vlan, sizeof(vlan));
+               header += sizeof(vlan);
+       }
+       if (mplsoudp_encap_conf.select_ipv4) {
+               memcpy(header, &ipv4, sizeof(ipv4));
+               header += sizeof(ipv4);
+       } else {
+               memcpy(header, &ipv6, sizeof(ipv6));
+               header += sizeof(ipv6);
+       }
+       memcpy(header, &udp, sizeof(udp));
+       header += sizeof(udp);
+       memset(&mpls, 0, sizeof(mpls));
+       memcpy(header, &mpls, sizeof(mpls));
+       header += sizeof(mpls);
+       action_decap_data->conf.size = header -
+               action_decap_data->data;
+       action->conf = &action_decap_data->conf;
+       return ret;
+}
+
 /** Parse tokens for destroy command. */
 static int
 parse_destroy(struct context *ctx, const struct token *token,
index f908fb3..32a4730 100644 (file)
@@ -414,9 +414,9 @@ cmdline_parse_inst_t cmd_add_port_meter_profile_srtcm = {
                (void *)&cmd_add_port_meter_profile_srtcm_port,
                (void *)&cmd_add_port_meter_profile_srtcm_meter,
                (void *)&cmd_add_port_meter_profile_srtcm_profile,
+               (void *)&cmd_add_port_meter_profile_srtcm_srtcm_rfc2697,
                (void *)&cmd_add_port_meter_profile_srtcm_port_id,
                (void *)&cmd_add_port_meter_profile_srtcm_profile_id,
-               (void *)&cmd_add_port_meter_profile_srtcm_srtcm_rfc2697,
                (void *)&cmd_add_port_meter_profile_srtcm_cir,
                (void *)&cmd_add_port_meter_profile_srtcm_cbs,
                (void *)&cmd_add_port_meter_profile_srtcm_ebs,
@@ -521,9 +521,9 @@ cmdline_parse_inst_t cmd_add_port_meter_profile_trtcm = {
                (void *)&cmd_add_port_meter_profile_trtcm_port,
                (void *)&cmd_add_port_meter_profile_trtcm_meter,
                (void *)&cmd_add_port_meter_profile_trtcm_profile,
+               (void *)&cmd_add_port_meter_profile_trtcm_trtcm_rfc2698,
                (void *)&cmd_add_port_meter_profile_trtcm_port_id,
                (void *)&cmd_add_port_meter_profile_trtcm_profile_id,
-               (void *)&cmd_add_port_meter_profile_trtcm_trtcm_rfc2698,
                (void *)&cmd_add_port_meter_profile_trtcm_cir,
                (void *)&cmd_add_port_meter_profile_trtcm_pir,
                (void *)&cmd_add_port_meter_profile_trtcm_cbs,
@@ -633,9 +633,9 @@ cmdline_parse_inst_t cmd_add_port_meter_profile_trtcm_rfc4115 = {
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_port,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_meter,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_profile,
+               (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_trtcm_rfc4115,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_port_id,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_profile_id,
-               (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_trtcm_rfc4115,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_cir,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_eir,
                (void *)&cmd_add_port_meter_profile_trtcm_rfc4115_cbs,
index 631f179..b430797 100644 (file)
@@ -2187,3 +2187,263 @@ cmdline_parse_inst_t cmd_port_tm_hierarchy_commit = {
                NULL,
        },
 };
+
+/* *** Port TM Mark IP ECN *** */
+struct cmd_port_tm_mark_ip_ecn_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t port;
+       cmdline_fixed_string_t tm;
+       cmdline_fixed_string_t mark;
+       cmdline_fixed_string_t ip_ecn;
+       uint16_t port_id;
+       uint16_t green;
+       uint16_t yellow;
+       uint16_t red;
+};
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_ecn_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                                set, "set");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_ecn_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                                port, "port");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_ecn_tm =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result, tm,
+                                "tm");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_ecn_mark =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                                mark, "mark");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_ecn_ip_ecn =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                                ip_ecn, "ip_ecn");
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_ecn_port_id =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                             port_id, UINT16);
+
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_ecn_green =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                             green, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_ecn_yellow =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                             yellow, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_ecn_red =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_ecn_result,
+                               red, UINT16);
+
+static void cmd_port_tm_mark_ip_ecn_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_port_tm_mark_ip_ecn_result *res = parsed_result;
+       struct rte_tm_error error;
+       portid_t port_id = res->port_id;
+       int green = res->green;
+       int yellow = res->yellow;
+       int red = res->red;
+       int ret;
+       if (port_id_is_invalid(port_id, ENABLED_WARN))
+               return;
+
+       memset(&error, 0, sizeof(struct rte_tm_error));
+       ret = rte_tm_mark_ip_ecn(port_id, green, yellow, red, &error);
+       if (ret != 0) {
+               print_err_msg(&error);
+               return;
+       }
+}
+
+cmdline_parse_inst_t cmd_port_tm_mark_ip_ecn = {
+       .f = cmd_port_tm_mark_ip_ecn_parsed,
+       .data = NULL,
+       .help_str = "set port tm mark ip_ecn <port> <green> <yellow> <red>",
+       .tokens = {
+               (void *)&cmd_port_tm_mark_ip_ecn_set,
+               (void *)&cmd_port_tm_mark_ip_ecn_port,
+               (void *)&cmd_port_tm_mark_ip_ecn_tm,
+               (void *)&cmd_port_tm_mark_ip_ecn_mark,
+               (void *)&cmd_port_tm_mark_ip_ecn_ip_ecn,
+               (void *)&cmd_port_tm_mark_ip_ecn_port_id,
+               (void *)&cmd_port_tm_mark_ip_ecn_green,
+               (void *)&cmd_port_tm_mark_ip_ecn_yellow,
+               (void *)&cmd_port_tm_mark_ip_ecn_red,
+               NULL,
+       },
+};
+
+
+/* *** Port TM Mark IP DSCP *** */
+struct cmd_port_tm_mark_ip_dscp_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t port;
+       cmdline_fixed_string_t tm;
+       cmdline_fixed_string_t mark;
+       cmdline_fixed_string_t ip_dscp;
+       uint16_t port_id;
+       uint16_t green;
+       uint16_t yellow;
+       uint16_t red;
+};
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_dscp_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                                set, "set");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_dscp_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                                port, "port");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_dscp_tm =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result, tm,
+                                "tm");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_dscp_mark =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                                mark, "mark");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_ip_dscp_ip_dscp =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                                ip_dscp, "ip_dscp");
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_dscp_port_id =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                             port_id, UINT16);
+
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_dscp_green =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                               green, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_dscp_yellow =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                               yellow, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_ip_dscp_red =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_ip_dscp_result,
+                               red, UINT16);
+
+static void cmd_port_tm_mark_ip_dscp_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_port_tm_mark_ip_dscp_result *res = parsed_result;
+       struct rte_tm_error error;
+       portid_t port_id = res->port_id;
+       int green = res->green;
+       int yellow = res->yellow;
+       int red = res->red;
+       int ret;
+       if (port_id_is_invalid(port_id, ENABLED_WARN))
+               return;
+
+       memset(&error, 0, sizeof(struct rte_tm_error));
+       ret = rte_tm_mark_ip_dscp(port_id, green, yellow, red, &error);
+       if (ret != 0) {
+               print_err_msg(&error);
+               return;
+       }
+}
+
+cmdline_parse_inst_t cmd_port_tm_mark_ip_dscp = {
+       .f = cmd_port_tm_mark_ip_dscp_parsed,
+       .data = NULL,
+       .help_str = "set port tm mark ip_dscp <port> <green> <yellow> <red>",
+       .tokens = {
+               (void *)&cmd_port_tm_mark_ip_dscp_set,
+               (void *)&cmd_port_tm_mark_ip_dscp_port,
+               (void *)&cmd_port_tm_mark_ip_dscp_tm,
+               (void *)&cmd_port_tm_mark_ip_dscp_mark,
+               (void *)&cmd_port_tm_mark_ip_dscp_ip_dscp,
+               (void *)&cmd_port_tm_mark_ip_dscp_port_id,
+               (void *)&cmd_port_tm_mark_ip_dscp_green,
+               (void *)&cmd_port_tm_mark_ip_dscp_yellow,
+               (void *)&cmd_port_tm_mark_ip_dscp_red,
+               NULL,
+       },
+};
+
+
+/* *** Port TM Mark VLAN_DEI *** */
+struct cmd_port_tm_mark_vlan_dei_result {
+       cmdline_fixed_string_t set;
+       cmdline_fixed_string_t port;
+       cmdline_fixed_string_t tm;
+       cmdline_fixed_string_t mark;
+       cmdline_fixed_string_t vlan_dei;
+       uint16_t port_id;
+       uint16_t green;
+       uint16_t yellow;
+       uint16_t red;
+};
+
+cmdline_parse_token_string_t cmd_port_tm_mark_vlan_dei_set =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                                set, "set");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_vlan_dei_port =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                                port, "port");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_vlan_dei_tm =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result, tm,
+                                "tm");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_vlan_dei_mark =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                                mark, "mark");
+
+cmdline_parse_token_string_t cmd_port_tm_mark_vlan_dei_vlan_dei =
+       TOKEN_STRING_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                                vlan_dei, "vlan_dei");
+cmdline_parse_token_num_t cmd_port_tm_mark_vlan_dei_port_id =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                             port_id, UINT16);
+
+cmdline_parse_token_num_t cmd_port_tm_mark_vlan_dei_green =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                               green, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_vlan_dei_yellow =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                               yellow, UINT16);
+cmdline_parse_token_num_t cmd_port_tm_mark_vlan_dei_red =
+       TOKEN_NUM_INITIALIZER(struct cmd_port_tm_mark_vlan_dei_result,
+                               red, UINT16);
+
+static void cmd_port_tm_mark_vlan_dei_parsed(void *parsed_result,
+       __attribute__((unused)) struct cmdline *cl,
+       __attribute__((unused)) void *data)
+{
+       struct cmd_port_tm_mark_vlan_dei_result *res = parsed_result;
+       struct rte_tm_error error;
+       portid_t port_id = res->port_id;
+       int green = res->green;
+       int yellow = res->yellow;
+       int red = res->red;
+       int ret;
+       if (port_id_is_invalid(port_id, ENABLED_WARN))
+               return;
+
+       memset(&error, 0, sizeof(struct rte_tm_error));
+       ret = rte_tm_mark_vlan_dei(port_id, green, yellow, red, &error);
+       if (ret != 0) {
+               print_err_msg(&error);
+               return;
+       }
+}
+
+cmdline_parse_inst_t cmd_port_tm_mark_vlan_dei = {
+       .f = cmd_port_tm_mark_vlan_dei_parsed,
+       .data = NULL,
+       .help_str = "set port tm mark vlan_dei <port> <green> <yellow> <red>",
+       .tokens = {
+               (void *)&cmd_port_tm_mark_vlan_dei_set,
+               (void *)&cmd_port_tm_mark_vlan_dei_port,
+               (void *)&cmd_port_tm_mark_vlan_dei_tm,
+               (void *)&cmd_port_tm_mark_vlan_dei_mark,
+               (void *)&cmd_port_tm_mark_vlan_dei_vlan_dei,
+               (void *)&cmd_port_tm_mark_vlan_dei_port_id,
+               (void *)&cmd_port_tm_mark_vlan_dei_green,
+               (void *)&cmd_port_tm_mark_vlan_dei_yellow,
+               (void *)&cmd_port_tm_mark_vlan_dei_red,
+               NULL,
+       },
+};
index b3a14ad..950cb75 100644 (file)
@@ -25,5 +25,8 @@ extern cmdline_parse_inst_t cmd_set_port_tm_node_parent;
 extern cmdline_parse_inst_t cmd_suspend_port_tm_node;
 extern cmdline_parse_inst_t cmd_resume_port_tm_node;
 extern cmdline_parse_inst_t cmd_port_tm_hierarchy_commit;
+extern cmdline_parse_inst_t cmd_port_tm_mark_vlan_dei;
+extern cmdline_parse_inst_t cmd_port_tm_mark_ip_ecn;
+extern cmdline_parse_inst_t cmd_port_tm_mark_ip_dscp;
 
 #endif /* _CMDLINE_TM_H_ */
index 14ccd68..b9e5dd9 100644 (file)
@@ -50,6 +50,7 @@
 #endif
 #include <rte_gro.h>
 #include <cmdline_parse_etheraddr.h>
+#include <rte_config.h>
 
 #include "testpmd.h"
 
@@ -74,6 +75,10 @@ static const struct {
 };
 
 const struct rss_type_info rss_type_table[] = {
+       { "all", ETH_RSS_IP | ETH_RSS_TCP |
+                       ETH_RSS_UDP | ETH_RSS_SCTP |
+                       ETH_RSS_L2_PAYLOAD },
+       { "none", 0 },
        { "ipv4", ETH_RSS_IPV4 },
        { "ipv4-frag", ETH_RSS_FRAG_IPV4 },
        { "ipv4-tcp", ETH_RSS_NONFRAG_IPV4_TCP },
@@ -410,6 +415,8 @@ port_infos_display(portid_t port_id)
        rte_eth_dev_get_name_by_port(port_id, name);
        printf("\nDevice name: %s", name);
        printf("\nDriver name: %s", dev_info.driver_name);
+       if (dev_info.device->devargs && dev_info.device->devargs->args)
+               printf("\nDevargs: %s", dev_info.device->devargs->args);
        printf("\nConnect to socket: %u", port->socket_id);
 
        if (port_numa[port_id] != NUMA_NO_CONFIG) {
@@ -461,12 +468,12 @@ port_infos_display(portid_t port_id)
        if (dev_info.reta_size > 0)
                printf("Redirection table size: %u\n", dev_info.reta_size);
        if (!dev_info.flow_type_rss_offloads)
-               printf("No flow type is supported.\n");
+               printf("No RSS offload flow type is supported.\n");
        else {
                uint16_t i;
                char *p;
 
-               printf("Supported flow types:\n");
+               printf("Supported RSS offload flow types:\n");
                for (i = RTE_ETH_FLOW_UNKNOWN + 1;
                     i < sizeof(dev_info.flow_type_rss_offloads) * CHAR_BIT; i++) {
                        if (!(dev_info.flow_type_rss_offloads & (1ULL << i)))
@@ -517,6 +524,43 @@ port_infos_display(portid_t port_id)
        }
 }
 
+void
+port_summary_header_display(void)
+{
+       uint16_t port_number;
+
+       port_number = rte_eth_dev_count_avail();
+       printf("Number of available ports: %i\n", port_number);
+       printf("%-4s %-17s %-12s %-14s %-8s %s\n", "Port", "MAC Address", "Name",
+                       "Driver", "Status", "Link");
+}
+
+void
+port_summary_display(portid_t port_id)
+{
+       struct ether_addr mac_addr;
+       struct rte_eth_link link;
+       struct rte_eth_dev_info dev_info;
+       char name[RTE_ETH_NAME_MAX_LEN];
+
+       if (port_id_is_invalid(port_id, ENABLED_WARN)) {
+               print_valid_ports();
+               return;
+       }
+
+       rte_eth_link_get_nowait(port_id, &link);
+       rte_eth_dev_info_get(port_id, &dev_info);
+       rte_eth_dev_get_name_by_port(port_id, name);
+       rte_eth_macaddr_get(port_id, &mac_addr);
+
+       printf("%-4d %02X:%02X:%02X:%02X:%02X:%02X %-12s %-14s %-8s %uMbps\n",
+               port_id, mac_addr.addr_bytes[0], mac_addr.addr_bytes[1],
+               mac_addr.addr_bytes[2], mac_addr.addr_bytes[3],
+               mac_addr.addr_bytes[4], mac_addr.addr_bytes[5], name,
+               dev_info.driver_name, (link.link_status) ? ("up") : ("down"),
+               (unsigned int) link.link_speed);
+}
+
 void
 port_offload_cap_display(portid_t port_id)
 {
@@ -543,7 +587,7 @@ port_offload_cap_display(portid_t port_id)
        if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_QINQ_STRIP) {
                printf("Double VLANs stripped:         ");
                if (ports[port_id].dev_conf.rxmode.offloads &
-                   DEV_RX_OFFLOAD_VLAN_EXTEND)
+                   DEV_RX_OFFLOAD_QINQ_STRIP)
                        printf("on\n");
                else
                        printf("off\n");
@@ -576,8 +620,17 @@ port_offload_cap_display(portid_t port_id)
                        printf("off\n");
        }
 
+       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SCTP_CKSUM) {
+               printf("RX SCTP checksum:              ");
+               if (ports[port_id].dev_conf.rxmode.offloads &
+                   DEV_RX_OFFLOAD_SCTP_CKSUM)
+                       printf("on\n");
+               else
+                       printf("off\n");
+       }
+
        if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM) {
-               printf("RX Outer IPv4 checksum:               ");
+               printf("RX Outer IPv4 checksum:        ");
                if (ports[port_id].dev_conf.rxmode.offloads &
                    DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM)
                        printf("on\n");
@@ -585,19 +638,19 @@ port_offload_cap_display(portid_t port_id)
                        printf("off\n");
        }
 
-       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) {
-               printf("Large receive offload:         ");
+       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_OUTER_UDP_CKSUM) {
+               printf("RX Outer UDP checksum:         ");
                if (ports[port_id].dev_conf.rxmode.offloads &
-                   DEV_RX_OFFLOAD_TCP_LRO)
+                   DEV_RX_OFFLOAD_OUTER_UDP_CKSUM)
                        printf("on\n");
                else
                        printf("off\n");
        }
 
-       if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT) {
-               printf("VLAN insert:                   ");
-               if (ports[port_id].dev_conf.txmode.offloads &
-                   DEV_TX_OFFLOAD_VLAN_INSERT)
+       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_TCP_LRO) {
+               printf("Large receive offload:         ");
+               if (ports[port_id].dev_conf.rxmode.offloads &
+                   DEV_RX_OFFLOAD_TCP_LRO)
                        printf("on\n");
                else
                        printf("off\n");
@@ -612,6 +665,33 @@ port_offload_cap_display(portid_t port_id)
                        printf("off\n");
        }
 
+       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_KEEP_CRC) {
+               printf("Rx Keep CRC:                   ");
+               if (ports[port_id].dev_conf.rxmode.offloads &
+                   DEV_RX_OFFLOAD_KEEP_CRC)
+                       printf("on\n");
+               else
+                       printf("off\n");
+       }
+
+       if (dev_info.rx_offload_capa & DEV_RX_OFFLOAD_SECURITY) {
+               printf("RX offload security:           ");
+               if (ports[port_id].dev_conf.rxmode.offloads &
+                   DEV_RX_OFFLOAD_SECURITY)
+                       printf("on\n");
+               else
+                       printf("off\n");
+       }
+
+       if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_VLAN_INSERT) {
+               printf("VLAN insert:                   ");
+               if (ports[port_id].dev_conf.txmode.offloads &
+                   DEV_TX_OFFLOAD_VLAN_INSERT)
+                       printf("on\n");
+               else
+                       printf("off\n");
+       }
+
        if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_QINQ_INSERT) {
                printf("Double VLANs insert:           ");
                if (ports[port_id].dev_conf.txmode.offloads &
@@ -737,6 +817,16 @@ port_offload_cap_display(portid_t port_id)
                else
                        printf("off\n");
        }
+
+       if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) {
+               printf("TX Outer UDP checksum:         ");
+               if (ports[port_id].dev_conf.txmode.offloads &
+                   DEV_TX_OFFLOAD_OUTER_UDP_CKSUM)
+                       printf("on\n");
+               else
+                       printf("off\n");
+       }
+
 }
 
 int
@@ -984,324 +1074,35 @@ port_mtu_set(portid_t port_id, uint16_t mtu)
 
 /* Generic flow management functions. */
 
-/** Generate flow_item[] entry. */
-#define MK_FLOW_ITEM(t, s) \
-       [RTE_FLOW_ITEM_TYPE_ ## t] = { \
-               .name = # t, \
-               .size = s, \
-       }
-
-/** Information about known flow pattern items. */
-static const struct {
-       const char *name;
-       size_t size;
-} flow_item[] = {
-       MK_FLOW_ITEM(END, 0),
-       MK_FLOW_ITEM(VOID, 0),
-       MK_FLOW_ITEM(INVERT, 0),
-       MK_FLOW_ITEM(ANY, sizeof(struct rte_flow_item_any)),
-       MK_FLOW_ITEM(PF, 0),
-       MK_FLOW_ITEM(VF, sizeof(struct rte_flow_item_vf)),
-       MK_FLOW_ITEM(PHY_PORT, sizeof(struct rte_flow_item_phy_port)),
-       MK_FLOW_ITEM(PORT_ID, sizeof(struct rte_flow_item_port_id)),
-       MK_FLOW_ITEM(RAW, sizeof(struct rte_flow_item_raw)),
-       MK_FLOW_ITEM(ETH, sizeof(struct rte_flow_item_eth)),
-       MK_FLOW_ITEM(VLAN, sizeof(struct rte_flow_item_vlan)),
-       MK_FLOW_ITEM(IPV4, sizeof(struct rte_flow_item_ipv4)),
-       MK_FLOW_ITEM(IPV6, sizeof(struct rte_flow_item_ipv6)),
-       MK_FLOW_ITEM(ICMP, sizeof(struct rte_flow_item_icmp)),
-       MK_FLOW_ITEM(UDP, sizeof(struct rte_flow_item_udp)),
-       MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
-       MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
-       MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-       MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
-       MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
-       MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
-       MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
-       MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
-       MK_FLOW_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
-       MK_FLOW_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
-       MK_FLOW_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
-       MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)),
-       MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)),
-       MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)),
-       MK_FLOW_ITEM(IPV6_EXT, sizeof(struct rte_flow_item_ipv6_ext)),
-       MK_FLOW_ITEM(ICMP6, sizeof(struct rte_flow_item_icmp6)),
-       MK_FLOW_ITEM(ICMP6_ND_NS, sizeof(struct rte_flow_item_icmp6_nd_ns)),
-       MK_FLOW_ITEM(ICMP6_ND_NA, sizeof(struct rte_flow_item_icmp6_nd_na)),
-       MK_FLOW_ITEM(ICMP6_ND_OPT, sizeof(struct rte_flow_item_icmp6_nd_opt)),
-       MK_FLOW_ITEM(ICMP6_ND_OPT_SLA_ETH,
-                    sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)),
-       MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH,
-                    sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)),
-};
-
-/** Pattern item specification types. */
-enum item_spec_type {
-       ITEM_SPEC,
-       ITEM_LAST,
-       ITEM_MASK,
-};
-
-/** Compute storage space needed by item specification and copy it. */
-static size_t
-flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
-                   enum item_spec_type type)
-{
-       size_t size = 0;
-       const void *data =
-               type == ITEM_SPEC ? item->spec :
-               type == ITEM_LAST ? item->last :
-               type == ITEM_MASK ? item->mask :
-               NULL;
-
-       if (!item->spec || !data)
-               goto empty;
-       switch (item->type) {
-               union {
-                       const struct rte_flow_item_raw *raw;
-               } spec;
-               union {
-                       const struct rte_flow_item_raw *raw;
-               } last;
-               union {
-                       const struct rte_flow_item_raw *raw;
-               } mask;
-               union {
-                       const struct rte_flow_item_raw *raw;
-               } src;
-               union {
-                       struct rte_flow_item_raw *raw;
-               } dst;
-               size_t off;
-
-       case RTE_FLOW_ITEM_TYPE_RAW:
-               spec.raw = item->spec;
-               last.raw = item->last ? item->last : item->spec;
-               mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask;
-               src.raw = data;
-               dst.raw = buf;
-               off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw),
-                                    sizeof(*src.raw->pattern));
-               if (type == ITEM_SPEC ||
-                   (type == ITEM_MASK &&
-                    ((spec.raw->length & mask.raw->length) >=
-                     (last.raw->length & mask.raw->length))))
-                       size = spec.raw->length & mask.raw->length;
-               else
-                       size = last.raw->length & mask.raw->length;
-               size = off + size * sizeof(*src.raw->pattern);
-               if (dst.raw) {
-                       memcpy(dst.raw, src.raw, sizeof(*src.raw));
-                       dst.raw->pattern = memcpy((uint8_t *)dst.raw + off,
-                                                 src.raw->pattern,
-                                                 size - off);
-               }
-               break;
-       default:
-               size = flow_item[item->type].size;
-               if (buf)
-                       memcpy(buf, data, size);
-               break;
-       }
-empty:
-       return RTE_ALIGN_CEIL(size, sizeof(double));
-}
-
-/** Generate flow_action[] entry. */
-#define MK_FLOW_ACTION(t, s) \
-       [RTE_FLOW_ACTION_TYPE_ ## t] = { \
-               .name = # t, \
-               .size = s, \
-       }
-
-/** Information about known flow actions. */
-static const struct {
-       const char *name;
-       size_t size;
-} flow_action[] = {
-       MK_FLOW_ACTION(END, 0),
-       MK_FLOW_ACTION(VOID, 0),
-       MK_FLOW_ACTION(PASSTHRU, 0),
-       MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
-       MK_FLOW_ACTION(FLAG, 0),
-       MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
-       MK_FLOW_ACTION(DROP, 0),
-       MK_FLOW_ACTION(COUNT, sizeof(struct rte_flow_action_count)),
-       MK_FLOW_ACTION(RSS, sizeof(struct rte_flow_action_rss)),
-       MK_FLOW_ACTION(PF, 0),
-       MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
-       MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)),
-       MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)),
-       MK_FLOW_ACTION(METER, sizeof(struct rte_flow_action_meter)),
-       MK_FLOW_ACTION(OF_SET_MPLS_TTL,
-                      sizeof(struct rte_flow_action_of_set_mpls_ttl)),
-       MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0),
-       MK_FLOW_ACTION(OF_SET_NW_TTL,
-                      sizeof(struct rte_flow_action_of_set_nw_ttl)),
-       MK_FLOW_ACTION(OF_DEC_NW_TTL, 0),
-       MK_FLOW_ACTION(OF_COPY_TTL_OUT, 0),
-       MK_FLOW_ACTION(OF_COPY_TTL_IN, 0),
-       MK_FLOW_ACTION(OF_POP_VLAN, 0),
-       MK_FLOW_ACTION(OF_PUSH_VLAN,
-                      sizeof(struct rte_flow_action_of_push_vlan)),
-       MK_FLOW_ACTION(OF_SET_VLAN_VID,
-                      sizeof(struct rte_flow_action_of_set_vlan_vid)),
-       MK_FLOW_ACTION(OF_SET_VLAN_PCP,
-                      sizeof(struct rte_flow_action_of_set_vlan_pcp)),
-       MK_FLOW_ACTION(OF_POP_MPLS,
-                      sizeof(struct rte_flow_action_of_pop_mpls)),
-       MK_FLOW_ACTION(OF_PUSH_MPLS,
-                      sizeof(struct rte_flow_action_of_push_mpls)),
-};
-
-/** Compute storage space needed by action configuration and copy it. */
-static size_t
-flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
-{
-       size_t size = 0;
-
-       if (!action->conf)
-               goto empty;
-       switch (action->type) {
-               union {
-                       const struct rte_flow_action_rss *rss;
-               } src;
-               union {
-                       struct rte_flow_action_rss *rss;
-               } dst;
-               size_t off;
-
-       case RTE_FLOW_ACTION_TYPE_RSS:
-               src.rss = action->conf;
-               dst.rss = buf;
-               off = 0;
-               if (dst.rss)
-                       *dst.rss = (struct rte_flow_action_rss){
-                               .func = src.rss->func,
-                               .level = src.rss->level,
-                               .types = src.rss->types,
-                               .key_len = src.rss->key_len,
-                               .queue_num = src.rss->queue_num,
-                       };
-               off += sizeof(*src.rss);
-               if (src.rss->key_len) {
-                       off = RTE_ALIGN_CEIL(off, sizeof(double));
-                       size = sizeof(*src.rss->key) * src.rss->key_len;
-                       if (dst.rss)
-                               dst.rss->key = memcpy
-                                       ((void *)((uintptr_t)dst.rss + off),
-                                        src.rss->key, size);
-                       off += size;
-               }
-               if (src.rss->queue_num) {
-                       off = RTE_ALIGN_CEIL(off, sizeof(double));
-                       size = sizeof(*src.rss->queue) * src.rss->queue_num;
-                       if (dst.rss)
-                               dst.rss->queue = memcpy
-                                       ((void *)((uintptr_t)dst.rss + off),
-                                        src.rss->queue, size);
-                       off += size;
-               }
-               size = off;
-               break;
-       default:
-               size = flow_action[action->type].size;
-               if (buf)
-                       memcpy(buf, action->conf, size);
-               break;
-       }
-empty:
-       return RTE_ALIGN_CEIL(size, sizeof(double));
-}
-
 /** Generate a port_flow entry from attributes/pattern/actions. */
 static struct port_flow *
 port_flow_new(const struct rte_flow_attr *attr,
              const struct rte_flow_item *pattern,
-             const struct rte_flow_action *actions)
-{
-       const struct rte_flow_item *item;
-       const struct rte_flow_action *action;
-       struct port_flow *pf = NULL;
-       size_t tmp;
-       size_t off1 = 0;
-       size_t off2 = 0;
-       int err = ENOTSUP;
-
-store:
-       item = pattern;
-       if (pf)
-               pf->pattern = (void *)&pf->data[off1];
-       do {
-               struct rte_flow_item *dst = NULL;
-
-               if ((unsigned int)item->type >= RTE_DIM(flow_item) ||
-                   !flow_item[item->type].name)
-                       goto notsup;
-               if (pf)
-                       dst = memcpy(pf->data + off1, item, sizeof(*item));
-               off1 += sizeof(*item);
-               if (item->spec) {
-                       if (pf)
-                               dst->spec = pf->data + off2;
-                       off2 += flow_item_spec_copy
-                               (pf ? pf->data + off2 : NULL, item, ITEM_SPEC);
-               }
-               if (item->last) {
-                       if (pf)
-                               dst->last = pf->data + off2;
-                       off2 += flow_item_spec_copy
-                               (pf ? pf->data + off2 : NULL, item, ITEM_LAST);
-               }
-               if (item->mask) {
-                       if (pf)
-                               dst->mask = pf->data + off2;
-                       off2 += flow_item_spec_copy
-                               (pf ? pf->data + off2 : NULL, item, ITEM_MASK);
-               }
-               off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-       } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
-       off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-       action = actions;
-       if (pf)
-               pf->actions = (void *)&pf->data[off1];
-       do {
-               struct rte_flow_action *dst = NULL;
-
-               if ((unsigned int)action->type >= RTE_DIM(flow_action) ||
-                   !flow_action[action->type].name)
-                       goto notsup;
-               if (pf)
-                       dst = memcpy(pf->data + off1, action, sizeof(*action));
-               off1 += sizeof(*action);
-               if (action->conf) {
-                       if (pf)
-                               dst->conf = pf->data + off2;
-                       off2 += flow_action_conf_copy
-                               (pf ? pf->data + off2 : NULL, action);
-               }
-               off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-       } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
-       if (pf != NULL)
+             const struct rte_flow_action *actions,
+             struct rte_flow_error *error)
+{
+       const struct rte_flow_conv_rule rule = {
+               .attr_ro = attr,
+               .pattern_ro = pattern,
+               .actions_ro = actions,
+       };
+       struct port_flow *pf;
+       int ret;
+
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, error);
+       if (ret < 0)
+               return NULL;
+       pf = calloc(1, offsetof(struct port_flow, rule) + ret);
+       if (!pf) {
+               rte_flow_error_set
+                       (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                        "calloc() failed");
+               return NULL;
+       }
+       if (rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &pf->rule, ret, &rule,
+                         error) >= 0)
                return pf;
-       off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-       tmp = RTE_ALIGN_CEIL(offsetof(struct port_flow, data), sizeof(double));
-       pf = calloc(1, tmp + off1 + off2);
-       if (pf == NULL)
-               err = errno;
-       else {
-               *pf = (const struct port_flow){
-                       .size = tmp + off1 + off2,
-                       .attr = *attr,
-               };
-               tmp -= offsetof(struct port_flow, data);
-               off2 = tmp + off1;
-               off1 = tmp;
-               goto store;
-       }
-notsup:
-       rte_errno = err;
+       free(pf);
        return NULL;
 }
 
@@ -1337,11 +1138,12 @@ port_flow_complain(struct rte_flow_error *error)
                errstr = "unknown type";
        else
                errstr = errstrlist[error->type];
-       printf("Caught error type %d (%s): %s%s\n",
+       printf("Caught error type %d (%s): %s%s: %s\n",
               error->type, errstr,
               error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
                                        error->cause), buf) : "",
-              error->message ? error->message : "(no stated reason)");
+              error->message ? error->message : "(no stated reason)",
+              rte_strerror(err));
        return -err;
 }
 
@@ -1391,13 +1193,10 @@ port_flow_create(portid_t port_id,
                id = port->flow_list->id + 1;
        } else
                id = 0;
-       pf = port_flow_new(attr, pattern, actions);
+       pf = port_flow_new(attr, pattern, actions, &error);
        if (!pf) {
-               int err = rte_errno;
-
-               printf("Cannot allocate flow: %s\n", rte_strerror(err));
                rte_flow_destroy(port_id, flow, NULL);
-               return -err;
+               return port_flow_complain(&error);
        }
        pf->next = port->flow_list;
        pf->id = id;
@@ -1489,6 +1288,7 @@ port_flow_query(portid_t port_id, uint32_t rule,
        union {
                struct rte_flow_query_count count;
        } query;
+       int ret;
 
        if (port_id_is_invalid(port_id, ENABLED_WARN) ||
            port_id == (portid_t)RTE_PORT_ALL)
@@ -1501,11 +1301,11 @@ port_flow_query(portid_t port_id, uint32_t rule,
                printf("Flow rule #%u not found\n", rule);
                return -ENOENT;
        }
-       if ((unsigned int)action->type >= RTE_DIM(flow_action) ||
-           !flow_action[action->type].name)
-               name = "unknown";
-       else
-               name = flow_action[action->type].name;
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_ACTION_NAME_PTR,
+                           &name, sizeof(name),
+                           (void *)(uintptr_t)action->type, &error);
+       if (ret < 0)
+               return port_flow_complain(&error);
        switch (action->type) {
        case RTE_FLOW_ACTION_TYPE_COUNT:
                break;
@@ -1558,48 +1358,63 @@ port_flow_list(portid_t port_id, uint32_t n, const uint32_t group[n])
        /* Sort flows by group, priority and ID. */
        for (pf = port->flow_list; pf != NULL; pf = pf->next) {
                struct port_flow **tmp;
+               const struct rte_flow_attr *curr = pf->rule.attr;
 
                if (n) {
                        /* Filter out unwanted groups. */
                        for (i = 0; i != n; ++i)
-                               if (pf->attr.group == group[i])
+                               if (curr->group == group[i])
                                        break;
                        if (i == n)
                                continue;
                }
-               tmp = &list;
-               while (*tmp &&
-                      (pf->attr.group > (*tmp)->attr.group ||
-                       (pf->attr.group == (*tmp)->attr.group &&
-                        pf->attr.priority > (*tmp)->attr.priority) ||
-                       (pf->attr.group == (*tmp)->attr.group &&
-                        pf->attr.priority == (*tmp)->attr.priority &&
-                        pf->id > (*tmp)->id)))
-                       tmp = &(*tmp)->tmp;
+               for (tmp = &list; *tmp; tmp = &(*tmp)->tmp) {
+                       const struct rte_flow_attr *comp = (*tmp)->rule.attr;
+
+                       if (curr->group > comp->group ||
+                           (curr->group == comp->group &&
+                            curr->priority > comp->priority) ||
+                           (curr->group == comp->group &&
+                            curr->priority == comp->priority &&
+                            pf->id > (*tmp)->id))
+                               continue;
+                       break;
+               }
                pf->tmp = *tmp;
                *tmp = pf;
        }
        printf("ID\tGroup\tPrio\tAttr\tRule\n");
        for (pf = list; pf != NULL; pf = pf->tmp) {
-               const struct rte_flow_item *item = pf->pattern;
-               const struct rte_flow_action *action = pf->actions;
+               const struct rte_flow_item *item = pf->rule.pattern;
+               const struct rte_flow_action *action = pf->rule.actions;
+               const char *name;
 
                printf("%" PRIu32 "\t%" PRIu32 "\t%" PRIu32 "\t%c%c%c\t",
                       pf->id,
-                      pf->attr.group,
-                      pf->attr.priority,
-                      pf->attr.ingress ? 'i' : '-',
-                      pf->attr.egress ? 'e' : '-',
-                      pf->attr.transfer ? 't' : '-');
+                      pf->rule.attr->group,
+                      pf->rule.attr->priority,
+                      pf->rule.attr->ingress ? 'i' : '-',
+                      pf->rule.attr->egress ? 'e' : '-',
+                      pf->rule.attr->transfer ? 't' : '-');
                while (item->type != RTE_FLOW_ITEM_TYPE_END) {
+                       if (rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR,
+                                         &name, sizeof(name),
+                                         (void *)(uintptr_t)item->type,
+                                         NULL) <= 0)
+                               name = "[UNKNOWN]";
                        if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
-                               printf("%s ", flow_item[item->type].name);
+                               printf("%s ", name);
                        ++item;
                }
                printf("=>");
                while (action->type != RTE_FLOW_ACTION_TYPE_END) {
+                       if (rte_flow_conv(RTE_FLOW_CONV_OP_ACTION_NAME_PTR,
+                                         &name, sizeof(name),
+                                         (void *)(uintptr_t)action->type,
+                                         NULL) <= 0)
+                               name = "[UNKNOWN]";
                        if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
-                               printf(" %s", flow_action[action->type].name);
+                               printf(" %s", name);
                        ++action;
                }
                printf("\n");
@@ -1669,8 +1484,8 @@ ring_dma_zone_lookup(const char *ring_name, portid_t port_id, uint16_t q_id)
        char mz_name[RTE_MEMZONE_NAMESIZE];
        const struct rte_memzone *mz;
 
-       snprintf(mz_name, sizeof(mz_name), "%s_%s_%d_%d",
-                ports[port_id].dev_info.driver_name, ring_name, port_id, q_id);
+       snprintf(mz_name, sizeof(mz_name), "eth_p%d_q%d_%s",
+                       port_id, q_id, ring_name);
        mz = rte_memzone_lookup(mz_name);
        if (mz == NULL)
                printf("%s ring memory zoneof (port %d, queue %d) not"
@@ -1927,9 +1742,9 @@ port_rss_reta_info(portid_t port_id,
  * key of the port.
  */
 void
-port_rss_hash_conf_show(portid_t port_id, char rss_info[], int show_rss_key)
+port_rss_hash_conf_show(portid_t port_id, int show_rss_key)
 {
-       struct rte_eth_rss_conf rss_conf;
+       struct rte_eth_rss_conf rss_conf = {0};
        uint8_t rss_key[RSS_HASH_KEY_LENGTH];
        uint64_t rss_hf;
        uint8_t i;
@@ -1940,7 +1755,6 @@ port_rss_hash_conf_show(portid_t port_id, char rss_info[], int show_rss_key)
        if (port_id_is_invalid(port_id, ENABLED_WARN))
                return;
 
-       memset(&dev_info, 0, sizeof(dev_info));
        rte_eth_dev_info_get(port_id, &dev_info);
        if (dev_info.hash_key_size > 0 &&
                        dev_info.hash_key_size <= sizeof(rss_key))
@@ -1950,12 +1764,6 @@ port_rss_hash_conf_show(portid_t port_id, char rss_info[], int show_rss_key)
                return;
        }
 
-       rss_conf.rss_hf = 0;
-       for (i = 0; rss_type_table[i].str; i++) {
-               if (!strcmp(rss_info, rss_type_table[i].str))
-                       rss_conf.rss_hf = rss_type_table[i].rss_type;
-       }
-
        /* Get RSS hash key if asked to display it */
        rss_conf.rss_key = (show_rss_key) ? rss_key : NULL;
        rss_conf.rss_key_len = hash_key_size;
@@ -2403,6 +2211,23 @@ fwd_config_setup(void)
                simple_fwd_config_setup();
 }
 
+static const char *
+mp_alloc_to_str(uint8_t mode)
+{
+       switch (mode) {
+       case MP_ALLOC_NATIVE:
+               return "native";
+       case MP_ALLOC_ANON:
+               return "anon";
+       case MP_ALLOC_XMEM:
+               return "xmem";
+       case MP_ALLOC_XMEM_HUGE:
+               return "xmemhuge";
+       default:
+               return "invalid";
+       }
+}
+
 void
 pkt_fwd_config_display(struct fwd_config *cfg)
 {
@@ -2411,12 +2236,12 @@ pkt_fwd_config_display(struct fwd_config *cfg)
        streamid_t sm_id;
 
        printf("%s packet forwarding%s - ports=%d - cores=%d - streams=%d - "
-               "NUMA support %s, MP over anonymous pages %s\n",
+               "NUMA support %s, MP allocation mode: %s\n",
                cfg->fwd_eng->fwd_mode_name,
                retry_enabled == 0 ? "" : " with retry",
                cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
                numa_support == 1 ? "enabled" : "disabled",
-               mp_anon != 0 ? "enabled" : "disabled");
+               mp_alloc_to_str(mp_alloc_type));
 
        if (retry_enabled)
                printf("TX retry num: %u, delay between TX retries: %uus\n",
@@ -2885,12 +2710,103 @@ set_pkt_forwarding_mode(const char *fwd_mode_name)
        printf("Invalid %s packet forwarding mode\n", fwd_mode_name);
 }
 
+void
+add_rx_dump_callbacks(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_rx_queues; queue++)
+               if (!ports[portid].rx_dump_cb[queue])
+                       ports[portid].rx_dump_cb[queue] =
+                               rte_eth_add_rx_callback(portid, queue,
+                                       dump_rx_pkts, NULL);
+}
+
+void
+add_tx_dump_callbacks(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_tx_queues; queue++)
+               if (!ports[portid].tx_dump_cb[queue])
+                       ports[portid].tx_dump_cb[queue] =
+                               rte_eth_add_tx_callback(portid, queue,
+                                                       dump_tx_pkts, NULL);
+}
+
+void
+remove_rx_dump_callbacks(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_rx_queues; queue++)
+               if (ports[portid].rx_dump_cb[queue]) {
+                       rte_eth_remove_rx_callback(portid, queue,
+                               ports[portid].rx_dump_cb[queue]);
+                       ports[portid].rx_dump_cb[queue] = NULL;
+               }
+}
+
+void
+remove_tx_dump_callbacks(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_tx_queues; queue++)
+               if (ports[portid].tx_dump_cb[queue]) {
+                       rte_eth_remove_tx_callback(portid, queue,
+                               ports[portid].tx_dump_cb[queue]);
+                       ports[portid].tx_dump_cb[queue] = NULL;
+               }
+}
+
+void
+configure_rxtx_dump_callbacks(uint16_t verbose)
+{
+       portid_t portid;
+
+#ifndef RTE_ETHDEV_RXTX_CALLBACKS
+               TESTPMD_LOG(ERR, "setting rxtx callbacks is not enabled\n");
+               return;
+#endif
+
+       RTE_ETH_FOREACH_DEV(portid)
+       {
+               if (verbose == 1 || verbose > 2)
+                       add_rx_dump_callbacks(portid);
+               else
+                       remove_rx_dump_callbacks(portid);
+               if (verbose >= 2)
+                       add_tx_dump_callbacks(portid);
+               else
+                       remove_tx_dump_callbacks(portid);
+       }
+}
+
 void
 set_verbose_level(uint16_t vb_level)
 {
        printf("Change verbose level from %u to %u\n",
               (unsigned int) verbose_level, (unsigned int) vb_level);
        verbose_level = vb_level;
+       configure_rxtx_dump_callbacks(verbose_level);
 }
 
 void
index 4948292..dce4b9b 100644 (file)
@@ -468,10 +468,15 @@ process_outer_cksums(void *outer_l3_hdr, struct testpmd_offload_info *info,
        if (info->outer_l4_proto != IPPROTO_UDP)
                return ol_flags;
 
+       /* Skip SW outer UDP checksum generation if HW supports it */
+       if (tx_offloads & DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) {
+               ol_flags |= PKT_TX_OUTER_UDP_CKSUM;
+               return ol_flags;
+       }
+
        udp_hdr = (struct udp_hdr *)((char *)outer_l3_hdr + info->outer_l3_len);
 
-       /* outer UDP checksum is done in software as we have no hardware
-        * supporting it today, and no API for it. In the other side, for
+       /* outer UDP checksum is done in software. In the other side, for
         * UDP tunneling, like VXLAN or Geneve, outer UDP checksum can be
         * set to zero.
         *
@@ -696,6 +701,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
        uint32_t retry;
        uint32_t rx_bad_ip_csum;
        uint32_t rx_bad_l4_csum;
+       uint32_t rx_bad_outer_l4_csum;
        struct testpmd_offload_info info;
        uint16_t nb_segments = 0;
        int ret;
@@ -721,6 +727,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
        fs->rx_packets += nb_rx;
        rx_bad_ip_csum = 0;
        rx_bad_l4_csum = 0;
+       rx_bad_outer_l4_csum = 0;
        gro_enable = gro_ports[fs->rx_port].enable;
 
        txp = &ports[fs->tx_port];
@@ -748,6 +755,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                        rx_bad_ip_csum += 1;
                if ((rx_ol_flags & PKT_RX_L4_CKSUM_MASK) == PKT_RX_L4_CKSUM_BAD)
                        rx_bad_l4_csum += 1;
+               if (rx_ol_flags & PKT_RX_OUTER_L4_CKSUM_BAD)
+                       rx_bad_outer_l4_csum += 1;
 
                /* step 1: dissect packet, parsing optional vlan, ip4/ip6, vxlan
                 * and inner headers */
@@ -826,6 +835,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                        if (info.tunnel_tso_segsz ||
                            (tx_offloads &
                             DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ||
+                           (tx_offloads &
+                            DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) ||
                            (tx_ol_flags & PKT_TX_OUTER_IPV6)) {
                                m->outer_l2_len = info.outer_l2_len;
                                m->outer_l3_len = info.outer_l3_len;
@@ -898,6 +909,8 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
                        if (info.is_tunnel == 1) {
                                if ((tx_offloads &
                                    DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) ||
+                                   (tx_offloads &
+                                   DEV_TX_OFFLOAD_OUTER_UDP_CKSUM) ||
                                    (tx_ol_flags & PKT_TX_OUTER_IPV6))
                                        printf("tx: m->outer_l2_len=%d "
                                                "m->outer_l3_len=%d\n",
@@ -982,6 +995,7 @@ pkt_burst_checksum_forward(struct fwd_stream *fs)
        fs->tx_packets += nb_tx;
        fs->rx_bad_ip_csum += rx_bad_ip_csum;
        fs->rx_bad_l4_csum += rx_bad_l4_csum;
+       fs->rx_bad_outer_l4_csum += rx_bad_outer_l4_csum;
 
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
        fs->tx_burst_stats.pkt_burst_spread[nb_tx]++;
index a0b3be0..6006c60 100644 (file)
@@ -17,12 +17,14 @@ sources = files('cmdline.c',
        'iofwd.c',
        'macfwd.c',
        'macswap.c',
+       'noisy_vnf.c',
        'parameters.c',
        'rxonly.c',
        'testpmd.c',
-       'txonly.c')
+       'txonly.c',
+       'util.c')
 
-deps = ['ethdev', 'gro', 'gso', 'cmdline', 'metrics', 'meter', 'bus_pci']
+deps += ['ethdev', 'gro', 'gso', 'cmdline', 'metrics', 'meter', 'bus_pci']
 if dpdk_conf.has('RTE_LIBRTE_PDUMP')
        deps += 'pdump'
 endif
diff --git a/app/test-pmd/noisy_vnf.c b/app/test-pmd/noisy_vnf.c
new file mode 100644 (file)
index 0000000..58c4ee9
--- /dev/null
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Red Hat Corp.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <sys/queue.h>
+#include <sys/stat.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_cycles.h>
+#include <rte_memory.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_memcpy.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+
+#include "testpmd.h"
+
+struct noisy_config {
+       struct rte_ring *f;
+       uint64_t prev_time;
+       char *vnf_mem;
+       bool do_buffering;
+       bool do_flush;
+       bool do_sim;
+};
+
+struct noisy_config *noisy_cfg[RTE_MAX_ETHPORTS];
+
+static inline void
+do_write(char *vnf_mem)
+{
+       uint64_t i = rte_rand();
+       uint64_t w = rte_rand();
+
+       vnf_mem[i % ((noisy_lkup_mem_sz * 1024 * 1024) /
+                       RTE_CACHE_LINE_SIZE)] = w;
+}
+
+static inline void
+do_read(char *vnf_mem)
+{
+       uint64_t i = rte_rand();
+       uint64_t r;
+
+       r = vnf_mem[i % ((noisy_lkup_mem_sz * 1024 * 1024) /
+                       RTE_CACHE_LINE_SIZE)];
+       r++;
+}
+
+static inline void
+do_readwrite(char *vnf_mem)
+{
+       do_read(vnf_mem);
+       do_write(vnf_mem);
+}
+
+/*
+ * Simulate route lookups as defined by commandline parameters
+ */
+static void
+sim_memory_lookups(struct noisy_config *ncf, uint16_t nb_pkts)
+{
+       uint16_t i, j;
+
+       if (!ncf->do_sim)
+               return;
+
+       for (i = 0; i < nb_pkts; i++) {
+               for (j = 0; j < noisy_lkup_num_writes; j++)
+                       do_write(ncf->vnf_mem);
+               for (j = 0; j < noisy_lkup_num_reads; j++)
+                       do_read(ncf->vnf_mem);
+               for (j = 0; j < noisy_lkup_num_reads_writes; j++)
+                       do_readwrite(ncf->vnf_mem);
+       }
+}
+
+static uint16_t
+do_retry(uint16_t nb_rx, uint16_t nb_tx, struct rte_mbuf **pkts,
+        struct fwd_stream *fs)
+{
+       uint32_t retry = 0;
+
+       while (nb_tx < nb_rx && retry++ < burst_tx_retry_num) {
+               rte_delay_us(burst_tx_delay_time);
+               nb_tx += rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
+                               &pkts[nb_tx], nb_rx - nb_tx);
+       }
+
+       return nb_tx;
+}
+
+static uint32_t
+drop_pkts(struct rte_mbuf **pkts, uint16_t nb_rx, uint16_t nb_tx)
+{
+       if (nb_tx < nb_rx) {
+               do {
+                       rte_pktmbuf_free(pkts[nb_tx]);
+               } while (++nb_tx < nb_rx);
+       }
+
+       return nb_rx - nb_tx;
+}
+
+/*
+ * Forwarding of packets in noisy VNF mode.  Forward packets but perform
+ * memory operations first as specified on cmdline.
+ *
+ * Depending on which commandline parameters are specified we have
+ * different cases to handle:
+ *
+ * 1. No FIFO size was given, so we don't do buffering of incoming
+ *    packets.  This case is pretty much what iofwd does but in this case
+ *    we also do simulation of memory accesses (depending on which
+ *    parameters were specified for it).
+ * 2. User wants do buffer packets in a FIFO and sent out overflowing
+ *    packets.
+ * 3. User wants a FIFO and specifies a time in ms to flush all packets
+ *    out of the FIFO
+ * 4. Cases 2 and 3 combined
+ */
+static void
+pkt_burst_noisy_vnf(struct fwd_stream *fs)
+{
+       const uint64_t freq_khz = rte_get_timer_hz() / 1000;
+       struct noisy_config *ncf = noisy_cfg[fs->rx_port];
+       struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+       struct rte_mbuf *tmp_pkts[MAX_PKT_BURST];
+       uint16_t nb_deqd = 0;
+       uint16_t nb_rx = 0;
+       uint16_t nb_tx = 0;
+       uint16_t nb_enqd;
+       unsigned int fifo_free;
+       uint64_t delta_ms;
+       bool needs_flush = false;
+       uint64_t now;
+
+       nb_rx = rte_eth_rx_burst(fs->rx_port, fs->rx_queue,
+                       pkts_burst, nb_pkt_per_burst);
+       if (unlikely(nb_rx == 0))
+               goto flush;
+       fs->rx_packets += nb_rx;
+
+       if (!ncf->do_buffering) {
+               sim_memory_lookups(ncf, nb_rx);
+               nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
+                               pkts_burst, nb_rx);
+               if (unlikely(nb_tx < nb_rx) && fs->retry_enabled)
+                       nb_tx += do_retry(nb_rx, nb_tx, pkts_burst, fs);
+               fs->tx_packets += nb_tx;
+               fs->fwd_dropped += drop_pkts(pkts_burst, nb_rx, nb_tx);
+               return;
+       }
+
+       fifo_free = rte_ring_free_count(ncf->f);
+       if (fifo_free >= nb_rx) {
+               nb_enqd = rte_ring_enqueue_burst(ncf->f,
+                               (void **) pkts_burst, nb_rx, NULL);
+               if (nb_enqd < nb_rx)
+                       fs->fwd_dropped += drop_pkts(pkts_burst,
+                                                    nb_rx, nb_enqd);
+       } else {
+               nb_deqd = rte_ring_dequeue_burst(ncf->f,
+                               (void **) tmp_pkts, nb_rx, NULL);
+               nb_enqd = rte_ring_enqueue_burst(ncf->f,
+                               (void **) pkts_burst, nb_deqd, NULL);
+               if (nb_deqd > 0) {
+                       nb_tx = rte_eth_tx_burst(fs->tx_port,
+                                       fs->tx_queue, tmp_pkts,
+                                       nb_deqd);
+                       if (unlikely(nb_tx < nb_rx) && fs->retry_enabled)
+                               nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs);
+                       fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, nb_tx);
+               }
+       }
+
+       sim_memory_lookups(ncf, nb_enqd);
+
+flush:
+       if (ncf->do_flush) {
+               if (!ncf->prev_time)
+                       now = ncf->prev_time = rte_get_timer_cycles();
+               else
+                       now = rte_get_timer_cycles();
+               delta_ms = (now - ncf->prev_time) / freq_khz;
+               needs_flush = delta_ms >= noisy_tx_sw_buf_flush_time &&
+                               noisy_tx_sw_buf_flush_time > 0 && !nb_tx;
+       }
+       while (needs_flush && !rte_ring_empty(ncf->f)) {
+               unsigned int sent;
+               nb_deqd = rte_ring_dequeue_burst(ncf->f, (void **)tmp_pkts,
+                               MAX_PKT_BURST, NULL);
+               sent = rte_eth_tx_burst(fs->tx_port, fs->tx_queue,
+                                        tmp_pkts, nb_deqd);
+               if (unlikely(sent < nb_deqd) && fs->retry_enabled)
+                       nb_tx += do_retry(nb_rx, nb_tx, tmp_pkts, fs);
+               fs->fwd_dropped += drop_pkts(tmp_pkts, nb_deqd, sent);
+               ncf->prev_time = rte_get_timer_cycles();
+       }
+}
+
+#define NOISY_STRSIZE 256
+#define NOISY_RING "noisy_ring_%d\n"
+
+static void
+noisy_fwd_end(portid_t pi)
+{
+       rte_ring_free(noisy_cfg[pi]->f);
+       rte_free(noisy_cfg[pi]->vnf_mem);
+       rte_free(noisy_cfg[pi]);
+}
+
+static void
+noisy_fwd_begin(portid_t pi)
+{
+       struct noisy_config *n;
+       char name[NOISY_STRSIZE];
+
+       noisy_cfg[pi] = rte_zmalloc("testpmd noisy fifo and timers",
+                               sizeof(struct noisy_config),
+                               RTE_CACHE_LINE_SIZE);
+       if (noisy_cfg[pi] == NULL) {
+               rte_exit(EXIT_FAILURE,
+                        "rte_zmalloc(%d) struct noisy_config) failed\n",
+                        (int) pi);
+       }
+       n = noisy_cfg[pi];
+       n->do_buffering = noisy_tx_sw_bufsz > 0;
+       n->do_sim = noisy_lkup_num_writes + noisy_lkup_num_reads +
+                   noisy_lkup_num_reads_writes;
+       n->do_flush = noisy_tx_sw_buf_flush_time > 0;
+
+       if (n->do_buffering) {
+               snprintf(name, NOISY_STRSIZE, NOISY_RING, pi);
+               n->f = rte_ring_create(name, noisy_tx_sw_bufsz,
+                               rte_socket_id(), 0);
+               if (!n->f)
+                       rte_exit(EXIT_FAILURE,
+                                "rte_ring_create(%d), size %d) failed\n",
+                                (int) pi,
+                                noisy_tx_sw_bufsz);
+       }
+       if (noisy_lkup_mem_sz > 0) {
+               n->vnf_mem = (char *) rte_zmalloc("vnf sim memory",
+                                noisy_lkup_mem_sz * 1024 * 1024,
+                                RTE_CACHE_LINE_SIZE);
+               if (!n->vnf_mem)
+                       rte_exit(EXIT_FAILURE,
+                          "rte_zmalloc(%" PRIu64 ") for vnf memory) failed\n",
+                          noisy_lkup_mem_sz);
+       } else if (n->do_sim) {
+               rte_exit(EXIT_FAILURE,
+                        "--noisy-lkup-memory-size must be > 0\n");
+       }
+}
+
+struct fwd_engine noisy_vnf_engine = {
+       .fwd_mode_name  = "noisy",
+       .port_fwd_begin = noisy_fwd_begin,
+       .port_fwd_end   = noisy_fwd_end,
+       .packet_fwd     = pkt_burst_noisy_vnf,
+};
index 962fad7..38b4197 100644 (file)
@@ -190,6 +190,17 @@ usage(char* progname)
        printf("  --vxlan-gpe-port=N: UPD port of tunnel VXLAN-GPE\n");
        printf("  --mlockall: lock all memory\n");
        printf("  --no-mlockall: do not lock all memory\n");
+       printf("  --mp-alloc <native|anon|xmem|xmemhuge>: mempool allocation method.\n"
+              "    native: use regular DPDK memory to create and populate mempool\n"
+              "    anon: use regular DPDK memory to create and anonymous memory to populate mempool\n"
+              "    xmem: use anonymous memory to create and populate mempool\n"
+              "    xmemhuge: use anonymous hugepage memory to create and populate mempool\n");
+       printf("  --noisy-tx-sw-buffer-size=N: size of FIFO buffer\n");
+       printf("  --noisy-tx-sw-buffer-flushtime=N: flush FIFO after N ms\n");
+       printf("  --noisy-lkup-memory=N: allocate N MB of VNF memory\n");
+       printf("  --noisy-lkup-num-writes=N: do N random writes per packet\n");
+       printf("  --noisy-lkup-num-reads=N: do N random reads per packet\n");
+       printf("  --noisy-lkup-num-writes=N: do N random reads and writes per packet\n");
 }
 
 #ifdef RTE_LIBRTE_CMDLINE
@@ -405,8 +416,11 @@ parse_portnuma_config(const char *q_arg)
                }
                socket_id = (uint8_t)int_fld[FLD_SOCKET];
                if (new_socket_id(socket_id)) {
-                       print_invalid_socket_id_error();
-                       return -1;
+                       if (num_sockets >= RTE_MAX_NUMA_NODES) {
+                               print_invalid_socket_id_error();
+                               return -1;
+                       }
+                       socket_ids[num_sockets++] = socket_id;
                }
                port_numa[port_id] = socket_id;
        }
@@ -462,8 +476,11 @@ parse_ringnuma_config(const char *q_arg)
                }
                socket_id = (uint8_t)int_fld[FLD_SOCKET];
                if (new_socket_id(socket_id)) {
-                       print_invalid_socket_id_error();
-                       return -1;
+                       if (num_sockets >= RTE_MAX_NUMA_NODES) {
+                               print_invalid_socket_id_error();
+                               return -1;
+                       }
+                       socket_ids[num_sockets++] = socket_id;
                }
                ring_flag = (uint8_t)int_fld[FLD_FLAG];
                if ((ring_flag < RX_RING_ONLY) || (ring_flag > RXTX_RING)) {
@@ -625,6 +642,13 @@ launch_args_parse(int argc, char** argv)
                { "vxlan-gpe-port",             1, 0, 0 },
                { "mlockall",                   0, 0, 0 },
                { "no-mlockall",                0, 0, 0 },
+               { "mp-alloc",                   1, 0, 0 },
+               { "noisy-tx-sw-buffer-size",    1, 0, 0 },
+               { "noisy-tx-sw-buffer-flushtime", 1, 0, 0 },
+               { "noisy-lkup-memory",          1, 0, 0 },
+               { "noisy-lkup-num-writes",      1, 0, 0 },
+               { "noisy-lkup-num-reads",       1, 0, 0 },
+               { "noisy-lkup-num-reads-writes", 1, 0, 0 },
                { 0, 0, 0, 0 },
        };
 
@@ -743,7 +767,22 @@ launch_args_parse(int argc, char** argv)
                        if (!strcmp(lgopts[opt_idx].name, "numa"))
                                numa_support = 1;
                        if (!strcmp(lgopts[opt_idx].name, "mp-anon")) {
-                               mp_anon = 1;
+                               mp_alloc_type = MP_ALLOC_ANON;
+                       }
+                       if (!strcmp(lgopts[opt_idx].name, "mp-alloc")) {
+                               if (!strcmp(optarg, "native"))
+                                       mp_alloc_type = MP_ALLOC_NATIVE;
+                               else if (!strcmp(optarg, "anon"))
+                                       mp_alloc_type = MP_ALLOC_ANON;
+                               else if (!strcmp(optarg, "xmem"))
+                                       mp_alloc_type = MP_ALLOC_XMEM;
+                               else if (!strcmp(optarg, "xmemhuge"))
+                                       mp_alloc_type = MP_ALLOC_XMEM_HUGE;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                               "mp-alloc %s invalid - must be: "
+                                               "native, anon, xmem or xmemhuge\n",
+                                                optarg);
                        }
                        if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) {
                                if (parse_portnuma_config(optarg))
@@ -878,10 +917,8 @@ launch_args_parse(int argc, char** argv)
                                                 " must be >= 0\n", n);
                        }
 #endif
-                       if (!strcmp(lgopts[opt_idx].name, "disable-crc-strip")) {
-                               rx_offloads &= ~DEV_RX_OFFLOAD_CRC_STRIP;
+                       if (!strcmp(lgopts[opt_idx].name, "disable-crc-strip"))
                                rx_offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
-                       }
                        if (!strcmp(lgopts[opt_idx].name, "enable-lro"))
                                rx_offloads |= DEV_RX_OFFLOAD_TCP_LRO;
                        if (!strcmp(lgopts[opt_idx].name, "enable-scatter"))
@@ -1147,6 +1184,60 @@ launch_args_parse(int argc, char** argv)
                                do_mlockall = 1;
                        if (!strcmp(lgopts[opt_idx].name, "no-mlockall"))
                                do_mlockall = 0;
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-tx-sw-buffer-size")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_tx_sw_bufsz = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                               "noisy-tx-sw-buffer-size must be >= 0\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-tx-sw-buffer-flushtime")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_tx_sw_buf_flush_time = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "noisy-tx-sw-buffer-flushtime must be >= 0\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-lkup-memory")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_lkup_mem_sz = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "noisy-lkup-memory must be >= 0\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-lkup-num-writes")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_lkup_num_writes = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "noisy-lkup-num-writes must be >= 0\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-lkup-num-reads")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_lkup_num_reads = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "noisy-lkup-num-reads must be >= 0\n");
+                       }
+                       if (!strcmp(lgopts[opt_idx].name,
+                                   "noisy-lkup-num-reads-writes")) {
+                               n = atoi(optarg);
+                               if (n >= 0)
+                                       noisy_lkup_num_reads_writes = n;
+                               else
+                                       rte_exit(EXIT_FAILURE,
+                                                "noisy-lkup-num-reads-writes must be >= 0\n");
+                       }
                        break;
                case 'h':
                        usage(argv[0]);
index a93d806..5c65fc4 100644 (file)
 
 #include "testpmd.h"
 
-static inline void
-print_ether_addr(const char *what, struct ether_addr *eth_addr)
-{
-       char buf[ETHER_ADDR_FMT_SIZE];
-       ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
-       printf("%s%s", what, buf);
-}
-
 /*
  * Received a burst of packets.
  */
@@ -55,16 +47,8 @@ static void
 pkt_burst_receive(struct fwd_stream *fs)
 {
        struct rte_mbuf  *pkts_burst[MAX_PKT_BURST];
-       struct rte_mbuf  *mb;
-       struct ether_hdr *eth_hdr;
-       uint16_t eth_type;
-       uint64_t ol_flags;
        uint16_t nb_rx;
-       uint16_t i, packet_type;
-       uint16_t is_encapsulation;
-       char buf[256];
-       struct rte_net_hdr_lens hdr_lens;
-       uint32_t sw_packet_type;
+       uint16_t i;
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
        uint64_t start_tsc;
@@ -86,124 +70,8 @@ pkt_burst_receive(struct fwd_stream *fs)
        fs->rx_burst_stats.pkt_burst_spread[nb_rx]++;
 #endif
        fs->rx_packets += nb_rx;
-
-       /*
-        * Dump each received packet if verbose_level > 0.
-        */
-       if (verbose_level > 0)
-               printf("port %u/queue %u: received %u packets\n",
-                      fs->rx_port,
-                      (unsigned) fs->rx_queue,
-                      (unsigned) nb_rx);
-       for (i = 0; i < nb_rx; i++) {
-               mb = pkts_burst[i];
-               if (verbose_level == 0) {
-                       rte_pktmbuf_free(mb);
-                       continue;
-               }
-               eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
-               eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
-               ol_flags = mb->ol_flags;
-               packet_type = mb->packet_type;
-               is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
-
-               print_ether_addr("  src=", &eth_hdr->s_addr);
-               print_ether_addr(" - dst=", &eth_hdr->d_addr);
-               printf(" - type=0x%04x - length=%u - nb_segs=%d",
-                      eth_type, (unsigned) mb->pkt_len,
-                      (int)mb->nb_segs);
-               if (ol_flags & PKT_RX_RSS_HASH) {
-                       printf(" - RSS hash=0x%x", (unsigned) mb->hash.rss);
-                       printf(" - RSS queue=0x%x",(unsigned) fs->rx_queue);
-               }
-               if (ol_flags & PKT_RX_FDIR) {
-                       printf(" - FDIR matched ");
-                       if (ol_flags & PKT_RX_FDIR_ID)
-                               printf("ID=0x%x",
-                                      mb->hash.fdir.hi);
-                       else if (ol_flags & PKT_RX_FDIR_FLX)
-                               printf("flex bytes=0x%08x %08x",
-                                      mb->hash.fdir.hi, mb->hash.fdir.lo);
-                       else
-                               printf("hash=0x%x ID=0x%x ",
-                                      mb->hash.fdir.hash, mb->hash.fdir.id);
-               }
-               if (ol_flags & PKT_RX_TIMESTAMP)
-                       printf(" - timestamp %"PRIu64" ", mb->timestamp);
-               if (ol_flags & PKT_RX_VLAN_STRIPPED)
-                       printf(" - VLAN tci=0x%x", mb->vlan_tci);
-               if (ol_flags & PKT_RX_QINQ_STRIPPED)
-                       printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
-                                       mb->vlan_tci, mb->vlan_tci_outer);
-               if (mb->packet_type) {
-                       rte_get_ptype_name(mb->packet_type, buf, sizeof(buf));
-                       printf(" - hw ptype: %s", buf);
-               }
-               sw_packet_type = rte_net_get_ptype(mb, &hdr_lens,
-                       RTE_PTYPE_ALL_MASK);
-               rte_get_ptype_name(sw_packet_type, buf, sizeof(buf));
-               printf(" - sw ptype: %s", buf);
-               if (sw_packet_type & RTE_PTYPE_L2_MASK)
-                       printf(" - l2_len=%d", hdr_lens.l2_len);
-               if (sw_packet_type & RTE_PTYPE_L3_MASK)
-                       printf(" - l3_len=%d", hdr_lens.l3_len);
-               if (sw_packet_type & RTE_PTYPE_L4_MASK)
-                       printf(" - l4_len=%d", hdr_lens.l4_len);
-               if (sw_packet_type & RTE_PTYPE_TUNNEL_MASK)
-                       printf(" - tunnel_len=%d", hdr_lens.tunnel_len);
-               if (sw_packet_type & RTE_PTYPE_INNER_L2_MASK)
-                       printf(" - inner_l2_len=%d", hdr_lens.inner_l2_len);
-               if (sw_packet_type & RTE_PTYPE_INNER_L3_MASK)
-                       printf(" - inner_l3_len=%d", hdr_lens.inner_l3_len);
-               if (sw_packet_type & RTE_PTYPE_INNER_L4_MASK)
-                       printf(" - inner_l4_len=%d", hdr_lens.inner_l4_len);
-               if (is_encapsulation) {
-                       struct ipv4_hdr *ipv4_hdr;
-                       struct ipv6_hdr *ipv6_hdr;
-                       struct udp_hdr *udp_hdr;
-                       uint8_t l2_len;
-                       uint8_t l3_len;
-                       uint8_t l4_len;
-                       uint8_t l4_proto;
-                       struct  vxlan_hdr *vxlan_hdr;
-
-                       l2_len  = sizeof(struct ether_hdr);
-
-                        /* Do not support ipv4 option field */
-                       if (RTE_ETH_IS_IPV4_HDR(packet_type)) {
-                               l3_len = sizeof(struct ipv4_hdr);
-                               ipv4_hdr = rte_pktmbuf_mtod_offset(mb,
-                                                                  struct ipv4_hdr *,
-                                                                  l2_len);
-                               l4_proto = ipv4_hdr->next_proto_id;
-                       } else {
-                               l3_len = sizeof(struct ipv6_hdr);
-                               ipv6_hdr = rte_pktmbuf_mtod_offset(mb,
-                                                                  struct ipv6_hdr *,
-                                                                  l2_len);
-                               l4_proto = ipv6_hdr->proto;
-                       }
-                       if (l4_proto == IPPROTO_UDP) {
-                               udp_hdr = rte_pktmbuf_mtod_offset(mb,
-                                                                 struct udp_hdr *,
-                                                                 l2_len + l3_len);
-                               l4_len = sizeof(struct udp_hdr);
-                               vxlan_hdr = rte_pktmbuf_mtod_offset(mb,
-                                                                   struct vxlan_hdr *,
-                                                                   l2_len + l3_len + l4_len);
-
-                               printf(" - VXLAN packet: packet type =%d, "
-                                       "Destination UDP port =%d, VNI = %d",
-                                       packet_type, RTE_BE_TO_CPU_16(udp_hdr->dst_port),
-                                       rte_be_to_cpu_32(vxlan_hdr->vx_vni) >> 8);
-                       }
-               }
-               printf(" - Receive queue=0x%x", (unsigned) fs->rx_queue);
-               printf("\n");
-               rte_get_rx_ol_flag_list(mb->ol_flags, buf, sizeof(buf));
-               printf("  ol_flags: %s\n", buf);
-               rte_pktmbuf_free(mb);
-       }
+       for (i = 0; i < nb_rx; i++)
+               rte_pktmbuf_free(pkts_burst[i]);
 
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
        end_tsc = rte_rdtsc();
index ee48db2..9c0edca 100644 (file)
@@ -27,6 +27,7 @@
 #include <rte_log.h>
 #include <rte_debug.h>
 #include <rte_cycles.h>
+#include <rte_malloc_heap.h>
 #include <rte_memory.h>
 #include <rte_memcpy.h>
 #include <rte_launch.h>
 
 #include "testpmd.h"
 
+#ifndef MAP_HUGETLB
+/* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
+#define HUGE_FLAG (0x40000)
+#else
+#define HUGE_FLAG MAP_HUGETLB
+#endif
+
+#ifndef MAP_HUGE_SHIFT
+/* older kernels (or FreeBSD) will not have this define */
+#define HUGE_SHIFT (26)
+#else
+#define HUGE_SHIFT MAP_HUGE_SHIFT
+#endif
+
+#define EXTMEM_HEAP_NAME "extmem"
+
 uint16_t verbose_level = 0; /**< Silent by default. */
 int testpmd_logtype; /**< Log type for testpmd logs */
 
@@ -88,9 +105,13 @@ uint8_t numa_support = 1; /**< numa enabled by default */
 uint8_t socket_num = UMA_NO_CONFIG;
 
 /*
- * Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs.
+ * Select mempool allocation type:
+ * - native: use regular DPDK memory
+ * - anon: use regular DPDK memory to create mempool, but populate using
+ *         anonymous memory (may not be IOVA-contiguous)
+ * - xmem: use externally allocated hugepage memory
  */
-uint8_t mp_anon = 0;
+uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
 
 /*
  * Store specified sockets on which memory pool to be used by ports
@@ -157,6 +178,7 @@ struct fwd_engine * fwd_engines[] = {
        &tx_only_engine,
        &csum_fwd_engine,
        &icmp_echo_engine,
+       &noisy_vnf_engine,
 #if defined RTE_LIBRTE_PMD_SOFTNIC
        &softnic_fwd_engine,
 #endif
@@ -252,6 +274,40 @@ int16_t tx_free_thresh = RTE_PMD_PARAM_UNSET;
  */
 int16_t tx_rs_thresh = RTE_PMD_PARAM_UNSET;
 
+/*
+ * Configurable value of buffered packets before sending.
+ */
+uint16_t noisy_tx_sw_bufsz;
+
+/*
+ * Configurable value of packet buffer timeout.
+ */
+uint16_t noisy_tx_sw_buf_flush_time;
+
+/*
+ * Configurable value for size of VNF internal memory area
+ * used for simulating noisy neighbour behaviour
+ */
+uint64_t noisy_lkup_mem_sz;
+
+/*
+ * Configurable value of number of random writes done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_writes;
+
+/*
+ * Configurable value of number of random reads done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_reads;
+
+/*
+ * Configurable value of number of random reads/writes done in
+ * VNF simulation memory area.
+ */
+uint64_t noisy_lkup_num_reads_writes;
+
 /*
  * Receive Side Scaling (RSS) configuration.
  */
@@ -289,6 +345,24 @@ uint8_t rmv_interrupt = 1; /* enabled by default */
 
 uint8_t hot_plug = 0; /**< hotplug disabled by default. */
 
+/* After attach, port setup is called on event or by iterator */
+bool setup_on_probe_event = true;
+
+/* Pretty printing of ethdev events */
+static const char * const eth_event_desc[] = {
+       [RTE_ETH_EVENT_UNKNOWN] = "unknown",
+       [RTE_ETH_EVENT_INTR_LSC] = "link state change",
+       [RTE_ETH_EVENT_QUEUE_STATE] = "queue state",
+       [RTE_ETH_EVENT_INTR_RESET] = "reset",
+       [RTE_ETH_EVENT_VF_MBOX] = "VF mbox",
+       [RTE_ETH_EVENT_IPSEC] = "IPsec",
+       [RTE_ETH_EVENT_MACSEC] = "MACsec",
+       [RTE_ETH_EVENT_INTR_RMV] = "device removal",
+       [RTE_ETH_EVENT_NEW] = "device probed",
+       [RTE_ETH_EVENT_DESTROY] = "device released",
+       [RTE_ETH_EVENT_MAX] = NULL,
+};
+
 /*
  * Display or mask ether events
  * Default to all events except VF_MBOX
@@ -334,7 +408,6 @@ lcoreid_t latencystats_lcore_id = -1;
  */
 struct rte_eth_rxmode rx_mode = {
        .max_rx_pkt_len = ETHER_MAX_LEN, /**< Default maximum frame length. */
-       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
 };
 
 struct rte_eth_txmode tx_mode = {
@@ -426,18 +499,16 @@ struct nvgre_encap_conf nvgre_encap_conf = {
 };
 
 /* Forward function declarations */
+static void setup_attached_port(portid_t pi);
 static void map_port_queue_stats_mapping_registers(portid_t pi,
                                                   struct rte_port *port);
 static void check_all_ports_link_status(uint32_t port_mask);
 static int eth_event_callback(portid_t port_id,
                              enum rte_eth_event_type type,
                              void *param, void *ret_param);
-static void eth_dev_event_callback(char *device_name,
+static void eth_dev_event_callback(const char *device_name,
                                enum rte_dev_event_type type,
                                void *param);
-static int eth_dev_event_callback_register(void);
-static int eth_dev_event_callback_unregister(void);
-
 
 /*
  * Check if all the ports are started.
@@ -476,6 +547,8 @@ set_default_fwd_lcores_config(void)
 
        nb_lc = 0;
        for (i = 0; i < RTE_MAX_LCORE; i++) {
+               if (!rte_lcore_is_enabled(i))
+                       continue;
                sock_num = rte_lcore_to_socket_id(i);
                if (new_socket_id(sock_num)) {
                        if (num_sockets >= RTE_MAX_NUMA_NODES) {
@@ -485,8 +558,6 @@ set_default_fwd_lcores_config(void)
                        }
                        socket_ids[num_sockets++] = sock_num;
                }
-               if (!rte_lcore_is_enabled(i))
-                       continue;
                if (i == rte_get_master_lcore())
                        continue;
                fwd_lcores_cpuids[nb_lc++] = i;
@@ -513,9 +584,21 @@ set_default_fwd_ports_config(void)
        portid_t pt_id;
        int i = 0;
 
-       RTE_ETH_FOREACH_DEV(pt_id)
+       RTE_ETH_FOREACH_DEV(pt_id) {
                fwd_ports_ids[i++] = pt_id;
 
+               /* Update sockets info according to the attached device */
+               int socket_id = rte_eth_dev_socket_id(pt_id);
+               if (socket_id >= 0 && new_socket_id(socket_id)) {
+                       if (num_sockets >= RTE_MAX_NUMA_NODES) {
+                               rte_exit(EXIT_FAILURE,
+                                        "Total sockets greater than %u\n",
+                                        RTE_MAX_NUMA_NODES);
+                       }
+                       socket_ids[num_sockets++] = socket_id;
+               }
+       }
+
        nb_cfg_ports = nb_ports;
        nb_fwd_ports = nb_ports;
 }
@@ -528,6 +611,236 @@ set_def_fwd_config(void)
        set_default_fwd_ports_config();
 }
 
+/* extremely pessimistic estimation of memory required to create a mempool */
+static int
+calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
+{
+       unsigned int n_pages, mbuf_per_pg, leftover;
+       uint64_t total_mem, mbuf_mem, obj_sz;
+
+       /* there is no good way to predict how much space the mempool will
+        * occupy because it will allocate chunks on the fly, and some of those
+        * will come from default DPDK memory while some will come from our
+        * external memory, so just assume 128MB will be enough for everyone.
+        */
+       uint64_t hdr_mem = 128 << 20;
+
+       /* account for possible non-contiguousness */
+       obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
+       if (obj_sz > pgsz) {
+               TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
+               return -1;
+       }
+
+       mbuf_per_pg = pgsz / obj_sz;
+       leftover = (nb_mbufs % mbuf_per_pg) > 0;
+       n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
+
+       mbuf_mem = n_pages * pgsz;
+
+       total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
+
+       if (total_mem > SIZE_MAX) {
+               TESTPMD_LOG(ERR, "Memory size too big\n");
+               return -1;
+       }
+       *out = (size_t)total_mem;
+
+       return 0;
+}
+
+static inline uint32_t
+bsf64(uint64_t v)
+{
+       return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+       if (v == 0)
+               return 0;
+       v = rte_align64pow2(v);
+       return bsf64(v);
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+       /* as per mmap() manpage, all page sizes are log2 of page size
+        * shifted by MAP_HUGE_SHIFT
+        */
+       int log2 = log2_u64(page_sz);
+
+       return (log2 << HUGE_SHIFT);
+}
+
+static void *
+alloc_mem(size_t memsz, size_t pgsz, bool huge)
+{
+       void *addr;
+       int flags;
+
+       /* allocate anonymous hugepages */
+       flags = MAP_ANONYMOUS | MAP_PRIVATE;
+       if (huge)
+               flags |= HUGE_FLAG | pagesz_flags(pgsz);
+
+       addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
+       if (addr == MAP_FAILED)
+               return NULL;
+
+       return addr;
+}
+
+struct extmem_param {
+       void *addr;
+       size_t len;
+       size_t pgsz;
+       rte_iova_t *iova_table;
+       unsigned int iova_table_len;
+};
+
+static int
+create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
+               bool huge)
+{
+       uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
+                       RTE_PGSIZE_16M, RTE_PGSIZE_16G};    /* POWER */
+       unsigned int cur_page, n_pages, pgsz_idx;
+       size_t mem_sz, cur_pgsz;
+       rte_iova_t *iovas = NULL;
+       void *addr;
+       int ret;
+
+       for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
+               /* skip anything that is too big */
+               if (pgsizes[pgsz_idx] > SIZE_MAX)
+                       continue;
+
+               cur_pgsz = pgsizes[pgsz_idx];
+
+               /* if we were told not to allocate hugepages, override */
+               if (!huge)
+                       cur_pgsz = sysconf(_SC_PAGESIZE);
+
+               ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
+               if (ret < 0) {
+                       TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
+                       return -1;
+               }
+
+               /* allocate our memory */
+               addr = alloc_mem(mem_sz, cur_pgsz, huge);
+
+               /* if we couldn't allocate memory with a specified page size,
+                * that doesn't mean we can't do it with other page sizes, so
+                * try another one.
+                */
+               if (addr == NULL)
+                       continue;
+
+               /* store IOVA addresses for every page in this memory area */
+               n_pages = mem_sz / cur_pgsz;
+
+               iovas = malloc(sizeof(*iovas) * n_pages);
+
+               if (iovas == NULL) {
+                       TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
+                       goto fail;
+               }
+               /* lock memory if it's not huge pages */
+               if (!huge)
+                       mlock(addr, mem_sz);
+
+               /* populate IOVA addresses */
+               for (cur_page = 0; cur_page < n_pages; cur_page++) {
+                       rte_iova_t iova;
+                       size_t offset;
+                       void *cur;
+
+                       offset = cur_pgsz * cur_page;
+                       cur = RTE_PTR_ADD(addr, offset);
+
+                       /* touch the page before getting its IOVA */
+                       *(volatile char *)cur = 0;
+
+                       iova = rte_mem_virt2iova(cur);
+
+                       iovas[cur_page] = iova;
+               }
+
+               break;
+       }
+       /* if we couldn't allocate anything */
+       if (iovas == NULL)
+               return -1;
+
+       param->addr = addr;
+       param->len = mem_sz;
+       param->pgsz = cur_pgsz;
+       param->iova_table = iovas;
+       param->iova_table_len = n_pages;
+
+       return 0;
+fail:
+       if (iovas)
+               free(iovas);
+       if (addr)
+               munmap(addr, mem_sz);
+
+       return -1;
+}
+
+static int
+setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
+{
+       struct extmem_param param;
+       int socket_id, ret;
+
+       memset(&param, 0, sizeof(param));
+
+       /* check if our heap exists */
+       socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+       if (socket_id < 0) {
+               /* create our heap */
+               ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
+               if (ret < 0) {
+                       TESTPMD_LOG(ERR, "Cannot create heap\n");
+                       return -1;
+               }
+       }
+
+       ret = create_extmem(nb_mbufs, mbuf_sz, &param, huge);
+       if (ret < 0) {
+               TESTPMD_LOG(ERR, "Cannot create memory area\n");
+               return -1;
+       }
+
+       /* we now have a valid memory area, so add it to heap */
+       ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
+                       param.addr, param.len, param.iova_table,
+                       param.iova_table_len, param.pgsz);
+
+       /* when using VFIO, memory is automatically mapped for DMA by EAL */
+
+       /* not needed any more */
+       free(param.iova_table);
+
+       if (ret < 0) {
+               TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
+               munmap(param.addr, param.len);
+               return -1;
+       }
+
+       /* success */
+
+       TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
+                       param.len >> 20);
+
+       return 0;
+}
+
 /*
  * Configuration initialisation done once at init time.
  */
@@ -546,27 +859,59 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
                "create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
                pool_name, nb_mbuf, mbuf_seg_size, socket_id);
 
-       if (mp_anon != 0) {
-               rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
-                       mb_size, (unsigned) mb_mempool_cache,
-                       sizeof(struct rte_pktmbuf_pool_private),
-                       socket_id, 0);
-               if (rte_mp == NULL)
-                       goto err;
-
-               if (rte_mempool_populate_anon(rte_mp) == 0) {
-                       rte_mempool_free(rte_mp);
-                       rte_mp = NULL;
-                       goto err;
-               }
-               rte_pktmbuf_pool_init(rte_mp, NULL);
-               rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
-       } else {
-               /* wrapper to rte_mempool_create() */
-               TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
-                               rte_mbuf_best_mempool_ops());
-               rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
-                       mb_mempool_cache, 0, mbuf_seg_size, socket_id);
+       switch (mp_alloc_type) {
+       case MP_ALLOC_NATIVE:
+               {
+                       /* wrapper to rte_mempool_create() */
+                       TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+                                       rte_mbuf_best_mempool_ops());
+                       rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+                               mb_mempool_cache, 0, mbuf_seg_size, socket_id);
+                       break;
+               }
+       case MP_ALLOC_ANON:
+               {
+                       rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
+                               mb_size, (unsigned int) mb_mempool_cache,
+                               sizeof(struct rte_pktmbuf_pool_private),
+                               socket_id, 0);
+                       if (rte_mp == NULL)
+                               goto err;
+
+                       if (rte_mempool_populate_anon(rte_mp) == 0) {
+                               rte_mempool_free(rte_mp);
+                               rte_mp = NULL;
+                               goto err;
+                       }
+                       rte_pktmbuf_pool_init(rte_mp, NULL);
+                       rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
+                       break;
+               }
+       case MP_ALLOC_XMEM:
+       case MP_ALLOC_XMEM_HUGE:
+               {
+                       int heap_socket;
+                       bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
+
+                       if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
+                               rte_exit(EXIT_FAILURE, "Could not create external memory\n");
+
+                       heap_socket =
+                               rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+                       if (heap_socket < 0)
+                               rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
+
+                       TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+                                       rte_mbuf_best_mempool_ops());
+                       rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+                                       mb_mempool_cache, 0, mbuf_seg_size,
+                                       heap_socket);
+                       break;
+               }
+       default:
+               {
+                       rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
+               }
        }
 
 err:
@@ -707,12 +1052,6 @@ init_config(void)
 
        memset(port_per_socket,0,RTE_MAX_NUMA_NODES);
 
-       if (numa_support) {
-               memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-               memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-               memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
-       }
-
        /* Configuration of logical cores. */
        fwd_lcores = rte_zmalloc("testpmd: fwd_lcores",
                                sizeof(struct fwd_lcore *) * nb_lcores,
@@ -739,23 +1078,26 @@ init_config(void)
                port->dev_conf.rxmode = rx_mode;
                rte_eth_dev_info_get(pid, &port->dev_info);
 
-               if (!(port->dev_info.rx_offload_capa &
-                                       DEV_RX_OFFLOAD_CRC_STRIP))
-                       port->dev_conf.rxmode.offloads &=
-                               ~DEV_RX_OFFLOAD_CRC_STRIP;
                if (!(port->dev_info.tx_offload_capa &
                      DEV_TX_OFFLOAD_MBUF_FAST_FREE))
                        port->dev_conf.txmode.offloads &=
                                ~DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+               if (!(port->dev_info.tx_offload_capa &
+                       DEV_TX_OFFLOAD_MATCH_METADATA))
+                       port->dev_conf.txmode.offloads &=
+                               ~DEV_TX_OFFLOAD_MATCH_METADATA;
                if (numa_support) {
                        if (port_numa[pid] != NUMA_NO_CONFIG)
                                port_per_socket[port_numa[pid]]++;
                        else {
                                uint32_t socket_id = rte_eth_dev_socket_id(pid);
 
-                               /* if socket_id is invalid, set to 0 */
+                               /*
+                                * if socket_id is invalid,
+                                * set to the first available socket.
+                                */
                                if (check_socket_id(socket_id) < 0)
-                                       socket_id = 0;
+                                       socket_id = socket_ids[0];
                                port_per_socket[socket_id]++;
                        }
                }
@@ -772,6 +1114,7 @@ init_config(void)
                /* set flag to initialize port/queue */
                port->need_reconfig = 1;
                port->need_reconfig_queues = 1;
+               port->tx_metadata = 0;
        }
 
        /*
@@ -911,9 +1254,12 @@ init_fwd_streams(void)
                        else {
                                port->socket_id = rte_eth_dev_socket_id(pid);
 
-                               /* if socket_id is invalid, set to 0 */
+                               /*
+                                * if socket_id is invalid,
+                                * set to the first available socket.
+                                */
                                if (check_socket_id(port->socket_id) < 0)
-                                       port->socket_id = 0;
+                                       port->socket_id = socket_ids[0];
                        }
                }
                else {
@@ -1045,8 +1391,9 @@ fwd_port_stats_display(portid_t port_id, struct rte_eth_stats *stats)
                       (uint64_t) (stats->ipackets + stats->imissed));
 
                if (cur_fwd_eng == &csum_fwd_engine)
-                       printf("  Bad-ipcsum: %-14"PRIu64" Bad-l4csum: %-14"PRIu64" \n",
-                              port->rx_bad_ip_csum, port->rx_bad_l4_csum);
+                       printf("  Bad-ipcsum: %-14"PRIu64" Bad-l4csum: %-14"PRIu64"Bad-outer-l4csum: %-14"PRIu64"\n",
+                              port->rx_bad_ip_csum, port->rx_bad_l4_csum,
+                              port->rx_bad_outer_l4_csum);
                if ((stats->ierrors + stats->rx_nombuf) > 0) {
                        printf("  RX-error: %-"PRIu64"\n",  stats->ierrors);
                        printf("  RX-nombufs: %-14"PRIu64"\n", stats->rx_nombuf);
@@ -1064,8 +1411,9 @@ fwd_port_stats_display(portid_t port_id, struct rte_eth_stats *stats)
                       (uint64_t) (stats->ipackets + stats->imissed));
 
                if (cur_fwd_eng == &csum_fwd_engine)
-                       printf("  Bad-ipcsum:%14"PRIu64"    Bad-l4csum:%14"PRIu64"\n",
-                              port->rx_bad_ip_csum, port->rx_bad_l4_csum);
+                       printf("  Bad-ipcsum:%14"PRIu64"    Bad-l4csum:%14"PRIu64"    Bad-outer-l4csum: %-14"PRIu64"\n",
+                              port->rx_bad_ip_csum, port->rx_bad_l4_csum,
+                              port->rx_bad_outer_l4_csum);
                if ((stats->ierrors + stats->rx_nombuf) > 0) {
                        printf("  RX-error:%"PRIu64"\n", stats->ierrors);
                        printf("  RX-nombufs:             %14"PRIu64"\n",
@@ -1129,7 +1477,9 @@ fwd_stream_stats_display(streamid_t stream_id)
        /* if checksum mode */
        if (cur_fwd_eng == &csum_fwd_engine) {
               printf("  RX- bad IP checksum: %-14u  Rx- bad L4 checksum: "
-                       "%-14u\n", fs->rx_bad_ip_csum, fs->rx_bad_l4_csum);
+                       "%-14u Rx- bad outer L4 checksum: %-14u\n",
+                       fs->rx_bad_ip_csum, fs->rx_bad_l4_csum,
+                       fs->rx_bad_outer_l4_csum);
        }
 
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
@@ -1282,31 +1632,6 @@ launch_packet_forwarding(lcore_function_t *pkt_fwd_on_lcore)
        }
 }
 
-/*
- * Update the forward ports list.
- */
-void
-update_fwd_ports(portid_t new_pid)
-{
-       unsigned int i;
-       unsigned int new_nb_fwd_ports = 0;
-       int move = 0;
-
-       for (i = 0; i < nb_fwd_ports; ++i) {
-               if (port_id_is_invalid(fwd_ports_ids[i], DISABLED_WARN))
-                       move = 1;
-               else if (move)
-                       fwd_ports_ids[new_nb_fwd_ports++] = fwd_ports_ids[i];
-               else
-                       new_nb_fwd_ports++;
-       }
-       if (new_pid < RTE_MAX_ETHPORTS)
-               fwd_ports_ids[new_nb_fwd_ports++] = new_pid;
-
-       nb_fwd_ports = new_nb_fwd_ports;
-       nb_cfg_ports = new_nb_fwd_ports;
-}
-
 /*
  * Launch packet forwarding configuration.
  */
@@ -1383,6 +1708,7 @@ start_packet_forwarding(int with_tx_first)
                fwd_streams[sm_id]->fwd_dropped = 0;
                fwd_streams[sm_id]->rx_bad_ip_csum = 0;
                fwd_streams[sm_id]->rx_bad_l4_csum = 0;
+               fwd_streams[sm_id]->rx_bad_outer_l4_csum = 0;
 
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
                memset(&fwd_streams[sm_id]->rx_burst_stats, 0,
@@ -1488,6 +1814,9 @@ stop_packet_forwarding(void)
                ports[fwd_streams[sm_id]->rx_port].rx_bad_l4_csum =
                                                        rx_bad_l4_csum;
 
+               ports[fwd_streams[sm_id]->rx_port].rx_bad_outer_l4_csum +=
+                               fwd_streams[sm_id]->rx_bad_outer_l4_csum;
+
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
                fwd_cycles = (uint64_t) (fwd_cycles +
                                         fwd_streams[sm_id]->core_cycles);
@@ -1620,18 +1949,6 @@ port_is_started(portid_t port_id)
        return 1;
 }
 
-static int
-port_is_closed(portid_t port_id)
-{
-       if (port_id_is_invalid(port_id, ENABLED_WARN))
-               return 0;
-
-       if (ports[port_id].port_status != RTE_PORT_CLOSED)
-               return 0;
-
-       return 1;
-}
-
 int
 start_port(portid_t pid)
 {
@@ -1640,7 +1957,6 @@ start_port(portid_t pid)
        queueid_t qi;
        struct rte_port *port;
        struct ether_addr mac_addr;
-       enum rte_eth_event_type event_type;
 
        if (port_id_is_invalid(pid, ENABLED_WARN))
                return 0;
@@ -1670,7 +1986,7 @@ start_port(portid_t pid)
                                        return -1;
                                }
                        }
-
+                       configure_rxtx_dump_callbacks(0);
                        printf("Configuring Port %d (socket %u)\n", pi,
                                        port->socket_id);
                        /* configure port */
@@ -1769,7 +2085,7 @@ start_port(portid_t pid)
                                return -1;
                        }
                }
-
+               configure_rxtx_dump_callbacks(verbose_level);
                /* start port */
                if (rte_eth_dev_start(pi) < 0) {
                        printf("Fail to start port %d\n", pi);
@@ -1796,20 +2112,6 @@ start_port(portid_t pid)
                need_check_link_status = 1;
        }
 
-       for (event_type = RTE_ETH_EVENT_UNKNOWN;
-            event_type < RTE_ETH_EVENT_MAX;
-            event_type++) {
-               diag = rte_eth_dev_callback_register(RTE_ETH_ALL,
-                                               event_type,
-                                               eth_event_callback,
-                                               NULL);
-               if (diag) {
-                       printf("Failed to setup even callback for event %d\n",
-                               event_type);
-                       return -1;
-               }
-       }
-
        if (need_check_link_status == 1 && !no_link_check)
                check_all_ports_link_status(RTE_PORT_ALL);
        else if (need_check_link_status == 0)
@@ -1868,6 +2170,28 @@ stop_port(portid_t pid)
        printf("Done\n");
 }
 
+static void
+remove_invalid_ports_in(portid_t *array, portid_t *total)
+{
+       portid_t i;
+       portid_t new_total = 0;
+
+       for (i = 0; i < *total; i++)
+               if (!port_id_is_invalid(array[i], DISABLED_WARN)) {
+                       array[new_total] = array[i];
+                       new_total++;
+               }
+       *total = new_total;
+}
+
+static void
+remove_invalid_ports(void)
+{
+       remove_invalid_ports_in(ports_ids, &nb_ports);
+       remove_invalid_ports_in(fwd_ports_ids, &nb_fwd_ports);
+       nb_cfg_ports = nb_fwd_ports;
+}
+
 void
 close_port(portid_t pid)
 {
@@ -1910,6 +2234,8 @@ close_port(portid_t pid)
                        port_flow_flush(pi);
                rte_eth_dev_close(pi);
 
+               remove_invalid_ports();
+
                if (rte_atomic16_cmpset(&(port->port_status),
                        RTE_PORT_HANDLING, RTE_PORT_CLOSED) == 0)
                        printf("Port %d cannot be set to closed\n", pi);
@@ -1959,44 +2285,11 @@ reset_port(portid_t pid)
        printf("Done\n");
 }
 
-static int
-eth_dev_event_callback_register(void)
-{
-       int ret;
-
-       /* register the device event callback */
-       ret = rte_dev_event_callback_register(NULL,
-               eth_dev_event_callback, NULL);
-       if (ret) {
-               printf("Failed to register device event callback\n");
-               return -1;
-       }
-
-       return 0;
-}
-
-
-static int
-eth_dev_event_callback_unregister(void)
-{
-       int ret;
-
-       /* unregister the device event callback */
-       ret = rte_dev_event_callback_unregister(NULL,
-               eth_dev_event_callback, NULL);
-       if (ret < 0) {
-               printf("Failed to unregister device event callback\n");
-               return -1;
-       }
-
-       return 0;
-}
-
 void
 attach_port(char *identifier)
 {
-       portid_t pi = 0;
-       unsigned int socket_id;
+       portid_t pi;
+       struct rte_dev_iterator iterator;
 
        printf("Attaching a new port...\n");
 
@@ -2005,61 +2298,97 @@ attach_port(char *identifier)
                return;
        }
 
-       if (rte_eth_dev_attach(identifier, &pi))
+       if (rte_dev_probe(identifier) != 0) {
+               TESTPMD_LOG(ERR, "Failed to attach port %s\n", identifier);
                return;
+       }
+
+       /* first attach mode: event */
+       if (setup_on_probe_event) {
+               /* new ports are detected on RTE_ETH_EVENT_NEW event */
+               for (pi = 0; pi < RTE_MAX_ETHPORTS; pi++)
+                       if (ports[pi].port_status == RTE_PORT_HANDLING &&
+                                       ports[pi].need_setup != 0)
+                               setup_attached_port(pi);
+               return;
+       }
+
+       /* second attach mode: iterator */
+       RTE_ETH_FOREACH_MATCHING_DEV(pi, identifier, &iterator) {
+               /* setup ports matching the devargs used for probing */
+               if (port_is_forwarding(pi))
+                       continue; /* port was already attached before */
+               setup_attached_port(pi);
+       }
+}
+
+static void
+setup_attached_port(portid_t pi)
+{
+       unsigned int socket_id;
 
        socket_id = (unsigned)rte_eth_dev_socket_id(pi);
-       /* if socket_id is invalid, set to 0 */
+       /* if socket_id is invalid, set to the first available socket. */
        if (check_socket_id(socket_id) < 0)
-               socket_id = 0;
+               socket_id = socket_ids[0];
        reconfig(pi, socket_id);
        rte_eth_promiscuous_enable(pi);
 
-       ports_ids[nb_ports] = pi;
-       nb_ports = rte_eth_dev_count_avail();
-
+       ports_ids[nb_ports++] = pi;
+       fwd_ports_ids[nb_fwd_ports++] = pi;
+       nb_cfg_ports = nb_fwd_ports;
+       ports[pi].need_setup = 0;
        ports[pi].port_status = RTE_PORT_STOPPED;
 
-       update_fwd_ports(pi);
-
        printf("Port %d is attached. Now total ports is %d\n", pi, nb_ports);
        printf("Done\n");
 }
 
 void
-detach_port(portid_t port_id)
+detach_port_device(portid_t port_id)
 {
-       char name[RTE_ETH_NAME_MAX_LEN];
-       uint16_t i;
+       struct rte_device *dev;
+       portid_t sibling;
 
-       printf("Detaching a port...\n");
+       printf("Removing a device...\n");
 
-       if (!port_is_closed(port_id)) {
-               printf("Please close port first\n");
+       dev = rte_eth_devices[port_id].device;
+       if (dev == NULL) {
+               printf("Device already removed\n");
                return;
        }
 
-       if (ports[port_id].flow_list)
-               port_flow_flush(port_id);
+       if (ports[port_id].port_status != RTE_PORT_CLOSED) {
+               if (ports[port_id].port_status != RTE_PORT_STOPPED) {
+                       printf("Port not stopped\n");
+                       return;
+               }
+               printf("Port was not closed\n");
+               if (ports[port_id].flow_list)
+                       port_flow_flush(port_id);
+       }
 
-       if (rte_eth_dev_detach(port_id, name)) {
-               TESTPMD_LOG(ERR, "Failed to detach port %u\n", port_id);
+       if (rte_dev_remove(dev) != 0) {
+               TESTPMD_LOG(ERR, "Failed to detach device %s\n", dev->name);
                return;
        }
 
-       for (i = 0; i < nb_ports; i++) {
-               if (ports_ids[i] == port_id) {
-                       ports_ids[i] = ports_ids[nb_ports-1];
-                       ports_ids[nb_ports-1] = 0;
-                       break;
+       for (sibling = 0; sibling < RTE_MAX_ETHPORTS; sibling++) {
+               if (rte_eth_devices[sibling].device != dev)
+                       continue;
+               /* reset mapping between old ports and removed device */
+               rte_eth_devices[sibling].device = NULL;
+               if (ports[sibling].port_status != RTE_PORT_CLOSED) {
+                       /* sibling ports are forced to be closed */
+                       ports[sibling].port_status = RTE_PORT_CLOSED;
+                       printf("Port %u is closed\n", sibling);
                }
        }
-       nb_ports = rte_eth_dev_count_avail();
 
-       update_fwd_ports(RTE_MAX_ETHPORTS);
+       remove_invalid_ports();
 
-       printf("Port %u is detached. Now total ports is %d\n",
-                       port_id, nb_ports);
+       printf("Device of port %u is detached\n", port_id);
+       printf("Now total ports is %d\n", nb_ports);
        printf("Done\n");
        return;
 }
@@ -2092,20 +2421,32 @@ pmd_test_exit(void)
                         */
                        device = rte_eth_devices[pt_id].device;
                        if (device && !strcmp(device->driver->name, "net_virtio_user"))
-                               detach_port(pt_id);
+                               detach_port_device(pt_id);
                }
        }
 
        if (hot_plug) {
                ret = rte_dev_event_monitor_stop();
-               if (ret)
+               if (ret) {
                        RTE_LOG(ERR, EAL,
                                "fail to stop device event monitor.");
+                       return;
+               }
 
-               ret = eth_dev_event_callback_unregister();
-               if (ret)
+               ret = rte_dev_event_callback_unregister(NULL,
+                       eth_dev_event_callback, NULL);
+               if (ret < 0) {
+                       RTE_LOG(ERR, EAL,
+                               "fail to unregister device event callback.\n");
+                       return;
+               }
+
+               ret = rte_dev_hotplug_handle_disable();
+               if (ret) {
                        RTE_LOG(ERR, EAL,
-                               "fail to unregister all event callbacks.");
+                               "fail to disable hotplug handling.\n");
+                       return;
+               }
        }
 
        printf("\nBye...\n");
@@ -2192,7 +2533,7 @@ rmv_event_callback(void *arg)
        stop_port(port_id);
        no_link_check = org_no_link_check;
        close_port(port_id);
-       detach_port(port_id);
+       detach_port_device(port_id);
        if (need_to_start)
                start_packet_forwarding(0);
 }
@@ -2202,38 +2543,27 @@ static int
 eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
                  void *ret_param)
 {
-       static const char * const event_desc[] = {
-               [RTE_ETH_EVENT_UNKNOWN] = "Unknown",
-               [RTE_ETH_EVENT_INTR_LSC] = "LSC",
-               [RTE_ETH_EVENT_QUEUE_STATE] = "Queue state",
-               [RTE_ETH_EVENT_INTR_RESET] = "Interrupt reset",
-               [RTE_ETH_EVENT_VF_MBOX] = "VF Mbox",
-               [RTE_ETH_EVENT_IPSEC] = "IPsec",
-               [RTE_ETH_EVENT_MACSEC] = "MACsec",
-               [RTE_ETH_EVENT_INTR_RMV] = "device removal",
-               [RTE_ETH_EVENT_NEW] = "device probed",
-               [RTE_ETH_EVENT_DESTROY] = "device released",
-               [RTE_ETH_EVENT_MAX] = NULL,
-       };
-
        RTE_SET_USED(param);
        RTE_SET_USED(ret_param);
 
        if (type >= RTE_ETH_EVENT_MAX) {
-               fprintf(stderr, "\nPort %" PRIu8 ": %s called upon invalid event %d\n",
+               fprintf(stderr, "\nPort %" PRIu16 ": %s called upon invalid event %d\n",
                        port_id, __func__, type);
                fflush(stderr);
        } else if (event_print_mask & (UINT32_C(1) << type)) {
-               printf("\nPort %" PRIu8 ": %s event\n", port_id,
-                       event_desc[type]);
+               printf("\nPort %" PRIu16 ": %s event\n", port_id,
+                       eth_event_desc[type]);
                fflush(stdout);
        }
 
-       if (port_id_is_invalid(port_id, DISABLED_WARN))
-               return 0;
-
        switch (type) {
+       case RTE_ETH_EVENT_NEW:
+               ports[port_id].need_setup = 1;
+               ports[port_id].port_status = RTE_PORT_HANDLING;
+               break;
        case RTE_ETH_EVENT_INTR_RMV:
+               if (port_id_is_invalid(port_id, DISABLED_WARN))
+                       break;
                if (rte_eal_alarm_set(100000,
                                rmv_event_callback, (void *)(intptr_t)port_id))
                        fprintf(stderr, "Could not set up deferred device removal\n");
@@ -2244,11 +2574,36 @@ eth_event_callback(portid_t port_id, enum rte_eth_event_type type, void *param,
        return 0;
 }
 
+static int
+register_eth_event_callback(void)
+{
+       int ret;
+       enum rte_eth_event_type event;
+
+       for (event = RTE_ETH_EVENT_UNKNOWN;
+                       event < RTE_ETH_EVENT_MAX; event++) {
+               ret = rte_eth_dev_callback_register(RTE_ETH_ALL,
+                               event,
+                               eth_event_callback,
+                               NULL);
+               if (ret != 0) {
+                       TESTPMD_LOG(ERR, "Failed to register callback for "
+                                       "%s event\n", eth_event_desc[event]);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 /* This function is used by the interrupt thread */
 static void
-eth_dev_event_callback(char *device_name, enum rte_dev_event_type type,
+eth_dev_event_callback(const char *device_name, enum rte_dev_event_type type,
                             __rte_unused void *arg)
 {
+       uint16_t port_id;
+       int ret;
+
        if (type >= RTE_DEV_EVENT_MAX) {
                fprintf(stderr, "%s called upon invalid event %d\n",
                        __func__, type);
@@ -2259,9 +2614,13 @@ eth_dev_event_callback(char *device_name, enum rte_dev_event_type type,
        case RTE_DEV_EVENT_REMOVE:
                RTE_LOG(ERR, EAL, "The device: %s has been removed!\n",
                        device_name);
-               /* TODO: After finish failure handle, begin to stop
-                * packet forward, stop port, close port, detach port.
-                */
+               ret = rte_eth_dev_get_port_by_name(device_name, &port_id);
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "can not get port by device %s!\n",
+                               device_name);
+                       return;
+               }
+               rmv_event_callback((void *)(intptr_t)port_id);
                break;
        case RTE_DEV_EVENT_ADD:
                RTE_LOG(ERR, EAL, "The device: %s has been added!\n",
@@ -2650,6 +3009,11 @@ init_port(void)
                                "rte_zmalloc(%d struct rte_port) failed\n",
                                RTE_MAX_ETHPORTS);
        }
+
+       /* Initialize ports NUMA structures */
+       memset(port_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
+       memset(rxring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
+       memset(txring_numa, NUMA_NO_CONFIG, RTE_MAX_ETHPORTS);
 }
 
 static void
@@ -2716,6 +3080,10 @@ main(int argc, char** argv)
                rte_panic("Cannot register log type");
        rte_log_set_level(testpmd_logtype, RTE_LOG_DEBUG);
 
+       ret = register_eth_event_callback();
+       if (ret != 0)
+               rte_panic("Cannot register for ethdev events");
+
 #ifdef RTE_LIBRTE_PDUMP
        /* initialize packet capture framework */
        rte_pdump_init(NULL);
@@ -2784,14 +3152,27 @@ main(int argc, char** argv)
        init_config();
 
        if (hot_plug) {
-               /* enable hot plug monitoring */
+               ret = rte_dev_hotplug_handle_enable();
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "fail to enable hotplug handling.");
+                       return -1;
+               }
+
                ret = rte_dev_event_monitor_start();
                if (ret) {
-                       rte_errno = EINVAL;
+                       RTE_LOG(ERR, EAL,
+                               "fail to start device event monitoring.");
                        return -1;
                }
-               eth_dev_event_callback_register();
 
+               ret = rte_dev_event_callback_register(NULL,
+                       eth_dev_event_callback, NULL);
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "fail  to register device event callback\n");
+                       return -1;
+               }
        }
 
        if (start_port(RTE_PORT_ALL) != 0)
index a1f6614..3ff11e6 100644 (file)
@@ -5,6 +5,8 @@
 #ifndef _TESTPMD_H_
 #define _TESTPMD_H_
 
+#include <stdbool.h>
+
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 #include <rte_gro.h>
@@ -69,6 +71,16 @@ enum {
        PORT_TOPOLOGY_LOOP,
 };
 
+enum {
+       MP_ALLOC_NATIVE, /**< allocate and populate mempool natively */
+       MP_ALLOC_ANON,
+       /**< allocate mempool natively, but populate using anonymous memory */
+       MP_ALLOC_XMEM,
+       /**< allocate and populate mempool using anonymous memory */
+       MP_ALLOC_XMEM_HUGE
+       /**< allocate and populate mempool using anonymous hugepage memory */
+};
+
 #ifdef RTE_TEST_PMD_RECORD_BURST_STATS
 /**
  * The data structure associated with RX and TX packet burst statistics
@@ -112,6 +124,8 @@ struct fwd_stream {
        unsigned int fwd_dropped; /**< received packets not forwarded */
        unsigned int rx_bad_ip_csum ; /**< received packets has bad ip checksum */
        unsigned int rx_bad_l4_csum ; /**< received packets has bad l4 checksum */
+       unsigned int rx_bad_outer_l4_csum;
+       /**< received packets has bad outer l4 checksum */
        unsigned int gro_times; /**< GRO operation times */
 #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
        uint64_t     core_cycles; /**< used for RX and TX processing */
@@ -124,15 +138,12 @@ struct fwd_stream {
 
 /** Descriptor for a single flow. */
 struct port_flow {
-       size_t size; /**< Allocated space including data[]. */
        struct port_flow *next; /**< Next flow in list. */
        struct port_flow *tmp; /**< Temporary linking. */
        uint32_t id; /**< Flow rule ID. */
        struct rte_flow *flow; /**< Opaque flow object returned by PMD. */
-       struct rte_flow_attr attr; /**< Attributes. */
-       struct rte_flow_item *pattern; /**< Pattern. */
-       struct rte_flow_action *actions; /**< Actions. */
-       uint8_t data[]; /**< Storage for pattern/actions. */
+       struct rte_flow_conv_rule rule; /* Saved flow rule description. */
+       uint8_t data[]; /**< Storage for flow rule description */
 };
 
 #ifdef SOFTNIC
@@ -165,9 +176,12 @@ struct rte_port {
        void                    *fwd_ctx;   /**< Forwarding mode context */
        uint64_t                rx_bad_ip_csum; /**< rx pkts with bad ip checksum  */
        uint64_t                rx_bad_l4_csum; /**< rx pkts with bad l4 checksum */
+       uint64_t                rx_bad_outer_l4_csum;
+       /**< rx pkts with bad outer l4 checksum */
        uint8_t                 tx_queue_stats_mapping_enabled;
        uint8_t                 rx_queue_stats_mapping_enabled;
        volatile uint16_t        port_status;    /**< port started or not */
+       uint8_t                 need_setup;     /**< port just attached */
        uint8_t                 need_reconfig;  /**< need reconfiguring port or not */
        uint8_t                 need_reconfig_queues; /**< need reconfiguring queues or not */
        uint8_t                 rss_flag;   /**< enable rss or not */
@@ -180,9 +194,14 @@ struct rte_port {
        uint32_t                mc_addr_nb; /**< nb. of addr. in mc_addr_pool */
        uint8_t                 slave_flag; /**< bonding slave port */
        struct port_flow        *flow_list; /**< Associated flows. */
+       const struct rte_eth_rxtx_callback *rx_dump_cb[MAX_QUEUE_ID+1];
+       const struct rte_eth_rxtx_callback *tx_dump_cb[MAX_QUEUE_ID+1];
 #ifdef SOFTNIC
        struct softnic_port     softport;  /**< softnic params */
 #endif
+       /**< metadata value to insert in Tx packets. */
+       rte_be32_t              tx_metadata;
+       const struct rte_eth_rxtx_callback *tx_set_md_cb[MAX_QUEUE_ID+1];
 };
 
 /**
@@ -243,6 +262,7 @@ extern struct fwd_engine rx_only_engine;
 extern struct fwd_engine tx_only_engine;
 extern struct fwd_engine csum_fwd_engine;
 extern struct fwd_engine icmp_echo_engine;
+extern struct fwd_engine noisy_vnf_engine;
 #ifdef SOFTNIC
 extern struct fwd_engine softnic_fwd_engine;
 #endif
@@ -304,13 +324,15 @@ extern uint8_t  numa_support; /**< set by "--numa" parameter */
 extern uint16_t port_topology; /**< set by "--port-topology" parameter */
 extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
 extern uint8_t flow_isolate_all; /**< set by "--flow-isolate-all */
-extern uint8_t  mp_anon; /**< set by "--mp-anon" parameter */
+extern uint8_t  mp_alloc_type;
+/**< set by "--mp-anon" or "--mp-alloc" parameter */
 extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
 extern volatile int test_done; /* stop packet forwarding when set to 1. */
 extern uint8_t lsc_interrupt; /**< disabled by "--no-lsc-interrupt" parameter */
 extern uint8_t rmv_interrupt; /**< disabled by "--no-rmv-interrupt" parameter */
 extern uint32_t event_print_mask;
 /**< set by "--print-event xxxx" and "--mask-event xxxx parameters */
+extern bool setup_on_probe_event; /**< disabled by port setup-on iterator */
 extern uint8_t hot_plug; /**< enable by "--hot-plug" parameter */
 extern int do_mlockall; /**< set by "--mlockall" or "--no-mlockall" parameter */
 
@@ -375,6 +397,13 @@ extern int8_t rx_drop_en;
 extern int16_t tx_free_thresh;
 extern int16_t tx_rs_thresh;
 
+extern uint16_t noisy_tx_sw_bufsz;
+extern uint16_t noisy_tx_sw_buf_flush_time;
+extern uint64_t noisy_lkup_mem_sz;
+extern uint64_t noisy_lkup_num_writes;
+extern uint64_t noisy_lkup_num_reads;
+extern uint64_t noisy_lkup_num_reads_writes;
+
 extern uint8_t dcb_config;
 extern uint8_t dcb_test;
 
@@ -487,6 +516,68 @@ struct nvgre_encap_conf {
 };
 struct nvgre_encap_conf nvgre_encap_conf;
 
+/* L2 encap parameters. */
+struct l2_encap_conf {
+       uint32_t select_ipv4:1;
+       uint32_t select_vlan:1;
+       rte_be16_t vlan_tci;
+       uint8_t eth_src[ETHER_ADDR_LEN];
+       uint8_t eth_dst[ETHER_ADDR_LEN];
+};
+struct l2_encap_conf l2_encap_conf;
+
+/* L2 decap parameters. */
+struct l2_decap_conf {
+       uint32_t select_vlan:1;
+};
+struct l2_decap_conf l2_decap_conf;
+
+/* MPLSoGRE encap parameters. */
+struct mplsogre_encap_conf {
+       uint32_t select_ipv4:1;
+       uint32_t select_vlan:1;
+       uint8_t label[3];
+       rte_be32_t ipv4_src;
+       rte_be32_t ipv4_dst;
+       uint8_t ipv6_src[16];
+       uint8_t ipv6_dst[16];
+       rte_be16_t vlan_tci;
+       uint8_t eth_src[ETHER_ADDR_LEN];
+       uint8_t eth_dst[ETHER_ADDR_LEN];
+};
+struct mplsogre_encap_conf mplsogre_encap_conf;
+
+/* MPLSoGRE decap parameters. */
+struct mplsogre_decap_conf {
+       uint32_t select_ipv4:1;
+       uint32_t select_vlan:1;
+};
+struct mplsogre_decap_conf mplsogre_decap_conf;
+
+/* MPLSoUDP encap parameters. */
+struct mplsoudp_encap_conf {
+       uint32_t select_ipv4:1;
+       uint32_t select_vlan:1;
+       uint8_t label[3];
+       rte_be16_t udp_src;
+       rte_be16_t udp_dst;
+       rte_be32_t ipv4_src;
+       rte_be32_t ipv4_dst;
+       uint8_t ipv6_src[16];
+       uint8_t ipv6_dst[16];
+       rte_be16_t vlan_tci;
+       uint8_t eth_src[ETHER_ADDR_LEN];
+       uint8_t eth_dst[ETHER_ADDR_LEN];
+};
+struct mplsoudp_encap_conf mplsoudp_encap_conf;
+
+/* MPLSoUDP decap parameters. */
+struct mplsoudp_decap_conf {
+       uint32_t select_ipv4:1;
+       uint32_t select_vlan:1;
+};
+struct mplsoudp_decap_conf mplsoudp_decap_conf;
+
 static inline unsigned int
 lcore_num(void)
 {
@@ -594,6 +685,8 @@ void nic_xstats_display(portid_t port_id);
 void nic_xstats_clear(portid_t port_id);
 void nic_stats_mapping_display(portid_t port_id);
 void port_infos_display(portid_t port_id);
+void port_summary_display(portid_t port_id);
+void port_summary_header_display(void);
 void port_offload_cap_display(portid_t port_id);
 void rx_queue_infos_display(portid_t port_idi, uint16_t queue_id);
 void tx_queue_infos_display(portid_t port_idi, uint16_t queue_id);
@@ -688,7 +781,7 @@ void stop_port(portid_t pid);
 void close_port(portid_t pid);
 void reset_port(portid_t pid);
 void attach_port(char *identifier);
-void detach_port(portid_t port_id);
+void detach_port_device(portid_t port_id);
 int all_ports_stopped(void);
 int port_is_stopped(portid_t port_id);
 int port_is_started(portid_t port_id);
@@ -708,8 +801,7 @@ int set_queue_rate_limit(portid_t port_id, uint16_t queue_idx, uint16_t rate);
 int set_vf_rate_limit(portid_t port_id, uint16_t vf, uint16_t rate,
                                uint64_t q_msk);
 
-void port_rss_hash_conf_show(portid_t port_id, char rss_info[],
-                            int show_rss_key);
+void port_rss_hash_conf_show(portid_t port_id, int show_rss_key);
 void port_rss_hash_key_update(portid_t port_id, char rss_type[],
                              uint8_t *hash_key, uint hash_key_len);
 int rx_queue_id_is_invalid(queueid_t rxq_id);
@@ -743,6 +835,25 @@ int check_nb_rxq(queueid_t rxq);
 queueid_t get_allowed_max_nb_txq(portid_t *pid);
 int check_nb_txq(queueid_t txq);
 
+uint16_t dump_rx_pkts(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
+                     uint16_t nb_pkts, __rte_unused uint16_t max_pkts,
+                     __rte_unused void *user_param);
+
+uint16_t dump_tx_pkts(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
+                     uint16_t nb_pkts, __rte_unused void *user_param);
+
+void add_rx_dump_callbacks(portid_t portid);
+void remove_rx_dump_callbacks(portid_t portid);
+void add_tx_dump_callbacks(portid_t portid);
+void remove_tx_dump_callbacks(portid_t portid);
+void configure_rxtx_dump_callbacks(uint16_t verbose);
+
+uint16_t tx_pkt_set_md(uint16_t port_id, __rte_unused uint16_t queue,
+                      struct rte_mbuf *pkts[], uint16_t nb_pkts,
+                      __rte_unused void *user_param);
+void add_tx_md_callback(portid_t portid);
+void remove_tx_md_callback(portid_t portid);
+
 /*
  * Work-around of a compilation error with ICC on invocations of the
  * rte_be_to_cpu_16() function.
diff --git a/app/test-pmd/util.c b/app/test-pmd/util.c
new file mode 100644 (file)
index 0000000..687bfa4
--- /dev/null
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ * Copyright(c) 2018 Mellanox Technology
+ */
+
+#include <stdio.h>
+
+#include <rte_net.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_flow.h>
+
+#include "testpmd.h"
+
+static inline void
+print_ether_addr(const char *what, struct ether_addr *eth_addr)
+{
+       char buf[ETHER_ADDR_FMT_SIZE];
+       ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+       printf("%s%s", what, buf);
+}
+
+static inline void
+dump_pkt_burst(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
+             uint16_t nb_pkts, int is_rx)
+{
+       struct rte_mbuf  *mb;
+       struct ether_hdr *eth_hdr;
+       uint16_t eth_type;
+       uint64_t ol_flags;
+       uint16_t i, packet_type;
+       uint16_t is_encapsulation;
+       char buf[256];
+       struct rte_net_hdr_lens hdr_lens;
+       uint32_t sw_packet_type;
+       uint16_t udp_port;
+       uint32_t vx_vni;
+
+       if (!nb_pkts)
+               return;
+       printf("port %u/queue %u: %s %u packets\n",
+               port_id, queue,
+              is_rx ? "received" : "sent",
+              (unsigned int) nb_pkts);
+       for (i = 0; i < nb_pkts; i++) {
+               mb = pkts[i];
+               eth_hdr = rte_pktmbuf_mtod(mb, struct ether_hdr *);
+               eth_type = RTE_BE_TO_CPU_16(eth_hdr->ether_type);
+               ol_flags = mb->ol_flags;
+               packet_type = mb->packet_type;
+               is_encapsulation = RTE_ETH_IS_TUNNEL_PKT(packet_type);
+
+               print_ether_addr("  src=", &eth_hdr->s_addr);
+               print_ether_addr(" - dst=", &eth_hdr->d_addr);
+               printf(" - type=0x%04x - length=%u - nb_segs=%d",
+                      eth_type, (unsigned int) mb->pkt_len,
+                      (int)mb->nb_segs);
+               if (ol_flags & PKT_RX_RSS_HASH) {
+                       printf(" - RSS hash=0x%x", (unsigned int) mb->hash.rss);
+                       printf(" - RSS queue=0x%x", (unsigned int) queue);
+               }
+               if (ol_flags & PKT_RX_FDIR) {
+                       printf(" - FDIR matched ");
+                       if (ol_flags & PKT_RX_FDIR_ID)
+                               printf("ID=0x%x",
+                                      mb->hash.fdir.hi);
+                       else if (ol_flags & PKT_RX_FDIR_FLX)
+                               printf("flex bytes=0x%08x %08x",
+                                      mb->hash.fdir.hi, mb->hash.fdir.lo);
+                       else
+                               printf("hash=0x%x ID=0x%x ",
+                                      mb->hash.fdir.hash, mb->hash.fdir.id);
+               }
+               if (ol_flags & PKT_RX_TIMESTAMP)
+                       printf(" - timestamp %"PRIu64" ", mb->timestamp);
+               if (ol_flags & PKT_RX_QINQ)
+                       printf(" - QinQ VLAN tci=0x%x, VLAN tci outer=0x%x",
+                              mb->vlan_tci, mb->vlan_tci_outer);
+               else if (ol_flags & PKT_RX_VLAN)
+                       printf(" - VLAN tci=0x%x", mb->vlan_tci);
+               if (mb->packet_type) {
+                       rte_get_ptype_name(mb->packet_type, buf, sizeof(buf));
+                       printf(" - hw ptype: %s", buf);
+               }
+               sw_packet_type = rte_net_get_ptype(mb, &hdr_lens,
+                                       RTE_PTYPE_ALL_MASK);
+               rte_get_ptype_name(sw_packet_type, buf, sizeof(buf));
+               printf(" - sw ptype: %s", buf);
+               if (sw_packet_type & RTE_PTYPE_L2_MASK)
+                       printf(" - l2_len=%d", hdr_lens.l2_len);
+               if (sw_packet_type & RTE_PTYPE_L3_MASK)
+                       printf(" - l3_len=%d", hdr_lens.l3_len);
+               if (sw_packet_type & RTE_PTYPE_L4_MASK)
+                       printf(" - l4_len=%d", hdr_lens.l4_len);
+               if (sw_packet_type & RTE_PTYPE_TUNNEL_MASK)
+                       printf(" - tunnel_len=%d", hdr_lens.tunnel_len);
+               if (sw_packet_type & RTE_PTYPE_INNER_L2_MASK)
+                       printf(" - inner_l2_len=%d", hdr_lens.inner_l2_len);
+               if (sw_packet_type & RTE_PTYPE_INNER_L3_MASK)
+                       printf(" - inner_l3_len=%d", hdr_lens.inner_l3_len);
+               if (sw_packet_type & RTE_PTYPE_INNER_L4_MASK)
+                       printf(" - inner_l4_len=%d", hdr_lens.inner_l4_len);
+               if (is_encapsulation) {
+                       struct ipv4_hdr *ipv4_hdr;
+                       struct ipv6_hdr *ipv6_hdr;
+                       struct udp_hdr *udp_hdr;
+                       uint8_t l2_len;
+                       uint8_t l3_len;
+                       uint8_t l4_len;
+                       uint8_t l4_proto;
+                       struct  vxlan_hdr *vxlan_hdr;
+
+                       l2_len  = sizeof(struct ether_hdr);
+
+                       /* Do not support ipv4 option field */
+                       if (RTE_ETH_IS_IPV4_HDR(packet_type)) {
+                               l3_len = sizeof(struct ipv4_hdr);
+                               ipv4_hdr = rte_pktmbuf_mtod_offset(mb,
+                               struct ipv4_hdr *,
+                               l2_len);
+                               l4_proto = ipv4_hdr->next_proto_id;
+                       } else {
+                               l3_len = sizeof(struct ipv6_hdr);
+                               ipv6_hdr = rte_pktmbuf_mtod_offset(mb,
+                               struct ipv6_hdr *,
+                               l2_len);
+                               l4_proto = ipv6_hdr->proto;
+                       }
+                       if (l4_proto == IPPROTO_UDP) {
+                               udp_hdr = rte_pktmbuf_mtod_offset(mb,
+                               struct udp_hdr *,
+                               l2_len + l3_len);
+                               l4_len = sizeof(struct udp_hdr);
+                               vxlan_hdr = rte_pktmbuf_mtod_offset(mb,
+                               struct vxlan_hdr *,
+                               l2_len + l3_len + l4_len);
+                               udp_port = RTE_BE_TO_CPU_16(udp_hdr->dst_port);
+                               vx_vni = rte_be_to_cpu_32(vxlan_hdr->vx_vni);
+                               printf(" - VXLAN packet: packet type =%d, "
+                                      "Destination UDP port =%d, VNI = %d",
+                                      packet_type, udp_port, vx_vni >> 8);
+                       }
+               }
+               printf(" - %s queue=0x%x", is_rx ? "Receive" : "Send",
+                       (unsigned int) queue);
+               printf("\n");
+               rte_get_rx_ol_flag_list(mb->ol_flags, buf, sizeof(buf));
+               printf("  ol_flags: %s\n", buf);
+       }
+}
+
+uint16_t
+dump_rx_pkts(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
+            uint16_t nb_pkts, __rte_unused uint16_t max_pkts,
+            __rte_unused void *user_param)
+{
+       dump_pkt_burst(port_id, queue, pkts, nb_pkts, 1);
+       return nb_pkts;
+}
+
+uint16_t
+dump_tx_pkts(uint16_t port_id, uint16_t queue, struct rte_mbuf *pkts[],
+            uint16_t nb_pkts, __rte_unused void *user_param)
+{
+       dump_pkt_burst(port_id, queue, pkts, nb_pkts, 0);
+       return nb_pkts;
+}
+
+uint16_t
+tx_pkt_set_md(uint16_t port_id, __rte_unused uint16_t queue,
+             struct rte_mbuf *pkts[], uint16_t nb_pkts,
+             __rte_unused void *user_param)
+{
+       uint16_t i = 0;
+
+       /*
+        * Add metadata value to every Tx packet,
+        * and set ol_flags accordingly.
+        */
+       for (i = 0; i < nb_pkts; i++) {
+               pkts[i]->tx_metadata = ports[port_id].tx_metadata;
+               pkts[i]->ol_flags |= PKT_TX_METADATA;
+       }
+       return nb_pkts;
+}
+
+void
+add_tx_md_callback(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_tx_queues; queue++)
+               if (!ports[portid].tx_set_md_cb[queue])
+                       ports[portid].tx_set_md_cb[queue] =
+                               rte_eth_add_tx_callback(portid, queue,
+                                                       tx_pkt_set_md, NULL);
+}
+
+void
+remove_tx_md_callback(portid_t portid)
+{
+       struct rte_eth_dev_info dev_info;
+       uint16_t queue;
+
+       if (port_id_is_invalid(portid, ENABLED_WARN))
+               return;
+       rte_eth_dev_info_get(portid, &dev_info);
+       for (queue = 0; queue < dev_info.nb_tx_queues; queue++)
+               if (ports[portid].tx_set_md_cb[queue]) {
+                       rte_eth_remove_tx_callback(portid, queue,
+                               ports[portid].tx_set_md_cb[queue]);
+                       ports[portid].tx_set_md_cb[queue] = NULL;
+               }
+}
index d28a5c3..5b613c3 100755 (executable)
@@ -23,7 +23,7 @@ name=${5:?define/type/function name required}
 
 : ${CC:=cc}
 
-temp=/tmp/${0##*/}.$$.c
+temp=$(mktemp -t dpdk.${0##*/}.c.XXXXXX)
 
 case $type in
 define)
@@ -86,7 +86,7 @@ printf "\
 " "$include" "$code" > "${temp}" &&
 if ${CC} ${CPPFLAGS} ${EXTRA_CPPFLAGS} ${CFLAGS} ${EXTRA_CFLAGS} \
        ${AUTO_CONFIG_CFLAGS} \
-       -c -o ${temp}.o "${temp}" 1>&${out} 2>&${err}
+       -xc -c -o ${temp}.o "${temp}" 1>&${out} 2>&${err}
 then
        rm -f "${temp}" "${temp}.o"
        printf "\
index 5bc8cda..d091510 100755 (executable)
@@ -16,9 +16,9 @@ for i in `awk 'BEGIN {found=0}
                /.*;/ {if (found == 1) print $1}' $MAPFILE`
 do
        SYM=`echo $i | sed -e"s/;//"`
-       objdump -t $OBJFILE | grep -q "\.text.*$SYM"
+       objdump -t $OBJFILE | grep -q "\.text.*$SYM$"
        IN_TEXT=$?
-       objdump -t $OBJFILE | grep -q "\.text\.experimental.*$SYM"
+       objdump -t $OBJFILE | grep -q "\.text\.experimental.*$SYM$"
        IN_EXP=$?
        if [ $IN_TEXT -eq 0 -a $IN_EXP -ne 0 ]
        then
index 0f35ca4..dc0b6d5 100644 (file)
@@ -8,7 +8,6 @@
  *
  */
 
-#define _GNU_SOURCE
 #include <stdio.h>
 #include <ctype.h>
 #include <string.h>
index 803dfec..9826c6a 100644 (file)
@@ -9,4 +9,4 @@
 # parameters to script are paths relative to install prefix:
 # 1. directory containing driver files e.g. lib64/dpdk/drivers
 # 2. directory for installed regular libs e.g. lib64
-ln -sf ${DESTDIR}/${MESON_INSTALL_PREFIX}/$1/* ${DESTDIR}/${MESON_INSTALL_PREFIX}/$2
+ln -rsf ${DESTDIR}/${MESON_INSTALL_PREFIX}/$1/* ${DESTDIR}/${MESON_INSTALL_PREFIX}/$2
index 40dbc87..4b23b39 100644 (file)
@@ -53,7 +53,7 @@ flags_cavium = [
        ['RTE_MAX_NUMA_NODES', 2],
        ['RTE_MAX_LCORE', 96],
        ['RTE_MAX_VFIO_GROUPS', 128],
-       ['RTE_RING_USE_C11_MEM_MODEL', false]]
+       ['RTE_USE_C11_MEM_MODEL', false]]
 flags_dpaa = [
        ['RTE_MACHINE', '"dpaa"'],
        ['RTE_CACHE_LINE_SIZE', 64],
@@ -157,7 +157,8 @@ else
 endif
 message(machine_args)
 
-if cc.get_define('__ARM_NEON', args: machine_args) != ''
+if (cc.get_define('__ARM_NEON', args: machine_args) != '' or
+    cc.get_define('__aarch64__', args: machine_args) != '')
        dpdk_conf.set('RTE_MACHINE_CPUFLAG_NEON', 1)
        compile_time_cpuflags += ['RTE_CPUFLAG_NEON']
 endif
index 111c005..ad88a37 100644 (file)
@@ -17,6 +17,8 @@ CONFIG_RTE_FORCE_INTRINSICS=y
 # to address minimum DMA alignment across all arm64 implementations.
 CONFIG_RTE_CACHE_LINE_SIZE=128
 
+CONFIG_RTE_USE_C11_MEM_MODEL=y
+
 # Accelarate rte_memcpy. Be sure to run unit test (memcpy_perf_autotest)
 # to determine the best threshold in code. Refer to notes in source file
 # (lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h) for more info.
@@ -29,8 +31,6 @@ CONFIG_RTE_ARCH_ARM64_MEMCPY=n
 #CONFIG_RTE_ARM64_MEMCPY_ALIGN_MASK=0xF
 #CONFIG_RTE_ARM64_MEMCPY_STRICT_ALIGN=n
 
-CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
-
 CONFIG_RTE_LIBRTE_FM10K_PMD=n
 CONFIG_RTE_LIBRTE_SFC_EFX_PMD=n
 CONFIG_RTE_LIBRTE_AVP_PMD=n
index 4bcbaf9..d12ae98 100644 (file)
@@ -55,12 +55,18 @@ CONFIG_RTE_MAJOR_ABI=
 #
 CONFIG_RTE_CACHE_LINE_SIZE=64
 
+#
+# Memory model
+#
+CONFIG_RTE_USE_C11_MEM_MODEL=n
+
 #
 # Compile Environment Abstraction Layer
 #
 CONFIG_RTE_LIBRTE_EAL=y
 CONFIG_RTE_MAX_LCORE=128
 CONFIG_RTE_MAX_NUMA_NODES=8
+CONFIG_RTE_MAX_HEAPS=32
 CONFIG_RTE_MAX_MEMSEG_LISTS=64
 # each memseg list will be limited to either RTE_MAX_MEMSEG_PER_LIST pages
 # or RTE_MAX_MEM_MB_PER_LIST megabytes worth of memory, whichever is smaller
@@ -128,7 +134,7 @@ CONFIG_RTE_MAX_QUEUES_PER_PORT=1024
 CONFIG_RTE_LIBRTE_IEEE1588=n
 CONFIG_RTE_ETHDEV_QUEUE_STAT_CNTRS=16
 CONFIG_RTE_ETHDEV_RXTX_CALLBACKS=y
-CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
+CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE=n
 
 #
 # Turn off Tx preparation stage
@@ -138,6 +144,11 @@ CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS=n
 #
 CONFIG_RTE_ETHDEV_TX_PREPARE_NOOP=n
 
+#
+# Common libraries, before Bus/PMDs
+#
+CONFIG_RTE_LIBRTE_COMMON_DPAAX=n
+
 #
 # Compile the Intel FPGA bus
 #
@@ -163,6 +174,11 @@ CONFIG_RTE_LIBRTE_ARK_DEBUG_TX=n
 CONFIG_RTE_LIBRTE_ARK_DEBUG_STATS=n
 CONFIG_RTE_LIBRTE_ARK_DEBUG_TRACE=n
 
+#
+# Compile Aquantia Atlantic PMD driver
+#
+CONFIG_RTE_LIBRTE_ATLANTIC_PMD=y
+
 #
 # Compile AMD PMD
 #
@@ -217,6 +233,11 @@ CONFIG_RTE_LIBRTE_DPAA2_USE_PHYS_IOVA=y
 CONFIG_RTE_LIBRTE_DPAA2_PMD=n
 CONFIG_RTE_LIBRTE_DPAA2_DEBUG_DRIVER=n
 
+#
+# Compile NXP ENETC PMD Driver
+#
+CONFIG_RTE_LIBRTE_ENETC_PMD=n
+
 #
 # Compile burst-oriented Amazon ENA PMD driver
 #
@@ -399,6 +420,11 @@ CONFIG_RTE_LIBRTE_PMD_FAILSAFE=y
 #
 CONFIG_RTE_LIBRTE_MVPP2_PMD=n
 
+#
+# Compile Marvell MVNETA PMD driver
+#
+CONFIG_RTE_LIBRTE_MVNETA_PMD=n
+
 #
 # Compile support for VMBus library
 #
@@ -479,6 +505,12 @@ CONFIG_RTE_CRYPTO_MAX_DEVS=64
 CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO=n
 CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO_DEBUG=n
 
+#
+# Compile NXP CAAM JR crypto Driver
+#
+CONFIG_RTE_LIBRTE_PMD_CAAM_JR=n
+CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE=n
+
 #
 # Compile NXP DPAA2 crypto sec driver for CAAM HW
 #
@@ -490,6 +522,11 @@ CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC=n
 CONFIG_RTE_LIBRTE_PMD_DPAA_SEC=n
 CONFIG_RTE_LIBRTE_DPAA_MAX_CRYPTODEV=4
 
+#
+# Compile PMD for Cavium OCTEON TX crypto device
+#
+CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=y
+
 #
 # Compile PMD for QuickAssist based devices - see docs for details
 #
@@ -500,6 +537,7 @@ CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n
 #
 CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48
 CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16
+CONFIG_RTE_PMD_QAT_COMP_IM_BUFFER_SIZE=65536
 
 #
 # Compile PMD for virtio crypto devices
@@ -559,7 +597,6 @@ CONFIG_RTE_LIBRTE_PMD_CCP=n
 # Compile PMD for Marvell Crypto device
 #
 CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO=n
-CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO_DEBUG=n
 
 #
 # Compile generic security library
@@ -602,6 +639,7 @@ CONFIG_RTE_EVENT_MAX_QUEUES_PER_DEV=64
 CONFIG_RTE_EVENT_TIMER_ADAPTER_NUM_MAX=32
 CONFIG_RTE_EVENT_ETH_INTR_RING_SIZE=1024
 CONFIG_RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE=32
+CONFIG_RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE=32
 
 #
 # Compile PMD for skeleton event device
@@ -614,6 +652,11 @@ CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV_DEBUG=n
 #
 CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV=y
 
+#
+# Compile PMD for distributed software event device
+#
+CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV=y
+
 #
 # Compile PMD for octeontx sso event device
 #
@@ -661,7 +704,6 @@ CONFIG_RTE_LIBRTE_PMD_IFPGA_RAWDEV=y
 # Compile librte_ring
 #
 CONFIG_RTE_LIBRTE_RING=y
-CONFIG_RTE_RING_USE_C11_MEM_MODEL=n
 
 #
 # Compile librte_mempool
@@ -745,6 +787,11 @@ CONFIG_RTE_LIBRTE_BITRATE=y
 #
 CONFIG_RTE_LIBRTE_LATENCY_STATS=y
 
+#
+# Compile librte_telemetry
+#
+CONFIG_RTE_LIBRTE_TELEMETRY=n
+
 #
 # Compile librte_lpm
 #
index 9c5ea9d..6c1c8d0 100644 (file)
@@ -14,6 +14,7 @@ CONFIG_RTE_LIBRTE_KNI=y
 CONFIG_RTE_LIBRTE_PMD_KNI=y
 CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_VHOST_NUMA=y
+CONFIG_RTE_LIBRTE_VHOST_POSTCOPY=n
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_IFC_PMD=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
@@ -29,12 +30,18 @@ CONFIG_RTE_PROC_INFO=y
 CONFIG_RTE_LIBRTE_VMBUS=y
 CONFIG_RTE_LIBRTE_NETVSC_PMD=y
 
+#
+# Common libraries, before Bus/PMDs
+#
+CONFIG_RTE_LIBRTE_COMMON_DPAAX=y
+
 # NXP DPAA BUS and drivers
 CONFIG_RTE_LIBRTE_DPAA_BUS=y
 CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=y
 CONFIG_RTE_LIBRTE_DPAA_PMD=y
 CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV=y
 CONFIG_RTE_LIBRTE_PMD_DPAA_SEC=y
+CONFIG_RTE_LIBRTE_PMD_CAAM_JR=y
 
 # NXP FSLMC BUS and DPAA2 drivers
 CONFIG_RTE_LIBRTE_FSLMC_BUS=y
@@ -44,3 +51,8 @@ CONFIG_RTE_LIBRTE_PMD_DPAA2_EVENTDEV=y
 CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC=y
 CONFIG_RTE_LIBRTE_PMD_DPAA2_CMDIF_RAWDEV=y
 CONFIG_RTE_LIBRTE_PMD_DPAA2_QDMA_RAWDEV=y
+
+#
+# NXP ENETC PMD Driver
+#
+CONFIG_RTE_LIBRTE_ENETC_PMD=y
index c47aec0..544b677 100644 (file)
@@ -21,3 +21,6 @@ CONFIG_RTE_PKTMBUF_HEADROOM=128
 # NXP DPAA Bus
 CONFIG_RTE_LIBRTE_DPAA_DEBUG_DRIVER=n
 CONFIG_RTE_LIBRTE_DPAA_HWDEBUG=n
+
+# NXP CAAM_JR driver
+CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE=y
index 2bed66c..fd160aa 100644 (file)
@@ -7,10 +7,10 @@
 CONFIG_RTE_MACHINE="thunderx"
 
 CONFIG_RTE_CACHE_LINE_SIZE=128
+CONFIG_RTE_USE_C11_MEM_MODEL=n
 CONFIG_RTE_MAX_NUMA_NODES=2
 CONFIG_RTE_MAX_LCORE=96
 CONFIG_RTE_MAX_VFIO_GROUPS=128
-CONFIG_RTE_RING_USE_C11_MEM_MODEL=n
 
 #
 # Compile PMD for octeontx sso event device
index a52e22e..8cbf7ed 100644 (file)
@@ -48,6 +48,7 @@ CONFIG_RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT=n
 
 # Note: Initially, all of the PMD drivers compilation are turned off on Power
 # Will turn on them only after the successful testing on Power
+CONFIG_RTE_LIBRTE_ATLANTIC_PMD=n
 CONFIG_RTE_LIBRTE_IXGBE_PMD=n
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
 CONFIG_RTE_LIBRTE_VMXNET3_PMD=n
index 4d75532..0b710b7 100644 (file)
@@ -9,12 +9,21 @@ else
 endif
 dpdk_conf.set('RTE_MACHINE', machine)
 machine_args = []
-machine_args += '-march=' + machine
+# ppc64 does not support -march=native
+if host_machine.cpu_family().startswith('ppc') and machine == 'native'
+       machine_args += '-mcpu=' + machine
+       machine_args += '-mtune=' + machine
+else
+       machine_args += '-march=' + machine
+endif
 
 toolchain = cc.get_id()
 dpdk_conf.set_quoted('RTE_TOOLCHAIN', toolchain)
 dpdk_conf.set('RTE_TOOLCHAIN_' + toolchain.to_upper(), 1)
 
+add_project_link_arguments('-Wl,--no-as-needed', language: 'c')
+dpdk_extra_ldflags += '-Wl,--no-as-needed'
+
 # use pthreads
 add_project_link_arguments('-pthread', language: 'c')
 dpdk_extra_ldflags += '-pthread'
@@ -84,6 +93,8 @@ if host_machine.cpu_family().startswith('x86')
        arch_subdir = 'x86'
 elif host_machine.cpu_family().startswith('arm') or host_machine.cpu_family().startswith('aarch')
        arch_subdir = 'arm'
+elif host_machine.cpu_family().startswith('ppc')
+       arch_subdir = 'ppc_64'
 endif
 subdir(arch_subdir)
 dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
@@ -92,3 +103,6 @@ dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
 dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
 
 install_headers('rte_config.h', subdir: get_option('include_subdir_arch'))
+
+# enable VFIO only if it is linux OS
+dpdk_conf.set('RTE_EAL_VFIO', host_machine.system() == 'linux')
diff --git a/config/ppc_64/meson.build b/config/ppc_64/meson.build
new file mode 100644 (file)
index 0000000..e207c43
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+dpdk_conf.set('RTE_ARCH', 'ppc_64')
+dpdk_conf.set('RTE_ARCH_PPC_64', 1)
+dpdk_conf.set('RTE_ARCH_64', 1)
+
+# overrides specific to ppc64
+dpdk_conf.set('RTE_MAX_LCORE', 256)
+dpdk_conf.set('RTE_MAX_NUMA_NODES', 32)
+dpdk_conf.set('RTE_CACHE_LINE_SIZE', 128)
index a8e4797..d3732e9 100644 (file)
 
 /****** library defines ********/
 
+/* compat defines */
+#define RTE_BUILD_SHARED_LIB
+
 /* EAL defines */
+#define RTE_MAX_HEAPS 32
 #define RTE_MAX_MEMSEG_LISTS 128
 #define RTE_MAX_MEMSEG_PER_LIST 8192
 #define RTE_MAX_MEM_MB_PER_LIST 32768
@@ -31,7 +35,6 @@
 #define RTE_MAX_TAILQ 32
 #define RTE_LOG_DP_LEVEL RTE_LOG_INFO
 #define RTE_BACKTRACE 1
-#define RTE_EAL_VFIO 1
 #define RTE_MAX_VFIO_CONTAINERS 64
 
 /* bsd module defines */
@@ -66,6 +69,7 @@
 #define RTE_EVENT_TIMER_ADAPTER_NUM_MAX 32
 #define RTE_EVENT_ETH_INTR_RING_SIZE 1024
 #define RTE_EVENT_CRYPTO_ADAPTER_MAX_INSTANCE 32
+#define RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE 32
 
 /* rawdev defines */
 #define RTE_RAWDEV_MAX_DEVS 10
@@ -90,6 +94,7 @@
 /* Max. number of QuickAssist devices which can be attached */
 #define RTE_PMD_QAT_MAX_PCI_DEVICES 48
 #define RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS 16
+#define RTE_PMD_QAT_COMP_IM_BUFFER_SIZE 65536
 
 /* virtio crypto defines */
 #define RTE_MAX_VIRTIO_CRYPTO 32
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_PF 64
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VF 4
 #define RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM 4
-/* interval up to 8160 us, aligned to 2 (or default value) */
-#define RTE_LIBRTE_I40E_ITR_INTERVAL -1
 
 /* Ring net PMD settings */
 #define RTE_PMD_RING_MAX_RX_RINGS 16
 #define RTE_PMD_RING_MAX_TX_RINGS 16
 
+/* QEDE PMD defines */
+#define RTE_LIBRTE_QEDE_FW ""
+
 #endif /* _RTE_CONFIG_H_ */
diff --git a/devtools/check-forbidden-tokens.awk b/devtools/check-forbidden-tokens.awk
new file mode 100755 (executable)
index 0000000..fd77cdd
--- /dev/null
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Arnon Warshavsky <arnon@qwilt.com>
+
+# This awk script receives a list of expressions to monitor
+# and a list of folders to search these expressions in
+# - No search is done inside comments
+# - Both additions and removals of the expressions are checked
+#   A positive balance of additions fails the check
+
+BEGIN {
+       split(FOLDERS,deny_folders," ");
+       split(EXPRESSIONS,deny_expr," ");
+       in_file=0;
+       in_comment=0;
+       count=0;
+       comment_start="/*"
+       comment_end="*/"
+}
+# search for add/remove instances in current file
+# state machine assumes the comments structure is enforced by
+# checkpatches.pl
+(in_file) {
+       # comment start
+       if (index($0,comment_start) > 0) {
+               in_comment = 1
+       }
+       # non comment code
+       if (in_comment == 0) {
+               for (i in deny_expr) {
+                       forbidden_added = "^\\+.*" deny_expr[i];
+                       forbidden_removed="^-.*" deny_expr[i];
+                       current = expressions[deny_expr[i]]
+                       if ($0 ~ forbidden_added) {
+                               count = count + 1;
+                               expressions[deny_expr[i]] = current + 1
+                       }
+                       if ($0 ~ forbidden_removed) {
+                               count = count - 1;
+                               expressions[deny_expr[i]] = current - 1
+                       }
+               }
+       }
+       # comment end
+       if (index($0,comment_end) > 0) {
+               in_comment = 0
+       }
+}
+# switch to next file , check if the balance of add/remove
+# of previous filehad new additions
+($0 ~ "^\\+\\+\\+ b/") {
+       in_file = 0;
+       if (count > 0) {
+               exit;
+       }
+       for (i in deny_folders) {
+               re = "^\\+\\+\\+ b/" deny_folders[i];
+               if ($0 ~ deny_folders[i]) {
+                       in_file = 1
+                       last_file = $0
+               }
+       }
+}
+END {
+       if (count > 0) {
+               print "Warning in " substr(last_file,6) ":"
+               print "are you sure you want to add the following:"
+               for (key in expressions) {
+                       if (expressions[key] > 0) {
+                               print key
+                       }
+               }
+               exit RET_ON_FAIL
+       }
+}
index 97dae4b..85d67fb 100755 (executable)
@@ -108,6 +108,7 @@ bad=$(echo "$headlines" | grep -E --color=always \
        -e ':.*\<nvm\>' \
        -e ':.*\<numa\>' \
        -e ':.*\<pci\>' \
+       -e ':.*\<phy\>' \
        -e ':.*\<pmd\>' \
        -e ':.*\<rss\>' \
        -e ':.*\<sctp\>' \
@@ -116,6 +117,8 @@ bad=$(echo "$headlines" | grep -E --color=always \
        -e ':.*\<[Vv]lan\>' \
        -e ':.*\<vdpa\>' \
        -e ':.*\<vsi\>' \
+       | grep \
+       -v ':.*\<OCTEON\ TX\>' \
        | sed 's,^,\t,')
 [ -z "$bad" ] || printf "Wrong headline lowercase:\n$bad\n"
 
index 9057633..ba9d00b 100755 (executable)
@@ -90,11 +90,11 @@ include_dir=${1:-build/include}
        'rte_eth_vhost.h' \
 }
 
-temp_cc=/tmp/${0##*/}.$$.c
+temp_cc=$(mktemp -t dpdk.${0##*/}.XXX.c)
 pass_cc=
 failures_cc=0
 
-temp_cxx=/tmp/${0##*/}.$$.cc
+temp_cxx=$(mktemp -t dpdk.${0##*/}.XXX.cc)
 pass_cxx=
 failures_cxx=0
 
index daaf45e..c0d2a6d 100755 (executable)
@@ -25,14 +25,14 @@ build_map_changes()
                # supresses the subordonate rules below
                /[-+] a\/.*\.^(map)/ {in_map=0}
 
-               # Triggering this rule, which starts a line with a + and ends it
+               # Triggering this rule, which starts a line and ends it
                # with a { identifies a versioned section.  The section name is
                # the rest of the line with the + and { symbols remvoed.
                # Triggering this rule sets in_sec to 1, which actives the
                # symbol rule below
-               /+.*{/ {gsub("+","");
+               /^.*{/ {
                        if (in_map == 1) {
-                               sec=$1; in_sec=1;
+                               sec=$(NF-1); in_sec=1;
                        }
                }
 
@@ -140,7 +140,7 @@ check_for_rule_violations()
 
 trap clean_and_exit_on_sig EXIT
 
-mapfile=`mktemp mapdb.XXXXXX`
+mapfile=`mktemp -t dpdk.mapdb.XXXXXX`
 patch=$1
 exit_code=1
 
index ba795ad..bf3114f 100755 (executable)
@@ -44,85 +44,12 @@ print_usage () {
 }
 
 check_forbidden_additions() {
-    # This awk script receives a list of expressions to monitor
-    # and a list of folders to search these expressions in
-    # - No search is done inside comments
-    # - Both additions and removals of the expressions are checked
-    #   A positive balance of additions fails the check
-       read -d '' awk_script << 'EOF'
-       BEGIN {
-               split(FOLDERS,deny_folders," ");
-               split(EXPRESSIONS,deny_expr," ");
-               in_file=0;
-               in_comment=0;
-               count=0;
-               comment_start="/*"
-               comment_end="*/"
-       }
-       # search for add/remove instances in current file
-       # state machine assumes the comments structure is enforced by
-       # checkpatches.pl
-       (in_file) {
-               # comment start
-               if (index($0,comment_start) > 0) {
-                       in_comment = 1
-               }
-               # non comment code
-               if (in_comment == 0) {
-                       for (i in deny_expr) {
-                               forbidden_added = "^\+.*" deny_expr[i];
-                               forbidden_removed="^-.*" deny_expr[i];
-                               current = expressions[deny_expr[i]]
-                               if ($0 ~ forbidden_added) {
-                                       count = count + 1;
-                                       expressions[deny_expr[i]] = current + 1
-                               }
-                               if ($0 ~ forbidden_removed) {
-                                       count = count - 1;
-                                       expressions[deny_expr[i]] = current - 1
-                               }
-                       }
-               }
-               # comment end
-               if (index($0,comment_end) > 0) {
-                       in_comment = 0
-               }
-       }
-       # switch to next file , check if the balance of add/remove
-       # of previous filehad new additions
-       ($0 ~ "^\+\+\+ b/") {
-               in_file = 0;
-               if (count > 0) {
-                       exit;
-               }
-               for (i in deny_folders) {
-                       re = "^\+\+\+ b/" deny_folders[i];
-                       if ($0 ~ deny_folders[i]) {
-                               in_file = 1
-                               last_file = $0
-                       }
-               }
-       }
-       END {
-               if (count > 0) {
-                       print "Warning in " substr(last_file,6) ":"
-                       print "are you sure you want to add the following:"
-                       for (key in expressions) {
-                               if (expressions[key] > 0) {
-                                       print key
-                               }
-                       }
-                       exit RET_ON_FAIL
-               }
-       }
-EOF
-       # ---------------------------------
        # refrain from new additions of rte_panic() and rte_exit()
        # multiple folders and expressions are separated by spaces
        awk -v FOLDERS="lib drivers" \
                -v EXPRESSIONS="rte_panic\\\( rte_exit\\\(" \
                -v RET_ON_FAIL=1 \
-               "$awk_script" -
+               -f $(dirname $(readlink -e $0))/check-forbidden-tokens.awk -
 }
 
 number=0
@@ -146,28 +73,35 @@ if [ ! -f "$DPDK_CHECKPATCH_PATH" ] || [ ! -x "$DPDK_CHECKPATCH_PATH" ] ; then
        exit 1
 fi
 
+print_headline() { # <title>
+       printf '\n### %s\n\n' "$1"
+       headline_printed=true
+}
+
 total=0
 status=0
 
 check () { # <patch> <commit> <title>
        local ret=0
+       headline_printed=false
 
        total=$(($total + 1))
-       ! $verbose || printf '\n### %s\n\n' "$3"
+       ! $verbose || print_headline "$3"
        if [ -n "$1" ] ; then
                tmpinput=$1
        elif [ -n "$2" ] ; then
-               tmpinput=$(mktemp checkpatches.XXXXXX)
+               tmpinput=$(mktemp -t dpdk.checkpatches.XXXXXX)
                git format-patch --find-renames \
                --no-stat --stdout -1 $commit > "$tmpinput"
        else
-               tmpinput=$(mktemp checkpatches.XXXXXX)
+               tmpinput=$(mktemp -t dpdk.checkpatches.XXXXXX)
                cat > "$tmpinput"
        fi
 
+       ! $verbose || printf 'Running checkpatch.pl:\n'
        report=$($DPDK_CHECKPATCH_PATH $options "$tmpinput" 2>/dev/null)
        if [ $? -ne 0 ] ; then
-               $verbose || printf '\n### %s\n\n' "$3"
+               $headline_printed || print_headline "$3"
                printf '%s\n' "$report" | sed -n '1,/^total:.*lines checked$/p'
                ret=1
        fi
@@ -175,6 +109,7 @@ check () { # <patch> <commit> <title>
        ! $verbose || printf '\nChecking API additions/removals:\n'
        report=$($VALIDATE_NEW_API "$tmpinput")
        if [ $? -ne 0 ] ; then
+               $headline_printed || print_headline "$3"
                printf '%s\n' "$report"
                ret=1
        fi
@@ -182,6 +117,7 @@ check () { # <patch> <commit> <title>
        ! $verbose || printf '\nChecking forbidden tokens additions:\n'
        report=$(check_forbidden_additions <"$tmpinput")
        if [ $? -ne 0 ] ; then
+               $headline_printed || print_headline "$3"
                printf '%s\n' "$report"
                ret=1
        fi
index 4ca5025..8b17a8c 100755 (executable)
@@ -44,7 +44,7 @@ PATCH_LIST="$@"
        exit 1
 )
 
-tmp=$(mktemp)
+tmp=$(mktemp -t dpdk.cocci.XXX)
 
 for c in $PATCH_LIST; do
        while true; do
index 1eee241..42f4ad0 100755 (executable)
@@ -10,6 +10,7 @@ default_path=$PATH
 # - DPDK_DEP_ARCHIVE
 # - DPDK_DEP_CFLAGS
 # - DPDK_DEP_ISAL (y/[n])
+# - DPDK_DEP_JSON (y/[n])
 # - DPDK_DEP_LDFLAGS
 # - DPDK_DEP_MLX (y/[n])
 # - DPDK_DEP_NUMA ([y]/n)
@@ -96,6 +97,7 @@ reset_env ()
        unset DPDK_DEP_ARCHIVE
        unset DPDK_DEP_CFLAGS
        unset DPDK_DEP_ISAL
+       unset DPDK_DEP_JSON
        unset DPDK_DEP_LDFLAGS
        unset DPDK_DEP_MLX
        unset DPDK_DEP_NUMA
@@ -179,9 +181,13 @@ config () # <directory> <target> <options>
                sed -ri     's,(BBDEV_TURBO_SW=)n,\1y,' $1/.config
                sed -ri           's,(SCHED_.*=)n,\1y,' $1/.config
                test -z "$LIBMUSDK_PATH" || \
-               sed -ri    's,(PMD_MVSAM_CRYPTO=)n,\1y,' $1/.config
+               sed -ri   's,(PMD_MVSAM_CRYPTO=)n,\1y,' $1/.config
                test -z "$LIBMUSDK_PATH" || \
                sed -ri          's,(MVPP2_PMD=)n,\1y,' $1/.config
+               test -z "$LIBMUSDK_PATH" || \
+               sed -ri         's,(MVNETA_PMD=)n,\1y,' $1/.config
+               test -z "$DPDK_DEP_JSON" || \
+               sed -ri          's,(TELEMETRY=)n,\1y,' $1/.config
                build_config_hook $1 $2 $3
 
                # Explicit enabler/disabler (uppercase)
index 951c906..79109b7 100755 (executable)
@@ -9,6 +9,7 @@
 
 srcdir=$(dirname $(readlink -m $0))/..
 MESON=${MESON:-meson}
+use_shared="--default-library=shared"
 
 if command -v ninja >/dev/null 2>&1 ; then
        ninja_cmd=ninja
@@ -42,19 +43,19 @@ for c in gcc clang ; do
 done
 
 # test compilation with minimal x86 instruction set
-build build-x86-default -Dmachine=nehalem
+build build-x86-default -Dmachine=nehalem $use_shared
 
 # enable cross compilation if gcc cross-compiler is found
 c=aarch64-linux-gnu-gcc
 if command -v $c >/dev/null 2>&1 ; then
        # compile the general v8a also for clang to increase coverage
        export CC="ccache clang"
-       build build-arm64-host-clang --cross-file \
-               config/arm/arm64_armv8_linuxapp_gcc
+       build build-arm64-host-clang $use_shared \
+               --cross-file config/arm/arm64_armv8_linuxapp_gcc
 
        for f in config/arm/arm*gcc ; do
                export CC="ccache gcc"
                build build-$(basename $f | tr '_' '-' | cut -d'-' -f-2) \
-                       --cross-file $f
+                       $use_shared --cross-file $f
        done
 fi
index 9265907..e27874c 100644 (file)
@@ -22,6 +22,7 @@ The public API headers are grouped by topics:
   [compress]           (@ref rte_comp.h),
   [eventdev]           (@ref rte_eventdev.h),
   [event_eth_rx_adapter]   (@ref rte_event_eth_rx_adapter.h),
+  [event_eth_tx_adapter]   (@ref rte_event_eth_tx_adapter.h),
   [event_timer_adapter]    (@ref rte_event_timer_adapter.h),
   [event_crypto_adapter]   (@ref rte_event_crypto_adapter.h),
   [rawdev]             (@ref rte_rawdev.h),
@@ -36,6 +37,7 @@ The public API headers are grouped by topics:
   [softnic]            (@ref rte_eth_softnic.h),
   [bond]               (@ref rte_eth_bond.h),
   [vhost]              (@ref rte_vhost.h),
+  [vdpa]               (@ref rte_vdpa.h),
   [KNI]                (@ref rte_kni.h),
   [ixgbe]              (@ref rte_pmd_ixgbe.h),
   [i40e]               (@ref rte_pmd_i40e.h),
@@ -153,6 +155,7 @@ The public API headers are grouped by topics:
 
 - **debug**:
   [jobstats]           (@ref rte_jobstats.h),
+  [telemetry]          (@ref rte_telemetry.h),
   [pdump]              (@ref rte_pdump.h),
   [hexdump]            (@ref rte_hexdump.h),
   [debug]              (@ref rte_debug.h),
diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf
deleted file mode 100644 (file)
index 66693c3..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-# SPDX-License-Identifier: BSD-3-Clause
-# Copyright 2013-2017 6WIND S.A.
-
-PROJECT_NAME            = DPDK
-INPUT                   = doc/api/doxy-api-index.md \
-                          drivers/crypto/scheduler \
-                          drivers/mempool/dpaa2 \
-                          drivers/net/bnxt \
-                          drivers/net/bonding \
-                          drivers/net/dpaa \
-                          drivers/net/i40e \
-                          drivers/net/ixgbe \
-                          drivers/net/softnic \
-                          drivers/raw/dpaa2_cmdif \
-                          drivers/raw/dpaa2_qdma \
-                          lib/librte_eal/common/include \
-                          lib/librte_eal/common/include/generic \
-                          lib/librte_acl \
-                          lib/librte_bbdev \
-                          lib/librte_bitratestats \
-                          lib/librte_bpf \
-                          lib/librte_cfgfile \
-                          lib/librte_cmdline \
-                          lib/librte_compat \
-                          lib/librte_compressdev \
-                          lib/librte_cryptodev \
-                          lib/librte_distributor \
-                          lib/librte_efd \
-                          lib/librte_ethdev \
-                          lib/librte_eventdev \
-                          lib/librte_flow_classify \
-                          lib/librte_gro \
-                          lib/librte_gso \
-                          lib/librte_hash \
-                          lib/librte_ip_frag \
-                          lib/librte_jobstats \
-                          lib/librte_kni \
-                          lib/librte_kvargs \
-                          lib/librte_latencystats \
-                          lib/librte_lpm \
-                          lib/librte_mbuf \
-                          lib/librte_member \
-                          lib/librte_mempool \
-                          lib/librte_meter \
-                          lib/librte_metrics \
-                          lib/librte_net \
-                          lib/librte_pci \
-                          lib/librte_pdump \
-                          lib/librte_pipeline \
-                          lib/librte_port \
-                          lib/librte_power \
-                          lib/librte_rawdev \
-                          lib/librte_reorder \
-                          lib/librte_ring \
-                          lib/librte_sched \
-                          lib/librte_security \
-                          lib/librte_table \
-                          lib/librte_timer \
-                          lib/librte_vhost
-FILE_PATTERNS           = rte_*.h \
-                          cmdline.h
-PREDEFINED              = __DOXYGEN__ \
-                          VFIO_PRESENT \
-                          __attribute__(x)=
-
-OPTIMIZE_OUTPUT_FOR_C   = YES
-ENABLE_PREPROCESSING    = YES
-MACRO_EXPANSION         = YES
-EXPAND_ONLY_PREDEF      = YES
-EXTRACT_STATIC          = YES
-DISTRIBUTE_GROUP_DOC    = YES
-HIDE_UNDOC_MEMBERS      = YES
-HIDE_UNDOC_CLASSES      = YES
-HIDE_SCOPE_NAMES        = YES
-GENERATE_DEPRECATEDLIST = NO
-VERBATIM_HEADERS        = NO
-ALPHABETICAL_INDEX      = NO
-
-HTML_TIMESTAMP          = NO
-HTML_DYNAMIC_SECTIONS   = YES
-SEARCHENGINE            = NO
-SORT_MEMBER_DOCS        = NO
-SOURCE_BROWSER          = YES
-
-EXAMPLE_PATH            = examples
-EXAMPLE_PATTERNS        = *.c
-EXAMPLE_RECURSIVE       = YES
diff --git a/doc/api/doxy-api.conf.in b/doc/api/doxy-api.conf.in
new file mode 100644 (file)
index 0000000..77ba327
--- /dev/null
@@ -0,0 +1,97 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2013-2017 6WIND S.A.
+
+PROJECT_NAME            = DPDK
+PROJECT_NUMBER          = @VERSION@
+INPUT                   = @TOPDIR@/doc/api/doxy-api-index.md \
+                          @TOPDIR@/drivers/crypto/scheduler \
+                          @TOPDIR@/drivers/mempool/dpaa2 \
+                          @TOPDIR@/drivers/net/bnxt \
+                          @TOPDIR@/drivers/net/bonding \
+                          @TOPDIR@/drivers/net/dpaa \
+                          @TOPDIR@/drivers/net/i40e \
+                          @TOPDIR@/drivers/net/ixgbe \
+                          @TOPDIR@/drivers/net/softnic \
+                          @TOPDIR@/drivers/raw/dpaa2_cmdif \
+                          @TOPDIR@/drivers/raw/dpaa2_qdma \
+                          @TOPDIR@/lib/librte_eal/common/include \
+                          @TOPDIR@/lib/librte_eal/common/include/generic \
+                          @TOPDIR@/lib/librte_acl \
+                          @TOPDIR@/lib/librte_bbdev \
+                          @TOPDIR@/lib/librte_bitratestats \
+                          @TOPDIR@/lib/librte_bpf \
+                          @TOPDIR@/lib/librte_cfgfile \
+                          @TOPDIR@/lib/librte_cmdline \
+                          @TOPDIR@/lib/librte_compat \
+                          @TOPDIR@/lib/librte_compressdev \
+                          @TOPDIR@/lib/librte_cryptodev \
+                          @TOPDIR@/lib/librte_distributor \
+                          @TOPDIR@/lib/librte_efd \
+                          @TOPDIR@/lib/librte_ethdev \
+                          @TOPDIR@/lib/librte_eventdev \
+                          @TOPDIR@/lib/librte_flow_classify \
+                          @TOPDIR@/lib/librte_gro \
+                          @TOPDIR@/lib/librte_gso \
+                          @TOPDIR@/lib/librte_hash \
+                          @TOPDIR@/lib/librte_ip_frag \
+                          @TOPDIR@/lib/librte_jobstats \
+                          @TOPDIR@/lib/librte_kni \
+                          @TOPDIR@/lib/librte_kvargs \
+                          @TOPDIR@/lib/librte_latencystats \
+                          @TOPDIR@/lib/librte_lpm \
+                          @TOPDIR@/lib/librte_mbuf \
+                          @TOPDIR@/lib/librte_member \
+                          @TOPDIR@/lib/librte_mempool \
+                          @TOPDIR@/lib/librte_meter \
+                          @TOPDIR@/lib/librte_metrics \
+                          @TOPDIR@/lib/librte_net \
+                          @TOPDIR@/lib/librte_pci \
+                          @TOPDIR@/lib/librte_pdump \
+                          @TOPDIR@/lib/librte_pipeline \
+                          @TOPDIR@/lib/librte_port \
+                          @TOPDIR@/lib/librte_power \
+                          @TOPDIR@/lib/librte_rawdev \
+                          @TOPDIR@/lib/librte_reorder \
+                          @TOPDIR@/lib/librte_ring \
+                          @TOPDIR@/lib/librte_sched \
+                          @TOPDIR@/lib/librte_security \
+                          @TOPDIR@/lib/librte_table \
+                          @TOPDIR@/lib/librte_telemetry \
+                          @TOPDIR@/lib/librte_timer \
+                          @TOPDIR@/lib/librte_vhost
+INPUT                   += @API_EXAMPLES@
+FILE_PATTERNS           = rte_*.h \
+                          cmdline.h
+PREDEFINED              = __DOXYGEN__ \
+                          VFIO_PRESENT \
+                          __attribute__(x)=
+
+OPTIMIZE_OUTPUT_FOR_C   = YES
+ENABLE_PREPROCESSING    = YES
+MACRO_EXPANSION         = YES
+EXPAND_ONLY_PREDEF      = YES
+EXTRACT_STATIC          = YES
+DISTRIBUTE_GROUP_DOC    = YES
+HIDE_UNDOC_MEMBERS      = YES
+HIDE_UNDOC_CLASSES      = YES
+HIDE_SCOPE_NAMES        = YES
+GENERATE_DEPRECATEDLIST = YES
+VERBATIM_HEADERS        = NO
+ALPHABETICAL_INDEX      = NO
+
+HTML_TIMESTAMP          = NO
+HTML_DYNAMIC_SECTIONS   = YES
+SEARCHENGINE            = NO
+SORT_MEMBER_DOCS        = NO
+SOURCE_BROWSER          = YES
+
+EXAMPLE_PATH            = @TOPDIR@/examples
+EXAMPLE_PATTERNS        = *.c
+EXAMPLE_RECURSIVE       = YES
+
+OUTPUT_DIRECTORY        = @OUTPUT@
+STRIP_FROM_PATH         = @STRIP_FROM_PATH@
+GENERATE_HTML           = YES
+HTML_OUTPUT             = @HTML_OUTPUT@
+GENERATE_LATEX          = NO
+GENERATE_MAN            = NO
diff --git a/doc/api/generate_doxygen.sh b/doc/api/generate_doxygen.sh
new file mode 100755 (executable)
index 0000000..ab57660
--- /dev/null
@@ -0,0 +1,10 @@
+#! /bin/sh -e
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Luca Boccassi <bluca@debian.org>
+
+DOXYCONF=$1
+OUTDIR=$2
+SCRIPTCSS=$3
+
+doxygen "${DOXYCONF}"
+"${SCRIPTCSS}" "${OUTDIR}"/doxygen.css
diff --git a/doc/api/generate_examples.sh b/doc/api/generate_examples.sh
new file mode 100755 (executable)
index 0000000..6fcfe51
--- /dev/null
@@ -0,0 +1,12 @@
+#! /bin/sh -e
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Luca Boccassi <bluca@debian.org>
+
+EXAMPLES_DIR=$1
+API_EXAMPLES=$2
+
+exec > "${API_EXAMPLES}"
+printf '/**\n'
+printf '@page examples DPDK Example Programs\n\n'
+find "${EXAMPLES_DIR}" -type f -name '*.c' -printf '@example examples/%P\n' | LC_ALL=C sort
+printf '*/\n'
diff --git a/doc/api/meson.build b/doc/api/meson.build
new file mode 100644 (file)
index 0000000..30bdc57
--- /dev/null
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+doxygen = find_program('doxygen', required: get_option('enable_docs'))
+
+if doxygen.found()
+       # due to the CSS customisation script, which needs to run on a file that
+       # is in a subdirectory that is created at build time and thus it cannot
+       # be an individual custom_target, we need to wrap the doxygen call in a
+       # script to run the CSS modification afterwards
+       generate_doxygen = find_program('generate_doxygen.sh')
+       generate_examples = find_program('generate_examples.sh')
+       generate_css = find_program('doxy-html-custom.sh')
+
+       inputdir = join_paths(meson.source_root(), 'examples')
+       htmldir = join_paths('share', 'doc', 'dpdk')
+
+       # due to the following bug: https://github.com/mesonbuild/meson/issues/4107
+       # if install is set to true it will override build_by_default and it will
+       # cause the target to always be built. If install were to be always set to
+       # false it would be impossible to install the docs.
+       # So use a configure option for now.
+       example = custom_target('examples.dox',
+               input: inputdir,
+               output: 'examples.dox',
+               command: [generate_examples, '@INPUT@', '@OUTPUT@'],
+               install: get_option('enable_docs'),
+               install_dir: htmldir,
+               build_by_default: false)
+
+       cdata = configuration_data()
+       cdata.set('VERSION', meson.project_version())
+       cdata.set('API_EXAMPLES', join_paths(meson.build_root(), 'doc', 'api', 'examples.dox'))
+       cdata.set('OUTPUT', join_paths(meson.build_root(), 'doc', 'api'))
+       cdata.set('HTML_OUTPUT', 'api')
+       cdata.set('TOPDIR', meson.source_root())
+       cdata.set('STRIP_FROM_PATH', meson.source_root())
+
+       doxy_conf = configure_file(input: 'doxy-api.conf.in',
+               output: 'doxy-api.conf',
+               configuration: cdata,
+               install: false)
+
+       doxy_build = custom_target('doxygen',
+               depends: example,
+               input: doxy_conf,
+               output: 'api',
+               command: [generate_doxygen, '@INPUT@', '@OUTPUT@', generate_css],
+               install: get_option('enable_docs'),
+               install_dir: htmldir,
+               build_by_default: false)
+
+       doc_targets += doxy_build
+       doc_target_names += 'Doxygen_API'
+endif
index 9618e75..508e2cb 100644 (file)
@@ -85,6 +85,8 @@ Project-specific options are passed used -Doption=value::
 
        meson -Dmax_lcores=8 smallbuild  # scale build for smaller systems
 
+       meson -Denable_docs=true fullbuild  # build and install docs
+
 Examples of setting the same options using meson configure::
 
        meson configure -Dwerror=true
index 884a8b0..cc8b025 100644 (file)
@@ -1,7 +1,7 @@
 ;
 ; Refer to default.ini for the full list of available PMD features.
 ;
-; Supported features of 'OCTEONTX ZIP' compression driver.
+; Supported features of 'OCTEON TX ZIP' compression driver.
 ;
 [Features]
 HW Accelerated = Y
index 5cd4524..6b1e7f9 100644 (file)
@@ -13,3 +13,4 @@ Adler32             = Y
 Crc32               = Y
 Adler32&Crc32       = Y
 Fixed               = Y
+Dynamic             = Y
index 5a32d5d..05dbd68 100644 (file)
@@ -1,12 +1,12 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2018 Cavium Networks.
 
-Octeontx ZIP Compression Poll Mode Driver
-=========================================
+OCTEON TX ZIP Compression Poll Mode Driver
+==========================================
 
-The Octeontx ZIP PMD (**librte_pmd_octeontx_zip**) provides poll mode
+The OCTEON TX ZIP PMD (**librte_pmd_octeontx_zip**) provides poll mode
 compression & decompression driver for ZIP HW offload device, found in
-**Cavium OCTEONTX** SoC family.
+**Cavium OCTEON TX** SoC family.
 
 More information can be found at `Cavium, Inc Official Website
 <http://www.cavium.com/OCTEON-TX_ARM_Processors.html>`_.
@@ -14,7 +14,7 @@ More information can be found at `Cavium, Inc Official Website
 Features
 --------
 
-Octeontx ZIP PMD has support for:
+OCTEON TX ZIP PMD has support for:
 
 Compression/Decompression algorithm:
 
@@ -34,24 +34,24 @@ Limitations
 
 * Chained mbufs are not supported.
 
-Supported OCTEONTX SoCs
------------------------
+Supported OCTEON TX SoCs
+------------------------
 
 - CN83xx
 
 Steps To Setup Platform
 -----------------------
 
-   Octeontx SDK includes kernel image which provides Octeontx ZIP PF
+   OCTEON TX SDK includes kernel image which provides OCTEON TX ZIP PF
    driver to manage configuration of ZIPVF device
    Required version of SDK is "OCTEONTX-SDK-6.2.0-build35" or above.
 
    SDK can be install by using below command.
-   #rpm -ivh CTEONTX-SDK-6.2.0-build35.x86_64.rpm --force --nodeps
+   #rpm -ivh OCTEONTX-SDK-6.2.0-build35.x86_64.rpm --force --nodeps
    It will install OCTEONTX-SDK at following default location
    /usr/local/Cavium_Networks/OCTEONTX-SDK/
 
-   For more information on building and booting linux kernel on OCTEONTX
+   For more information on building and booting linux kernel on OCTEON TX
    please refer /usr/local/Cavium_Networks/OCTEONTX-SDK/docs/OcteonTX-SDK-UG_6.2.0.pdf.
 
    SDK and related information can be obtained from: `Cavium support site <https://support.cavium.com/>`_.
@@ -62,7 +62,7 @@ Installation
 Driver Compilation
 ~~~~~~~~~~~~~~~~~~
 
-To compile the OCTEONTX ZIP PMD for Linux arm64 gcc target, run the
+To compile the OCTEON TX ZIP PMD for Linux arm64 gcc target, run the
 following ``make`` command:
 
    .. code-block:: console
@@ -74,7 +74,7 @@ following ``make`` command:
 Initialization
 --------------
 
-The octeontx zip is exposed as pci device which consists of a set of
+The OCTEON TX zip is exposed as pci device which consists of a set of
 PCIe VF devices. On EAL initialization, ZIP PCIe VF devices will be
 probed. To use the PMD in an application, user must:
 
index 8b1270b..aee3b99 100644 (file)
@@ -18,11 +18,7 @@ QAT compression PMD has support for:
 
 Compression/Decompression algorithm:
 
-    * DEFLATE
-
-Huffman code type:
-
-    * FIXED
+    * DEFLATE - using Fixed and Dynamic Huffman encoding
 
 Window size support:
 
@@ -36,12 +32,13 @@ Limitations
 -----------
 
 * Compressdev level 0, no compression, is not supported.
+* Queue pairs are not thread-safe (that is, within a single queue pair, RX and TX from different lcores is not supported).
+* No BSD support as BSD QAT kernel driver not available.
 
-* Dynamic Huffman encoding is not yet supported.
 
 Installation
 ------------
 
 The QAT compression PMD is built by default with a standard DPDK build.
 
-It depends on a QAT kernel driver, see :ref:`qat_kernel_installation`.
+It depends on a QAT kernel driver, see :ref:`building_qat`.
index b1bf0d1..19445c1 100644 (file)
@@ -741,8 +741,8 @@ A specialization looks like this:
  * PF/VF mailbox output: ``type.section.name.mbox``
 
 A real world example is the i40e poll mode driver which exposes two
-specializations, one for initialization ``pmd.i40e.init`` and the other for
-the remaining driver logs ``pmd.i40e.driver``.
+specializations, one for initialization ``pmd.net.i40e.init`` and the other for
+the remaining driver logs ``pmd.net.i40e.driver``.
 
 Note that specializations have no formatting rules, but please follow
 a precedent if one exists. In order to see all current log topics and
index 6a07555..0165990 100644 (file)
@@ -297,8 +297,8 @@ Line Length
 
      testpmd -l 2-3 -n 4 \
              --vdev=virtio_user0,path=/dev/vhost-net,queues=2,queue_size=1024 \
-             -- -i --txqflags=0x0 --enable-hw-vlan --enable-lro \
-             --enable-rx-cksum --txq=2 --rxq=2 --rxd=1024  --txd=1024
+             -- -i --tx-offloads=0x0000002c --enable-lro --txq=2 --rxq=2 \
+             --txd=1024 --rxd=1024
 
 
 Whitespace
@@ -615,19 +615,14 @@ The following are some guidelines for use of Doxygen in the DPDK API documentati
   .. code-block:: c
 
      /**
-      * Attach a new Ethernet device specified by arguments.
-      *
-      * @param devargs
-      *  A pointer to a strings array describing the new device
-      *  to be attached. The strings should be a pci address like
-      *  `0000:01:00.0` or **virtual** device name like `net_pcap0`.
-      * @param port_id
-      *  A pointer to a port identifier actually attached.
+      * Try to take the lock.
       *
+      * @param sl
+      *   A pointer to the spinlock.
       * @return
-      *  0 on success and port_id is filled, negative on error.
+      *   1 if the lock is successfully taken; 0 otherwise.
       */
-     int rte_eth_dev_attach(const char *devargs, uint8_t *port_id);
+     int rte_spinlock_trylock(rte_spinlock_t *sl);
 
 * Doxygen supports Markdown style syntax such as bold, italics, fixed width text and lists.
   For example the second line in the ``devargs`` parameter in the previous example will be rendered as:
index c292950..63e060d 100644 (file)
@@ -44,6 +44,7 @@ Hash algorithms:
 AEAD algorithms:
 
 * RTE_CRYPTO_AEAD_AES_CCM
+* RTE_CRYPTO_AEAD_AES_GCM
 
 Limitations
 -----------
diff --git a/doc/guides/cryptodevs/caam_jr.rst b/doc/guides/cryptodevs/caam_jr.rst
new file mode 100644 (file)
index 0000000..e87ff09
--- /dev/null
@@ -0,0 +1,150 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright 2018 NXP
+
+
+NXP CAAM JOB RING (caam_jr)
+===========================
+
+The caam_jr PMD provides poll mode crypto driver support for NXP SEC 4.x+ (CAAM)
+hardware accelerator. More information is available at:
+
+`NXP Cryptographic Acceleration Technology  <https://www.nxp.com/applications/solutions/internet-of-things/secure-things/network-security-technology/cryptographic-acceleration-technology:NETWORK_SECURITY_CRYPTOG>`_.
+
+Architecture
+------------
+
+SEC is the SOC's security engine, which serves as NXP's latest cryptographic
+acceleration and offloading hardware. It combines functions previously
+implemented in separate modules to create a modular and scalable acceleration
+and assurance engine. It also implements block encryption algorithms, stream
+cipher algorithms, hashing algorithms, public key algorithms, run-time
+integrity checking, and a hardware random number generator. SEC performs
+higher-level cryptographic operations than previous NXP cryptographic
+accelerators. This provides significant improvement to system level performance.
+
+SEC HW accelerator above 4.x+ version are also known as CAAM.
+
+caam_jr PMD is one of DPAA drivers which uses uio interface to interact with
+Linux kernel for configure and destroy the device instance (ring).
+
+
+Implementation
+--------------
+
+SEC provides platform assurance by working with SecMon, which is a companion
+logic block that tracks the security state of the SOC. SEC is programmed by
+means of descriptors (not to be confused with frame descriptors (FDs)) that
+indicate the operations to be performed and link to the message and
+associated data. SEC incorporates two DMA engines to fetch the descriptors,
+read the message data, and write the results of the operations. The DMA
+engine provides a scatter/gather capability so that SEC can read and write
+data scattered in memory. SEC may be configured by means of software for
+dynamic changes in byte ordering. The default configuration for this version
+of SEC is little-endian mode.
+
+Note that one physical Job Ring represent one caam_jr device.
+
+Features
+--------
+
+The CAAM_JR PMD has support for:
+
+Cipher algorithms:
+
+* ``RTE_CRYPTO_CIPHER_3DES_CBC``
+* ``RTE_CRYPTO_CIPHER_AES128_CBC``
+* ``RTE_CRYPTO_CIPHER_AES192_CBC``
+* ``RTE_CRYPTO_CIPHER_AES256_CBC``
+* ``RTE_CRYPTO_CIPHER_AES128_CTR``
+* ``RTE_CRYPTO_CIPHER_AES192_CTR``
+* ``RTE_CRYPTO_CIPHER_AES256_CTR``
+
+Hash algorithms:
+
+* ``RTE_CRYPTO_AUTH_SHA1_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA224_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA256_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA384_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA512_HMAC``
+* ``RTE_CRYPTO_AUTH_MD5_HMAC``
+
+AEAD algorithms:
+
+* ``RTE_CRYPTO_AEAD_AES_GCM``
+
+Supported DPAA SoCs
+--------------------
+
+* LS1046A/LS1026A
+* LS1043A/LS1023A
+* LS1028A
+* LS1012A
+
+Limitations
+-----------
+
+* Hash followed by Cipher mode is not supported
+* Only supports the session-oriented API implementation (session-less APIs are not supported).
+
+Prerequisites
+-------------
+
+caam_jr driver has following dependencies are not part of DPDK and must be installed separately:
+
+* **NXP Linux SDK**
+
+  NXP Linux software development kit (SDK) includes support for the family
+  of QorIQ® ARM-Architecture-based system on chip (SoC) processors
+  and corresponding boards.
+
+  It includes the Linux board support packages (BSPs) for NXP SoCs,
+  a fully operational tool chain, kernel and board specific modules.
+
+  SDK and related information can be obtained from:  `NXP QorIQ SDK  <http://www.nxp.com/products/software-and-tools/run-time-software/linux-sdk/linux-sdk-for-qoriq-processors:SDKLINUX>`_.
+
+Currently supported by DPDK:
+
+* NXP SDK **18.09+**.
+* Supported architectures:  **arm64 LE**.
+
+* Follow the DPDK :ref:`Getting Started Guide for Linux <linux_gsg>` to setup the basic DPDK environment.
+
+Pre-Installation Configuration
+------------------------------
+
+Config File Options
+~~~~~~~~~~~~~~~~~~~
+
+The following options can be modified in the ``config`` file
+to enable caam_jr PMD.
+
+Please note that enabling debugging options may affect system performance.
+
+* ``CONFIG_RTE_LIBRTE_PMD_CAAM_JR`` (default ``n``)
+  By default it is only enabled in common_linuxapp config.
+  Toggle compilation of the ``librte_pmd_caam_jr`` driver.
+
+* ``CONFIG_RTE_LIBRTE_PMD_CAAM_JR_BE`` (default ``n``)
+  By default it is disabled.
+  It can be used when the underlying hardware supports the CAAM in BE mode.
+  e.g. LS1043A, LS1046A supports CAAM in BE mode.
+  BE mode is enabled by default in defconfig-arm64-dpaa-linuxapp-gcc.
+
+Installations
+-------------
+To compile the caam_jr PMD for Linux arm64 gcc target, run the
+following ``make`` command:
+
+.. code-block:: console
+
+   cd <DPDK-source-directory>
+   make config T=arm64-armv8a-linuxapp-gcc install
+
+Enabling logs
+-------------
+
+For enabling logs, use the following EAL parameter:
+
+.. code-block:: console
+
+   ./your_crypto_application <EAL args> --log-level=pmd.crypto.caam,<level>
diff --git a/doc/guides/cryptodevs/features/caam_jr.ini b/doc/guides/cryptodevs/features/caam_jr.ini
new file mode 100644 (file)
index 0000000..68f8d81
--- /dev/null
@@ -0,0 +1,46 @@
+;
+; Supported features of the 'caam_jr' crypto driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Symmetric crypto       = Y
+Sym operation chaining = Y
+HW Accelerated         = Y
+Protocol offload       = Y
+In Place SGL           = Y
+OOP SGL In SGL Out     = Y
+OOP SGL In LB  Out     = Y
+OOP LB  In SGL Out     = Y
+OOP LB  In LB  Out     = Y
+
+;
+; Supported crypto algorithms of the 'dpaa2_sec' crypto driver.
+;
+[Cipher]
+AES CBC (128) = Y
+AES CBC (192) = Y
+AES CBC (256) = Y
+AES CTR (128) = Y
+AES CTR (192) = Y
+AES CTR (256) = Y
+3DES CBC      = Y
+
+;
+; Supported authentication algorithms of the 'dpaa2_sec' crypto driver.
+;
+[Auth]
+MD5 HMAC     = Y
+SHA1 HMAC    = Y
+SHA224 HMAC  = Y
+SHA256 HMAC  = Y
+SHA384 HMAC  = Y
+SHA512 HMAC  = Y
+
+;
+; Supported AEAD algorithms of the 'dpaa2_sec' crypto driver.
+;
+[AEAD]
+AES GCM (128) = Y
+AES GCM (192) = Y
+AES GCM (256) = Y
index 92a7ccf..810da0d 100644 (file)
@@ -38,9 +38,13 @@ AES ECB (256)  =
 AES CTR (128)  =
 AES CTR (192)  =
 AES CTR (256)  =
+AES XTS (128)  =
+AES XTS (192)  =
+AES XTS (256)  =
 AES DOCSIS BPI =
 3DES CBC       =
 3DES CTR       =
+3DES ECB       =
 DES CBC        =
 DES DOCSIS BPI =
 SNOW3G UEA2    =
index b7c105a..0cc90a5 100644 (file)
@@ -5,17 +5,24 @@
 [Features]
 Symmetric crypto       = Y
 Sym operation chaining = Y
+HW Accelerated         = Y
+OOP SGL In LB  Out     = Y
+OOP LB  In LB  Out     = Y
 
 ;
 ; Supported crypto algorithms of a default crypto driver.
 ;
 [Cipher]
+NULL           = Y
 AES CBC (128)  = Y
 AES CBC (192)  = Y
 AES CBC (256)  = Y
 AES CTR (128)  = Y
 AES CTR (192)  = Y
 AES CTR (256)  = Y
+AES ECB (128)  = Y
+AES ECB (192)  = Y
+AES ECB (256)  = Y
 3DES CBC       = Y
 3DES CTR       = Y
 
@@ -23,10 +30,13 @@ AES CTR (256)  = Y
 ; Supported authentication algorithms of a default crypto driver.
 ;
 [Auth]
+NULL         = Y
 MD5          = Y
 MD5 HMAC     = Y
 SHA1         = Y
 SHA1 HMAC    = Y
+SHA224       = Y
+SHA224 HMAC  = Y
 SHA256       = Y
 SHA256 HMAC  = Y
 SHA384       = Y
@@ -40,3 +50,5 @@ AES GMAC     = Y
 ;
 [AEAD]
 AES GCM (128) = Y
+AES GCM (192) = Y
+AES GCM (256) = Y
diff --git a/doc/guides/cryptodevs/features/octeontx.ini b/doc/guides/cryptodevs/features/octeontx.ini
new file mode 100644 (file)
index 0000000..307ab88
--- /dev/null
@@ -0,0 +1,62 @@
+;
+; Supported features of the 'octeontx' crypto driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Symmetric crypto       = Y
+Sym operation chaining = Y
+HW Accelerated         = Y
+In Place SGL           = Y
+OOP SGL In LB  Out     = Y
+OOP SGL In SGL Out     = Y
+
+;
+; Supported crypto algorithms of 'octeontx' crypto driver.
+;
+[Cipher]
+NULL           = Y
+3DES CBC       = Y
+3DES ECB       = Y
+AES CBC (128)  = Y
+AES CBC (192)  = Y
+AES CBC (256)  = Y
+AES CTR (128)  = Y
+AES CTR (192)  = Y
+AES CTR (256)  = Y
+AES XTS (128)  = Y
+AES XTS (256)  = Y
+DES CBC        = Y
+KASUMI F8      = Y
+SNOW3G UEA2    = Y
+ZUC EEA3       = Y
+
+;
+; Supported authentication algorithms of 'octeontx' crypto driver.
+;
+[Auth]
+NULL         = Y
+AES GMAC     = Y
+KASUMI F9    = Y
+MD5          = Y
+MD5 HMAC     = Y
+SHA1         = Y
+SHA1 HMAC    = Y
+SHA224       = Y
+SHA224 HMAC  = Y
+SHA256       = Y
+SHA256 HMAC  = Y
+SHA384       = Y
+SHA384 HMAC  = Y
+SHA512       = Y
+SHA512 HMAC  = Y
+SNOW3G UIA2  = Y
+ZUC EIA3     = Y
+
+;
+; Supported AEAD algorithms of 'octeontx' crypto driver.
+;
+[AEAD]
+AES GCM (128) = Y
+AES GCM (192) = Y
+AES GCM (256) = Y
index 29d865e..4f15ee0 100644 (file)
@@ -48,6 +48,7 @@ SNOW3G UIA2  = Y
 KASUMI F9    = Y
 AES XCBC MAC = Y
 ZUC EIA3     = Y
+AES CMAC (128) = Y
 
 ;
 ; Supported AEAD algorithms of the 'qat' crypto driver.
@@ -56,3 +57,6 @@ ZUC EIA3     = Y
 AES GCM (128) = Y
 AES GCM (192) = Y
 AES GCM (256) = Y
+AES CCM (128) = Y
+AES CCM (192) = Y
+AES CCM (256) = Y
index e9928a4..83610e6 100644 (file)
@@ -13,10 +13,12 @@ Crypto Device Drivers
     aesni_mb
     aesni_gcm
     armv8
+    caam_jr
     ccp
     dpaa2_sec
     dpaa_sec
     kasumi
+    octeontx
     openssl
     mvsam
     null
index fd418c2..7acae19 100644 (file)
@@ -37,32 +37,50 @@ support by utilizing MUSDK library, which provides cryptographic operations
 acceleration by using Security Acceleration Engine (EIP197) directly from
 user-space with minimum overhead and high performance.
 
+Detailed information about SoCs that use MVSAM crypto driver can be obtained here:
+
+* https://www.marvell.com/embedded-processors/armada-70xx/
+* https://www.marvell.com/embedded-processors/armada-80xx/
+* https://www.marvell.com/embedded-processors/armada-3700/
+
+
 Features
 --------
 
 MVSAM CRYPTO PMD has support for:
 
-* Symmetric crypto
-* Sym operation chaining
-* AES CBC (128)
-* AES CBC (192)
-* AES CBC (256)
-* AES CTR (128)
-* AES CTR (192)
-* AES CTR (256)
-* 3DES CBC
-* 3DES CTR
-* MD5
-* MD5 HMAC
-* SHA1
-* SHA1 HMAC
-* SHA256
-* SHA256 HMAC
-* SHA384
-* SHA384 HMAC
-* SHA512
-* SHA512 HMAC
-* AES GCM (128)
+Cipher algorithms:
+
+* ``RTE_CRYPTO_CIPHER_NULL``
+* ``RTE_CRYPTO_CIPHER_AES_CBC``
+* ``RTE_CRYPTO_CIPHER_AES_CTR``
+* ``RTE_CRYPTO_CIPHER_AES_ECB``
+* ``RTE_CRYPTO_CIPHER_3DES_CBC``
+* ``RTE_CRYPTO_CIPHER_3DES_CTR``
+* ``RTE_CRYPTO_CIPHER_3DES_ECB``
+
+Hash algorithms:
+
+* ``RTE_CRYPTO_AUTH_NULL``
+* ``RTE_CRYPTO_AUTH_MD5``
+* ``RTE_CRYPTO_AUTH_MD5_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA1``
+* ``RTE_CRYPTO_AUTH_SHA1_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA224``
+* ``RTE_CRYPTO_AUTH_SHA224_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA256``
+* ``RTE_CRYPTO_AUTH_SHA256_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA384``
+* ``RTE_CRYPTO_AUTH_SHA384_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA512``
+* ``RTE_CRYPTO_AUTH_SHA512_HMAC``
+* ``RTE_CRYPTO_AUTH_AES_GMAC``
+
+AEAD algorithms:
+
+* ``RTE_CRYPTO_AEAD_AES_GCM``
+
+For supported feature flags please consult :doc:`overview`.
 
 Limitations
 -----------
@@ -77,25 +95,18 @@ MVSAM CRYPTO PMD driver compilation is disabled by default due to external depen
 Currently there are two driver specific compilation options in
 ``config/common_base`` available:
 
-- ``CONFIG_RTE_LIBRTE_MVSAM_CRYPTO`` (default ``n``)
+- ``CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO`` (default: ``n``)
 
     Toggle compilation of the librte_pmd_mvsam driver.
 
-- ``CONFIG_RTE_LIBRTE_MVSAM_CRYPTO_DEBUG`` (default ``n``)
-
-    Toggle display of debugging messages.
-
-For a list of prerequisites please refer to `Prerequisites` section in
-:ref:`MVPP2 Poll Mode Driver <mvpp2_poll_mode_driver>` guide.
-
 MVSAM CRYPTO PMD requires MUSDK built with EIP197 support thus following
 extra option must be passed to the library configuration script:
 
 .. code-block:: console
 
-   --enable-sam
+   --enable-sam [--enable-sam-statistics] [--enable-sam-debug]
 
-For `crypto_safexcel.ko` module build instructions please refer
+For instructions how to build required kernel modules please refer
 to `doc/musdk_get_started.txt`.
 
 Initialization
@@ -106,17 +117,15 @@ loaded:
 
 .. code-block:: console
 
-   insmod musdk_uio.ko
-   insmod mvpp2x_sysfs.ko
-   insmod mv_pp_uio.ko
+   insmod musdk_cma.ko
+   insmod crypto_safexcel.ko rings=0,0
    insmod mv_sam_uio.ko
-   insmod crypto_safexcel.ko
 
 The following parameters (all optional) are exported by the driver:
 
-* max_nb_queue_pairs: maximum number of queue pairs in the device (8 by default).
-* max_nb_sessions: maximum number of sessions that can be created (2048 by default).
-* socket_id: socket on which to allocate the device resources on.
+- ``max_nb_queue_pairs``: maximum number of queue pairs in the device (default: 8 - A8K, 4 - A7K/A3K).
+- ``max_nb_sessions``: maximum number of sessions that can be created (default: 2048).
+- ``socket_id``: socket on which to allocate the device resources on.
 
 l2fwd-crypto example application can be used to verify MVSAM CRYPTO PMD
 operation:
@@ -129,65 +138,3 @@ operation:
      --auth_op GENERATE --auth_algo sha1-hmac \
      --auth_key 10:11:12:13:14:15:16:17:18:19:1a:1b:1c:1d:1e:1f
 
-Example output:
-
-.. code-block:: console
-
-   [...]
-   AAD: at [0x7f253ceb80], len=
-   P ID 0 configuration ----
-   Port mode               : KR
-   MAC status              : disabled
-   Link status             : link up
-   Port speed              : 10G
-   Port duplex             : full
-   Port: Egress enable tx_port_num=16 qmap=0x1
-   PORT: Port0 - link
-   P ID 0 configuration ----
-   Port mode               : KR
-   MAC status              : disabled
-   Link status             : link down
-   Port speed              : 10G
-   Port duplex             : full
-   Port: Egress enable tx_port_num=16 qmap=0x1
-   Port 0, MAC address: 00:50:43:02:21:20
-
-
-   Checking link statusdone
-   Port 0 Link Up - speed 0 Mbps - full-duplex
-   Lcore 0: RX port 0
-   Allocated session pool on socket 0
-   eip197: 0:0 registers: paddr: 0xf2880000, vaddr: 0x0x7f56a80000
-   DMA buffer (131136 bytes) for CDR #0 allocated: paddr = 0xb0585e00, vaddr = 0x7f09384e00
-   DMA buffer (131136 bytes) for RDR #0 allocated: paddr = 0xb05a5f00, vaddr = 0x7f093a4f00
-   DMA buffers allocated for 2049 operations. Tokens - 256 bytes
-   Lcore 0: cryptodev 0
-   L2FWD: lcore 1 has nothing to do
-   L2FWD: lcore 2 has nothing to do
-   L2FWD: lcore 3 has nothing to do
-   L2FWD: entering main loop on lcore 0
-   L2FWD:  -- lcoreid=0 portid=0
-   L2FWD:  -- lcoreid=0 cryptoid=0
-   Options:-
-   nportmask: ffffffff
-   ports per lcore: 1
-   refresh period : 10000
-   single lcore mode: disabled
-   stats_printing: enabled
-   sessionless crypto: disabled
-
-   Crypto chain: Input --> Encrypt --> Auth generate --> Output
-
-   ---- Cipher information ---
-   Algorithm: aes-cbc
-   Cipher key: at [0x7f56db4e80], len=16
-   00000000: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F | ................
-   IV: at [0x7f56db4b80], len=16
-   00000000: 20 F0 63 0E 45 EB 2D 84 72 D4 13 6E 36 B5 AF FE |  .c.E.-.r..n6...
-
-   ---- Authentication information ---
-   Algorithm: sha1-hmac
-   Auth key: at [0x7f56db4d80], len=16
-   00000000: 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F | ................
-   IV: at [0x7f56db4a80], len=0
-   AAD: at [0x7f253ceb80], len=
diff --git a/doc/guides/cryptodevs/octeontx.rst b/doc/guides/cryptodevs/octeontx.rst
new file mode 100644 (file)
index 0000000..660e980
--- /dev/null
@@ -0,0 +1,127 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+   Copyright(c) 2018 Cavium, Inc
+
+Cavium OCTEON TX Crypto Poll Mode Driver
+========================================
+
+The OCTEON TX crypto poll mode driver provides support for offloading
+cryptographic operations to cryptographic accelerator units on
+**OCTEON TX** :sup:`®` family of processors (CN8XXX). The OCTEON TX crypto
+poll mode driver enqueues the crypto request to this accelerator and dequeues
+the response once the operation is completed.
+
+Supported Algorithms
+--------------------
+
+Cipher Algorithms
+~~~~~~~~~~~~~~~~~
+
+* ``RTE_CRYPTO_CIPHER_NULL``
+* ``RTE_CRYPTO_CIPHER_3DES_CBC``
+* ``RTE_CRYPTO_CIPHER_3DES_ECB``
+* ``RTE_CRYPTO_CIPHER_AES_CBC``
+* ``RTE_CRYPTO_CIPHER_AES_CTR``
+* ``RTE_CRYPTO_CIPHER_AES_XTS``
+* ``RTE_CRYPTO_CIPHER_DES_CBC``
+* ``RTE_CRYPTO_CIPHER_KASUMI_F8``
+* ``RTE_CRYPTO_CIPHER_SNOW3G_UEA2``
+* ``RTE_CRYPTO_CIPHER_ZUC_EEA3``
+
+Hash Algorithms
+~~~~~~~~~~~~~~~
+
+* ``RTE_CRYPTO_AUTH_NULL``
+* ``RTE_CRYPTO_AUTH_AES_GMAC``
+* ``RTE_CRYPTO_AUTH_KASUMI_F9``
+* ``RTE_CRYPTO_AUTH_MD5``
+* ``RTE_CRYPTO_AUTH_MD5_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA1``
+* ``RTE_CRYPTO_AUTH_SHA1_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA224``
+* ``RTE_CRYPTO_AUTH_SHA224_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA256``
+* ``RTE_CRYPTO_AUTH_SHA256_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA384``
+* ``RTE_CRYPTO_AUTH_SHA384_HMAC``
+* ``RTE_CRYPTO_AUTH_SHA512``
+* ``RTE_CRYPTO_AUTH_SHA512_HMAC``
+* ``RTE_CRYPTO_AUTH_SNOW3G_UIA2``
+* ``RTE_CRYPTO_AUTH_ZUC_EIA3``
+
+AEAD Algorithms
+~~~~~~~~~~~~~~~
+
+* ``RTE_CRYPTO_AEAD_AES_GCM``
+
+Compilation
+-----------
+
+The **OCTEON TX** :sup:`®` board must be running the linux kernel based on
+sdk-6.2.0 patch 3. In this, the OCTEON TX crypto PF driver is already built in.
+
+For compiling the OCTEON TX crypto poll mode driver, please check if the
+CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO setting is set to `y` in
+config/common_base file.
+
+* ``CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO=y``
+
+The following are the steps to compile the OCTEON TX crypto poll mode driver:
+
+.. code-block:: console
+
+        cd <dpdk directory>
+        make config T=arm64-thunderx-linuxapp-gcc
+        make
+
+The example applications can be compiled using the following:
+
+.. code-block:: console
+
+        cd <dpdk directory>
+        export RTE_SDK=$PWD
+        export RTE_TARGET=build
+        cd examples/<application>
+        make
+
+Execution
+---------
+
+The number of crypto VFs to be enabled can be controlled by setting sysfs entry,
+`sriov_numvfs`, for the corresponding PF driver.
+
+.. code-block:: console
+
+        echo <num_vfs> > /sys/bus/pci/devices/<dev_bus_id>/sriov_numvfs
+
+The device bus ID, `dev_bus_id`, to be used in the above step can be found out
+by using dpdk-devbind.py script. The OCTEON TX crypto PF device need to be
+identified and the corresponding device number can be used to tune various PF
+properties.
+
+
+Once the required VFs are enabled, dpdk-devbind.py script can be used to
+identify the VFs. To be accessible from DPDK, VFs need to be bound to vfio-pci
+driver:
+
+.. code-block:: console
+
+        cd <dpdk directory>
+        ./usertools/dpdk-devbind.py -u <vf device no>
+        ./usertools/dpdk-devbind.py -b vfio-pci <vf device no>
+
+Appropriate huge page need to be setup in order to run the DPDK example
+applications.
+
+.. code-block:: console
+
+        echo 8 > /sys/kernel/mm/hugepages/hugepages-524288kB/nr_hugepages
+        mkdir /mnt/huge
+        mount -t hugetlbfs nodev /mnt/huge
+
+Example applications can now be executed with crypto operations offloaded to
+OCTEON TX crypto PMD.
+
+.. code-block:: console
+
+        ./build/ipsec-secgw --log-level=8 -c 0xff -- -P -p 0x3 -u 0x2 --config
+        "(1,0,0),(0,0,0)" -f ep1.cfg
index 3f776f0..607e758 100644 (file)
@@ -33,7 +33,7 @@ Supported Feature Flags
      scatter-gathered styled buffers.
 
    - "OOP LB In LB Out" feature flag stands for
-     "Out-of-place Linear Buffers Input, Scatter-gather list Output",
+     "Out-of-place Linear Buffers Input, Linear Buffers Output",
      which means that Out-of-place operation is supported,
      with linear input and output buffers.
 
index bdc58eb..b2dfeb0 100644 (file)
@@ -4,17 +4,30 @@
 Intel(R) QuickAssist (QAT) Crypto Poll Mode Driver
 ==================================================
 
-The QAT PMD provides poll mode crypto driver support for the following
+QAT documentation consists of three parts:
+
+* Details of the symmetric crypto service below.
+* Details of the `compression service <http://dpdk.org/doc/guides/compressdevs/qat_comp.html>`_
+  in the compressdev drivers section.
+* Details of building the common QAT infrastructure and the PMDs to support the
+  above services. See :ref:`building_qat` below.
+
+
+Symmetric Crypto Service on QAT
+-------------------------------
+
+The QAT crypto PMD provides poll mode crypto driver support for the following
 hardware accelerator devices:
 
 * ``Intel QuickAssist Technology DH895xCC``
 * ``Intel QuickAssist Technology C62x``
 * ``Intel QuickAssist Technology C3xxx``
 * ``Intel QuickAssist Technology D15xx``
+* ``Intel QuickAssist Technology C4xxx``
 
 
 Features
---------
+~~~~~~~~
 
 The QAT PMD has support for:
 
@@ -50,14 +63,16 @@ Hash algorithms:
 * ``RTE_CRYPTO_AUTH_KASUMI_F9``
 * ``RTE_CRYPTO_AUTH_AES_GMAC``
 * ``RTE_CRYPTO_AUTH_ZUC_EIA3``
+* ``RTE_CRYPTO_AUTH_AES_CMAC``
 
 Supported AEAD algorithms:
 
 * ``RTE_CRYPTO_AEAD_AES_GCM``
+* ``RTE_CRYPTO_AEAD_AES_CCM``
 
 
 Limitations
------------
+~~~~~~~~~~~
 
 * Only supports the session-oriented API implementation (session-less APIs are not supported).
 * SNOW 3G (UEA2), KASUMI (F8) and ZUC (EEA3) supported only if cipher length and offset fields are byte-multiple.
@@ -69,104 +84,155 @@ Limitations
 
 
 Extra notes on KASUMI F9
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 When using KASUMI F9 authentication algorithm, the input buffer must be
-constructed according to the 3GPP KASUMI specifications (section 4.4, page 13):
-`<http://cryptome.org/3gpp/35201-900.pdf>`_.
-Input buffer has to have COUNT (4 bytes), FRESH (4 bytes), MESSAGE and DIRECTION (1 bit)
-concatenated. After the DIRECTION bit, a single '1' bit is appended, followed by
-between 0 and 7 '0' bits, so that the total length of the buffer is multiple of 8 bits.
-Note that the actual message can be any length, specified in bits.
+constructed according to the
+`3GPP KASUMI specification <http://cryptome.org/3gpp/35201-900.pdf>`_
+(section 4.4, page 13). The input buffer has to have COUNT (4 bytes),
+FRESH (4 bytes), MESSAGE and DIRECTION (1 bit) concatenated. After the DIRECTION
+bit, a single '1' bit is appended, followed by between 0 and 7 '0' bits, so that
+the total length of the buffer is multiple of 8 bits. Note that the actual
+message can be any length, specified in bits.
 
 Once this buffer is passed this way, when creating the crypto operation,
-length of data to authenticate (op.sym.auth.data.length) must be the length
+length of data to authenticate "op.sym.auth.data.length" must be the length
 of all the items described above, including the padding at the end.
-Also, offset of data to authenticate (op.sym.auth.data.offset)
+Also, offset of data to authenticate "op.sym.auth.data.offset"
 must be such that points at the start of the COUNT bytes.
 
 
-Building the DPDK QAT cryptodev PMD
------------------------------------
 
+.. _building_qat:
+
+Building PMDs on QAT
+--------------------
+
+A QAT device can host multiple acceleration services:
+
+* symmetric cryptography
+* data compression
 
-To enable QAT crypto in DPDK, follow the instructions for modifying the compile-time
-configuration file as described `here <http://dpdk.org/doc/guides/linux_gsg/build_dpdk.html>`_.
+These services are provided to DPDK applications via PMDs which register to
+implement the corresponding cryptodev and compressdev APIs. The PMDs use
+common QAT driver code which manages the QAT PCI device. They also depend on a
+QAT kernel driver being installed on the platform, see :ref:`qat_kernel` below.
 
 
-Quick instructions are as follows:
+Configuring and Building the DPDK QAT PMDs
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+Further information on configuring, building and installing DPDK is described
+`here <http://dpdk.org/doc/guides/linux_gsg/build_dpdk.html>`_.
+
+
+Quick instructions for QAT cryptodev PMD are as follows:
 
 .. code-block:: console
 
        cd to the top-level DPDK directory
-       make config T=x86_64-native-linuxapp-gcc
-       sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_QAT\)=n,\1=y,' build/.config
+       make defconfig
        sed -i 's,\(CONFIG_RTE_LIBRTE_PMD_QAT_SYM\)=n,\1=y,' build/.config
        make
 
+Quick instructions for QAT compressdev PMD are as follows:
 
-.. _qat_kernel_installation:
+.. code-block:: console
 
-Dependency on the QAT kernel driver
------------------------------------
+       cd to the top-level DPDK directory
+       make defconfig
+       make
 
-To use the QAT PMD an SRIOV-enabled QAT kernel driver is required. The VF
-devices created and initialised by this driver will be used by the QAT PMD.
 
-Instructions for installation are below, but first an explanation of the
-relationships between the PF/VF devices and the PMDs visible to
-DPDK applications.
+Build Configuration
+~~~~~~~~~~~~~~~~~~~
 
+These are the build configuration options affecting QAT, and their default values:
 
-Acceleration services - cryptography and compression - are provided to DPDK
-applications via PMDs which register to implement the corresponding
-cryptodev and compressdev APIs.
+.. code-block:: console
 
-Each QuickAssist VF device can expose one cryptodev PMD and/or one compressdev PMD.
-These QAT PMDs share the same underlying device and pci-mgmt code, but are
-enumerated independently on their respective APIs and appear as independent
-devices to applications.
+       CONFIG_RTE_LIBRTE_PMD_QAT=y
+       CONFIG_RTE_LIBRTE_PMD_QAT_SYM=n
+       CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES=48
+       CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS=16
 
-.. Note::
+CONFIG_RTE_LIBRTE_PMD_QAT must be enabled for any QAT PMD to be built.
 
-   Each VF can only be used by one DPDK process. It is not possible to share
-   the same VF across multiple processes, even if these processes are using
-   different acceleration services.
+The QAT cryptodev PMD has an external dependency on libcrypto, so is not
+built by default. CONFIG_RTE_LIBRTE_PMD_QAT_SYM should be enabled to build it.
 
-   Conversely one DPDK process can use one or more QAT VFs and can expose both
-   cryptodev and compressdev instances on each of those VFs.
+The QAT compressdev PMD has no external dependencies, so needs no configuration
+options and is built by default.
 
+The number of VFs per PF varies - see table below. If multiple QAT packages are
+installed on a platform then CONFIG_RTE_PMD_QAT_MAX_PCI_DEVICES should be
+adjusted to the number of VFs which the QAT common code will need to handle.
+Note, there is a separate config item for max cryptodevs CONFIG_RTE_CRYPTO_MAX_DEVS,
+if necessary this should be adjusted to handle the total of QAT and other devices
+which the process will use.
+
+QAT allocates internal structures to handle SGLs. For the compression service
+CONFIG_RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS can be changed if more segments are needed.
+An extra (max_inflight_ops x 16) bytes per queue_pair will be used for every increment.
 
 
 Device and driver naming
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 * The qat cryptodev driver name is "crypto_qat".
-  The rte_cryptodev_devices_get() returns the devices exposed by this driver.
+  The "rte_cryptodev_devices_get()" returns the devices exposed by this driver.
 
 * Each qat crypto device has a unique name, in format
-  <pci bdf>_<service>, e.g. "0000:41:01.0_qat_sym".
-  This name can be passed to rte_cryptodev_get_dev_id() to get the device_id.
+  "<pci bdf>_<service>", e.g. "0000:41:01.0_qat_sym".
+  This name can be passed to "rte_cryptodev_get_dev_id()" to get the device_id.
 
 .. Note::
 
-       The qat crypto driver name is passed to the dpdk-test-crypto-perf tool in the -devtype parameter.
+       The qat crypto driver name is passed to the dpdk-test-crypto-perf tool in the "-devtype" parameter.
 
        The qat crypto device name is in the format of the slave parameter passed to the crypto scheduler.
 
-* The qat compressdev driver name is "comp_qat".
+* The qat compressdev driver name is "compress_qat".
   The rte_compressdev_devices_get() returns the devices exposed by this driver.
 
 * Each qat compression device has a unique name, in format
   <pci bdf>_<service>, e.g. "0000:41:01.0_qat_comp".
   This name can be passed to rte_compressdev_get_dev_id() to get the device_id.
 
+.. _qat_kernel:
+
+Dependency on the QAT kernel driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To use QAT an SRIOV-enabled QAT kernel driver is required. The VF
+devices created and initialised by this driver will be used by the QAT PMDs.
+
+Instructions for installation are below, but first an explanation of the
+relationships between the PF/VF devices and the PMDs visible to
+DPDK applications.
+
+Each QuickAssist PF device exposes a number of VF devices. Each VF device can
+enable one cryptodev PMD and/or one compressdev PMD.
+These QAT PMDs share the same underlying device and pci-mgmt code, but are
+enumerated independently on their respective APIs and appear as independent
+devices to applications.
+
+.. Note::
+
+   Each VF can only be used by one DPDK process. It is not possible to share
+   the same VF across multiple processes, even if these processes are using
+   different acceleration services.
+
+   Conversely one DPDK process can use one or more QAT VFs and can expose both
+   cryptodev and compressdev instances on each of those VFs.
+
 
 Available kernel drivers
-------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~
 
 Kernel drivers for each device are listed in the following table. Scroll right
-to check that the driver and device supports the servic you require.
+to check that the driver and device supports the service you require.
 
 
 .. _table_qat_pmds_drivers:
@@ -190,6 +256,8 @@ to check that the driver and device supports the servic you require.
    +-----+----------+---------------+---------------+------------+--------+------+--------+--------+-----------+-------------+
    | 2   | D15xx    | p             | qat_d15xx     | d15xx      | 6f54   | 1    | 6f55   | 16     | Yes       | No          |
    +-----+----------+---------------+---------------+------------+--------+------+--------+--------+-----------+-------------+
+   | 3   | C4xxx    | p             | qat_c4xxx     | c4xxx      | 18a0   | 1    | 18a1   | 128    | Yes       | No          |
+   +-----+----------+---------------+---------------+------------+--------+------+--------+--------+-----------+-------------+
 
 
 The ``Driver`` column indicates either the Linux kernel version in which
@@ -203,7 +271,7 @@ If you are running on a kernel which includes a driver for your device, see
 
 
 Installation using kernel.org driver
-------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The examples below are based on the C62x device, if you have a different device
 use the corresponding values in the above table.
@@ -274,7 +342,7 @@ To complete the installation follow the instructions in
 
 
 Installation using 01.org QAT driver
-------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Download the latest QuickAssist Technology Driver from `01.org
 <https://01.org/packet-processing/intel%C2%AE-quickassist-technology-drivers-and-patches>`_.
@@ -368,12 +436,12 @@ To complete the installation - follow instructions in `Binding the available VFs
 
 
 Binding the available VFs to the DPDK UIO driver
-------------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Unbind the VFs from the stock driver so they can be bound to the uio driver.
 
 For an Intel(R) QuickAssist Technology DH895xCC device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The unbind command below assumes ``BDFs`` of ``03:01.00-03:04.07``, if your
 VFs are different adjust the unbind command below::
@@ -386,7 +454,7 @@ VFs are different adjust the unbind command below::
     done
 
 For an Intel(R) QuickAssist Technology C62x device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The unbind command below assumes ``BDFs`` of ``1a:01.00-1a:02.07``,
 ``3d:01.00-3d:02.07`` and ``3f:01.00-3f:02.07``, if your VFs are different
@@ -406,7 +474,7 @@ adjust the unbind command below::
     done
 
 For Intel(R) QuickAssist Technology C3xxx or D15xx device
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The unbind command below assumes ``BDFs`` of ``01:01.00-01:02.07``, if your
 VFs are different adjust the unbind command below::
@@ -419,7 +487,7 @@ VFs are different adjust the unbind command below::
     done
 
 Bind to the DPDK uio driver
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Install the DPDK igb_uio driver, bind the VF PCI Device id to it and use lspci
 to confirm the VF devices are now in use by igb_uio kernel driver,
@@ -438,9 +506,29 @@ Another way to bind the VFs to the DPDK UIO driver is by using the
     cd to the top-level DPDK directory
     ./usertools/dpdk-devbind.py -b igb_uio 0000:03:01.1
 
+Testing
+~~~~~~~
+
+QAT crypto PMD can be tested by running the test application::
+
+    make defconfig
+    make test-build -j
+    cd ./build/app
+    ./test -l1 -n1 -w <your qat bdf>
+    RTE>>cryptodev_qat_autotest
+
+QAT compression PMD can be tested by running the test application::
+
+    make defconfig
+    sed -i 's,\(CONFIG_RTE_COMPRESSDEV_TEST\)=n,\1=y,' build/.config
+    make test-build -j
+    cd ./build/app
+    ./test -l1 -n1 -w <your qat bdf>
+    RTE>>compressdev_autotest
+
 
 Debugging
-----------------------------------------
+~~~~~~~~~
 
 There are 2 sets of trace available via the dynamic logging feature:
 
index 7383295..2f356d3 100644 (file)
@@ -122,6 +122,8 @@ Example:
 
     ./your_eventdev_application --vdev="event_dpaa1"
 
+* Use dev arg option ``disable_intr=1`` to disable the interrupt mode
+
 Limitations
 -----------
 
diff --git a/doc/guides/eventdevs/dsw.rst b/doc/guides/eventdevs/dsw.rst
new file mode 100644 (file)
index 0000000..6653f50
--- /dev/null
@@ -0,0 +1,96 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Ericsson AB
+
+Distributed Software Eventdev Poll Mode Driver
+==============================================
+
+The distributed software event device is an eventdev driver which
+distributes the task of scheduling events among all the eventdev ports
+and the lcore threads using them.
+
+Features
+--------
+
+Queues
+ * Atomic
+ * Parallel
+ * Single-Link
+
+Ports
+ * Load balanced (for Atomic, Ordered, Parallel queues)
+ * Single Link (for single-link queues)
+
+Configuration and Options
+-------------------------
+
+The distributed software eventdev is a vdev device, and as such can be
+created from the application code, or from the EAL command line:
+
+* Call ``rte_vdev_init("event_dsw0")`` from the application
+
+* Use ``--vdev="event_dsw0"`` in the EAL options, which will call
+  rte_vdev_init() internally
+
+Example:
+
+.. code-block:: console
+
+    ./your_eventdev_application --vdev="event_dsw0"
+
+Limitations
+-----------
+
+Unattended Ports
+~~~~~~~~~~~~~~~~
+
+The distributed software eventdev uses an internal signaling schema
+between the ports to achieve load balancing. In order for this to
+work, the application must perform enqueue and/or dequeue operations
+on all ports.
+
+Producer-only ports which currently have no events to enqueue should
+periodically call rte_event_enqueue_burst() with a zero-sized burst.
+
+Ports left unattended for longer periods of time will prevent load
+balancing, and also cause traffic interruptions on the flows which
+are in the process of being migrated.
+
+Output Buffering
+~~~~~~~~~~~~~~~~
+
+For efficiency reasons, the distributed software eventdev might not
+send enqueued events immediately to the destination port, but instead
+store them in an internal buffer in the source port.
+
+In case no more events are enqueued on a port with buffered events,
+these events will be sent after the application has performed a number
+of enqueue and/or dequeue operations.
+
+For explicit flushing, an application may call
+rte_event_enqueue_burst() with a zero-sized burst.
+
+
+Priorities
+~~~~~~~~~~
+
+The distributed software eventdev does not support event priorities.
+
+Ordered Queues
+~~~~~~~~~~~~~~
+
+The distributed software eventdev does not support the ordered queue type.
+
+
+"All Types" Queues
+~~~~~~~~~~~~~~~~~~
+
+The distributed software eventdev does not support queues of type
+RTE_EVENT_QUEUE_CFG_ALL_TYPES, which allow both atomic, ordered, and
+parallel events on the same queue.
+
+Dynamic Link/Unlink
+~~~~~~~~~~~~~~~~~~~
+
+The distributed software eventdev does not support calls to
+rte_event_port_link() or rte_event_port_unlink() after
+rte_event_dev_start() has been called.
index 18ec8e4..f7382dc 100644 (file)
@@ -13,6 +13,7 @@ application trough the eventdev API.
 
     dpaa
     dpaa2
+    dsw
     sw
     octeontx
     opdl
index 18cfc7a..e276fd4 100644 (file)
@@ -1,11 +1,11 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2017 Cavium, Inc
 
-OCTEONTX SSOVF Eventdev Driver
-==============================
+OCTEON TX SSOVF Eventdev Driver
+===============================
 
-The OCTEONTX SSOVF PMD (**librte_pmd_octeontx_ssovf**) provides poll mode
-eventdev driver support for the inbuilt event device found in the **Cavium OCTEONTX**
+The OCTEON TX SSOVF PMD (**librte_pmd_octeontx_ssovf**) provides poll mode
+eventdev driver support for the inbuilt event device found in the **Cavium OCTEON TX**
 SoC family as well as their virtual functions (VF) in SR-IOV context.
 
 More information can be found at `Cavium, Inc Official Website
@@ -14,7 +14,7 @@ More information can be found at `Cavium, Inc Official Website
 Features
 --------
 
-Features of the OCTEONTX SSOVF PMD are:
+Features of the OCTEON TX SSOVF PMD are:
 
 - 64 Event queues
 - 32 Event ports
@@ -32,8 +32,8 @@ Features of the OCTEONTX SSOVF PMD are:
   time granularity of 1us.
 - Up to 64 event timer adapters.
 
-Supported OCTEONTX SoCs
------------------------
+Supported OCTEON TX SoCs
+------------------------
 - CN83xx
 
 Prerequisites
@@ -57,7 +57,7 @@ Please note that enabling debugging options may affect system performance.
 Driver Compilation
 ~~~~~~~~~~~~~~~~~~
 
-To compile the OCTEONTX SSOVF PMD for Linux arm64 gcc target, run the
+To compile the OCTEON TX SSOVF PMD for Linux arm64 gcc target, run the
 following ``make`` command:
 
 .. code-block:: console
@@ -69,7 +69,7 @@ following ``make`` command:
 Initialization
 --------------
 
-The octeontx eventdev is exposed as a vdev device which consists of a set
+The OCTEON TX eventdev is exposed as a vdev device which consists of a set
 of SSO group and work-slot PCIe VF devices. On EAL initialization,
 SSO PCIe VF devices will be probed and then the vdev device can be created
 from the application code, or from the EAL command line based on
@@ -90,7 +90,7 @@ Example:
 Selftest
 --------
 
-The functionality of octeontx eventdev can be verified using this option,
+The functionality of OCTEON TX eventdev can be verified using this option,
 various unit and functional tests are run to verify the sanity.
 The tests are run once the vdev creation is successfully complete.
 
index 2105223..0262a33 100644 (file)
@@ -62,7 +62,7 @@ Queue Dependencies
 As stated the order in which packets travel through queues is static in
 nature. They go through the queues in the order the queues are setup at
 initialisation ``rte_event_queue_setup()``. For example if an application
-sets up 3 queues, Q0, Q1, Q2 and has 3 assoicated ports P0, P1, P2 and
+sets up 3 queues, Q0, Q1, Q2 and has 3 associated ports P0, P1, P2 and
 P3 then packets must be
 
  * Enqueued onto Q0 (typically through P0), then
index e13a090..a642a2b 100644 (file)
@@ -17,3 +17,4 @@ HowTo Guides
     virtio_user_for_container_networking
     virtio_user_as_exceptional_path
     packet_capture_framework
+    telemetry
index caa4e1a..6a8534d 100644 (file)
@@ -32,10 +32,10 @@ Code
 .. code-block:: c
 
   /* create the attribute structure */
-  struct rte_flow_attr attr = {.ingress = 1};
+  struct rte_flow_attr attr = { .ingress = 1 };
   struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW];
   struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW];
-  struct rte_flow_item_etc eth;
+  struct rte_flow_item_eth eth;
   struct rte_flow_item_vlan vlan;
   struct rte_flow_item_ipv4 ipv4;
   struct rte_flow *flow;
@@ -55,15 +55,15 @@ Code
   pattern[2].spec = &ipv4;
 
   /* end the pattern array */
-  pattern[3].type = RTE_FLOW_ITEM)TYPE_END;
+  pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
 
   /* create the drop action */
   actions[0].type = RTE_FLOW_ACTION_TYPE_DROP;
   actions[1].type = RTE_FLOW_ACTION_TYPE_END;
 
   /* validate and create the flow rule */
-  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)
-      flow = rte_flow_create(port_id, &attr, pattern, actions, &error)
+  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error))
+      flow = rte_flow_create(port_id, &attr, pattern, actions, &error);
 
 Output
 ~~~~~~
@@ -120,7 +120,7 @@ clarity)::
 
   tpmd> flow create 0 ingress pattern eth / vlan /
                     ipv4 dst spec 192.168.3.0 dst mask 255.255.255.0 /
-                   end actions drop / end
+                    end actions drop / end
 
 Code
 ~~~~
@@ -130,7 +130,7 @@ Code
   struct rte_flow_attr attr = {.ingress = 1};
   struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW];
   struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW];
-  struct rte_flow_item_etc eth;
+  struct rte_flow_item_eth eth;
   struct rte_flow_item_vlan vlan;
   struct rte_flow_item_ipv4 ipv4;
   struct rte_flow_item_ipv4 ipv4_mask;
@@ -153,15 +153,15 @@ Code
   pattern[2].mask = &ipv4_mask;
 
   /* end the pattern array */
-  pattern[3].type = RTE_FLOW_ITEM)TYPE_END;
+  pattern[3].type = RTE_FLOW_ITEM_TYPE_END;
 
   /* create the drop action */
   actions[0].type = RTE_FLOW_ACTION_TYPE_DROP;
   actions[1].type = RTE_FLOW_ACTION_TYPE_END;
 
   /* validate and create the flow rule */
-  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)
-      flow = rte_flow_create(port_id, &attr, pattern, actions, &error)
+  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error))
+      flow = rte_flow_create(port_id, &attr, pattern, actions, &error);
 
 Output
 ~~~~~~
@@ -227,10 +227,10 @@ Code
 
 .. code-block:: c
 
-  struct rte_flow_attr attr = {.ingress = 1};
+  struct rte_flow_attr attr = { .ingress = 1 };
   struct rte_flow_item pattern[MAX_PATTERN_IN_FLOW];
   struct rte_flow_action actions[MAX_ACTIONS_IN_FLOW];
-  struct rte_flow_item_etc eth;
+  struct rte_flow_item_eth eth;
   struct rte_flow_item_vlan vlan;
   struct rte_flow_action_queue queue = { .index = 3 };
   struct rte_flow *flow;
@@ -246,16 +246,16 @@ Code
   pattern[1].spec = &vlan;
 
   /* end the pattern array */
-  pattern[2].type = RTE_FLOW_ITEM)TYPE_END;
+  pattern[2].type = RTE_FLOW_ITEM_TYPE_END;
 
   /* create the queue action */
   actions[0].type = RTE_FLOW_ACTION_TYPE_QUEUE;
-  actions[0].conf = &queue
+  actions[0].conf = &queue;
   actions[1].type = RTE_FLOW_ACTION_TYPE_END;
 
   /* validate and create the flow rule */
-  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error)
-      flow = rte_flow_create(port_id, &attr, pattern, actions, &error)
+  if (!rte_flow_validate(port_id, &attr, pattern, actions, &error))
+      flow = rte_flow_create(port_id, &attr, pattern, actions, &error);
 
 Output
 ~~~~~~
diff --git a/doc/guides/howto/telemetry.rst b/doc/guides/howto/telemetry.rst
new file mode 100644 (file)
index 0000000..3fcb061
--- /dev/null
@@ -0,0 +1,85 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+DPDK Telemetry API User Guide
+==============================
+
+This document describes how the Data Plane Development Kit(DPDK) Telemetry API
+is used for querying port statistics from incoming traffic.
+
+Introduction
+------------
+
+The ``librte_telemetry`` provides the functionality so that users may query
+metrics from incoming port traffic. The application which initializes packet
+forwarding will act as the server, sending metrics to the requesting application
+which acts as the client.
+
+In DPDK, applications are used to initialize the ``telemetry``. To view incoming
+traffic on featured ports, the application should be run first (ie. after ports
+are configured). Once the application is running, the service assurance agent
+(for example the collectd plugin) should be run to begin querying the API.
+
+A client connects their Service Assurance application to the DPDK application
+via a UNIX socket. Once a connection is established, a client can send JSON
+messages to the DPDK application requesting metrics via another UNIX client.
+This request is then handled and parsed if valid. The response is then
+formatted in JSON and sent back to the requesting client.
+
+Pre-requisites
+~~~~~~~~~~~~~~
+
+* Python ≥ 2.5
+
+* Jansson library for JSON serialization
+
+Test Environment
+----------------
+
+``telemetry`` offers a range of selftests that a client can run within
+the DPDK application.
+
+Selftests are disabled by default. They can be enabled by setting the 'selftest'
+variable to 1 in rte_telemetry_initial_accept().
+
+Note: this 'hardcoded' value is temporary.
+
+Configuration
+-------------
+
+Enable the telemetry API by modifying the following config option before
+building DPDK::
+
+        CONFIG_RTE_LIBRTE_TELEMETRY=y
+
+Note: Meson will pick this up automatically if ``libjansson`` is available.
+
+Running the Application
+-----------------------
+
+The following steps demonstrate how to run the ``telemetry`` API  to query all
+statistics on all active ports, using the ``telemetry_client`` python script
+to query.
+Note: This guide assumes packet generation is applicable and the user is
+testing with testpmd as a DPDK primary application to forward packets, although
+any DPDK application is applicable.
+
+#. Launch testpmd as the primary application with ``telemetry``.::
+
+        ./app/testpmd --telemetry
+
+#. Launch the ``telemetry`` python script with a client filepath.::
+
+        python usertools/telemetry_client.py /var/run/some_client
+
+   The client filepath is going to be used to setup our UNIX connection with the
+   DPDK primary application, in this case ``testpmd``
+   This will initialize a menu where a client can proceed to recursively query
+   statistics, request statistics once or unregister the file_path, thus exiting
+   the menu.
+
+#. Send traffic to any or all available ports from a traffic generator.
+   Select a query option(recursive or singular polling).
+   The metrics will then be displayed on the client terminal in JSON format.
+
+#. Once finished, unregister the client using the menu command.
index 6a13c76..4910c12 100644 (file)
@@ -57,8 +57,8 @@ compiling the kernel and those kernel modules should be inserted.
 
         $(testpmd) -l 2-3 -n 4 \
                --vdev=virtio_user0,path=/dev/vhost-net,queue_size=1024 \
-               -- -i --txqflags=0x0 --enable-lro \
-               --enable-rx-cksum --rxd=1024 --txd=1024
+               -- -i --tx-offloads=0x0000002c --enable-lro \
+               --txd=1024 --rxd=1024
 
     This command runs testpmd with two ports, one physical NIC to communicate
     with outside, and one virtio-user to communicate with kernel.
@@ -69,11 +69,6 @@ compiling the kernel and those kernel modules should be inserted.
     VIRTIO_NET_F_GUEST_TSO6 feature so that large packets from kernel can be
     transmitted to DPDK application and further TSOed by physical NIC.
 
-* ``--enable-rx-cksum``
-
-    This is used to negotiate VIRTIO_NET_F_GUEST_CSUM so that packets from
-    kernel can be deemed as valid Rx checksumed.
-
 * ``queue_size``
 
     256 by default. To avoid shortage of descriptors, we can increase it to 1024.
@@ -86,9 +81,17 @@ compiling the kernel and those kernel modules should be inserted.
 
         $(testpmd) -l 2-3 -n 4 \
                --vdev=virtio_user0,path=/dev/vhost-net,queues=2,queue_size=1024 \
-               -- -i --txqflags=0x0 --enable-lro \
-               --enable-rx-cksum --txq=2 --rxq=2 --rxd=1024 \
-               --txd=1024
+               -- -i --tx-offloads=0x0000002c --enable-lro \
+               --txq=2 --rxq=2 --txd=1024 --rxd=1024
+
+#. Enable Rx checksum offloads in testpmd:
+
+    .. code-block:: console
+
+        (testpmd) port stop 0
+        (testpmd) port config 0 rx_offload tcp_cksum on
+        (testpmd) port config 0 rx_offload udp_cksum on
+        (testpmd) port start 0
 
 #. Start testpmd:
 
index 476ce3a..2313dc7 100644 (file)
@@ -96,7 +96,7 @@ some minor changes.
             dpdk-app-testpmd testpmd -l 6-7 -n 4 -m 1024 --no-pci \
             --vdev=virtio_user0,path=/var/run/usvhost \
             --file-prefix=container \
-            -- -i --txqflags=0xf00 --disable-hw-vlan
+            -- -i
 
 Note: If we run all above setup on the host, it's a shm-based IPC.
 
index b06bb61..e05aeb9 100644 (file)
@@ -1,11 +1,11 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2017 Cavium, Inc
 
-OCTEONTX FPAVF Mempool Driver
-=============================
+OCTEON TX FPAVF Mempool Driver
+==============================
 
-The OCTEONTX FPAVF PMD (**librte_mempool_octeontx**) is a mempool
-driver for offload mempool device found in **Cavium OCTEONTX** SoC
+The OCTEON TX FPAVF PMD (**librte_mempool_octeontx**) is a mempool
+driver for offload mempool device found in **Cavium OCTEON TX** SoC
 family.
 
 More information can be found at `Cavium, Inc Official Website
@@ -14,14 +14,14 @@ More information can be found at `Cavium, Inc Official Website
 Features
 --------
 
-Features of the OCTEONTX FPAVF PMD are:
+Features of the OCTEON TX FPAVF PMD are:
 
 - 32 SR-IOV Virtual functions
 - 32 Pools
 - HW mempool manager
 
-Supported OCTEONTX SoCs
------------------------
+Supported OCTEON TX SoCs
+------------------------
 
 - CN83xx
 
@@ -50,7 +50,7 @@ Please note that enabling debugging options may affect system performance.
 Driver Compilation
 ~~~~~~~~~~~~~~~~~~
 
-To compile the OCTEONTX FPAVF MEMPOOL PMD for Linux arm64 gcc target, run the
+To compile the OCTEON TX FPAVF MEMPOOL PMD for Linux arm64 gcc target, run the
 following ``make`` command:
 
 .. code-block:: console
@@ -62,7 +62,7 @@ following ``make`` command:
 Initialization
 --------------
 
-The octeontx fpavf mempool initialization similar to other mempool
+The OCTEON TX fpavf mempool initialization similar to other mempool
 drivers like ring. However user need to pass --base-virtaddr as
 command line input to application example test_mempool.c application.
 
diff --git a/doc/guides/meson.build b/doc/guides/meson.build
new file mode 100644 (file)
index 0000000..06f1488
--- /dev/null
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+sphinx = find_program('sphinx-build', required: get_option('enable_docs'))
+
+if sphinx.found()
+       htmldir = join_paths('share', 'doc', 'dpdk')
+       html_guides_build = custom_target('html_guides_build',
+               input: meson.current_source_dir(),
+               output: 'guides',
+               command: [sphinx, '-b', 'html',
+                       '-d', meson.current_build_dir() + '/.doctrees',
+                       '@INPUT@', meson.current_build_dir() + '/guides'],
+               build_by_default: false,
+               install: get_option('enable_docs'),
+               install_dir: htmldir)
+
+       doc_targets += html_guides_build
+       doc_target_names += 'HTML_Guides'
+
+       # sphinx leaves a .buildinfo in the target directory, which we don't
+       # want to install. Note that sh -c has to be used, otherwise the
+       # env var does not get expanded if calling rm/install directly.
+       meson.add_install_script('sh', '-c',
+               'rm -f $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/.buildinfo')
+       meson.add_install_script('sh', '-c',
+               'install -D -m0644 $MESON_SOURCE_ROOT/doc/guides/custom.css $MESON_INSTALL_DESTDIR_PREFIX/share/doc/dpdk/guides/_static/css/custom.css')
+endif
diff --git a/doc/guides/nics/atlantic.rst b/doc/guides/nics/atlantic.rst
new file mode 100644 (file)
index 0000000..80591b1
--- /dev/null
@@ -0,0 +1,47 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Aquantia Corporation.
+
+Aquantia Atlantic DPDK Driver
+=============================
+
+Atlantic DPDK driver provides DPDK support for Aquantia's AQtion family of chipsets: AQC107/AQC108/AQC109
+
+More information can be found at `Aquantia Official Website
+<https://www.aquantia.com/products/client-connectivity/>`_.
+
+Supported features
+^^^^^^^^^^^^^^^^^^
+
+- Base L2 features
+- Promiscuous mode
+- Multicast mode
+- Port statistics
+- RSS (Receive Side Scaling)
+- Checksum offload
+- Jumbo Frame upto 16K
+
+Configuration Information
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- ``CONFIG_RTE_LIBRTE_ATLANTIC_PMD`` (default ``y``)
+
+Application Programming Interface
+---------------------------------
+
+Limitations or Known issues
+---------------------------
+
+Statistics
+~~~~~~~~~~
+
+MTU setting
+~~~~~~~~~~~
+
+Atlantic NIC supports up to 16K jumbo frame size
+
+Supported Chipsets and NICs
+---------------------------
+
+- Aquantia AQtion AQC107 10 Gigabit Ethernet Controller
+- Aquantia AQtion AQC108 5 Gigabit Ethernet Controller
+- Aquantia AQtion AQC109 2.5 Gigabit Ethernet Controller
index e30f494..9b270a4 100644 (file)
@@ -24,7 +24,7 @@ AXGBE PMD has support for:
 - Multicast mode
 - RSS (Receive Side Scaling)
 - Checksum offload
-- Jumbo Frame upto 9K
+- Jumbo Frame up to 9K
 
 
 Configuration Information
index 66c03e1..e2f385d 100644 (file)
@@ -558,7 +558,7 @@ which are lower than logging ``level``.
 
     <dpdk app> <EAL args> --log-level=pmd.net.dpaa2:<level> -- ...
 
-Using ``pmd.dpaa2`` as log matching criteria, all PMD logs can be enabled
+Using ``pmd.net.dpaa2`` as log matching criteria, all PMD logs can be enabled
 which are lower than logging ``level``.
 
 Whitelisting & Blacklisting
index d19912e..34c4857 100644 (file)
@@ -113,10 +113,6 @@ Configuration information
    * **CONFIG_RTE_LIBRTE_ENA_PMD** (default y): Enables or disables inclusion
      of the ENA PMD driver in the DPDK compilation.
 
-
-   * **CONFIG_RTE_LIBRTE_ENA_DEBUG_INIT** (default y): Enables or disables debug
-     logging of device initialization within the ENA PMD driver.
-
    * **CONFIG_RTE_LIBRTE_ENA_DEBUG_RX** (default n): Enables or disables debug
      logging of RX logic within the ENA PMD driver.
 
@@ -187,11 +183,20 @@ Prerequisites
 -------------
 
 #. Prepare the system as recommended by DPDK suite.  This includes environment
-   variables, hugepages configuration, tool-chains and configuration
+   variables, hugepages configuration, tool-chains and configuration.
+
+#. ENA PMD can operate with ``vfio-pci`` or ``igb_uio`` driver.
+
+#. Insert ``vfio-pci`` or ``igb_uio`` kernel module using the command
+   ``modprobe vfio-pci`` or ``modprobe igb_uio`` respectively.
+
+#. For ``vfio-pci`` users only:
+   Please make sure that ``IOMMU`` is enabled in your system,
+   or use ``vfio`` driver in ``noiommu`` mode::
 
-#. Insert igb_uio kernel module using the command 'modprobe igb_uio'
+     echo 1 > /sys/module/vfio/parameters/enable_unsafe_noiommu_mode
 
-#. Bind the intended ENA device to igb_uio module
+#. Bind the intended ENA device to ``vfio-pci`` or ``igb_uio`` module.
 
 
 At this point the system should be ready to run DPDK applications. Once the
diff --git a/doc/guides/nics/enetc.rst b/doc/guides/nics/enetc.rst
new file mode 100644 (file)
index 0000000..8038bf2
--- /dev/null
@@ -0,0 +1,110 @@
+.. SPDX-License-Identifier: BSD-3-Clause
+   Copyright 2018 NXP
+
+ENETC Poll Mode Driver
+======================
+
+The ENETC NIC PMD (**librte_pmd_enetc**) provides poll mode driver
+support for the inbuilt NIC found in the **NXP LS1028** SoC.
+
+More information can be found at `NXP Official Website
+<https://www.nxp.com/products/processors-and-microcontrollers/arm-based-processors-and-mcus/qoriq-layerscape-arm-processors/qoriq-layerscape-1028a-industrial-applications-processor:LS1028A>`_.
+
+ENETC
+-----
+
+This section provides an overview of the NXP ENETC
+and how it is integrated into the DPDK.
+
+Contents summary
+
+- ENETC overview
+- ENETC features
+- PCI bus driver
+- NIC driver
+- Supported ENETC SoCs
+- Prerequisites
+- Driver compilation and testing
+
+ENETC Overview
+~~~~~~~~~~~~~~
+
+ENETC is a PCI Integrated End Point(IEP). IEP implements
+peripheral devices in an SoC such that software sees them as PCIe device.
+ENETC is an evolution of BDR(Buffer Descriptor Ring) based networking
+IPs.
+
+This infrastructure simplifies adding support for IEP and facilitates in following:
+
+- Device discovery and location
+- Resource requirement discovery and allocation (e.g. interrupt assignment,
+  device register address)
+- Event reporting
+
+ENETC Features
+~~~~~~~~~~~~~~
+
+- Link Status
+- Packet type information
+
+NIC Driver (PMD)
+~~~~~~~~~~~~~~~~
+
+ENETC PMD is traditional DPDK PMD which provides necessary interface between
+RTE framework and ENETC internal drivers.
+
+- Driver registers the device vendor table in PCI subsystem.
+- RTE framework scans the PCI bus for connected devices.
+- This scanning will invoke the probe function of ENETC driver.
+- The probe function will set the basic device registers and also setups BD rings.
+- On packet Rx the respective BD Ring status bit is set which is then used for
+  packet processing.
+- Then Tx is done first followed by Rx.
+
+Supported ENETC SoCs
+~~~~~~~~~~~~~~~~~~~~
+
+- LS1028
+
+Prerequisites
+~~~~~~~~~~~~~
+
+There are three main pre-requisities for executing ENETC PMD on a ENETC
+compatible board:
+
+1. **ARM 64 Tool Chain**
+
+   For example, the `*aarch64* Linaro Toolchain <https://releases.linaro.org/components/toolchain/binaries/7.3-2018.05/aarch64-linux-gnu/gcc-linaro-7.3.1-2018.05-i686_aarch64-linux-gnu.tar.xz>`_.
+
+2. **Linux Kernel**
+
+   It can be obtained from `NXP's Github hosting <https://source.codeaurora.org/external/qoriq/qoriq-components/linux>`_.
+
+3. **Rootfile system**
+
+   Any *aarch64* supporting filesystem can be used. For example,
+   Ubuntu 16.04 LTS (Xenial) or 18.04 (Bionic) userland which can be obtained
+   from `here <http://cdimage.ubuntu.com/ubuntu-base/releases/18.04/release/ubuntu-base-18.04.1-base-arm64.tar.gz>`_.
+
+The following dependencies are not part of DPDK and must be installed
+separately:
+
+- **NXP Linux LSDK**
+
+  NXP Layerscape software development kit (LSDK) includes support for family
+  of QorIQ® ARM-Architecture-based system on chip (SoC) processors
+  and corresponding boards.
+
+  It includes the Linux board support packages (BSPs) for NXP SoCs,
+  a fully operational tool chain, kernel and board specific modules.
+
+  LSDK and related information can be obtained from:  `LSDK <https://www.nxp.com/support/developer-resources/run-time-software/linux-software-and-development-tools/layerscape-software-development-kit:LAYERSCAPE-SDK>`_
+
+Driver compilation and testing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Follow instructions available in the document
+:ref:`compiling and testing a PMD for a NIC <pmd_build_and_test>`
+to launch **testpmd**
+
+To compile in performance mode, please set ``CONFIG_RTE_CACHE_LINE_SIZE=64``
index 438a83d..746d891 100644 (file)
@@ -260,6 +260,12 @@ Generic Flow API is supported. The baseline support is:
   - Selectors: 'is', 'spec' and 'mask'. 'last' is not supported
   - In total, up to 64 bytes of mask is allowed across all headers
 
+- **1400 and later series VICS with advanced filters enabled**
+
+  All the above plus:
+
+  - Action: count
+
 More features may be added in future firmware and new versions of the VIC.
 Please refer to the release notes.
 
@@ -345,6 +351,41 @@ suitable for others. Such applications may change the mode by setting
   applications such as OVS-DPDK performance benchmarks that utilize
   only the default VLAN and want to see only untagged packets.
 
+
+Vectorized Rx Handler
+---------------------
+
+ENIC PMD includes a version of the receive handler that is vectorized using
+AVX2 SIMD instructions. It is meant for bulk, throughput oriented workloads
+where reducing cycles/packet in PMD is a priority. In order to use the
+vectorized handler, take the following steps.
+
+- Use a recent version of gcc, icc, or clang and build 64-bit DPDK. If
+  the compiler is known to support AVX2, DPDK build system
+  automatically compiles the vectorized handler. Otherwise, the
+  handler is not available.
+
+- Set ``devargs`` parameter ``enable-avx2-rx=1`` to explicitly request that
+  PMD consider the vectorized handler when selecting the receive handler.
+  For example::
+
+    -w 12:00.0,enable-avx2-rx=1
+
+  As the current implementation is intended for field trials, by default, the
+  vectorized handler is not considered (``enable-avx2-rx=0``).
+
+- Run on a UCS M4 or later server with CPUs that support AVX2.
+
+PMD selects the vectorized handler when the handler is compiled into
+the driver, the user requests its use via ``enable-avx2-rx=1``, CPU
+supports AVX2, and scatter Rx is not used. To verify that the
+vectorized handler is selected, enable debug logging
+(``--log-level=pmd,debug``) and check the following message.
+
+.. code-block:: console
+
+    enic_use_vector_rx_handler use the non-scatter avx2 Rx handler
+
 .. _enic_limitations:
 
 Limitations
index cddc877..3fa5cb7 100644 (file)
@@ -513,8 +513,9 @@ CRC offload
 -----------
 
 Supports CRC stripping by hardware.
+A PMD assumed to support CRC stripping by default. PMD should advertise if it supports keeping CRC.
 
-* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_CRC_STRIP,DEV_RX_OFFLOAD_KEEP_CRC``.
+* **[uses] rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_KEEP_CRC``.
 
 
 .. _nic_features_vlan_offload:
@@ -526,8 +527,9 @@ Supports VLAN offload to hardware.
 
 * **[uses]       rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_VLAN_STRIP,DEV_RX_OFFLOAD_VLAN_FILTER,DEV_RX_OFFLOAD_VLAN_EXTEND``.
 * **[uses]       rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_VLAN_INSERT``.
+* **[uses]       mbuf**: ``mbuf.ol_flags:PKT_TX_VLAN``, ``mbuf.vlan_tci``.
 * **[implements] eth_dev_ops**: ``vlan_offload_set``.
-* **[provides]   mbuf**: ``mbuf.ol_flags:PKT_RX_VLAN_STRIPPED``, ``mbuf.vlan_tci``.
+* **[provides]   mbuf**: ``mbuf.ol_flags:PKT_RX_VLAN_STRIPPED``, ``mbuf.ol_flags:PKT_RX_VLAN`` ``mbuf.vlan_tci``.
 * **[provides]   rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_VLAN_STRIP``,
   ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_VLAN_INSERT``.
 * **[related]    API**: ``rte_eth_dev_set_vlan_offload()``,
@@ -543,9 +545,10 @@ Supports QinQ (queue in queue) offload.
 
 * **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_QINQ_STRIP``.
 * **[uses]     rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_QINQ_INSERT``.
-* **[uses]     mbuf**: ``mbuf.ol_flags:PKT_TX_QINQ_PKT``.
-* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_QINQ_STRIPPED``, ``mbuf.vlan_tci``,
-   ``mbuf.vlan_tci_outer``.
+* **[uses]     mbuf**: ``mbuf.ol_flags:PKT_TX_QINQ``, ``mbuf.vlan_tci_outer``.
+* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_QINQ_STRIPPED``, ``mbuf.ol_flags:PKT_RX_QINQ``,
+  ``mbuf.ol_flags:PKT_RX_VLAN_STRIPPED``, ``mbuf.ol_flags:PKT_RX_VLAN``
+  ``mbuf.vlan_tci``, ``mbuf.vlan_tci_outer``.
 * **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_QINQ_STRIP``,
   ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_QINQ_INSERT``.
 
@@ -561,6 +564,7 @@ Supports L3 checksum offload.
 * **[uses]     rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_IPV4_CKSUM``.
 * **[uses]     mbuf**: ``mbuf.ol_flags:PKT_TX_IP_CKSUM``,
   ``mbuf.ol_flags:PKT_TX_IPV4`` | ``PKT_TX_IPV6``.
+* **[uses]     mbuf**: ``mbuf.l2_len``, ``mbuf.l3_len``.
 * **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_IP_CKSUM_UNKNOWN`` |
   ``PKT_RX_IP_CKSUM_BAD`` | ``PKT_RX_IP_CKSUM_GOOD`` |
   ``PKT_RX_IP_CKSUM_NONE``.
@@ -575,15 +579,16 @@ L4 checksum offload
 
 Supports L4 checksum offload.
 
-* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM``.
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM,DEV_RX_OFFLOAD_SCTP_CKSUM``.
 * **[uses]     rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_UDP_CKSUM,DEV_TX_OFFLOAD_TCP_CKSUM,DEV_TX_OFFLOAD_SCTP_CKSUM``.
 * **[uses]     mbuf**: ``mbuf.ol_flags:PKT_TX_IPV4`` | ``PKT_TX_IPV6``,
   ``mbuf.ol_flags:PKT_TX_L4_NO_CKSUM`` | ``PKT_TX_TCP_CKSUM`` |
   ``PKT_TX_SCTP_CKSUM`` | ``PKT_TX_UDP_CKSUM``.
+* **[uses]     mbuf**: ``mbuf.l2_len``, ``mbuf.l3_len``.
 * **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_L4_CKSUM_UNKNOWN`` |
   ``PKT_RX_L4_CKSUM_BAD`` | ``PKT_RX_L4_CKSUM_GOOD`` |
   ``PKT_RX_L4_CKSUM_NONE``.
-* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM``,
+* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_UDP_CKSUM,DEV_RX_OFFLOAD_TCP_CKSUM,DEV_RX_OFFLOAD_SCTP_CKSUM``,
   ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_UDP_CKSUM,DEV_TX_OFFLOAD_TCP_CKSUM,DEV_TX_OFFLOAD_SCTP_CKSUM``.
 
 .. _nic_features_hw_timestamp:
@@ -638,6 +643,16 @@ Inner L4 checksum
 
 Supports inner packet L4 checksum.
 
+* **[uses]     rte_eth_rxconf,rte_eth_rxmode**: ``offloads:DEV_RX_OFFLOAD_OUTER_UDP_CKSUM``.
+* **[provides] mbuf**: ``mbuf.ol_flags:PKT_RX_OUTER_L4_CKSUM_UNKNOWN`` |
+  ``PKT_RX_OUTER_L4_CKSUM_BAD`` | ``PKT_RX_OUTER_L4_CKSUM_GOOD`` | ``PKT_RX_OUTER_L4_CKSUM_INVALID``.
+* **[uses]     rte_eth_txconf,rte_eth_txmode**: ``offloads:DEV_TX_OFFLOAD_OUTER_UDP_CKSUM``.
+* **[uses]     mbuf**: ``mbuf.ol_flags:PKT_TX_OUTER_IPV4`` | ``PKT_TX_OUTER_IPV6``.
+  ``mbuf.ol_flags:PKT_TX_OUTER_UDP_CKSUM``.
+* **[uses]     mbuf**: ``mbuf.outer_l2_len``, ``mbuf.outer_l3_len``.
+* **[provides] rte_eth_dev_info**: ``rx_offload_capa,rx_queue_offload_capa:DEV_RX_OFFLOAD_OUTER_UDP_CKSUM``,
+  ``tx_offload_capa,tx_queue_offload_capa:DEV_TX_OFFLOAD_OUTER_UDP_CKSUM``.
+
 
 .. _nic_features_packet_type_parsing:
 
diff --git a/doc/guides/nics/features/atlantic.ini b/doc/guides/nics/features/atlantic.ini
new file mode 100644 (file)
index 0000000..5ed095b
--- /dev/null
@@ -0,0 +1,37 @@
+;
+; Supported features of the 'atlantic' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Speed capabilities   = Y
+Link status          = Y
+Link status event    = Y
+Queue start/stop     = Y
+MTU update           = Y
+Jumbo frame          = Y
+Promiscuous mode     = Y
+Allmulticast mode    = Y
+Unicast MAC filter   = Y
+RSS hash             = Y
+RSS key update       = Y
+RSS reta update      = Y
+VLAN filter          = Y
+Flow control         = Y
+CRC offload          = Y
+VLAN offload         = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Packet type parsing  = Y
+Rx descriptor status = Y
+Tx descriptor status = Y
+Basic stats          = Y
+Extended stats       = Y
+Stats per queue      = Y
+FW version           = Y
+EEPROM dump          = Y
+Registers dump       = Y
+Linux UIO            = Y
+ARMv8                = Y
+x86-32               = Y
+x86-64               = Y
index 691c1e3..aa6f05a 100644 (file)
@@ -23,5 +23,6 @@ Inner L4 checksum    = Y
 Basic stats          = Y
 Extended stats       = Y
 Linux UIO            = Y
+Linux VFIO           = Y
 x86-32               = Y
 x86-64               = Y
diff --git a/doc/guides/nics/features/enetc.ini b/doc/guides/nics/features/enetc.ini
new file mode 100644 (file)
index 0000000..69476a2
--- /dev/null
@@ -0,0 +1,11 @@
+;
+; Supported features of the 'enetc' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Packet type parsing  = Y
+Link status          = Y
+Linux VFIO           = Y
+ARMv8                = Y
+Usage doc            = Y
index 39ee579..e3c4c08 100644 (file)
@@ -7,6 +7,9 @@
 Link status          = Y
 Link status event    = Y
 Rx interrupt         = Y
+Queue start/stop     = Y
+Runtime Rx queue setup = Y
+Runtime Tx queue setup = Y
 MTU update           = Y
 Jumbo frame          = Y
 Promiscuous mode     = Y
diff --git a/doc/guides/nics/features/mvneta.ini b/doc/guides/nics/features/mvneta.ini
new file mode 100644 (file)
index 0000000..701eb03
--- /dev/null
@@ -0,0 +1,19 @@
+;
+; Supported features of the 'mvneta' network poll mode driver.
+;
+; Refer to default.ini for the full list of available PMD features.
+;
+[Features]
+Speed capabilities   = Y
+Link status          = Y
+MTU update           = Y
+Jumbo frame          = Y
+Promiscuous mode     = Y
+Unicast MAC filter   = Y
+CRC offload          = Y
+L3 checksum offload  = Y
+L4 checksum offload  = Y
+Packet type parsing  = Y
+Basic stats          = Y
+ARMv8                = Y
+Usage doc            = Y
index 2ff6042..f5dc1e7 100644 (file)
@@ -6,6 +6,7 @@
 [Features]
 Speed capabilities   = P
 Link status          = Y
+Free Tx mbuf on demand = Y
 Queue start/stop     = Y
 Scattered Rx         = Y
 Promiscuous mode     = Y
index 8a497ee..d1aa833 100644 (file)
@@ -9,6 +9,8 @@ Link status          = Y
 Link status event    = Y
 Fast mbuf free       = Y
 Queue start/stop     = Y
+Runtime Rx queue setup = Y
+Runtime Tx queue setup = Y
 MTU update           = Y
 Jumbo frame          = Y
 Scattered Rx         = Y
index d1391e9..764e089 100644 (file)
@@ -139,8 +139,7 @@ CRC striping
 ~~~~~~~~~~~~
 
 The FM10000 family of NICs strip the CRC for every packets coming into the
-host interface.  So, CRC will be stripped even when ``DEV_RX_OFFLOAD_CRC_STRIP``
-in ``rxmode.offloads`` is NOT set in ``struct rte_eth_conf``.
+host interface. So, keeping CRC is not supported.
 
 Maximum packet length
 ~~~~~~~~~~~~~~~~~~~~~
index 65d87f8..ab3928a 100644 (file)
@@ -163,6 +163,15 @@ Runtime Config Options
   Currently hot-plugging of representor ports is not supported so all required
   representors must be specified on the creation of the PF.
 
+- ``Use latest supported vector`` (default ``disable``)
+
+  Latest supported vector path may not always get the best perf so vector path was
+  recommended to use only on later platform. But users may want the latest vector path
+  since it can get better perf in some real work loading cases. So ``devargs`` param
+  ``use-latest-supported-vec`` is introduced, for example::
+
+  -w 84:00.0,use-latest-supported-vec=1
+
 Driver compilation and testing
 ------------------------------
 
@@ -421,6 +430,12 @@ functionality requires a NIC firmware version of 6.0 or greater.
 Current implementation supports GTP-C/GTP-U/PPPoE/PPPoL2TP,
 steering can be used with rte_flow API.
 
+GTPv1 package is released, and it can be downloaded from
+https://downloadcenter.intel.com/download/27587.
+
+PPPoE package is released, and it can be downloaded from
+https://downloadcenter.intel.com/download/28040.
+
 Load a profile which supports GTP and store backup profile:
 
 .. code-block:: console
diff --git a/doc/guides/nics/img/mvpp2_tm.svg b/doc/guides/nics/img/mvpp2_tm.svg
new file mode 100644 (file)
index 0000000..4aa9272
--- /dev/null
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/PR-SVG-20010719/DTD/svg10.dtd">
+<svg width="16cm" height="4cm" viewBox="-1 -1 309 75" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+  <g>
+    <polyline style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" points="159.661,12.6759 141.655,12.6759 141.655,35.5606 88.1561,35.5606 88.1561,44.9245 "/>
+    <polygon style="fill: #000000" points="88.1561,49.4245 85.1561,43.4245 88.1561,44.9245 91.1561,43.4245 "/>
+    <polygon style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" points="88.1561,49.4245 85.1561,43.4245 88.1561,44.9245 91.1561,43.4245 "/>
+  </g>
+  <g>
+    <polyline style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" points="159.661,12.6759 176.28,12.6759 176.28,35.5606 281.681,35.5606 281.681,44.9245 "/>
+    <polygon style="fill: #000000" points="281.681,49.4245 278.681,43.4245 281.681,44.9245 284.681,43.4245 "/>
+    <polygon style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" points="281.681,49.4245 278.681,43.4245 281.681,44.9245 284.681,43.4245 "/>
+  </g>
+  <g>
+    <rect style="fill: #ffffff" x="126.066" y="0.98102" width="67.1901" height="23.3899"/>
+    <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="126.066" y="0.98102" width="67.1901" height="23.3899"/>
+  </g>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="159.661" y="17.1259">
+    <tspan x="159.661" y="17.1259">Port N</tspan>
+  </text>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:sans-serif;font-style:normal;font-weight:normal" x="304.581" y="68.168">
+    <tspan x="304.581" y="68.168"></tspan>
+  </text>
+  <g>
+    <rect style="fill: #ffffff" x="62.5504" y="51.5478" width="51.2114" height="22.0925"/>
+    <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="62.5504" y="51.5478" width="51.2114" height="22.0925"/>
+  </g>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="88.1561" y="67.044">
+    <tspan x="88.1561" y="67.044">Txq 0</tspan>
+  </text>
+  <g>
+    <rect style="fill: #ffffff" x="134.1" y="51.355" width="51.1213" height="22.478"/>
+    <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="134.1" y="51.355" width="51.1213" height="22.478"/>
+  </g>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="159.661" y="67.044">
+    <tspan x="159.661" y="67.044">Txq 1</tspan>
+  </text>
+  <g>
+    <rect style="fill: #ffffff" x="256.416" y="51.5478" width="50.5306" height="22.0925"/>
+    <rect style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" x="256.416" y="51.5478" width="50.5306" height="22.0925"/>
+  </g>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:middle;font-family:sans-serif;font-style:normal;font-weight:normal" x="281.681" y="67.044">
+    <tspan x="281.681" y="67.044">Txq M</tspan>
+  </text>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:sans-serif;font-style:normal;font-weight:normal" x="101.822" y="67.044">
+    <tspan x="101.822" y="67.044"></tspan>
+  </text>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:sans-serif;font-style:normal;font-weight:normal" x="-0.537645" y="17.1259">
+    <tspan x="-0.537645" y="17.1259">Level 0:</tspan>
+  </text>
+  <text font-size="12.7998" style="fill: #000000;text-anchor:start;font-family:sans-serif;font-style:normal;font-weight:normal" x="-0.746688" y="67.044">
+    <tspan x="-0.746688" y="67.044">Level 1:</tspan>
+  </text>
+  <g>
+    <ellipse style="fill: #000000" cx="207.645" cy="62.594" rx="0.425344" ry="0.425344"/>
+    <ellipse style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" cx="207.645" cy="62.594" rx="0.425344" ry="0.425344"/>
+  </g>
+  <g>
+    <ellipse style="fill: #000000" cx="219.525" cy="62.594" rx="0.425344" ry="0.425344"/>
+    <ellipse style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" cx="219.525" cy="62.594" rx="0.425344" ry="0.425344"/>
+  </g>
+  <g>
+    <ellipse style="fill: #000000" cx="231.405" cy="62.594" rx="0.425345" ry="0.425345"/>
+    <ellipse style="fill: none; fill-opacity:0; stroke-width: 2; stroke: #000000" cx="231.405" cy="62.594" rx="0.425345" ry="0.425345"/>
+  </g>
+  <g>
+    <line style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" x1="159.661" y1="24.3709" x2="159.661" y2="45.737"/>
+    <polygon style="fill: #000000" points="159.661,50.237 156.661,44.237 159.661,45.737 162.661,44.237 "/>
+    <polygon style="fill: none; fill-opacity:0; stroke-width: 1; stroke: #000000" points="159.661,50.237 156.661,44.237 159.661,45.737 162.661,44.237 "/>
+  </g>
+</svg>
index 59f6063..bb107ae 100644 (file)
@@ -12,6 +12,7 @@ Network Interface Controller Drivers
     features
     build_and_test
     ark
+    atlantic
     avp
     axgbe
     bnx2x
@@ -21,6 +22,7 @@ Network Interface Controller Drivers
     dpaa2
     e1000em
     ena
+    enetc
     enic
     fm10k
     i40e
@@ -32,6 +34,7 @@ Network Interface Controller Drivers
     liquidio
     mlx4
     mlx5
+    mvneta
     mvpp2
     netvsc
     nfp
index 16d6390..1c294b0 100644 (file)
@@ -200,6 +200,33 @@ There is no RTE API to add a VF's MAC address from the PF. On ixgbe, the
 ``rte_eth_dev_mac_addr_add()`` function can be used to add a VF's MAC address,
 as a workaround.
 
+X550 does not support legacy interrupt mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Desccription
+^^^^^^^^^^^^
+X550 cannot get interrupts if using ``uio_pci_generic`` module or using legacy
+interrupt mode of ``igb_uio`` or ``vfio``. Because the errata of X550 states
+that the Interrupt Status bit is not implemented. The errata is the item #22
+from `X550 spec update <https://www.intel.com/content/dam/www/public/us/en/
+documents/specification-updates/ethernet-x550-spec-update.pdf>`_
+
+Implication
+^^^^^^^^^^^
+When using ``uio_pci_generic`` module or using legacy interrupt mode of
+``igb_uio`` or ``vfio``, the Interrupt Status bit would be checked if the
+interrupt is coming. Since the bit is not implemented in X550, the irq cannot
+be handled correctly and cannot report the event fd to DPDK apps. Then apps
+cannot get interrupts and ``dmesg`` will show messages like ``irq #No.: ``
+``nobody cared.``
+
+Workaround
+^^^^^^^^^^
+Do not bind the ``uio_pci_generic`` module in X550 NICs.
+Do not bind ``igb_uio`` with legacy mode in X550 NICs.
+Before binding ``vfio`` with legacy mode in X550 NICs, use ``modprobe vfio ``
+``nointxmask=1`` to load ``vfio`` module if the intx is not shared with other
+devices.
 
 Inline crypto processing support
 --------------------------------
index 87b42cd..e2a3800 100644 (file)
@@ -30,14 +30,6 @@ Please note that enabling debugging options may affect system performance.
 
   Toggle compilation of LiquidIO PMD.
 
-- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_DRIVER`` (default ``n``)
-
-  Toggle display of generic debugging messages.
-
-- ``CONFIG_RTE_LIBRTE_LIO_DEBUG_INIT`` (default ``n``)
-
-  Toggle display of initialization related messages.
-
 - ``CONFIG_RTE_LIBRTE_LIO_DEBUG_RX`` (default ``n``)
 
   Toggle display of receive fast path run-time messages.
index 52e1213..6769628 100644 (file)
@@ -339,7 +339,12 @@ Run-time configuration
   When those offloads are requested the MPS send function will not be used.
 
   It is currently only supported on the ConnectX-4 Lx, ConnectX-5 and Bluefield
-  families of adapters. Enabled by default.
+  families of adapters.
+  On ConnectX-4 Lx the MPW is considered un-secure hence disabled by default.
+  Users which enable the MPW should be aware that application which provides incorrect
+  mbuf descriptors in the Tx burst can lead to serious errors in the host including, on some cases,
+  NIC to get stuck.
+  On ConnectX-5 and Bluefield the MPW is secure and enabled by default.
 
 - ``txq_mpw_hdr_dseg_en`` parameter [int]
 
@@ -392,6 +397,13 @@ Run-time configuration
 
   Disabled by default.
 
+- ``dv_flow_en`` parameter [int]
+
+  A nonzero value enables the DV flow steering assuming it is supported
+  by the driver.
+
+  Disabled by default.
+
 - ``representor`` parameter [list]
 
   This parameter can be used to instantiate DPDK Ethernet devices from
diff --git a/doc/guides/nics/mvneta.rst b/doc/guides/nics/mvneta.rst
new file mode 100644 (file)
index 0000000..2132a81
--- /dev/null
@@ -0,0 +1,171 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Marvell International Ltd.
+    Copyright(c) 2018 Semihalf.
+    All rights reserved.
+
+MVNETA Poll Mode Driver
+=======================
+
+The MVNETA PMD (librte_pmd_mvneta) provides poll mode driver support
+for the Marvell NETA 1/2.5 Gbps adapter.
+
+Detailed information about SoCs that use PPv2 can be obtained here:
+
+* https://www.marvell.com/embedded-processors/armada-3700/
+
+.. Note::
+
+   Due to external dependencies, this driver is disabled by default. It must
+   be enabled manually by setting relevant configuration option manually.
+   Please refer to `Config File Options`_ section for further details.
+
+
+Features
+--------
+
+Features of the MVNETA PMD are:
+
+- Start/stop
+- tx/rx_queue_setup
+- tx/rx_burst
+- Speed capabilities
+- Jumbo frame
+- MTU update
+- Promiscuous mode
+- Unicast MAC filter
+- Link status
+- CRC offload
+- L3 checksum offload
+- L4 checksum offload
+- Packet type parsing
+- Basic stats
+
+
+Limitations
+-----------
+
+- Flushing vlans added for filtering is not possible due to MUSDK missing
+  functionality. Current workaround is to reset board so that NETA has a
+  chance to start in a sane state.
+
+Prerequisites
+-------------
+
+- Custom Linux Kernel sources
+
+  .. code-block:: console
+
+     git clone https://github.com/MarvellEmbeddedProcessors/linux-marvell.git -b linux-4.4.120-armada-18.09
+
+
+- MUSDK (Marvell User-Space SDK) sources
+
+  .. code-block:: console
+
+     git clone https://github.com/MarvellEmbeddedProcessors/musdk-marvell.git -b musdk-armada-18.09
+
+  MUSDK is a light-weight library that provides direct access to Marvell's
+  NETA. Alternatively prebuilt MUSDK library can be
+  requested from `Marvell Extranet <https://extranet.marvell.com>`_. Once
+  approval has been granted, library can be found by typing ``musdk`` in
+  the search box.
+
+  MUSDK must be configured with the following features:
+
+  .. code-block:: console
+
+     --enable-pp2=no --enable-neta
+
+- DPDK environment
+
+  Follow the DPDK :ref:`Getting Started Guide for Linux <linux_gsg>` to setup
+  DPDK environment.
+
+Pre-Installation Configuration
+------------------------------
+
+Config File Options
+~~~~~~~~~~~~~~~~~~~
+
+The following options can be modified in the ``config`` file.
+
+- ``CONFIG_RTE_LIBRTE_MVNETA_PMD`` (default ``n``)
+
+    Toggle compilation of the librte_pmd_mvneta driver.
+
+Runtime options
+~~~~~~~~~~~~~~~
+
+The following ``devargs`` options can be enabled at runtime. They must
+be passed as part of EAL arguments.
+
+- ``iface`` (mandatory, with no default value)
+
+  The name of port (owned by MUSDK) that should be enabled in DPDK.
+  This options can be repeated resulting in a list of ports to be
+  enabled.  For instance below will enable ``eth0`` and ``eth1`` ports.
+
+.. code-block:: console
+
+   ./testpmd --vdev=net_mvneta,iface=eth0,iface=eth1 \
+    -c 3 -- -i --p 3 -a
+
+
+Building DPDK
+-------------
+
+Driver needs precompiled MUSDK library during compilation.
+
+.. code-block:: console
+
+   export CROSS_COMPILE=<toolchain>/bin/aarch64-linux-gnu-
+   ./bootstrap
+   ./configure --host=aarch64-linux-gnu --enable-pp2=no --enable-neta
+   make install
+
+MUSDK will be installed to `usr/local` under current directory.
+For the detailed build instructions please consult ``doc/musdk_get_started.txt``.
+
+Before the DPDK build process the environmental variable ``LIBMUSDK_PATH`` with
+the path to the MUSDK installation directory needs to be exported.
+
+.. code-block:: console
+
+   export LIBMUSDK_PATH=<musdk>/usr/local
+   export CROSS=aarch64-linux-gnu-
+   make config T=arm64-armv8a-linuxapp-gcc
+   sed -ri 's,(MVNETA_PMD=)n,\1y,' build/.config
+   make
+
+Usage Example
+-------------
+
+MVNETA PMD requires extra out of tree kernel modules to function properly.
+`musdk_uio` and `mv_neta_uio` sources are part of the MUSDK. Please consult
+``doc/musdk_get_started.txt`` for the detailed build instructions.
+
+.. code-block:: console
+
+   insmod musdk_uio.ko
+   insmod mv_neta_uio.ko
+
+Additionally interfaces used by DPDK application need to be put up:
+
+.. code-block:: console
+
+   ip link set eth0 up
+   ip link set eth1 up
+
+In order to run testpmd example application following command can be used:
+
+.. code-block:: console
+
+   ./testpmd --vdev=net_mvneta,iface=eth0,iface=eth1 -c 3 -- \
+     -i --p 3 -a --txd 256 --rxd 128 --rxq=1 --txq=1  --nb-cores=1
+
+
+In order to run l2fwd example application following command can be used:
+
+.. code-block:: console
+
+   ./l2fwd --vdev=net_mvneta,iface=eth0,iface=eth1 -c 3 -- -T 1 -p 3
index 0408752..82b9383 100644 (file)
@@ -56,7 +56,7 @@ Features of the MVPP2 PMD are:
 
 - Speed capabilities
 - Link status
-- Queue start/stop
+- Tx Queue start/stop
 - MTU update
 - Jumbo frame
 - Promiscuous mode
@@ -70,11 +70,13 @@ Features of the MVPP2 PMD are:
 - L4 checksum offload
 - Packet type parsing
 - Basic stats
-- Extended stats
-- QoS
+- :ref:`Extended stats <extstats>`
 - RX flow control
-- TX queue start/stop
-
+- Scattered TX frames
+- :ref:`QoS <qossupport>`
+- :ref:`Flow API <flowapi>`
+- :ref:`Traffic metering and policing <mtrapi>`
+- :ref:`Traffic Management API <tmapi>`
 
 Limitations
 -----------
@@ -88,6 +90,20 @@ Limitations
   functionality. Current workaround is to reset board so that PPv2 has a
   chance to start in a sane state.
 
+- MUSDK architecture does not support changing configuration in run time.
+  All nessesary configurations should be done before first dev_start().
+
+- RX queue start/stop is not supported.
+
+- Current implementation does not support replacement of buffers in the HW buffer pool
+  at run time, so it is responsibility of the application to ensure that MTU does not exceed the configured buffer size.
+
+- Configuring TX flow control currently is not supported.
+
+- In current implementation, mechanism for acknowledging transmitted packets (``tx_done_cleanup``) is not supported.
+
+- Running more than one DPDK-MUSDK application simultaneously is not supported.
+
 
 Prerequisites
 -------------
@@ -96,19 +112,19 @@ Prerequisites
 
   .. code-block:: console
 
-     git clone https://github.com/MarvellEmbeddedProcessors/linux-marvell.git -b linux-4.4.52-armada-17.10
+     git clone https://github.com/MarvellEmbeddedProcessors/linux-marvell.git -b linux-4.4.120-armada-18.09
 
 - Out of tree `mvpp2x_sysfs` kernel module sources
 
   .. code-block:: console
 
-     git clone https://github.com/MarvellEmbeddedProcessors/mvpp2x-marvell.git -b mvpp2x-armada-17.10
+     git clone https://github.com/MarvellEmbeddedProcessors/mvpp2x-marvell.git -b mvpp2x-armada-18.09
 
 - MUSDK (Marvell User-Space SDK) sources
 
   .. code-block:: console
 
-     git clone https://github.com/MarvellEmbeddedProcessors/musdk-marvell.git -b musdk-armada-17.10
+     git clone https://github.com/MarvellEmbeddedProcessors/musdk-marvell.git -b musdk-armada-18.09
 
   MUSDK is a light-weight library that provides direct access to Marvell's
   PPv2 (Packet Processor v2). Alternatively prebuilt MUSDK library can be
@@ -119,12 +135,6 @@ Prerequisites
   To get better understanding of the library one can consult documentation
   available in the ``doc`` top level directory of the MUSDK sources.
 
-  MUSDK must be configured with the following features:
-
-  .. code-block:: console
-
-     --enable-bpool-dma=64
-
 - DPDK environment
 
   Follow the DPDK :ref:`Getting Started Guide for Linux <linux_gsg>` to setup
@@ -140,6 +150,95 @@ The following options can be modified in the ``config`` file.
 
     Toggle compilation of the librte mvpp2 driver.
 
+    .. Note::
+
+       When MVPP2 PMD is enabled ``CONFIG_RTE_LIBRTE_MVNETA_PMD`` must be disabled
+
+
+Building DPDK
+-------------
+
+Driver needs precompiled MUSDK library during compilation.
+
+.. code-block:: console
+
+   export CROSS_COMPILE=<toolchain>/bin/aarch64-linux-gnu-
+   ./bootstrap
+   ./configure --host=aarch64-linux-gnu
+   make install
+
+MUSDK will be installed to `usr/local` under current directory.
+For the detailed build instructions please consult ``doc/musdk_get_started.txt``.
+
+Before the DPDK build process the environmental variable ``LIBMUSDK_PATH`` with
+the path to the MUSDK installation directory needs to be exported.
+
+For additional instructions regarding DPDK cross compilation please refer to :doc:`Cross compile DPDK for ARM64 <../linux_gsg/cross_build_dpdk_for_arm64>`.
+
+.. code-block:: console
+
+   export LIBMUSDK_PATH=<musdk>/usr/local
+   export CROSS=<toolchain>/bin/aarch64-linux-gnu-
+   export RTE_KERNELDIR=<kernel-dir>
+   export RTE_TARGET=arm64-armv8a-linuxapp-gcc
+
+   make config T=arm64-armv8a-linuxapp-gcc
+   sed -i "s/MVNETA_PMD=y/MVNETA_PMD=n/" build/.config
+   sed -i "s/MVPP2_PMD=n/MVPP2_PMD=y/" build/.config
+   make
+
+Usage Example
+-------------
+
+MVPP2 PMD requires extra out of tree kernel modules to function properly.
+`musdk_cma` sources are part of the MUSDK. Please consult
+``doc/musdk_get_started.txt`` for the detailed build instructions.
+For `mvpp2x_sysfs` please consult ``Documentation/pp22_sysfs.txt`` for the
+detailed build instructions.
+
+.. code-block:: console
+
+   insmod musdk_cma.ko
+   insmod mvpp2x_sysfs.ko
+
+Additionally interfaces used by DPDK application need to be put up:
+
+.. code-block:: console
+
+   ip link set eth0 up
+   ip link set eth2 up
+
+In order to run testpmd example application following command can be used:
+
+.. code-block:: console
+
+   ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 7 -- \
+     --burst=128 --txd=2048 --rxd=1024 --rxq=2 --txq=2  --nb-cores=2 \
+     -i -a --rss-udp
+
+.. _extstats:
+
+Extended stats
+--------------
+
+MVPP2 PMD supports the following extended statistics:
+
+       - ``rx_bytes``: number of RX bytes
+       - ``rx_packets``: number of RX packets
+       - ``rx_unicast_packets``: number of RX unicast packets
+       - ``rx_errors``: number of RX MAC errors
+       - ``rx_fullq_dropped``: number of RX packets dropped due to full RX queue
+       - ``rx_bm_dropped``: number of RX packets dropped due to no available buffers in the HW pool
+       - ``rx_early_dropped``: number of RX packets that were early dropped
+       - ``rx_fifo_dropped``: number of RX packets dropped due to RX fifo overrun
+       - ``rx_cls_dropped``: number of RX packets dropped by classifier
+       - ``tx_bytes``: number of TX bytes
+       - ``tx_packets``: number of TX packets
+       - ``tx_unicast_packets``: number of TX unicast packets
+       - ``tx_errors``: number of TX MAC errors
+
+
+.. _qossupport:
 
 QoS Configuration
 -----------------
@@ -152,20 +251,23 @@ Configuration syntax
 
 .. code-block:: console
 
-   [port <portnum> default]
-   default_tc = <default_tc>
-   mapping_priority = <mapping_priority>
-   policer_enable = <policer_enable>
+   [policer <policer_id>]
    token_unit = <token_unit>
    color = <color_mode>
    cir = <cir>
    ebs = <ebs>
    cbs = <cbs>
 
+   [port <portnum> default]
+   default_tc = <default_tc>
+   mapping_priority = <mapping_priority>
+
    rate_limit_enable = <rate_limit_enable>
    rate_limit = <rate_limit>
    burst_size = <burst_size>
 
+   default_policer = <policer_id>
+
    [port <portnum> tc <traffic_class>]
    rxq = <rx_queue_list>
    pcp = <pcp_list>
@@ -201,7 +303,9 @@ Where:
 
 - ``<dscp_list>``: List of DSCP values to handle in particular TC (e.g. 0-12 32-48 63).
 
-- ``<policer_enable>``: Enable ingress policer.
+- ``<default_policer>``: Id of the policer configuration section to be used as default.
+
+- ``<policer_id>``: Id of the policer configuration section (0..31).
 
 - ``<token_unit>``: Policer token unit (`bytes` or `packets`).
 
@@ -215,7 +319,7 @@ Where:
 
 - ``<default_color>``: Default color for specific tc.
 
-- ``<rate_limit_enable>``: Enables per port or per txq rate limiting.
+- ``<rate_limit_enable>``: Enables per port or per txq rate limiting (`0`/`1` to disable/enable).
 
 - ``<rate_limit>``: Committed information rate, in kilo bits per second.
 
@@ -234,6 +338,13 @@ Configuration file example
 
 .. code-block:: console
 
+   [policer 0]
+   token_unit = bytes
+   color = blind
+   cir = 100000
+   ebs = 64
+   cbs = 64
+
    [port 0 default]
    default_tc = 0
    mapping_priority = ip
@@ -265,12 +376,7 @@ Configuration file example
    default_tc = 0
    mapping_priority = vlan/ip
 
-   policer_enable = 1
-   token_unit = bytes
-   color = blind
-   cir = 100000
-   ebs = 64
-   cbs = 64
+   default_policer = 0
 
    [port 1 tc 0]
    rxq = 0
@@ -297,38 +403,14 @@ Usage example
    ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2,cfg=/home/user/mrvl.conf \
      -c 7 -- -i -a --disable-hw-vlan-strip --rxq=3 --txq=3
 
-
-Building DPDK
--------------
-
-Driver needs precompiled MUSDK library during compilation.
-
-.. code-block:: console
-
-   export CROSS_COMPILE=<toolchain>/bin/aarch64-linux-gnu-
-   ./bootstrap
-   ./configure --host=aarch64-linux-gnu --enable-bpool-dma=64
-   make install
-
-MUSDK will be installed to `usr/local` under current directory.
-For the detailed build instructions please consult ``doc/musdk_get_started.txt``.
-
-Before the DPDK build process the environmental variable ``LIBMUSDK_PATH`` with
-the path to the MUSDK installation directory needs to be exported.
-
-.. code-block:: console
-
-   export LIBMUSDK_PATH=<musdk>/usr/local
-   export CROSS=aarch64-linux-gnu-
-   make config T=arm64-armv8a-linuxapp-gcc
-   sed -ri 's,(MVPP2_PMD=)n,\1y,' build/.config
-   make
+.. _flowapi:
 
 Flow API
 --------
 
 PPv2 offers packet classification capabilities via classifier engine which
 can be configured via generic flow API offered by DPDK.
+For an additional description please refer to DPDK :ref:`Generic flow API <Generic_flow_API>`.
 
 Supported flow actions
 ~~~~~~~~~~~~~~~~~~~~~~
@@ -489,32 +571,239 @@ Following limitations need to be taken into account while creating flow rules:
 For additional information about classifier please consult
 ``doc/musdk_cls_user_guide.txt``.
 
-Usage Example
--------------
+.. _mtrapi:
 
-MVPP2 PMD requires extra out of tree kernel modules to function properly.
-`musdk_uio` and `mv_pp_uio` sources are part of the MUSDK. Please consult
-``doc/musdk_get_started.txt`` for the detailed build instructions.
-For `mvpp2x_sysfs` please consult ``Documentation/pp22_sysfs.txt`` for the
-detailed build instructions.
+Traffic metering and policing
+-----------------------------
 
-.. code-block:: console
+MVPP2 PMD supports DPDK traffic metering and policing that allows the following:
 
-   insmod musdk_uio.ko
-   insmod mv_pp_uio.ko
-   insmod mvpp2x_sysfs.ko
+1. Meter ingress traffic.
+2. Do policing.
+3. Gather statistics.
 
-Additionally interfaces used by DPDK application need to be put up:
+For an additional description please refer to DPDK :doc:`Traffic Metering and Policing API <../prog_guide/traffic_metering_and_policing>`.
 
-.. code-block:: console
+The policer objects defined by this feature can work with the default policer defined via config file as described in :ref:`QoS Support <qossupport>`.
 
-   ip link set eth0 up
-   ip link set eth2 up
+Limitations
+~~~~~~~~~~~
 
-In order to run testpmd example application following command can be used:
+The following capabilities are not supported:
 
-.. code-block:: console
+- MTR object meter DSCP table update
+- MTR object policer action update
+- MTR object enabled statistics
 
-   ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 7 -- \
-     --burst=128 --txd=2048 --rxd=1024 --rxq=2 --txq=2  --nb-cores=2 \
-     -i -a --rss-udp
+Usage example
+~~~~~~~~~~~~~
+
+1. Run testpmd user app:
+
+   .. code-block:: console
+
+               ./testpmd --vdev=eth_mvpp2,iface=eth0,iface=eth2 -c 6 -- -i -p 3 -a --txd 1024 --rxd 1024
+
+2. Create meter profile:
+
+   .. code-block:: console
+
+               testpmd> add port meter profile 0 0 srtcm_rfc2697 2000 256 256
+
+3. Create meter:
+
+   .. code-block:: console
+
+               testpmd> create port meter 0 0 0 yes d d d 0 1 0
+
+4. Create flow rule witch meter attached:
+
+   .. code-block:: console
+
+               testpmd> flow create 0 ingress pattern ipv4 src is 10.10.10.1 / end actions meter mtr_id 0 / end
+
+For a detailed usage description please refer to "Traffic Metering and Policing" section in DPDK :doc:`Testpmd Runtime Functions <../testpmd_app_ug/testpmd_funcs>`.
+
+
+
+.. _tmapi:
+
+Traffic Management API
+----------------------
+
+MVPP2 PMD supports generic DPDK Traffic Management API which allows to
+configure the following features:
+
+1. Hierarchical scheduling
+2. Traffic shaping
+3. Congestion management
+4. Packet marking
+
+Internally TM is represented by a hierarchy (tree) of nodes.
+Node which has a parent is called a leaf whereas node without
+parent is called a non-leaf (root).
+MVPP2 PMD supports two level hierarchy where level 0 represents ports and level 1 represents tx queues of a given port.
+
+.. figure:: img/mvpp2_tm.svg
+
+Nodes hold following types of settings:
+
+- for egress scheduler configuration: weight
+- for egress rate limiter: private shaper
+- bitmask indicating which statistics counters will be read
+
+Hierarchy is always constructed from the top, i.e first a root node is added
+then some number of leaf nodes. Number of leaf nodes cannot exceed number
+of configured tx queues.
+
+After hierarchy is complete it can be committed.
+
+
+For an additional description please refer to DPDK :doc:`Traffic Management API <../prog_guide/traffic_management>`.
+
+Limitations
+~~~~~~~~~~~
+
+The following capabilities are not supported:
+
+- Traffic manager WRED profile and WRED context
+- Traffic manager shared shaper update
+- Traffic manager packet marking
+- Maximum number of levels in hierarchy is 2
+- Currently dynamic change of a hierarchy is not supported
+
+Usage example
+~~~~~~~~~~~~~
+
+For a detailed usage description please refer to "Traffic Management" section in DPDK :doc:`Testpmd Runtime Functions <../testpmd_app_ug/testpmd_funcs>`.
+
+1. Run testpmd as follows:
+
+   .. code-block:: console
+
+               ./testpmd --vdev=net_mrvl,iface=eth0,iface=eth2,cfg=./qos_config -c 7 -- \
+               -i -p 3 --disable-hw-vlan-strip --rxq 3 --txq 3 --txd 1024 --rxd 1024
+
+2. Stop all ports:
+
+   .. code-block:: console
+
+               testpmd> port stop all
+
+3. Add shaper profile:
+
+   .. code-block:: console
+
+               testpmd> add port tm node shaper profile 0 0 900000 70000 0
+
+   Parameters have following meaning::
+
+               0       - Id of a port.
+               0       - Id of a new shaper profile.
+               900000  - Shaper rate in bytes/s.
+               70000   - Bucket size in bytes.
+               0       - Packet length adjustment - ignored.
+
+4. Add non-leaf node for port 0:
+
+   .. code-block:: console
+
+               testpmd> add port tm nonleaf node 0 3 -1 0 0 0 0 0 1 3 0
+
+   Parameters have following meaning::
+
+                0  - Id of a port
+                3  - Id of a new node.
+               -1  - Indicate that root does not have a parent.
+                0  - Priority of the node.
+                0  - Weight of the node.
+                0  - Id of a level. Since this is a root 0 is passed.
+                0  - Id of the shaper profile.
+                0  - Number of SP priorities.
+                3  - Enable statistics for both number of transmitted packets and bytes.
+                0  - Number of shared shapers.
+
+5. Add leaf node for tx queue 0:
+
+   .. code-block:: console
+
+               testpmd> add port tm leaf node 0 0 3 0 30 1 -1 0 0 1 0
+
+   Parameters have following meaning::
+
+                0  - Id of a port.
+                0  - Id of a new node.
+                3  - Id of the parent node.
+                0  - Priority of a node.
+                30 - WRR weight.
+                1  - Id of a level. Since this is a leaf node 1 is passed.
+               -1  - Id of a shaper. -1 indicates that shaper is not attached.
+                0  - Congestion management is not supported.
+                0  - Congestion management is not supported.
+                1  - Enable statistics counter for number of transmitted packets.
+                0  - Number of shared shapers.
+
+6. Add leaf node for tx queue 1:
+
+   .. code-block:: console
+
+       testpmd> add port tm leaf node 0 1 3 0 60 1 -1 0 0 1 0
+
+   Parameters have following meaning::
+
+                0  - Id of a port.
+                1  - Id of a new node.
+                3  - Id of the parent node.
+                0  - Priority of a node.
+                60 - WRR weight.
+                1  - Id of a level. Since this is a leaf node 1 is passed.
+               -1  - Id of a shaper. -1 indicates that shaper is not attached.
+                0  - Congestion management is not supported.
+                0  - Congestion management is not supported.
+                1  - Enable statistics counter for number of transmitted packets.
+                0  - Number of shared shapers.
+
+7. Add leaf node for tx queue 2:
+
+   .. code-block:: console
+
+               testpmd> add port tm leaf node 0 2 3 0 99 1 -1 0 0 1 0
+
+   Parameters have following meaning::
+
+                0  - Id of a port.
+                2  - Id of a new node.
+                3  - Id of the parent node.
+                0  - Priority of a node.
+                99 - WRR weight.
+                1  - Id of a level. Since this is a leaf node 1 is passed.
+               -1  - Id of a shaper. -1 indicates that shaper is not attached.
+                0  - Congestion management is not supported.
+                0  - Congestion management is not supported.
+                1  - Enable statistics counter for number of transmitted packets.
+                0  - Number of shared shapers.
+
+8. Commit hierarchy:
+
+   .. code-block:: console
+
+               testpmd> port tm hierarchy commit 0 no
+
+  Parameters have following meaning::
+
+               0  - Id of a port.
+               no - Do not flush TM hierarchy if commit fails.
+
+9. Start all ports
+
+   .. code-block:: console
+
+               testpmd> port start all
+
+
+
+10. Enable forwarding
+
+   .. code-block:: console
+
+               testpmd> start
index 345f393..87fabf5 100644 (file)
@@ -28,19 +28,16 @@ In this release, the hyper PMD driver provides the basic functionality of packet
 
 *   VLAN tags are always stripped and presented in mbuf tci field.
 
-*   The Hyper-V driver does not use or support Link State or Rx interrupt.
+*   The Hyper-V driver does not use or support interrupts. Link state change
+    callback is done via change events in the packet ring.
 
 *   The maximum number of queues is limited by the host (currently 64).
     When used with 4.16 kernel only a single queue is available.
 
-.. note::
-   This driver is intended for use with **Hyper-V only** and is
-   not recommended for use on Azure because accelerated Networking
-   (SR-IOV) is not supported.
-
-   On Azure, use the :doc:`vdev_netvsc` which
-   automatically configures the necessary TAP and failsave drivers.
-
+*   This driver supports SR-IOV network acceleration.
+    If SR-IOV is enabled then the driver will transparently manage the interface,
+    and send and receive packets using the VF path.
+    The VDEV_NETVSC and FAILSAFE drivers are *not* used when using netvsc PMD.
 
 Installation
 ------------
@@ -103,3 +100,19 @@ The following prerequisites apply:
 *   Linux kernel support for UIO on vmbus is done with the uio_hv_generic driver.
     Full support of multiple queues requires the 4.17 kernel. It is possible
     to use the netvsc PMD with 4.16 kernel but it is limited to a single queue.
+
+
+Netvsc PMD arguments
+--------------------
+
+The user can specify below argument in devargs.
+
+#.  ``latency``:
+
+    A netvsc device uses a mailbox page to indicate to the host that there
+    is something in the transmit queue. The host scans this page at a
+    periodic interval. This parameter allows adjusting the value that
+    is used by the host. Smaller values improve transmit latency, and larger
+    values save CPU cycles. This parameter is in microseconds.
+    If the value is too large or too small it will be
+    ignored by the host. (Default: 50)
index f8eaaa6..f8111d3 100644 (file)
@@ -1,11 +1,11 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2017 Cavium, Inc
 
-OCTEONTX Poll Mode driver
-=========================
+OCTEON TX Poll Mode driver
+==========================
 
-The OCTEONTX ETHDEV PMD (**librte_pmd_octeontx**) provides poll mode ethdev
-driver support for the inbuilt network device found in the **Cavium OCTEONTX**
+The OCTEON TX ETHDEV PMD (**librte_pmd_octeontx**) provides poll mode ethdev
+driver support for the inbuilt network device found in the **Cavium OCTEON TX**
 SoC family as well as their virtual functions (VF) in SR-IOV context.
 
 More information can be found at `Cavium, Inc Official Website
@@ -14,7 +14,7 @@ More information can be found at `Cavium, Inc Official Website
 Features
 --------
 
-Features of the OCTEONTX Ethdev PMD are:
+Features of the OCTEON TX Ethdev PMD are:
 
 - Packet type information
 - Promiscuous mode
@@ -26,8 +26,8 @@ Features of the OCTEONTX Ethdev PMD are:
 - Lock-free Tx queue
 - HW offloaded `ethdev Rx queue` to `eventdev event queue` packet injection
 
-Supported OCTEONTX SoCs
------------------------
+Supported OCTEON TX SoCs
+------------------------
 
 - CN83xx
 
@@ -65,7 +65,7 @@ Driver compilation and testing
 Refer to the document :ref:`compiling and testing a PMD for a NIC <pmd_build_and_test>`
 for details.
 
-To compile the OCTEONTX PMD for Linux arm64 gcc target, run the
+To compile the OCTEON TX PMD for Linux arm64 gcc target, run the
 following ``make`` command:
 
 .. code-block:: console
@@ -122,7 +122,7 @@ following ``make`` command:
 Initialization
 --------------
 
-The octeontx ethdev pmd is exposed as a vdev device which consists of a set
+The OCTEON TX ethdev pmd is exposed as a vdev device which consists of a set
 of PKI and PKO PCIe VF devices. On EAL initialization,
 PKI/PKO PCIe VF devices will be probed and then the vdev device can be created
 from the application code, or from the EAL command line based on
@@ -156,21 +156,21 @@ Limitations
 
 ``octeontx_fpavf`` external mempool handler dependency
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The OCTEONTX SoC family NIC has inbuilt HW assisted external mempool manager.
+The OCTEON TX SoC family NIC has inbuilt HW assisted external mempool manager.
 This driver will only work with ``octeontx_fpavf`` external mempool handler
 as it is the most performance effective way for packet allocation and Tx buffer
-recycling on OCTEONTX SoC platform.
+recycling on OCTEON TX SoC platform.
 
 CRC striping
 ~~~~~~~~~~~~
 
-The OCTEONTX SoC family NICs strip the CRC for every packets coming into the
+The OCTEON TX SoC family NICs strip the CRC for every packets coming into the
 host interface irrespective of the offload configuration.
 
 Maximum packet length
 ~~~~~~~~~~~~~~~~~~~~~
 
-The OCTEONTX SoC family NICs support a maximum of a 32K jumbo frame. The value
+The OCTEON TX SoC family NICs support a maximum of a 32K jumbo frame. The value
 is fixed and cannot be changed. So, even when the ``rxmode.max_rx_pkt_len``
 member of ``struct rte_eth_conf`` is set to a value lower than 32k, frames
 up to 32k bytes can still reach the host interface.
index 879e543..c1ef919 100644 (file)
@@ -96,6 +96,16 @@ The different stream types are:
 
         iface=eth0
 
+Runtime Config Options
+^^^^^^^^^^^^^^^^^^^^^^
+
+- Use PCAP interface physical MAC
+
+ In case ``iface=`` configuration is set, user may want to use the selected interface's physical MAC
+ address. This can be done with a ``devarg`` ``phy_mac``, for example::
+
+   --vdev 'net_pcap0,iface=eth0,phy_mac=1'
+
 Examples of Usage
 ^^^^^^^^^^^^^^^^^
 
index 63939ec..4006528 100644 (file)
@@ -240,6 +240,10 @@ Supported NICs
 
    - Solarflare X2522 Dual Port SFP28 10/25GbE Adapter
 
+   - Solarflare X2541 Single Port QSFP28 10/25G/100G Adapter
+
+   - Solarflare X2542 Dual Port QSFP28 10/25G/100G Adapter
+
 - Solarflare Flareon [Ultra] Server Adapters:
 
    - Solarflare SFN8522 Dual Port SFP+ Server Adapter
@@ -318,7 +322,7 @@ boolean parameters value.
   **efx** chooses libefx-based datapath which supports Rx scatter.
   **ef10** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which is
   more efficient than libefx-based and provides richer packet type
-  classification, but lacks Rx scatter support.
+  classification.
   **ef10_esps** chooses SFNX2xxx equal stride packed stream datapath
   which may be used on DPDK firmware variant only
   (see notes about its limitations above).
@@ -333,8 +337,7 @@ boolean parameters value.
   Mbuf segments may come from different mempools, and mbuf reference
   counters are treated responsibly.
   **ef10** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which is
-  more efficient than libefx-based but has no VLAN insertion and TSO
-  support yet.
+  more efficient than libefx-based but has no VLAN insertion support yet.
   Mbuf segments may come from different mempools, and mbuf reference
   counters are treated responsibly.
   **ef10_simple** chooses EF10 (SFN7xxx, SFN8xxx, X2xxx) native datapath which
index 6c2287a..32a9cf2 100644 (file)
@@ -248,3 +248,123 @@ command description provided in `softnic/rte_eth_softnic_cli.c`.
 
         thread 1 pipeline RX enable        (Soft NIC rx pipeline enable on cpu thread id 1)
         thread 1 pipeline TX enable        (Soft NIC tx pipeline enable on cpu thread id 1)
+
+QoS API Support:
+----------------
+
+SoftNIC PMD implements ethdev traffic management APIs ``rte_tm.h`` that
+allow building and committing traffic manager hierarchy, configuring hierarchy
+nodes of the Quality of Service (QoS) scheduler supported by DPDK librte_sched
+library. Furthermore, APIs for run-time update to the traffic manager hierarchy
+are supported by PMD.
+
+SoftNIC PMD also implements ethdev traffic metering and policing APIs
+``rte_mtr.h`` that enables metering and marking of the packets with the
+appropriate color (green, yellow or red), according to the traffic metering
+algorithm. For the meter output color, policer actions like
+`keep the packet color same`, `change the packet color` or `drop the packet`
+can be configured.
+
+.. Note::
+
+    The SoftNIC does not support the meter object shared by several flows,
+    thus only supports creating meter object private to the flow. Once meter
+    object is successfully created, it can be linked to the specific flow by
+    specifying the ``meter`` flow action in the flow rule.
+
+Flow API support:
+-----------------
+
+The SoftNIC PMD implements ethdev flow APIs ``rte_flow.h`` that allow validating
+flow rules, adding flow rules to the SoftNIC pipeline as table rules, deleting
+and querying the flow rules. The PMD provides new cli command for creating the
+flow group and their mapping to the SoftNIC pipeline and table. This cli should
+be configured as part of firmware file.
+
+    .. code-block:: console
+
+        flowapi map group <group_id> ingress | egress pipeline <pipeline_name> \
+            table <table_id>
+
+From the flow attributes of the flow, PMD uses the group id to get the mapped
+pipeline and table. PMD supports number of flow actions such as
+``JMP, QUEUE, RSS, DROP, COUNT, METER, VXLAN`` etc.
+
+.. Note::
+
+    The flow must have one terminating actions i.e.
+    ``JMP or RSS or QUEUE or DROP``. For the count and drop actions the
+    underlying PMD doesn't support the functionality yet. So it is not
+    recommended for use.
+
+The flow API can be tested with the help of testpmd application. The SoftNIC
+firmware specifies CLI commands for port configuration, pipeline creation,
+action profile creation and table creation. Once application gets initialized,
+the flow rules can be added through the testpmd CLI.
+The PMD will translate the flow rules to the SoftNIC pipeline tables rules.
+
+Example:
+~~~~~~~~
+Example demonstrates the flow queue action using the SoftNIC firmware and testpmd
+commands.
+
+* Prepare SoftNIC firmware
+
+    .. code-block:: console
+
+        link LINK0 dev 0000:83:00.0
+        link LINK1 dev 0000:81:00.0
+        pipeline RX period 10 offset_port_id 0
+        pipeline RX port in bsz 32 link LINK0 rxq 0
+        pipeline RX port in bsz 32 link LINK1 rxq 0
+        pipeline RX port out bsz 32 swq RXQ0
+        pipeline RX port out bsz 32 swq RXQ1
+        table action profile AP0 ipv4 offset 278 fwd
+        pipeline RX table match hash ext key 16 mask
+            00FF0000FFFFFFFFFFFFFFFFFFFFFFFF \
+            offset 278 buckets 16K size 65K action AP0
+        pipeline RX port in 0 table 0
+        pipeline RX port in 1 table 0
+        flowapi map group 0 ingress pipeline RX table 0
+        pipeline TX period 10 offset_port_id 0
+        pipeline TX port in bsz 32 swq TXQ0
+        pipeline TX port in bsz 32 swq TXQ1
+        pipeline TX port out bsz 32 link LINK0 txq 0
+        pipeline TX port out bsz 32 link LINK1 txq 0
+        pipeline TX table match hash ext key 16 mask
+            00FF0000FFFFFFFFFFFFFFFFFFFFFFFF \
+            offset 278 buckets 16K size 65K action AP0
+        pipeline TX port in 0 table 0
+        pipeline TX port in 1 table 0
+        pipeline TX table 0 rule add match hash ipv4_5tuple
+            1.10.11.12 2.20.21.22 100 200 6 action fwd port 0
+        pipeline TX table 0 rule add match hash ipv4_5tuple
+            1.10.11.13 2.20.21.23 100 200 6 action fwd port 1
+        thread 25 pipeline RX enable
+        thread 25 pipeline TX enable
+
+* Run testpmd:
+
+    .. code-block:: console
+
+        ./x86_64-native-linuxapp-gcc/app/testpmd -l 23-25  -n 4 \
+                                    --vdev 'net_softnic0, \
+                                    firmware=./drivers/net/softnic/ \
+                                        firmware.cli, \
+                                    cpu_id=1,conn_port=8086' -- \
+                                    -i --forward-mode=softnic --rxq=2, \
+                                    --txq=2, --disable-rss --portmask=0x4
+
+* Configure flow rules on softnic:
+
+    .. code-block:: console
+
+        flow create 2 group 0 ingress pattern eth / ipv4 proto mask 255 src \
+            mask 255.255.255.255 dst mask  255.255.255.255 src spec
+            1.10.11.12 dst spec 2.20.21.22 proto spec 6 / tcp src mask 65535 \
+            dst mask 65535 src spec 100 dst spec 200 / end actions queue \
+            index 0 / end
+        flow create 2 group 0 ingress pattern eth / ipv4 proto mask 255 src \
+            mask 255.255.255.255 dst mask  255.255.255.255 src spec 1.10.11.13 \
+            dst spec 2.20.21.23 proto spec 6 / tcp src mask 65535 dst mask \
+            65535 src spec 100 dst spec 200 / end actions queue index 1 / end
index 2714868..9a3d7b3 100644 (file)
@@ -152,6 +152,22 @@ Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
 
+Multi-process sharing
+---------------------
+
+It is possible to attach an existing TAP device in a secondary process,
+by declaring it as a vdev with the same name as in the primary process,
+and without any parameter.
+
+The port attached in a secondary process will give access to the
+statistics and the queues.
+Therefore it can be used for monitoring or Rx/Tx processing.
+
+The IPC synchronization of Rx/Tx queues is currently limited:
+
+  - Maximum 8 queues shared
+  - Synchronized on probing, but not on later port update
+
 Example
 -------
 
index 4f7ae89..23f2e87 100644 (file)
@@ -71,6 +71,11 @@ The user can specify below arguments in `--vdev` option.
     It is used to enable iommu support in vhost library.
     (Default: 0 (disabled))
 
+#.  ``postcopy-support``:
+
+    It is used to enable postcopy live-migration support in vhost library.
+    (Default: 0 (disabled))
+
 Vhost PMD event handling
 ------------------------
 
index 7c099fb..2ae875c 100644 (file)
@@ -47,7 +47,7 @@ In this release, the virtio PMD driver provides the basic functionality of packe
 *   The descriptor number for the Rx/Tx queue is hard-coded to be 256 by qemu 2.7 and below.
     If given a different descriptor number by the upper application,
     the virtio PMD generates a warning and fall back to the hard-coded value.
-    Rx queue size can be configureable and up to 1024 since qemu 2.8 and above. Rx queue size is 256
+    Rx queue size can be configurable and up to 1024 since qemu 2.8 and above. Rx queue size is 256
     by default. Tx queue size is still hard-coded to be 256.
 
 *   Features of mac/vlan filter are supported, negotiation with vhost/backend are needed to support them.
index b0a99c3..9f75d2a 100644 (file)
@@ -1,12 +1,12 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2017 Cavium, Inc
 
-OCTEONTX Board Support Package
-==============================
+OCTEON TX Board Support Package
+===============================
 
-This doc has information about steps to setup octeontx platform
+This doc has information about steps to setup OCTEON TX platform
 and information about common offload hw block drivers of
-**Cavium OCTEONTX** SoC family.
+**Cavium OCTEON TX** SoC family.
 
 
 More information about SoC can be found at `Cavium, Inc Official Website
@@ -27,11 +27,11 @@ Steps To Setup Platform
 -----------------------
 
 There are three main pre-prerequisites for setting up Platform drivers on
-OCTEONTX compatible board:
+OCTEON TX compatible board:
 
-1. **OCTEONTX Linux kernel PF driver for Network acceleration HW blocks**
+1. **OCTEON TX Linux kernel PF driver for Network acceleration HW blocks**
 
-   The OCTEONTX Linux kernel drivers (includes the required PF driver for the
+   The OCTEON TX Linux kernel drivers (includes the required PF driver for the
    Platform drivers) are available on Github at `octeontx-kmod <https://github.com/caviumnetworks/octeontx-kmod>`_
    along with build, install and dpdk usage instructions.
 
@@ -48,7 +48,7 @@ OCTEONTX compatible board:
 
    As an alternative method, Platform drivers can also be executed using images provided
    as part of SDK from Cavium. The SDK includes all the above prerequisites necessary
-   to bring up a OCTEONTX board.
+   to bring up a OCTEON TX board.
 
    SDK and related information can be obtained from: `Cavium support site <https://support.cavium.com/>`_.
 
index d362c92..4f8612a 100644 (file)
@@ -213,6 +213,43 @@ Normally, these options do not need to be changed.
     can later be mapped into that preallocated VA space (if dynamic memory mode
     is enabled), and can optionally be mapped into it at startup.
 
+Support for Externally Allocated Memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is possible to use externally allocated memory in DPDK, using a set of malloc
+heap API's. Support for externally allocated memory is implemented through
+overloading the socket ID - externally allocated heaps will have socket ID's
+that would be considered invalid under normal circumstances. Requesting an
+allocation to take place from a specified externally allocated memory is a
+matter of supplying the correct socket ID to DPDK allocator, either directly
+(e.g. through a call to ``rte_malloc``) or indirectly (through data
+structure-specific allocation API's such as ``rte_ring_create``).
+
+Since there is no way DPDK can verify whether memory are is available or valid,
+this responsibility falls on the shoulders of the user. All multiprocess
+synchronization is also user's responsibility, as well as ensuring  that all
+calls to add/attach/detach/remove memory are done in the correct order. It is
+not required to attach to a memory area in all processes - only attach to memory
+areas as needed.
+
+The expected workflow is as follows:
+
+* Get a pointer to memory area
+* Create a named heap
+* Add memory area(s) to the heap
+    - If IOVA table is not specified, IOVA addresses will be assumed to be
+      unavailable, and DMA mappings will not be performed
+    - Other processes must attach to the memory area before they can use it
+* Get socket ID used for the heap
+* Use normal DPDK allocation procedures, using supplied socket ID
+* If memory area is no longer needed, it can be removed from the heap
+    - Other processes must detach from this memory area before it can be removed
+* If heap is no longer needed, remove it
+    - Socket ID will become invalid and will not be reused
+
+For more information, please refer to ``rte_malloc`` API documentation,
+specifically the ``rte_malloc_heap_*`` family of function calls.
+
 PCI Access
 ~~~~~~~~~~
 
@@ -321,6 +358,14 @@ Misc Functions
 
 Locks and atomic operations are per-architecture (i686 and x86_64).
 
+IOVA Mode Configuration
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Auto detection of the IOVA mode, based on probing the bus and IOMMU configuration, may not report
+the desired addressing mode when virtual devices that are not directly attached to the bus are present.
+To facilitate forcing the IOVA mode to a specific value the EAL command line option ``--iova-mode`` can
+be used to select either physical addressing('pa') or virtual addressing('va').
+
 Memory Segments and Memory Zones (memzone)
 ------------------------------------------
 
diff --git a/doc/guides/prog_guide/event_ethernet_tx_adapter.rst b/doc/guides/prog_guide/event_ethernet_tx_adapter.rst
new file mode 100644 (file)
index 0000000..192f9e1
--- /dev/null
@@ -0,0 +1,165 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2017 Intel Corporation.
+
+Event Ethernet Tx Adapter Library
+=================================
+
+The DPDK Eventdev API allows the application to use an event driven programming
+model for packet processing in which the event device distributes events
+referencing packets to the application cores in a dynamic load balanced fashion
+while handling atomicity and packet ordering. Event adapters provide the interface
+between the ethernet, crypto and timer devices and the event device. Event adapter
+APIs enable common application code by abstracting PMD specific capabilities.
+The Event ethernet Tx adapter provides configuration and data path APIs for the
+transmit stage of the application allowing the same application code to use eventdev
+PMD support or in its absence, a common implementation.
+
+In the common implementation, the application enqueues mbufs to the adapter
+which runs as a rte_service function. The service function dequeues events
+from its event port and transmits the mbufs referenced by these events.
+
+
+API Walk-through
+----------------
+
+This section will introduce the reader to the adapter API. The
+application has to first instantiate an adapter which is associated with
+a single eventdev, next the adapter instance is configured with Tx queues,
+finally the adapter is started and the application can start enqueuing mbufs
+to it.
+
+Creating an Adapter Instance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+An adapter instance is created using ``rte_event_eth_tx_adapter_create()``. This
+function is passed the event device to be associated with the adapter and port
+configuration for the adapter to setup an event port if the adapter needs to use
+a service function.
+
+If the application desires to have finer control of eventdev port configuration,
+it can use the ``rte_event_eth_tx_adapter_create_ext()`` function. The
+``rte_event_eth_tx_adapter_create_ext()`` function is passed a callback function.
+The callback function is invoked if the adapter needs to use a service function
+and needs to create an event port for it. The callback is expected to fill the
+``struct rte_event_eth_tx_adapter_conf`` structure passed to it.
+
+.. code-block:: c
+
+        struct rte_event_dev_info dev_info;
+        struct rte_event_port_conf tx_p_conf = {0};
+
+        err = rte_event_dev_info_get(id, &dev_info);
+
+        tx_p_conf.new_event_threshold = dev_info.max_num_events;
+        tx_p_conf.dequeue_depth = dev_info.max_event_port_dequeue_depth;
+        tx_p_conf.enqueue_depth = dev_info.max_event_port_enqueue_depth;
+
+        err = rte_event_eth_tx_adapter_create(id, dev_id, &tx_p_conf);
+
+Adding Tx Queues to the Adapter Instance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Ethdev Tx queues are added to the instance using the
+``rte_event_eth_tx_adapter_queue_add()`` function. A queue value
+of -1 is used to indicate all queues within a device.
+
+.. code-block:: c
+
+        int err = rte_event_eth_tx_adapter_queue_add(id,
+                                                    eth_dev_id,
+                                                    q);
+
+Querying Adapter Capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``rte_event_eth_tx_adapter_caps_get()`` function allows
+the application to query the adapter capabilities for an eventdev and ethdev
+combination. Currently, the only capability flag defined is
+``RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT``, the application can
+query this flag to determine if a service function is associated with the
+adapter and retrieve its service identifier using the
+``rte_event_eth_tx_adapter_service_id_get()`` API.
+
+
+.. code-block:: c
+
+        int err = rte_event_eth_tx_adapter_caps_get(dev_id, eth_dev_id, &cap);
+
+        if (!(cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT))
+                err = rte_event_eth_tx_adapter_service_id_get(id, &service_id);
+
+Linking a Queue to the Adapter's Event Port
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the adapter uses a service function as described in the previous section, the
+application is required to link a queue to the adapter's event port. The adapter's
+event port can be obtained using the ``rte_event_eth_tx_adapter_event_port_get()``
+function. The queue can be configured with the ``RTE_EVENT_QUEUE_CFG_SINGLE_LINK``
+since it is linked to a single event port.
+
+Configuring the Service Function
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the adapter uses a service function, the application can assign
+a service core to the service function as shown below.
+
+.. code-block:: c
+
+        if (rte_event_eth_tx_adapter_service_id_get(id, &service_id) == 0)
+                rte_service_map_lcore_set(service_id, TX_CORE_ID);
+
+Starting the Adapter Instance
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The application calls ``rte_event_eth_tx_adapter_start()`` to start the adapter.
+This function calls the start callback of the eventdev PMD if supported,
+and the ``rte_service_run_state_set()`` to enable the service function if one exists.
+
+Enqueuing Packets to the Adapter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The application needs to notify the adapter about the transmit port and queue used
+to send the packet. The transmit port is set in the ``struct rte mbuf::port`` field
+and the transmit queue is set using the ``rte_event_eth_tx_adapter_txq_set()``
+function.
+
+If the eventdev PMD supports the ``RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT``
+capability for a given ethernet device, the application should use the
+``rte_event_eth_tx_adapter_enqueue()`` function to enqueue packets to the adapter.
+
+If the adapter uses a service function for the ethernet device then the application
+should use the ``rte_event_enqueue_burst()`` function.
+
+.. code-block:: c
+
+       struct rte_event event;
+
+       if (cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT) {
+
+               event.mbuf = m;
+
+               m->port = tx_port;
+               rte_event_eth_tx_adapter_txq_set(m, tx_queue_id);
+
+               rte_event_eth_tx_adapter_enqueue(dev_id, ev_port, &event, 1);
+       } else {
+
+               event.queue_id = qid; /* event queue linked to adapter port */
+               event.op = RTE_EVENT_OP_NEW;
+               event.event_type = RTE_EVENT_TYPE_CPU;
+               event.sched_type = RTE_SCHED_TYPE_ATOMIC;
+               event.mbuf = m;
+
+               m->port = tx_port;
+               rte_event_eth_tx_adapter_txq_set(m, tx_queue_id);
+
+               rte_event_enqueue_burst(dev_id, ev_port, &event, 1);
+       }
+
+Getting Adapter Statistics
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The  ``rte_event_eth_tx_adapter_stats_get()`` function reports counters defined
+in struct ``rte_event_eth_tx_adapter_stats``. The counter values are the sum of
+the counts from the eventdev PMD callback if the callback is supported, and
+the counts maintained by the service function, if one exists.
index 76a1f32..f5beec1 100644 (file)
@@ -1,5 +1,6 @@
 ..  SPDX-License-Identifier: BSD-3-Clause
     Copyright(c) 2010-2015 Intel Corporation.
+    Copyright(c) 2018 Arm Limited.
 
 .. _Hash_Library:
 
@@ -38,7 +39,7 @@ The main methods exported by the hash are:
 *   Lookup for entry with key: The key is provided as input. If an entry with the specified key is found in the hash (lookup hit),
     then the position of the entry is returned, otherwise (lookup miss) a negative value is returned.
 
-Apart from these method explained above, the API allows the user three more options:
+Apart from these methods explained above, the API provides the user with few more options:
 
 *   Add / lookup / delete with key and precomputed hash: Both the key and its precomputed hash are provided as input. This allows
     the user to perform these operations faster, as hash is already computed.
@@ -48,6 +49,9 @@ Apart from these method explained above, the API allows the user three more opti
 
 *   Combination of the two options above: User can provide key, precomputed hash and data.
 
+*   Ability to not free the position of the entry in the hash upon calling delete. This is useful for multi-threaded scenarios where
+    readers continue to use the position even after the entry is deleted.
+
 Also, the API contains a method to allow the user to look up entries in bursts, achieving higher performance
 than looking up individual entries, as the function prefetches next entries at the time it is operating
 with the first ones, which reduces significantly the impact of the necessary memory accesses.
@@ -83,13 +87,20 @@ For concurrent writes, and concurrent reads and writes the following flag values
    Key add, delete, and table reset are protected from other writer threads. With only this flag set, readers are not protected from ongoing writes.
 
 *  If the read/write concurrency (RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) is set, multithread read/write operation is safe
-   (i.e., no need to stop the readers from accessing the hash table until writers finish their updates. Reads and writes can operate table concurrently).
+   (i.e., application does not need to stop the readers from accessing the hash table until writers finish their updates. Readers and writers can operate on the table concurrently).
+   The library uses a reader-writer lock to provide the concurrency.
 
 *  In addition to these two flag values, if the transactional memory flag (RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) is also set,
-   hardware transactional memory will be used to guarantee the thread safety as long as it is supported by the hardware (for example the Intel® TSX support).
+   the reader-writer lock will use hardware transactional memory to guarantee thread safety as long as it is supported by the hardware (for example the Intel® TSX support).
+   If the platform supports Intel® TSX, it is advised to set the transactional memory flag, as this will speed up concurrent table operations.
+   Otherwise concurrent operations will be slower because of the overhead associated with the software locking mechanisms.
+
+*  If lock free read/write concurrency (RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) is set, read/write concurrency is provided without using reader-writer lock.
+   For platforms (ex: current Arm based platforms), that do not support transactional memory, it is advised to set this flag to achieve greater scalability in performance.
 
-If the platform supports Intel® TSX, it is advised to set the transactional memory flag, as this will speed up concurrent table operations.
-Otherwise concurrent operations will be slower because of the overhead associated with the software locking mechanisms.
+*  If, do not free on delete (RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL) flag is set, the position of the entry in the hash is not freed upon calling delete. This flag is enabled
+   by default when lock free read/write concurrency is set. The application should free the position after all the readers have stopped referencing the position.
+   Where required, the application can make use of RCU mechanisms to determine when the readers have stopped referencing the position.
 
 Implementation Details
 ----------------------
@@ -148,6 +159,14 @@ key is considered not able to be stored.
 With random keys, this method allows the user to get around 90% of the table utilization, without
 having to drop any stored entry (LRU) or allocate more memory (extended buckets).
 
+
+Example of deletion:
+
+Similar to lookup, the key is searched in its primary and secondary buckets. If the key is found, the bucket
+entry is marked as an empty slot. If the hash table was configured with 'no free on delete' or 'lock free read/write concurrency',
+the position of the key is not freed. It is the responsibility of the user to free the position while making sure that
+readers are not referencing the position anymore.
+
 Entry distribution in hash table
 --------------------------------
 
@@ -240,6 +259,10 @@ The flow table operations on the application side are described below:
 *   Delete flow: Delete the flow key from the hash. If the returned position is valid,
     use it to access the flow entry in the flow table to invalidate the information associated with the flow.
 
+*   Free flow: Free flow key position. If 'no free on delete' or 'lock-free read/write concurrency' flags are set,
+    wait till the readers are not referencing the position returned during add/delete flow and then free the position.
+    RCU mechanisms can be used to find out when the readers are not referencing the position anymore.
+
 *   Lookup flow: Lookup for the flow key in the hash.
     If the returned position is valid (flow lookup hit), use the returned position to access the flow entry in the flow table.
     Otherwise (flow lookup miss) there is no flow registered for the current packet.
index 3b920e5..2086e24 100644 (file)
@@ -44,6 +44,7 @@ Programmer's Guide
     thread_safety_dpdk_functions
     eventdev
     event_ethernet_rx_adapter
+    event_ethernet_tx_adapter
     event_timer_adapter
     event_crypto_adapter
     qos_framework
@@ -52,7 +53,6 @@ Programmer's Guide
     packet_framework
     vhost_lib
     metrics_lib
-    port_hotplug_framework
     bpf_lib
     source_org
     dev_kit_build_system
index 8fa13fa..33ea980 100644 (file)
@@ -29,58 +29,222 @@ The components of an application using the DPDK Kernel NIC Interface are shown i
 The DPDK KNI Kernel Module
 --------------------------
 
-The KNI kernel loadable module provides support for two types of devices:
+The KNI kernel loadable module ``rte_kni`` provides the kernel interface
+for DPDK applications.
 
-*   A Miscellaneous device (/dev/kni) that:
+When the ``rte_kni`` module is loaded, it will create a device ``/dev/kni``
+that is used by the DPDK KNI API functions to control and communicate with
+the kernel module.
 
-    *   Creates net devices (via ioctl  calls).
+The ``rte_kni`` kernel module contains several optional parameters which
+can be specified when the module is loaded to control its behavior:
 
-    *   Maintains a kernel thread context shared by all KNI instances
-        (simulating the RX side of the net driver).
+.. code-block:: console
 
-    *   For single kernel thread mode, maintains a kernel thread context shared by all KNI instances
-        (simulating the RX side of the net driver).
+    # modinfo rte_kni.ko
+    <snip>
+    parm:           lo_mode: KNI loopback mode (default=lo_mode_none):
+                    lo_mode_none        Kernel loopback disabled
+                    lo_mode_fifo        Enable kernel loopback with fifo
+                    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer
+                     (charp)
+    parm:           kthread_mode: Kernel thread mode (default=single):
+                    single    Single kernel thread mode enabled.
+                    multiple  Multiple kernel thread mode enabled.
+                     (charp)
+    parm:           carrier: Default carrier state for KNI interface (default=off):
+                    off   Interfaces will be created with carrier state set to off.
+                    on    Interfaces will be created with carrier state set to on.
+                     (charp)
 
-    *   For multiple kernel thread mode, maintains a kernel thread context for each KNI instance
-        (simulating the RX side of the net driver).
+Loading the ``rte_kni`` kernel module without any optional parameters is
+the typical way a DPDK application gets packets into and out of the kernel
+network stack.  Without any parameters, only one kernel thread is created
+for all KNI devices for packet receiving in kernel side, loopback mode is
+disabled, and the default carrier state of KNI interfaces is set to *off*.
 
-*   Net device:
+.. code-block:: console
 
-    *   Net functionality provided by implementing several operations such as netdev_ops,
-        header_ops, ethtool_ops that are defined by struct net_device,
-        including support for DPDK mbufs and FIFOs.
+    # insmod kmod/rte_kni.ko
 
-    *   The interface name is provided from userspace.
+.. _kni_loopback_mode:
 
-    *   The MAC address can be the real NIC MAC address or random.
+Loopback Mode
+~~~~~~~~~~~~~
+
+For testing, the ``rte_kni`` kernel module can be loaded in loopback mode
+by specifying the ``lo_mode`` parameter:
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko lo_mode=lo_mode_fifo
+
+The ``lo_mode_fifo`` loopback option will loop back ring enqueue/dequeue
+operations in kernel space.
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko lo_mode=lo_mode_fifo_skb
+
+The ``lo_mode_fifo_skb`` loopback option will loop back ring enqueue/dequeue
+operations and sk buffer copies in kernel space.
+
+If the ``lo_mode`` parameter is not specified, loopback mode is disabled.
+
+.. _kni_kernel_thread_mode:
+
+Kernel Thread Mode
+~~~~~~~~~~~~~~~~~~
+
+To provide flexibility of performance, the ``rte_kni`` KNI kernel module
+can be loaded with the ``kthread_mode`` parameter.  The ``rte_kni`` kernel
+module supports two options: "single kernel thread" mode and "multiple
+kernel thread" mode.
+
+Single kernel thread mode is enabled as follows:
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko kthread_mode=single
+
+This mode will create only one kernel thread for all KNI interfaces to
+receive data on the kernel side.  By default, this kernel thread is not
+bound to any particular core, but the user can set the core affinity for
+this kernel thread by setting the ``core_id`` and ``force_bind`` parameters
+in ``struct rte_kni_conf`` when the first KNI interface is created:
+
+For optimum performance, the kernel thread should be bound to a core in
+on the same socket as the DPDK lcores used in the application.
+
+The KNI kernel module can also be configured to start a separate kernel
+thread for each KNI interface created by the DPDK application.  Multiple
+kernel thread mode is enabled as follows:
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko kthread_mode=multiple
+
+This mode will create a separate kernel thread for each KNI interface to
+receive data on the kernel side.  The core affinity of each ``kni_thread``
+kernel thread can be specified by setting the ``core_id`` and ``force_bind``
+parameters in ``struct rte_kni_conf`` when each KNI interface is created.
+
+Multiple kernel thread mode can provide scalable higher performance if
+sufficient unused cores are available on the host system.
+
+If the ``kthread_mode`` parameter is not specified, the "single kernel
+thread" mode is used.
+
+.. _kni_default_carrier_state:
+
+Default Carrier State
+~~~~~~~~~~~~~~~~~~~~~
+
+The default carrier state of KNI interfaces created by the ``rte_kni``
+kernel module is controlled via the ``carrier`` option when the module
+is loaded.
+
+If ``carrier=off`` is specified, the kernel module will leave the carrier
+state of the interface *down* when the interface is management enabled.
+The DPDK application can set the carrier state of the KNI interface using the
+``rte_kni_update_link()`` function.  This is useful for DPDK applications
+which require that the carrier state of the KNI interface reflect the
+actual link state of the corresponding physical NIC port.
+
+If ``carrier=on`` is specified, the kernel module will automatically set
+the carrier state of the interface to *up* when the interface is management
+enabled.  This is useful for DPDK applications which use the KNI interface as
+a purely virtual interface that does not correspond to any physical hardware
+and do not wish to explicitly set the carrier state of the interface with
+``rte_kni_update_link()``.  It is also useful for testing in loopback mode
+where the NIC port may not be physically connected to anything.
+
+To set the default carrier state to *on*:
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko carrier=on
+
+To set the default carrier state to *off*:
+
+.. code-block:: console
+
+    # insmod kmod/rte_kni.ko carrier=off
+
+If the ``carrier`` parameter is not specified, the default carrier state
+of KNI interfaces will be set to *off*.
 
 KNI Creation and Deletion
 -------------------------
 
-The KNI interfaces are created by a DPDK application dynamically.
-The interface name and FIFO details are provided by the application through an ioctl call
-using the rte_kni_device_info struct which contains:
+Before any KNI interfaces can be created, the ``rte_kni`` kernel module must
+be loaded into the kernel and configured withe ``rte_kni_init()`` function.
+
+The KNI interfaces are created by a DPDK application dynamically via the
+``rte_kni_alloc()`` function.
+
+The ``struct rte_kni_conf`` structure contains fields which allow the
+user to specify the interface name, set the MTU size, set an explicit or
+random MAC address and control the affinity of the kernel Rx thread(s)
+(both single and multi-threaded modes).
+
+The ``struct rte_kni_ops`` structure contains pointers to functions to
+handle requests from the ``rte_kni`` kernel module.  These functions
+allow DPDK applications to perform actions when the KNI interfaces are
+manipulated by control commands or functions external to the application.
+
+For example, the DPDK application may wish to enabled/disable a physical
+NIC port when a user enabled/disables a KNI interface with ``ip link set
+[up|down] dev <ifaceX>``.  The DPDK application can register a callback for
+``config_network_if`` which will be called when the interface management
+state changes.
+
+There are currently four callbacks for which the user can register
+application functions:
 
-*   The interface name.
+``config_network_if``:
 
-*   Physical addresses of the corresponding memzones for the relevant FIFOs.
+    Called when the management state of the KNI interface changes.
+    For example, when the user runs ``ip link set [up|down] dev <ifaceX>``.
 
-*   Mbuf mempool details, both physical and virtual (to calculate the offset for mbuf pointers).
+``change_mtu``:
 
-*   PCI information.
+    Called when the user changes the MTU size of the KNI
+    interface.  For example, when the user runs ``ip link set mtu <size>
+    dev <ifaceX>``.
 
-*   Core affinity.
+``config_mac_address``:
 
-Refer to rte_kni_common.h in the DPDK source code for more details.
+    Called when the user changes the MAC address of the KNI interface.
+    For example, when the user runs ``ip link set address <MAC>
+    dev <ifaceX>``.  If the user sets this callback function to NULL,
+    but sets the ``port_id`` field to a value other than -1, a default
+    callback handler in the rte_kni library ``kni_config_mac_address()``
+    will be called which calls ``rte_eth_dev_default_mac_addr_set()``
+    on the specified ``port_id``.
 
-The physical addresses will be re-mapped into the kernel address space and stored in separate KNI contexts.
+``config_promiscusity``:
 
-The affinity of kernel RX thread (both single and multi-threaded modes) is controlled by force_bind and
-core_id config parameters.
+    Called when the user changes the promiscusity state of the KNI
+    interface.  For example, when the user runs ``ip link set promisc
+    [on|off] dev <ifaceX>``. If the user sets this callback function to
+    NULL, but sets the ``port_id`` field to a value other than -1, a default
+    callback handler in the rte_kni library ``kni_config_promiscusity()``
+    will be called which calls ``rte_eth_promiscuous_<enable|disable>()``
+    on the specified ``port_id``.
 
-The KNI interfaces can be deleted by a DPDK application dynamically after being created.
-Furthermore, all those KNI interfaces not deleted will be deleted on the release operation
-of the miscellaneous device (when the DPDK application is closed).
+In order to run these callbacks, the application must periodically call
+the ``rte_kni_handle_request()`` function.  Any user callback function
+registered will be called directly from ``rte_kni_handle_request()`` so
+care must be taken to prevent deadlock and to not block any DPDK fastpath
+tasks.  Typically DPDK applications which use these callbacks will need
+to create a separate thread or secondary process to periodically call
+``rte_kni_handle_request()``.
+
+The KNI interfaces can be deleted by a DPDK application with
+``rte_kni_release()``.  All KNI interfaces not explicitly deleted will be
+deleted when the the ``/dev/kni`` device is closed, either explicitly with
+``rte_kni_close()`` or when the DPDK application is closed.
 
 DPDK mbuf Flow
 --------------
@@ -118,7 +282,7 @@ The packet is received from the Linux net stack, by calling the kni_net_tx() cal
 The mbuf is dequeued (without waiting due the cache) and filled with data from sk_buff.
 The sk_buff is then freed and the mbuf sent in the tx_q FIFO.
 
-The DPDK TX thread dequeues the mbuf and sends it to the PMD (via rte_eth_tx_burst()).
+The DPDK TX thread dequeues the mbuf and sends it to the PMD via ``rte_eth_tx_burst()``.
 It then puts the mbuf back in the cache.
 
 Ethtool
@@ -128,16 +292,3 @@ Ethtool is a Linux-specific tool with corresponding support in the kernel
 where each net device must register its own callbacks for the supported operations.
 The current implementation uses the igb/ixgbe modified Linux drivers for ethtool support.
 Ethtool is not supported in i40e and VMs (VF or EM devices).
-
-Link state and MTU change
--------------------------
-
-Link state and MTU change are network interface specific operations usually done via ifconfig.
-The request is initiated from the kernel side (in the context of the ifconfig process)
-and handled by the user space DPDK application.
-The application polls the request, calls the application handler and returns the response back into the kernel space.
-
-The application handlers can be registered upon interface creation or explicitly registered/unregistered in runtime.
-This provides flexibility in multiprocess scenarios
-(where the KNI is created in the primary process but the callbacks are handled in the secondary one).
-The constraint is that a single process can register and handle the requests.
index f0b4856..48d2575 100644 (file)
@@ -98,6 +98,10 @@ Port Types
    |   |                  | character device.                                                                     |
    |   |                  |                                                                                       |
    +---+------------------+---------------------------------------------------------------------------------------+
+   | 9 | Sym_crypto       | Output port used to extract DPDK Cryptodev operations from a fixed offset of the      |
+   |   |                  | packet and then enqueue to the Cryptodev PMD. Input port used to dequeue the          |
+   |   |                  | Cryptodev operations from the Cryptodev PMD and then retrieve the packets from them.  |
+   +---+------------------+---------------------------------------------------------------------------------------+
 
 Port Interface
 ~~~~~~~~~~~~~~
@@ -1078,6 +1082,11 @@ with each table entry having its own set of enabled user actions and its own cop
    |   |                                   | checksum.                                                           |
    |   |                                   |                                                                     |
    +---+-----------------------------------+---------------------------------------------------------------------+
+   | 7 | Sym Crypto                        | Generate Cryptodev session based on the user-specified algorithm    |
+   |   |                                   | and key(s), and assemble the cryptodev operation based on the       |
+   |   |                                   | predefined offsets.                                                 |
+   |   |                                   |                                                                     |
+   +---+-----------------------------------+---------------------------------------------------------------------+
 
 Multicore Scaling
 -----------------
@@ -1133,7 +1142,7 @@ Typical devices with acceleration capabilities are:
 
 *   Inline accelerators: NICs, switches, FPGAs, etc;
 
-*   Look-aside accelerators: chipsets, FPGAs, etc.
+*   Look-aside accelerators: chipsets, FPGAs, Intel QuickAssist, etc.
 
 Usually, to support a specific functional block, specific implementation of Packet Framework tables and/or ports and/or actions has to be provided for each accelerator,
 with all the implementations sharing the same API: pure SW implementation (no acceleration), implementation using accelerator A, implementation using accelerator B, etc.
diff --git a/doc/guides/prog_guide/port_hotplug_framework.rst b/doc/guides/prog_guide/port_hotplug_framework.rst
deleted file mode 100644 (file)
index fb0efc1..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-..  BSD LICENSE
-    Copyright(c) 2015 IGEL Co.,Ltd. All rights reserved.
-    All rights reserved.
-
-    Redistribution and use in source and binary forms, with or without
-    modification, are permitted provided that the following conditions
-    are met:
-
-    * Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in
-    the documentation and/or other materials provided with the
-    distribution.
-    * Neither the name of IGEL Co.,Ltd. nor the names of its
-    contributors may be used to endorse or promote products derived
-    from this software without specific prior written permission.
-
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Port Hotplug Framework
-======================
-
-The Port Hotplug Framework provides DPDK applications with the ability to
-attach and detach ports at runtime. Because the framework depends on PMD
-implementation, the ports that PMDs cannot handle are out of scope of this
-framework. Furthermore, after detaching a port from a DPDK application, the
-framework doesn't provide a way for removing the devices from the system.
-For the ports backed by a physical NIC, the kernel will need to support PCI
-Hotplug feature.
-
-Overview
---------
-
-The basic requirements of the Port Hotplug Framework are:
-
-*       DPDK applications that use the Port Hotplug Framework must manage their
-        own ports.
-
-        The Port Hotplug Framework is implemented to allow DPDK applications to
-        manage ports. For example, when DPDK applications call the port attach
-        function, the attached port number is returned. DPDK applications can
-        also detach the port by port number.
-
-*       Kernel support is needed for attaching or detaching physical device
-        ports.
-
-        To attach new physical device ports, the device will be recognized by
-        userspace driver I/O framework in kernel at first. Then DPDK
-        applications can call the Port Hotplug functions to attach the ports.
-        For detaching, steps are vice versa.
-
-*       Before detaching, they must be stopped and closed.
-
-        DPDK applications must call "rte_eth_dev_stop()" and
-        "rte_eth_dev_close()" APIs before detaching ports. These functions will
-        start finalization sequence of the PMDs.
-
-*       The framework doesn't affect legacy DPDK applications behavior.
-
-        If the Port Hotplug functions aren't called, all legacy DPDK apps can
-        still work without modifications.
-
-Port Hotplug API overview
--------------------------
-
-*       Attaching a port
-
-        "rte_eth_dev_attach()" API attaches a port to DPDK application, and
-        returns the attached port number. Before calling the API, the device
-        should be recognized by an userspace driver I/O framework. The API
-        receives a pci address like "0000:01:00.0" or a virtual device name
-        like "net_pcap0,iface=eth0". In the case of virtual device name, the
-        format is the same as the general "--vdev" option of DPDK.
-
-*       Detaching a port
-
-        "rte_eth_dev_detach()" API detaches a port from DPDK application, and
-        returns a pci address of the detached device or a virtual device name
-        of the device.
-
-Reference
----------
-
-        "testpmd" supports the Port Hotplug Framework.
-
-Limitations
------------
-
-*       The Port Hotplug APIs are not thread safe.
-
-*       The framework can only be enabled with Linux. BSD is not supported.
-
-*       Not all PMDs support detaching feature.
-        The underlying bus must support hot-unplug. If not supported,
-        the function ``rte_eth_dev_detach()`` will return negative ENOTSUP.
index eba1cc6..68b7e8b 100644 (file)
@@ -106,6 +106,92 @@ User Cases
 
 The power management mechanism is used to save power when performing L3 forwarding.
 
+
+Empty Poll API
+--------------
+
+Abstract
+~~~~~~~~
+
+For packet processing workloads such as DPDK polling is continuous.
+This means CPU cores always show 100% busy independent of how much work
+those cores are doing. It is critical to accurately determine how busy
+a core is hugely important for the following reasons:
+
+        * No indication of overload conditions
+        * User does not know how much real load is on a system, resulting
+          in wasted energy as no power management is utilized
+
+Compared to the original l3fwd-power design, instead of going to sleep
+after detecting an empty poll, the new mechanism just lowers the core frequency.
+As a result, the application does not stop polling the device, which leads
+to improved handling of bursts of traffic.
+
+When the system become busy, the empty poll mechanism can also increase the core
+frequency (including turbo) to do best effort for intensive traffic. This gives
+us more flexible and balanced traffic awareness over the standard l3fwd-power
+application.
+
+
+Proposed Solution
+~~~~~~~~~~~~~~~~~
+The proposed solution focuses on how many times empty polls are executed.
+The less the number of empty polls, means current core is busy with processing
+workload, therefore, the higher frequency is needed. The high empty poll number
+indicates the current core not doing any real work therefore, we can lower the
+frequency to safe power.
+
+In the current implementation, each core has 1 empty-poll counter which assume
+1 core is dedicated to 1 queue. This will need to be expanded in the future to
+support multiple queues per core.
+
+Power state definition:
+^^^^^^^^^^^^^^^^^^^^^^^
+
+* LOW:  Not currently used, reserved for future use.
+
+* MED:  the frequency is used to process modest traffic workload.
+
+* HIGH: the frequency is used to process busy traffic workload.
+
+There are two phases to establish the power management system:
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* Training phase. This phase is used to measure the optimal frequency
+  change thresholds for a given system. The thresholds will differ from
+  system to system due to differences in processor micro-architecture,
+  cache and device configurations.
+  In this phase, the user must ensure that no traffic can enter the
+  system so that counts can be measured for empty polls at low, medium
+  and high frequencies. Each frequency is measured for two seconds.
+  Once the training phase is complete, the threshold numbers are
+  displayed, and normal mode resumes, and traffic can be allowed into
+  the system. These threshold number can be used on the command line
+  when starting the application in normal mode to avoid re-training
+  every time.
+
+* Normal phase. Every 10ms the run-time counters are compared
+  to the supplied threshold values, and the decision will be made
+  whether to move to a different power state (by adjusting the
+  frequency).
+
+API Overview for Empty Poll Power Management
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+* **State Init**: initialize the power management system.
+
+* **State Free**: free the resource hold by power management system.
+
+* **Update Empty Poll Counter**: update the empty poll counter.
+
+* **Update Valid Poll Counter**: update the valid poll counter.
+
+* **Set the Fequence Index**: update the power state/frequency mapping.
+
+* **Detect empty poll state change**: empty poll state change detection algorithm then take action.
+
+User Cases
+----------
+The mechanism can applied to any device which is based on polling. e.g. NIC, FPGA.
+
 References
 ----------
 
index 1106216..02f0561 100644 (file)
@@ -33,38 +33,12 @@ Refer to the
 for details about application profiling.
 
 
-Empty cycles tracing
+Profiling with VTune
 ~~~~~~~~~~~~~~~~~~~~
 
-Iterations that yielded no RX packets (empty cycles, wasted iterations) can
-be analyzed using VTune Amplifier. This profiling employs the
-`Instrumentation and Tracing Technology (ITT) API
-<https://software.intel.com/en-us/node/544195>`_
-feature of VTune Amplifier and requires only reconfiguring the DPDK library,
-no changes in a DPDK application are needed.
-
-To trace wasted iterations on RX queues, first reconfigure DPDK with
-``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
-``CONFIG_RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS`` enabled.
-
-Then rebuild DPDK, specifying paths to the ITT header and library, which can
-be found in any VTune Amplifier distribution in the *include* and *lib*
-directories respectively:
-
-.. code-block:: console
-
-    make EXTRA_CFLAGS=-I<path to ittnotify.h> \
-         EXTRA_LDLIBS="-L<path to libittnotify.a> -littnotify"
-
-Finally, to see wasted iterations in your performance analysis results,
-select the *"Analyze user tasks, events, and counters"* checkbox in the
-*"Analysis Type"* tab when configuring analysis via VTune Amplifier GUI.
-Alternatively, when running VTune Amplifier via command line, specify
-``-knob enable-user-tasks=true`` option.
-
-Collected regions of wasted iterations will be marked on VTune Amplifier's
-timeline as ITT tasks. These ITT tasks have predefined names, containing
-Ethernet device and RX queue identifiers.
+To allow VTune attaching to the DPDK application, reconfigure and recompile
+the DPDK with ``CONFIG_RTE_ETHDEV_RXTX_CALLBACKS`` and
+``CONFIG_RTE_ETHDEV_PROFILE_WITH_VTUNE`` enabled.
 
 
 Profiling on ARM64
index b305a72..c186375 100644 (file)
@@ -1191,6 +1191,27 @@ Normally preceded by any of:
 - `Item: ICMP6_ND_NS`_
 - `Item: ICMP6_ND_OPT`_
 
+Item: ``META``
+^^^^^^^^^^^^^^
+
+Matches an application specific 32 bit metadata item.
+
+- Default ``mask`` matches the specified metadata value.
+
+.. _table_rte_flow_item_meta:
+
+.. table:: META
+
+   +----------+----------+---------------------------------------+
+   | Field    | Subfield | Value                                 |
+   +==========+==========+=======================================+
+   | ``spec`` | ``data`` | 32 bit metadata value                 |
+   +----------+--------------------------------------------------+
+   | ``last`` | ``data`` | upper range value                     |
+   +----------+----------+---------------------------------------+
+   | ``mask`` | ``data`` | bit-mask applies to "spec" and "last" |
+   +----------+----------+---------------------------------------+
+
 Actions
 ~~~~~~~
 
@@ -2076,6 +2097,250 @@ RTE_FLOW_ERROR_TYPE_ACTION error should be returned.
 
 This action modifies the payload of matched flows.
 
+Action: ``RAW_ENCAP``
+^^^^^^^^^^^^^^^^^^^^^
+
+Adds outer header whose template is provided in its data buffer,
+as defined in the ``rte_flow_action_raw_encap`` definition.
+
+This action modifies the payload of matched flows. The data supplied must
+be a valid header, either holding layer 2 data in case of adding layer 2 after
+decap layer 3 tunnel (for example MPLSoGRE) or complete tunnel definition
+starting from layer 2 and moving to the tunnel item itself. When applied to
+the original packet the resulting packet must be a valid packet.
+
+.. _table_rte_flow_action_raw_encap:
+
+.. table:: RAW_ENCAP
+
+   +----------------+----------------------------------------+
+   | Field          | Value                                  |
+   +================+========================================+
+   | ``data``       | Encapsulation data                     |
+   +----------------+----------------------------------------+
+   | ``preserve``   | Bit-mask of data to preserve on output |
+   +----------------+----------------------------------------+
+   | ``size``       | Size of data and preserve              |
+   +----------------+----------------------------------------+
+
+Action: ``RAW_DECAP``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Remove outer header whose template is provided in its data buffer,
+as defined in the ``rte_flow_action_raw_decap``
+
+This action modifies the payload of matched flows. The data supplied must
+be a valid header, either holding layer 2 data in case of removing layer 2
+before eincapsulation of layer 3 tunnel (for example MPLSoGRE) or complete
+tunnel definition starting from layer 2 and moving to the tunnel item itself.
+When applied to the original packet the resulting packet must be a
+valid packet.
+
+.. _table_rte_flow_action_raw_decap:
+
+.. table:: RAW_DECAP
+
+   +----------------+----------------------------------------+
+   | Field          | Value                                  |
+   +================+========================================+
+   | ``data``       | Decapsulation data                     |
+   +----------------+----------------------------------------+
+   | ``size``       | Size of data                           |
+   +----------------+----------------------------------------+
+
+Action: ``SET_IPV4_SRC``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new IPv4 source address in the outermost IPv4 header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_IPV4 flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_ipv4_src:
+
+.. table:: SET_IPV4_SRC
+
+   +-----------------------------------------+
+   | Field         | Value                   |
+   +===============+=========================+
+   | ``ipv4_addr`` | new IPv4 source address |
+   +---------------+-------------------------+
+
+Action: ``SET_IPV4_DST``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new IPv4 destination address in the outermost IPv4 header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_IPV4 flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_ipv4_dst:
+
+.. table:: SET_IPV4_DST
+
+   +---------------+------------------------------+
+   | Field         | Value                        |
+   +===============+==============================+
+   | ``ipv4_addr`` | new IPv4 destination address |
+   +---------------+------------------------------+
+
+Action: ``SET_IPV6_SRC``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new IPv6 source address in the outermost IPv6 header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_IPV6 flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_ipv6_src:
+
+.. table:: SET_IPV6_SRC
+
+   +---------------+-------------------------+
+   | Field         | Value                   |
+   +===============+=========================+
+   | ``ipv6_addr`` | new IPv6 source address |
+   +---------------+-------------------------+
+
+Action: ``SET_IPV6_DST``
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new IPv6 destination address in the outermost IPv6 header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_IPV6 flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_ipv6_dst:
+
+.. table:: SET_IPV6_DST
+
+   +---------------+------------------------------+
+   | Field         | Value                        |
+   +===============+==============================+
+   | ``ipv6_addr`` | new IPv6 destination address |
+   +---------------+------------------------------+
+
+Action: ``SET_TP_SRC``
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new source port number in the outermost TCP/UDP header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_TCP or RTE_FLOW_ITEM_TYPE_UDP
+flow pattern item. Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_tp_src:
+
+.. table:: SET_TP_SRC
+
+   +----------+-------------------------+
+   | Field    | Value                   |
+   +==========+=========================+
+   | ``port`` | new TCP/UDP source port |
+   +---------------+--------------------+
+
+Action: ``SET_TP_DST``
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Set a new destination port number in the outermost TCP/UDP header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_TCP or RTE_FLOW_ITEM_TYPE_UDP
+flow pattern item. Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_set_tp_dst:
+
+.. table:: SET_TP_DST
+
+   +----------+------------------------------+
+   | Field    | Value                        |
+   +==========+==============================+
+   | ``port`` | new TCP/UDP destination port |
+   +---------------+-------------------------+
+
+Action: ``MAC_SWAP``
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Swap the source and destination MAC addresses in the outermost Ethernet
+header.
+
+It must be used with a valid RTE_FLOW_ITEM_TYPE_ETH flow pattern item.
+Otherwise, RTE_FLOW_ERROR_TYPE_ACTION error will be returned.
+
+.. _table_rte_flow_action_mac_swap:
+
+.. table:: MAC_SWAP
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``DEC_TTL``
+^^^^^^^^^^^^^^^^^^^
+
+Decrease TTL value.
+
+If there is no valid RTE_FLOW_ITEM_TYPE_IPV4 or RTE_FLOW_ITEM_TYPE_IPV6
+in pattern, Some PMDs will reject rule because behaviour will be undefined.
+
+.. _table_rte_flow_action_dec_ttl:
+
+.. table:: DEC_TTL
+
+   +---------------+
+   | Field         |
+   +===============+
+   | no properties |
+   +---------------+
+
+Action: ``SET_TTL``
+^^^^^^^^^^^^^^^^^^^
+
+Assigns a new TTL value.
+
+If there is no valid RTE_FLOW_ITEM_TYPE_IPV4 or RTE_FLOW_ITEM_TYPE_IPV6
+in pattern, Some PMDs will reject rule because behaviour will be undefined.
+
+.. _table_rte_flow_action_set_ttl:
+
+.. table:: SET_TTL
+
+   +---------------+--------------------+
+   | Field         | Value              |
+   +===============+====================+
+   | ``ttl_value`` | new TTL value      |
+   +---------------+--------------------+
+
+Action: ``SET_MAC_SRC``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Set source MAC address
+
+.. _table_rte_flow_action_set_mac_src:
+
+.. table:: SET_MAC_SRC
+
+   +--------------+---------------+
+   | Field        | Value         |
+   +==============+===============+
+   | ``mac_addr`` | MAC address   |
+   +--------------+---------------+
+
+Action: ``SET_MAC_DST``
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Set source MAC address
+
+.. _table_rte_flow_action_set_mac_dst:
+
+.. table:: SET_MAC_DST
+
+   +--------------+---------------+
+   | Field        | Value         |
+   +==============+===============+
+   | ``mac_addr`` | MAC address   |
+   +--------------+---------------+
+
 Negative types
 ~~~~~~~~~~~~~~
 
@@ -2419,6 +2684,26 @@ This function initializes ``error`` (if non-NULL) with the provided
 parameters and sets ``rte_errno`` to ``code``. A negative error ``code`` is
 then returned.
 
+Object conversion
+~~~~~~~~~~~~~~~~~
+
+.. code-block:: c
+
+   int
+   rte_flow_conv(enum rte_flow_conv_op op,
+                 void *dst,
+                 size_t size,
+                 const void *src,
+                 struct rte_flow_error *error);
+
+Convert ``src`` to ``dst`` according to operation ``op``. Possible
+operations include:
+
+- Attributes, pattern item or action duplication.
+- Duplication of an entire pattern or list of actions.
+- Duplication of a complete flow rule description.
+- Pattern item or action name retrieval.
+
 Caveats
 -------
 
index 0812abe..cb70caa 100644 (file)
@@ -10,8 +10,8 @@ The security library provides a framework for management and provisioning
 of security protocol operations offloaded to hardware based devices. The
 library defines generic APIs to create and free security sessions which can
 support full protocol offload as well as inline crypto operation with
-NIC or crypto devices. The framework currently only supports the IPSec protocol
-and associated operations, other protocols will be added in future.
+NIC or crypto devices. The framework currently only supports the IPsec and PDCP
+protocol and associated operations, other protocols will be added in future.
 
 Design Principles
 -----------------
@@ -253,6 +253,49 @@ for any protocol header addition.
         +--------|--------+
                  V
 
+PDCP Flow Diagram
+~~~~~~~~~~~~~~~~~
+
+Based on 3GPP TS 36.323 Evolved Universal Terrestrial Radio Access (E-UTRA);
+Packet Data Convergence Protocol (PDCP) specification
+
+.. code-block:: c
+
+        Transmitting PDCP Entity          Receiving PDCP Entity
+                  |                                   ^
+                  |                       +-----------|-----------+
+                  V                       | In order delivery and |
+        +---------|----------+            | Duplicate detection   |
+        | Sequence Numbering |            |  (Data Plane only)    |
+        +---------|----------+            +-----------|-----------+
+                  |                                   |
+        +---------|----------+            +-----------|----------+
+        | Header Compression*|            | Header Decompression*|
+        | (Data-Plane only)  |            |   (Data Plane only)  |
+        +---------|----------+            +-----------|----------+
+                  |                                   |
+        +---------|-----------+           +-----------|----------+
+        | Integrity Protection|           |Integrity Verification|
+        | (Control Plane only)|           | (Control Plane only) |
+        +---------|-----------+           +-----------|----------+
+        +---------|-----------+            +----------|----------+
+        |     Ciphering       |            |     Deciphering     |
+        +---------|-----------+            +----------|----------+
+        +---------|-----------+            +----------|----------+
+        |   Add PDCP header   |            | Remove PDCP Header  |
+        +---------|-----------+            +----------|----------+
+                  |                                   |
+                  +----------------->>----------------+
+
+
+.. note::
+
+    * Header Compression and decompression are not supported currently.
+
+Just like IPsec, in case of PDCP also header addition/deletion, cipher/
+de-cipher, integrity protection/verification is done based on the action
+type chosen.
+
 Device Features and Capabilities
 ---------------------------------
 
@@ -271,7 +314,7 @@ structure in the *DPDK API Reference*.
 
 Each driver (crypto or ethernet) defines its own private array of capabilities
 for the operations it supports. Below is an example of the capabilities for a
-PMD which supports the IPSec protocol.
+PMD which supports the IPsec and PDCP protocol.
 
 .. code-block:: c
 
@@ -298,6 +341,24 @@ PMD which supports the IPSec protocol.
                 },
                 .crypto_capabilities = pmd_capabilities
         },
+        { /* PDCP Lookaside Protocol offload Data Plane */
+                .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+                .protocol = RTE_SECURITY_PROTOCOL_PDCP,
+                .pdcp = {
+                        .domain = RTE_SECURITY_PDCP_MODE_DATA,
+                        .capa_flags = 0
+                },
+                .crypto_capabilities = pmd_capabilities
+        },
+        { /* PDCP Lookaside Protocol offload Control */
+                .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+                .protocol = RTE_SECURITY_PROTOCOL_PDCP,
+                .pdcp = {
+                        .domain = RTE_SECURITY_PDCP_MODE_CONTROL,
+                        .capa_flags = 0
+                },
+                .crypto_capabilities = pmd_capabilities
+        },
         {
                 .action = RTE_SECURITY_ACTION_TYPE_NONE
         }
@@ -429,6 +490,7 @@ Security Session configuration structure is defined as ``rte_security_session_co
         union {
                 struct rte_security_ipsec_xform ipsec;
                 struct rte_security_macsec_xform macsec;
+                struct rte_security_pdcp_xform pdcp;
         };
         /**< Configuration parameters for security session */
         struct rte_crypto_sym_xform *crypto_xform;
@@ -463,15 +525,17 @@ The ``rte_security_session_protocol`` is defined as
 .. code-block:: c
 
     enum rte_security_session_protocol {
-        RTE_SECURITY_PROTOCOL_IPSEC,
+        RTE_SECURITY_PROTOCOL_IPSEC = 1,
         /**< IPsec Protocol */
         RTE_SECURITY_PROTOCOL_MACSEC,
         /**< MACSec Protocol */
+        RTE_SECURITY_PROTOCOL_PDCP,
+        /**< PDCP Protocol */
     };
 
-Currently the library defines configuration parameters for IPSec only. For other
-protocols like MACSec, structures and enums are defined as place holders which
-will be updated in the future.
+Currently the library defines configuration parameters for IPsec and PDCP only.
+For other protocols like MACSec, structures and enums are defined as place holders
+which will be updated in the future.
 
 IPsec related configuration parameters are defined in ``rte_security_ipsec_xform``
 
@@ -494,6 +558,35 @@ IPsec related configuration parameters are defined in ``rte_security_ipsec_xform
         /**< Tunnel parameters, NULL for transport mode */
     };
 
+PDCP related configuration parameters are defined in ``rte_security_pdcp_xform``
+
+.. code-block:: c
+
+    struct rte_security_pdcp_xform {
+        int8_t bearer; /**< PDCP bearer ID */
+        /** Enable in order delivery, this field shall be set only if
+         * driver/HW is capable. See RTE_SECURITY_PDCP_ORDERING_CAP.
+         */
+        uint8_t en_ordering;
+        /** Notify driver/HW to detect and remove duplicate packets.
+         * This field should be set only when driver/hw is capable.
+         * See RTE_SECURITY_PDCP_DUP_DETECT_CAP.
+         */
+        uint8_t remove_duplicates;
+        /** PDCP mode of operation: Control or data */
+        enum rte_security_pdcp_domain domain;
+        /** PDCP Frame Direction 0:UL 1:DL */
+        enum rte_security_pdcp_direction pkt_dir;
+        /** Sequence number size, 5/7/12/15/18 */
+        enum rte_security_pdcp_sn_size sn_size;
+        /** Starting Hyper Frame Number to be used together with the SN
+         * from the PDCP frames
+         */
+        uint32_t hfn;
+        /** HFN Threshold for key renegotiation */
+        uint32_t hfn_threshold;
+    };
+
 
 Security API
 ~~~~~~~~~~~~
index 77af4d7..c77df33 100644 (file)
@@ -106,6 +106,14 @@ The following is an overview of some key Vhost API functions:
     Enabling this flag with these Qemu version results in Qemu being blocked
     when multiple queue pairs are declared.
 
+  - ``RTE_VHOST_USER_POSTCOPY_SUPPORT``
+
+    Postcopy live-migration support will be enabled when this flag is set.
+    It is disabled by default.
+
+    Enabling this flag should only be done when the calling application does
+    not pre-fault the guest shared memory, otherwise migration would fail.
+
 * ``rte_vhost_driver_set_features(path, features)``
 
   This function sets the feature bits the vhost-user driver supports. The
index e2dbee3..34b2823 100644 (file)
@@ -11,21 +11,6 @@ API and ABI deprecation notices are to be posted here.
 Deprecation Notices
 -------------------
 
-* eal: certain structures will change in EAL on account of upcoming external
-  memory support. Aside from internal changes leading to an ABI break, the
-  following externally visible changes will also be implemented:
-
-  - ``rte_memseg_list`` will change to include a boolean flag indicating
-    whether a particular memseg list is externally allocated. This will have
-    implications for any users of memseg-walk-related functions, as they will
-    now have to skip externally allocated segments in most cases if the intent
-    is to only iterate over internal DPDK memory.
-  - ``socket_id`` parameter across the entire DPDK will gain additional meaning,
-    as some socket ID's will now be representing externally allocated memory. No
-    changes will be required for existing code as backwards compatibility will
-    be kept, and those who do not use this feature will not see these extra
-    socket ID's.
-
 * eal: both declaring and identifying devices will be streamlined in v18.11.
   New functions will appear to query a specific port from buses, classes of
   device and device drivers. Device declaration will be made coherent with the
@@ -56,12 +41,6 @@ Deprecation Notices
   experimental API ``rte_pktmbuf_attach_extbuf()`` is used. Removal of the macro
   is to fix this semantic inconsistency.
 
-* ethdev: In v18.11 ``DEV_RX_OFFLOAD_CRC_STRIP`` offload flag will be removed, default
-  behavior without any flag will be changed to CRC strip.
-  To keep CRC ``DEV_RX_OFFLOAD_KEEP_CRC`` flag is required.
-  ``KEEP_CRC``: Keep CRC in packet
-  No flag: Strip CRC from packet
-
 * ethdev: the legacy filter API, including
   ``rte_eth_dev_filter_supported()``, ``rte_eth_dev_filter_ctrl()`` as well
   as filter types MACVLAN, ETHERTYPE, FLEXIBLE, SYN, NTUPLE, TUNNEL, FDIR,
@@ -70,28 +49,9 @@ Deprecation Notices
   Target release for removal of the legacy API will be defined once most
   PMDs have switched to rte_flow.
 
-* ethdev: In v18.11 ``rte_eth_dev_attach()`` and ``rte_eth_dev_detach()``
-  will be removed.
-  Hotplug functions ``rte_eal_hotplug_add()`` and ``rte_eal_hotplug_remove()``
-  should be used instread.
-  Function ``rte_eth_dev_get_port_by_name()`` may be used to find
-  identifier of the added port.
-
-* eal: In v18.11 ``rte_eal_dev_attach()`` and ``rte_eal_dev_detach()``
-  will be removed.
-  Hotplug functions ``rte_eal_hotplug_add()`` and ``rte_eal_hotplug_remove()``
-  should be used directly.
-
 * pdump: As we changed to use generic IPC, some changes in APIs and structure
   are expected in subsequent release.
 
   - ``rte_pdump_set_socket_dir`` will be removed;
   - The parameter, ``path``, of ``rte_pdump_init`` will be removed;
   - The enum ``rte_pdump_socktype`` will be removed.
-
-* ethdev: flow API function ``rte_flow_copy()`` will be deprecated in v18.11
-  in favor of ``rte_flow_conv()`` (which will appear in that version) and
-  subsequently removed for v19.02.
-
-  This is due to a lack of flexibility and reliance on a type unusable with
-  C++ programs (struct rte_flow_desc).
index d125342..1243e98 100644 (file)
@@ -8,7 +8,7 @@ Release Notes
     :maxdepth: 1
     :numbered:
 
-    rel_description
+    release_18_11
     release_18_08
     release_18_05
     release_18_02
diff --git a/doc/guides/rel_notes/rel_description.rst b/doc/guides/rel_notes/rel_description.rst
deleted file mode 100644 (file)
index 8f28556..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-..  SPDX-License-Identifier: BSD-3-Clause
-    Copyright(c) 2010-2015 Intel Corporation.
-
-Description of Release
-======================
-
-This document contains the release notes for Data Plane Development Kit (DPDK)
-release version |release| and previous releases.
-
-It lists new features, fixed bugs, API and ABI changes and known issues.
-
-For instructions on compiling and running the release, see the :ref:`DPDK Getting Started Guide <linux_gsg>`.
index 321fa84..8a09dee 100644 (file)
@@ -252,7 +252,7 @@ The libraries prepended with a plus sign were incremented in this version.
      librte_distributor.so.1
    + librte_eal.so.8
    + librte_ethdev.so.10
-     librte_eventdev.so.4
+   + librte_eventdev.so.5
      librte_flow_classify.so.1
      librte_gro.so.1
      librte_gso.so.1
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
new file mode 100644 (file)
index 0000000..376128f
--- /dev/null
@@ -0,0 +1,529 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright 2018 The DPDK contributors
+
+DPDK Release 18.11
+==================
+
+.. **Read this first.**
+
+   The text in the sections below explains how to update the release notes.
+
+   Use proper spelling, capitalization and punctuation in all sections.
+
+   Variable and config names should be quoted as fixed width text:
+   ``LIKE_THIS``.
+
+   Build the docs and view the output file to ensure the changes are correct::
+
+      make doc-guides-html
+
+      xdg-open build/doc/html/guides/rel_notes/release_18_11.html
+
+
+New Features
+------------
+
+.. This section should contain new features added in this release.
+   Sample format:
+
+   * **Add a title in the past tense with a full stop.**
+
+     Add a short 1-2 sentence description in the past tense.
+     The description should be enough to allow someone scanning
+     the release notes to understand the new feature.
+
+     If the feature adds a lot of sub-features you can use a bullet list
+     like this:
+
+     * Added feature foo to do something.
+     * Enhanced feature bar to do something else.
+
+     Refer to the previous release notes for examples.
+
+     Suggested order in release notes items:
+     * Core libs (EAL, mempool, ring, mbuf, buses)
+     * Device abstraction libs and PMDs
+       - ethdev (lib, PMDs)
+       - cryptodev (lib, PMDs)
+       - eventdev (lib, PMDs)
+       - etc
+     * Other libs
+     * Apps, Examples, Tools (if significative)
+
+     This section is a comment. Do not overwrite or remove it.
+     Also, make sure to start the actual text at the margin.
+     =========================================================
+
+* **Added support for using externally allocated memory in DPDK.**
+
+  DPDK has gained support for creating new ``rte_malloc`` heaps referencing
+  memory that was created outside of DPDK's own page allocator, and using that
+  memory natively with any other DPDK library or data structure.
+
+* **Added check for ensuring allocated memory addressable by devices.**
+
+  Some devices can have addressing limitations so a new function,
+  ``rte_eal_check_dma_mask``, has been added for checking allocated memory is
+  not out of the device range. Because now memory can be dynamically allocated
+  after initialization, a dma mask is kept and any new allocated memory will be
+  checked out against that dma mask and rejected if out of range. If more than
+  one device has addressing limitations, the dma mask is the more restricted one.
+
+* **Added hot-unplug handle mechanism.**
+
+  ``rte_dev_hotplug_handle_enable`` and ``rte_dev_hotplug_handle_disable`` are
+  for enabling or disabling hotplug handle mechanism.
+
+* **Support device multi-process hotplug.**
+
+  Hotplug and hot-unplug for devices will now be supported in multiprocessing
+  scenario. Any ethdev devices created in the primary process will be regarded
+  as shared and will be available for all DPDK processes. Synchronization
+  between processes will be done using DPDK IPC.
+
+* **Added new Flow API actions to rewrite fields in packet headers.**
+
+  Added new Flow API actions to:
+
+  * Modify source and destination IP addresses in the outermost IPv4/IPv6
+    headers.
+  * Modify source and destination port numbers in the outermost TCP/UDP
+    headers.
+
+* **Added new Flow API action to swap MAC addresses in Ethernet header.**
+
+  Added new Flow API action to swap the source and destination MAC
+  addresses in the outermost Ethernet header.
+
+* **Add support to offload more flow match and actions for CXGBE PMD**
+
+  Flow API support has been enhanced for CXGBE Poll Mode Driver to offload:
+
+  * Match items: destination MAC address.
+  * Action items: push/pop/rewrite vlan header,
+    rewrite IP addresses in outermost IPv4/IPv6 header,
+    rewrite port numbers in outermost TCP/UDP header,
+    swap MAC addresses in outermost Ethernet header.
+
+* **Added a devarg to use the latest supported vector path in i40e.**
+  A new devarg ``use-latest-supported-vec`` was introduced to allow users to
+  choose the latest vector path that the platform supported. For example, users
+  can use AVX2 vector path on BDW/HSW to get better performance.
+
+* **Added support for SR-IOV in netvsc PMD.**
+
+  The ``netvsc`` poll mode driver now supports the Accelerated Networking
+  SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
+  vdev_netvsc, tap, and failsafe drivers combination.
+
+* **Added a new net driver for Marvell Armada 3k device.**
+
+  Added the new ``mvneta`` net driver for Marvell Armada 3k device. See the
+  :doc:`../nics/mvneta` NIC guide for more details on this new driver.
+
+* **Added NXP ENETC PMD.**
+
+  Added the new enetc driver for NXP enetc platform. See the
+  "ENETC Poll Mode Driver" document for more details on this new driver.
+
+* **Added Ethernet poll mode driver for Aquantia aQtion family of 10G devices.**
+
+  Added the new ``atlantic`` ethernet poll mode driver for Aquantia XGBE devices.
+  See the :doc:`../nics/atlantic` nic driver guide for more details on this
+  driver.
+
+* **Updated Solarflare network PMD.**
+
+  Updated the sfc_efx driver including the following changes:
+
+  * Added support for Rx scatter in EF10 datapath implementation.
+  * Added support for Rx descriptor status API in EF10 datapath implementation.
+  * Added support for TSO in EF10 datapath implementation.
+  * Added support for Tx descriptor status API in EF10 (ef10 and ef10_simple)
+    datapaths implementation.
+
+* **Updated the enic driver.**
+
+  * Added AVX2-based vectorized Rx handler.
+  * Added VLAN and checksum offloads to the simple Tx handler.
+  * Added the count flow action.
+  * Enabled the virtual address IOVA mode.
+
+* **Updated failsafe driver.**
+
+  Updated the failsafe driver including the following changes:
+
+  * Support for Rx and Tx queues start and stop.
+  * Support for Rx and Tx queues deferred start.
+  * Support for runtime Rx and Tx queues setup.
+  * Support multicast MAC address set.
+
+* **Added a devarg to use PCAP interface physical MAC address.**
+  A new devarg ``phy_mac`` was introduced to allow users to use physical
+  MAC address of the selected PCAP interface.
+
+* **Added TAP Rx/Tx queues sharing with a secondary process.**
+
+  A secondary process can attach a TAP device created in the primary process,
+  probe the queues, and process Rx/Tx in a secondary process.
+
+* **Added classification and metering support to SoftNIC PMD.**
+
+  Added support for flow classification (rte_flow API), and metering and
+  policing (rte_mtr API) to the SoftNIC PMD.
+
+* **Added Crypto support to Softnic PMD.**
+
+  The Softnic is now capable of processing symmetric crypto workloads such
+  as cipher, cipher-authentication chaining, and aead encryption and
+  decryption. This is achieved by calling DPDK Cryptodev APIs.
+
+* **Added cryptodev port to port library.**
+
+  Cryptodev port is a shim layer in the port library that interacts with DPDK
+  Cryptodev PMDs including burst enqueuing and dequeuing crypto operations.
+
+* **Added symmetric cryptographic actions to the pipeline library.**
+
+  In the pipeline library an added symmetric crypto action parsing and action
+  handler are implemented. The action allows automatically preparing the crypto
+  operation with the rules specified such as algorithm, key, and IV, etc for
+  the cryptodev port to process.
+
+* **Added support for GEN3 devices to Intel QAT driver .**
+
+  Added support for the third generation of Intel QuickAssist devices.
+
+* **Updated the QAT PMD.**
+
+  The QAT PMD was updated with additional support for:
+
+  * AES-CMAC algorithm.
+
+* **Updated the AESNI MB PMD.**
+
+  The AESNI MB PMD has been updated with additional support for AES-GCM
+  algorithm support.
+
+* **Added NXP CAAM JR PMD.**
+
+  Added the new caam job ring driver for NXP platforms. See the
+  "NXP CAAM JOB RING (caam_jr)" document for more details on this new driver.
+
+* **Added support for Dynamic Huffman Encoding to Intel QAT comp PMD.**
+
+  The Intel QuickAssist (QAT) compression PMD has been updated with support
+  for Dynamic Huffman Encoding for the Deflate algorithm.
+
+* **Added Event Ethernet Tx Adapter.**
+
+  Added event ethernet Tx adapter library that  provides configuration and
+  data path APIs for the ethernet transmit stage of an event driven packet
+  processing application. These APIs abstract the implementation of the
+  transmit stage and allow the application to use eventdev PMD support or
+  a common implementation.
+
+* **Added Distributed Software Eventdev PMD.**
+
+  Added the new Distributed Software Event Device (DSW), which is a
+  pure-software eventdev driver distributing the work of scheduling
+  among all eventdev ports and the lcores using them. DSW, compared to
+  the SW eventdev PMD, sacrifices load balancing performance to
+  gain better event scheduling throughput and scalability.
+
+* **Added extendable bucket feature to hash library (rte_hash).**
+
+  This new “extendable bucket” feature provides 100% insertion guarantee to
+  the capacity specified by the user by extending hash table with extra
+  buckets when needed to accommodate the unlikely event of intensive hash
+  collisions.  In addition, the internal hashing algorithm was changed to use
+  partial-key hashing to improve memory efficiency and lookup performance.
+
+* **Added lock free reader/writer concurrency to hash library (rte_hash).**
+
+  Lock free reader/writer concurrency prevents the readers from getting
+  blocked due to a pre-empted writer thread. This allows the hash library
+  to be used in scenarios where the writer thread runs on control plane.
+
+* **Added Traffic Pattern Aware Power Control Library**
+
+  Added an experimental library. This extend Power Library and provide
+  empty_poll APIs. This feature measure how many times empty_poll are
+  executed per core, use the number of empty polls as a hint for system
+  power management.
+
+  See the :doc:`../prog_guide/power_man` section of the DPDK Programmers
+  Guide document for more information.
+
+* **Added JSON power policy interface for containers.**
+
+  Extended the Power Library and vm_power_manager sample app to allow power
+  policies to be submitted via a FIFO using JSON formatted strings. Previously
+  limited to Virtual Machines, this feature extends power policy functionality
+  to containers and host applications that need to have their cores frequency
+  controlled based on the rules contained in the policy.
+
+* **Added Telemetry API.**
+
+  Added the telemetry API which allows applications to transparently expose
+  their telemetry via a UNIX socket in JSON. The JSON can be consumed by any
+  Service Assurance agent, such as CollectD.
+
+* **Added ability to switch queue deferred start flag on testpmd app.**
+
+  Added a console command to testpmd app, giving ability to switch
+  ``rx_deferred_start`` or ``tx_deferred_start`` flag of the specified queue of
+  the specified port. The port must be stopped before the command call in order
+  to reconfigure queues.
+
+* **Add a new sample for vDPA**
+
+  The vdpa sample application creates vhost-user sockets by using the
+  vDPA backend. vDPA stands for vhost Data Path Acceleration which utilizes
+  virtio ring compatible devices to serve virtio driver directly to enable
+  datapath acceleration. As vDPA driver can help to set up vhost datapath,
+  this application doesn't need to launch dedicated worker threads for vhost
+  enqueue/dequeue operations.
+
+
+API Changes
+-----------
+
+.. This section should contain API changes. Sample format:
+
+   * Add a short 1-2 sentence description of the API change.
+     Use fixed width quotes for ``function_names`` or ``struct_names``.
+     Use the past tense.
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =========================================================
+
+* eal: ``rte_memseg_list`` structure now has an additional flag indicating
+  whether the memseg list is externally allocated. This will have implications
+  for any users of memseg-walk-related functions, as they will now have to skip
+  externally allocated segments in most cases if the intent is to only iterate
+  over internal DPDK memory.
+  ``socket_id`` parameter across the entire DPDK has gained additional meaning,
+  as some socket ID's will now be representing externally allocated memory. No
+  changes will be required for existing code as backwards compatibility will be
+  kept, and those who do not use this feature will not see these extra socket
+  ID's. Any new API's must not check socket ID parameters themselves, and must
+  instead leave it to the memory subsystem to decide whether socket ID is a
+  valid one.
+
+* eal: The following devargs functions, which were deprecated in 18.05,
+  were removed in 18.11:
+  ``rte_eal_parse_devargs_str()``, ``rte_eal_devargs_add()``,
+  ``rte_eal_devargs_type_count()``, and ``rte_eal_devargs_dump()``.
+
+* eal: The parameters of the function ``rte_devargs_remove()`` have changed
+  from bus and device names to ``struct rte_devargs``.
+
+* eal: The deprecated functions attach/detach were removed in 18.11.
+  ``rte_eal_dev_attach`` can be replaced by
+  ``rte_dev_probe`` or ``rte_eal_hotplug_add``.
+  ``rte_eal_dev_detach`` can be replaced by
+  ``rte_dev_remove`` or ``rte_eal_hotplug_remove``.
+
+* eal: The scope of ``rte_eal_hotplug_add()``/``rte_dev_probe()``
+  and ``rte_eal_hotplug_remove()``/``rte_dev_remove()`` is extended.
+  In multi-process model, they will guarantee that the device is
+  attached or detached on all processes.
+
+* mbuf: The ``__rte_mbuf_raw_free()`` and ``__rte_pktmbuf_prefree_seg()``
+  functions were deprecated since 17.05 and are replaced by
+  ``rte_mbuf_raw_free()`` and ``rte_pktmbuf_prefree_seg()``.
+
+* ethdev: The deprecated functions attach/detach were removed in 18.11.
+  ``rte_eth_dev_attach`` can be replaced by ``RTE_ETH_FOREACH_MATCHING_DEV``
+  and ``rte_dev_probe`` or ``rte_eal_hotplug_add``.
+  ``rte_eth_dev_detach`` can be replaced by
+  ``rte_dev_remove`` or ``rte_eal_hotplug_remove``.
+
+* ethdev: A call to ``rte_eth_dev_release_port()`` has been added in
+  ``rte_eth_dev_close()``. As a consequence, a closed port is freed
+  and seen as invalid because of its state ``RTE_ETH_DEV_UNUSED``.
+  This new behaviour is enabled per driver for a migration period.
+
+* A new device flag, RTE_ETH_DEV_NOLIVE_MAC_ADDR, changes the order of
+  actions inside rte_eth_dev_start regarding MAC set. Some NICs do not
+  support MAC changes once the port has started and with this new device
+  flag the MAC can be properly configured in any case. This is particularly
+  important for bonding.
+
+* The default behaviour of CRC strip offload changed. Without any specific Rx
+  offload flag, default behavior by PMD is now to strip CRC.
+  DEV_RX_OFFLOAD_CRC_STRIP offload flag has been removed.
+  To request keeping CRC, application should set ``DEV_RX_OFFLOAD_KEEP_CRC`` Rx
+  offload.
+
+* eventdev: Type of 2nd parameter to ``rte_event_eth_rx_adapter_caps_get()``
+  has been changed from uint8_t to uint16_t.
+
+
+ABI Changes
+-----------
+
+.. This section should contain ABI changes. Sample format:
+
+   * Add a short 1-2 sentence description of the ABI change
+     that was announced in the previous releases and made in this release.
+     Use fixed width quotes for ``function_names`` or ``struct_names``.
+     Use the past tense.
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =========================================================
+
+* eal: added ``legacy_mem`` and ``single_file_segments`` values to
+       ``rte_config`` structure on account of improving DPDK usability when
+       using either ``--legacy-mem`` or ``--single-file-segments`` flags.
+
+* eal: EAL library ABI version was changed due to previously announced work on
+       supporting external memory in DPDK:
+         - structure ``rte_memseg_list`` now has a new field indicating length
+           of memory addressed by the segment list
+         - structure ``rte_memseg_list`` now has a new flag indicating whether
+           the memseg list refers to external memory
+         - structure ``rte_malloc_heap`` now has a new field indicating socket
+           ID the malloc heap belongs to
+         - structure ``rte_mem_config`` has had its ``malloc_heaps`` array
+           resized from ``RTE_MAX_NUMA_NODES`` to ``RTE_MAX_HEAPS`` value
+         - structure ``rte_malloc_heap`` now has a ``heap_name`` member
+         - structure ``rte_eal_memconfig`` has been extended to contain next
+           socket ID for externally allocated segments
+
+* eal: Added ``dma_maskbits`` to ``rte_mem_config`` for keeping more restricted
+       dma mask based on devices addressing limitations.
+
+* eal: The structure ``rte_device`` got a new field to reference a ``rte_bus``.
+  It is changing the size of the ``struct rte_device`` and the inherited
+  device structures of all buses.
+
+
+Removed Items
+-------------
+
+.. This section should contain removed items in this release. Sample format:
+
+   * Add a short 1-2 sentence description of the removed item
+     in the past tense.
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =========================================================
+
+
+Shared Library Versions
+-----------------------
+
+.. Update any library version updated in this release
+   and prepend with a ``+`` sign, like this:
+
+     librte_acl.so.2
+   + librte_cfgfile.so.2
+     librte_cmdline.so.2
+
+   This section is a comment. Do not overwrite or remove it.
+   =========================================================
+
+The libraries prepended with a plus sign were incremented in this version.
+
+.. code-block:: diff
+
+     librte_acl.so.2
+     librte_bbdev.so.1
+     librte_bitratestats.so.2
+     librte_bpf.so.1
+   + librte_bus_dpaa.so.2
+   + librte_bus_fslmc.so.2
+   + librte_bus_ifpga.so.2
+   + librte_bus_pci.so.2
+   + librte_bus_vdev.so.2
+   + librte_bus_vmbus.so.2
+     librte_cfgfile.so.2
+     librte_cmdline.so.2
+     librte_compressdev.so.1
+     librte_cryptodev.so.5
+     librte_distributor.so.1
+   + librte_eal.so.9
+     librte_efd.so.1
+   + librte_ethdev.so.11
+   + librte_eventdev.so.6
+     librte_flow_classify.so.1
+     librte_gro.so.1
+     librte_gso.so.1
+     librte_hash.so.2
+     librte_ip_frag.so.1
+     librte_jobstats.so.1
+     librte_kni.so.2
+     librte_kvargs.so.1
+     librte_latencystats.so.1
+     librte_lpm.so.2
+     librte_mbuf.so.4
+     librte_member.so.1
+     librte_mempool.so.5
+     librte_meter.so.2
+     librte_metrics.so.1
+     librte_net.so.1
+     librte_pci.so.1
+     librte_pdump.so.2
+     librte_pipeline.so.3
+     librte_pmd_bnxt.so.2
+     librte_pmd_bond.so.2
+     librte_pmd_i40e.so.2
+     librte_pmd_ixgbe.so.2
+     librte_pmd_dpaa2_qdma.so.1
+     librte_pmd_ring.so.2
+     librte_pmd_softnic.so.1
+     librte_pmd_vhost.so.2
+     librte_port.so.3
+     librte_power.so.1
+     librte_rawdev.so.1
+     librte_reorder.so.1
+     librte_ring.so.2
+     librte_sched.so.1
+     librte_security.so.1
+     librte_table.so.3
+     librte_timer.so.1
+     librte_vhost.so.4
+
+
+Known Issues
+------------
+
+.. This section should contain new known issues in this release. Sample format:
+
+   * **Add title in present tense with full stop.**
+
+     Add a short 1-2 sentence description of the known issue
+     in the present tense. Add information on any known workarounds.
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =========================================================
+
+* When using SR-IOV (VF) support with netvsc PMD and the Mellanox mlx5 bifurcated
+  driver; the Linux netvsc device must be brought up before the netvsc device is
+  unbound and passed to the DPDK.
+
+
+Tested Platforms
+----------------
+
+.. This section should contain a list of platforms that were tested
+   with this release.
+
+   The format is:
+
+   * <vendor> platform with <vendor> <type of devices> combinations
+
+     * List of CPU
+     * List of OS
+     * List of devices
+     * Other relevant details...
+
+   This section is a comment. Do not overwrite or remove it.
+   Also, make sure to start the actual text at the margin.
+   =========================================================
index bd0ae1e..0d6fe2b 100644 (file)
@@ -139,7 +139,6 @@ application is shown below:
            struct rte_eth_conf port_conf = {
                    .rxmode = {
                            .split_hdr_size = 0,
-                           .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                            },
                    .txmode = {
                            .offloads =
@@ -215,7 +214,6 @@ The Ethernet port is configured with default settings using the
    struct rte_eth_conf port_conf = {
            .rxmode = {
                    .split_hdr_size = 0,
-                   .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                    },
            .txmode = {
                    .offloads =
index 5bedf4f..74b12af 100644 (file)
@@ -45,6 +45,7 @@ Sample Applications User Guides
     vhost
     vhost_scsi
     vhost_crypto
+    vdpa
     netmap_compatibility
     ip_pipeline
     test_pipeline
index b75509a..447a544 100644 (file)
@@ -304,6 +304,15 @@ Kni
     [thread <thread_id>]
 
 
+Cryptodev
+~~~~~~~~~
+
+  Create cryptodev port ::
+
+   cryptodev <cryptodev_name>
+    dev <DPDK Cryptodev PMD name>
+    queue <n_queues> <queue_size>
+
 Action profile
 ~~~~~~~~~~~~~~
 
@@ -330,6 +339,8 @@ Action profile
    [ttl drop | fwd
        stats none | pkts]
    [stats pkts | bytes | both]
+   [sym_crypto cryptodev <cryptodev_name>
+       mempool_create <mempool_name> mempool_init <mempool_name>]
    [time]
 
 
@@ -471,6 +482,18 @@ Add rule to table for specific pipeline instance ::
      [ttl dec | keep]
      [stats]
      [time]
+     [sym_crypto
+        encrypt | decrypt
+        type
+        | cipher
+           cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+        | cipher_auth
+           cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+           auth_algo <algo> auth_key <key> digest_size <size>
+        | aead
+           aead_algo <algo> aead_key <key> aead_iv <iv> aead_aad <aad>
+           digest_size <size>
+        data_offset <data_offset>]
 
   where:
      <pa> ::= g | y | r | drop
index 46696f2..4869a01 100644 (file)
@@ -67,7 +67,7 @@ Constraints
 
 *  No IPv6 options headers.
 *  No AH mode.
-*  Supported algorithms: AES-CBC, AES-CTR, AES-GCM, HMAC-SHA1 and NULL.
+*  Supported algorithms: AES-CBC, AES-CTR, AES-GCM, 3DES-CBC, HMAC-SHA1 and NULL.
 *  Each SA must be handle by a unique lcore (*1 RX queue per port*).
 *  No chained mbufs.
 
@@ -397,6 +397,7 @@ where each options means:
    * *aes-128-cbc*: AES-CBC 128-bit algorithm
    * *aes-256-cbc*: AES-CBC 256-bit algorithm
    * *aes-128-ctr*: AES-CTR 128-bit algorithm
+   * *3des-cbc*: 3DES-CBC 192-bit algorithm
 
  * Syntax: *cipher_algo <your algorithm>*
 
index 1b3ee9a..6acdf0f 100644 (file)
@@ -31,18 +31,27 @@ This is done by creating one or more kernel net devices for each of the DPDK por
 The application allows the use of standard Linux tools (ethtool, ifconfig, tcpdump) with the DPDK ports and
 also the exchange of packets between the DPDK application and the Linux* kernel.
 
+The Kernel NIC Interface sample application requires that the
+KNI kernel module ``rte_kni`` be loaded into the kernel.  See
+:doc:`../prog_guide/kernel_nic_interface` for more information on loading
+the ``rte_kni`` kernel module.
+
 Overview
 --------
 
-The Kernel NIC Interface sample application uses two threads in user space for each physical NIC port being used,
-and allocates one or more KNI device for each physical NIC port with kernel module's support.
-For a physical NIC port, one thread reads from the port and writes to KNI devices,
-and another thread reads from KNI devices and writes the data unmodified to the physical NIC port.
-It is recommended to configure one KNI device for each physical NIC port.
-If configured with more than one KNI devices for a physical NIC port,
-it is just for performance testing, or it can work together with VMDq support in future.
+The Kernel NIC Interface sample application ``kni`` allocates one or more
+KNI interfaces for each physical NIC port.  For each physical NIC port,
+``kni`` uses two DPDK threads in user space; one thread reads from the port and
+writes to the corresponding KNI interfaces and the other thread reads from
+the KNI interfaces and writes the data unmodified to the physical NIC port.
+
+It is recommended to configure one KNI interface for each physical NIC port.
+The application can be configured with more than one KNI interface for
+each physical NIC port for performance testing or it can work together with
+VMDq support in future.
 
-The packet flow through the Kernel NIC Interface application is as shown in the following figure.
+The packet flow through the Kernel NIC Interface application is as shown
+in the following figure.
 
 .. _figure_kernel_nic:
 
@@ -50,145 +59,221 @@ The packet flow through the Kernel NIC Interface application is as shown in the
 
    Kernel NIC Application Packet Flow
 
+If link monitoring is enabled with the ``-m`` command line flag, one
+additional pthread is launched which will check the link status of each
+physical NIC port and will update the carrier status of the corresponding
+KNI interface(s) to match the physical NIC port's state.  This means that
+the KNI interface(s) will be disabled automatically when the Ethernet link
+goes down and enabled when the Ethernet link goes up.
+
+If link monitoring is enabled, the ``rte_kni`` kernel module should be loaded
+such that the :ref:`default carrier state <kni_default_carrier_state>` is
+set to *off*.  This ensures that the KNI interface is only enabled *after*
+the Ethernet link of the corresponding NIC port has reached the linkup state.
+
+If link monitoring is not enabled, the ``rte_kni`` kernel module should be
+loaded with the :ref:`default carrier state <kni_default_carrier_state>`
+set to *on*.  This sets the carrier state of the KNI interfaces to *on*
+when the KNI interfaces are enabled without regard to the actual link state
+of the corresponding NIC port.  This is useful for testing in loopback
+mode where the NIC port may not be physically connected to anything.
+
 Compiling the Application
 -------------------------
 
 To compile the sample application see :doc:`compiling`.
 
-The application is located in the ``kni`` sub-directory.
+The application is located in the ``examples/kni`` sub-directory.
 
 .. note::
 
         This application is intended as a linuxapp only.
 
-Loading the Kernel Module
--------------------------
+Running the kni Example Application
+-----------------------------------
 
-Loading the KNI kernel module without any parameter is the typical way a DPDK application
-gets packets into and out of the kernel net stack.
-This way, only one kernel thread is created for all KNI devices for packet receiving in kernel side:
+The ``kni`` example application requires a number of command line options:
 
 .. code-block:: console
 
-    #insmod rte_kni.ko
+    kni [EAL options] -- -p PORTMASK --config="(port,lcore_rx,lcore_tx[,lcore_kthread,...])[,(port,lcore_rx,lcore_tx[,lcore_kthread,...])]" [-P] [-m]
 
-Pinning the kernel thread to a specific core can be done using a taskset command such as following:
+Where:
 
-.. code-block:: console
+*   ``-p PORTMASK``:
 
-    #taskset -p 100000 `pgrep --fl kni_thread | awk '{print $1}'`
+    Hexadecimal bitmask of ports to configure.
 
-This command line tries to pin the specific kni_thread on the 20th lcore (lcore numbering starts at 0),
-which means it needs to check if that lcore is available on the board.
-This command must be sent after the application has been launched, as insmod does not start the kni thread.
+*   ``--config="(port,lcore_rx,lcore_tx[,lcore_kthread,...])[,(port,lcore_rx,lcore_tx[,lcore_kthread,...])]"``:
 
-For optimum performance,
-the lcore in the mask must be selected to be on the same socket as the lcores used in the KNI application.
+    Determines which lcores the Rx and Tx DPDK tasks, and (optionally)
+    the KNI kernel thread(s) are bound to for each physical port.
 
-To provide flexibility of performance, the kernel module of the KNI,
-located in the kmod sub-directory of the DPDK target directory,
-can be loaded with parameter of kthread_mode as follows:
+*   ``-P``:
 
-*   #insmod rte_kni.ko kthread_mode=single
+    Optional flag to set all ports to promiscuous mode so that packets are
+    accepted regardless of the packet's Ethernet MAC destination address.
+    Without this option, only packets with the Ethernet MAC destination
+    address set to the Ethernet address of the port are accepted.
 
-    This mode will create only one kernel thread for all KNI devices for packet receiving in kernel side.
-    By default, it is in this single kernel thread mode.
-    It can set core affinity for this kernel thread by using Linux command taskset.
+*   ``-m``:
 
-*   #insmod rte_kni.ko kthread_mode =multiple
+    Optional flag to enable monitoring and updating of the Ethernet
+    carrier state.  With this option set, a thread will be started which
+    will periodically check the Ethernet link status of the physical
+    Ethernet ports and set the carrier state of the corresponding KNI
+    network interface to match it.  This means that the KNI interface will
+    be disabled automatically when the Ethernet link goes down and enabled
+    when the Ethernet link goes up.
 
-    This mode will create a kernel thread for each KNI device for packet receiving in kernel side.
-    The core affinity of each kernel thread is set when creating the KNI device.
-    The lcore ID for each kernel thread is provided in the command line of launching the application.
-    Multiple kernel thread mode can provide scalable higher performance.
+Refer to *DPDK Getting Started Guide* for general information on running
+applications and the Environment Abstraction Layer (EAL) options.
 
-To measure the throughput in a loopback mode, the kernel module of the KNI,
-located in the kmod sub-directory of the DPDK target directory,
-can be loaded with parameters as follows:
+The ``-c coremask`` or ``-l corelist`` parameter of the EAL options must
+include the lcores specified by ``lcore_rx`` and ``lcore_tx`` for each port,
+but does not need to include lcores specified by ``lcore_kthread`` as those
+cores are used to pin the kernel threads in the ``rte_kni`` kernel module.
 
-*   #insmod rte_kni.ko lo_mode=lo_mode_fifo
+The ``--config`` parameter must include a set of
+``(port,lcore_rx,lcore_tx,[lcore_kthread,...])`` values for each physical
+port specified in the ``-p PORTMASK`` parameter.
 
-    This loopback mode will involve ring enqueue/dequeue operations in kernel space.
+The optional ``lcore_kthread`` lcore ID parameter in ``--config`` can be
+specified zero, one or more times for each physical port.
 
-*   #insmod rte_kni.ko lo_mode=lo_mode_fifo_skb
+If no lcore ID is specified for ``lcore_kthread``, one KNI interface will
+be created for the physical port ``port`` and the KNI kernel thread(s)
+will have no specific core affinity.
 
-    This loopback mode will involve ring enqueue/dequeue operations and sk buffer copies in kernel space.
+If one or more lcore IDs are specified for ``lcore_kthread``, a KNI interface
+will be created for each lcore ID specified, bound to the physical port
+``port``.  If the ``rte_kni`` kernel module is loaded in :ref:`multiple
+kernel thread <kni_kernel_thread_mode>` mode, a kernel thread will be created
+for each KNI interface and bound to the specified core.  If the ``rte_kni``
+kernel module is loaded in :ref:`single kernel thread <kni_kernel_thread_mode>`
+mode, only one kernel thread is started for all KNI interfaces.  The kernel
+thread will be bound to the first ``lcore_kthread`` lcore ID specified.
 
-Running the Application
------------------------
+Example Configurations
+~~~~~~~~~~~~~~~~~~~~~~~
 
-The application requires a number of command line options:
+The following commands will first load the ``rte_kni`` kernel module in
+:ref:`multiple kernel thread <kni_kernel_thread_mode>` mode.  The ``kni``
+application is then started using two ports;  Port 0 uses lcore 4 for the
+Rx task, lcore 6 for the Tx task, and will create a single KNI interface
+``vEth0_0`` with the kernel thread bound to lcore 8.  Port 1 uses lcore
+5 for the Rx task, lcore 7 for the Tx task, and will create a single KNI
+interface ``vEth1_0`` with the kernel thread bound to lcore 9.
 
 .. code-block:: console
 
-    kni [EAL options] -- -P -p PORTMASK --config="(port,lcore_rx,lcore_tx[,lcore_kthread,...])[,port,lcore_rx,lcore_tx[,lcore_kthread,...]]"
-
-Where:
+    # rmmod rte_kni
+    # insmod kmod/rte_kni.ko kthread_mode=multiple
+    # ./build/kni -l 4-7 -n 4 -- -P -p 0x3 -m --config="(0,4,6,8),(1,5,7,9)"
 
-*   -P: Set all ports to promiscuous mode so that packets are accepted regardless of the packet's Ethernet MAC destination address.
-    Without this option, only packets with the Ethernet MAC destination address set to the Ethernet address of the port are accepted.
+The following example is identical, except an additional ``lcore_kthread``
+core is specified per physical port.  In this case, ``kni`` will create
+four KNI interfaces: ``vEth0_0``/``vEth0_1`` bound to physical port 0 and
+``vEth1_0``/``vEth1_1`` bound to physical port 1.
 
-*   -p PORTMASK: Hexadecimal bitmask of ports to configure.
+The kernel thread for each interface will be bound as follows:
 
-*   --config="(port,lcore_rx, lcore_tx[,lcore_kthread, ...]) [, port,lcore_rx, lcore_tx[,lcore_kthread, ...]]":
-    Determines which lcores of RX, TX, kernel thread are mapped to which ports.
+    * ``vEth0_0`` - bound to lcore 8.
+    * ``vEth0_1`` - bound to lcore 10.
+    * ``vEth1_0`` - bound to lcore 9.
+    * ``vEth1_1`` - bound to lcore 11
 
-Refer to *DPDK Getting Started Guide* for general information on running applications and the Environment Abstraction Layer (EAL) options.
+.. code-block:: console
 
-The -c coremask or -l corelist parameter of the EAL options should include the lcores indicated by the lcore_rx and lcore_tx,
-but does not need to include lcores indicated by lcore_kthread as they are used to pin the kernel thread on.
-The -p PORTMASK parameter should include the ports indicated by the port in --config, neither more nor less.
+    # rmmod rte_kni
+    # insmod kmod/rte_kni.ko kthread_mode=multiple
+    # ./build/kni -l 4-7 -n 4 -- -P -p 0x3 -m --config="(0,4,6,8,10),(1,5,7,9,11)"
 
-The lcore_kthread in --config can be configured none, one or more lcore IDs.
-In multiple kernel thread mode, if configured none, a KNI device will be allocated for each port,
-while no specific lcore affinity will be set for its kernel thread.
-If configured one or more lcore IDs, one or more KNI devices will be allocated for each port,
-while specific lcore affinity will be set for its kernel thread.
-In single kernel thread mode, if configured none, a KNI device will be allocated for each port.
-If configured one or more lcore IDs,
-one or more KNI devices will be allocated for each port while
-no lcore affinity will be set as there is only one kernel thread for all KNI devices.
+The following example can be used to test the interface between the ``kni``
+test application and the ``rte_kni`` kernel module.  In this example,
+the ``rte_kni`` kernel module is loaded in :ref:`single kernel thread
+mode <kni_kernel_thread_mode>`, :ref:`loopback mode <kni_loopback_mode>`
+enabled, and the :ref:`default carrier state <kni_default_carrier_state>`
+is set to *on* so that the corresponding physical NIC port does not have
+to be connected in order to use the KNI interface.  One KNI interface
+``vEth0_0`` is created for port 0 and one KNI interface ``vEth1_0`` is
+created for port 1.  Since ``rte_kni`` is loaded in "single kernel thread"
+mode, the one kernel thread is bound to lcore 8.
 
-For example, to run the application with two ports served by six lcores, one lcore of RX, one lcore of TX,
-and one lcore of kernel thread for each port:
+Since the physical NIC ports are not being used, link monitoring can be
+disabled by **not** specifying the ``-m`` flag to ``kni``:
 
 .. code-block:: console
 
-    ./build/kni -l 4-7 -n 4 -- -P -p 0x3 --config="(0,4,6,8),(1,5,7,9)"
+    # rmmod rte_kni
+    # insmod kmod/rte_kni.ko lo_mode=lo_mode_fifo carrier=on
+    # ./build/kni -l 4-7 -n 4 -- -P -p 0x3 --config="(0,4,6,8),(1,5,7,9)"
 
 KNI Operations
 --------------
 
-Once the KNI application is started, one can use different Linux* commands to manage the net interfaces.
-If more than one KNI devices configured for a physical port,
-only the first KNI device will be paired to the physical device.
-Operations on other KNI devices will not affect the physical port handled in user space application.
+Once the ``kni`` application is started, the user can use the normal
+Linux commands to manage the KNI interfaces as if they were any other
+Linux network interface.
 
-Assigning an IP address:
+Enable KNI interface and assign an IP address:
 
 .. code-block:: console
 
-    #ifconfig vEth0_0 192.168.0.1
+    # ifconfig vEth0_0 192.168.0.1
 
-Displaying the NIC registers:
+Show KNI interface configuration and statistics:
 
 .. code-block:: console
 
-    #ethtool -d vEth0_0
+    # ifconfig vEth0_0
+
+The user can also check and reset the packet statistics inside the ``kni``
+application by sending the app the USR1 and USR2 signals:
+
+.. code-block:: console
+
+    # Print statistics
+    # kill -SIGUSR1 `pidof kni`
+
+    # Zero statistics
+    # kill -SIGUSR2 `pidof kni`
 
-Dumping the network traffic:
+Dump network traffic:
 
 .. code-block:: console
 
-    #tcpdump -i vEth0_0
+    # tcpdump -i vEth0_0
+
+The normal Linux commands can also be used to change the MAC address and
+MTU size used by the physical NIC which corresponds to the KNI interface.
+However, if more than one KNI interface is configured for a physical port,
+these commands will only work on the first KNI interface for that port.
 
 Change the MAC address:
 
 .. code-block:: console
 
-    #ifconfig vEth0_0 hw ether 0C:01:02:03:04:08
+    # ifconfig vEth0_0 hw ether 0C:01:02:03:04:08
+
+Change the MTU size:
+
+.. code-block:: console
+
+    # ifconfig vEth0_0 mtu 1450
+
+If DPDK is compiled with ``CONFIG_RTE_KNI_KMOD_ETHTOOL=y`` and an Intel
+NIC is used, the user can use ``ethtool`` on the KNI interface as if it
+were a normal Linux kernel interface.
+
+Displaying the NIC registers:
+
+.. code-block:: console
+
+    # ethtool -d vEth0_0
 
-When the DPDK userspace application is closed, all the KNI devices are deleted from Linux*.
+When the ``kni`` application is closed, all the KNI interfaces are deleted
+from the Linux kernel.
 
 Explanation
 -----------
@@ -227,7 +312,7 @@ to see if this lcore is reading from or writing to kernel NIC interfaces.
 For the case that reads from a NIC port and writes to the kernel NIC interfaces (``kni_ingress``),
 the packet reception is the same as in L2 Forwarding sample application
 (see :ref:`l2_fwd_app_rx_tx_packets`).
-The packet transmission is done by sending mbufs into the kernel NIC interfaces by rte_kni_tx_burst().
+The packet transmission is done by sending mbufs into the kernel NIC interfaces by ``rte_kni_tx_burst()``.
 The KNI library automatically frees the mbufs after the kernel successfully copied the mbufs.
 
 For the other case that reads from kernel NIC interfaces
@@ -235,16 +320,3 @@ and writes to a physical NIC port (``kni_egress``),
 packets are retrieved by reading mbufs from kernel NIC interfaces by ``rte_kni_rx_burst()``.
 The packet transmission is the same as in the L2 Forwarding sample application
 (see :ref:`l2_fwd_app_rx_tx_packets`).
-
-Callbacks for Kernel Requests
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-To execute specific PMD operations in user space requested by some Linux* commands,
-callbacks must be implemented and filled in the struct rte_kni_ops structure.
-Currently, setting a new MTU, change in MAC address, configuring promiscusous mode and
-configuring the network interface(up/down) re supported.
-Default implementation for following is available in rte_kni library.
-Application may choose to not implement following callbacks:
-
-- ``config_mac_address``
-- ``config_promiscusity``
index 795a570..e44a11b 100644 (file)
@@ -105,6 +105,8 @@ where,
 
 *   --no-numa: optional, disables numa awareness
 
+*   --empty-poll: Traffic Aware power management. See below for details
+
 See :doc:`l3_forward` for details.
 The L3fwd-power example reuses the L3fwd command line options.
 
@@ -362,3 +364,70 @@ The algorithm has the following sleeping behavior depending on the idle counter:
 If a thread polls multiple Rx queues and different queue returns different sleep duration values,
 the algorithm controls the sleep time in a conservative manner by sleeping for the least possible time
 in order to avoid a potential performance impact.
+
+Empty Poll Mode
+-------------------------
+Additionally, there is a traffic aware mode of operation called "Empty
+Poll" where the number of empty polls can be monitored to keep track
+of how busy the application is. Empty poll mode can be enabled by the
+command line option --empty-poll.
+
+See :doc:`Power Management<../prog_guide/power_man>` chapter in the DPDK Programmer's Guide for empty poll mode details.
+
+.. code-block:: console
+
+    ./l3fwd-power -l xxx   -n 4   -w 0000:xx:00.0 -w 0000:xx:00.1 -- -p 0x3 -P --config="(0,0,xx),(1,0,xx)" --empty-poll="0,0,0" -l 14 -m 9 -h 1
+
+Where,
+
+--empty-poll: Enable the empty poll mode instead of original algorithm
+
+--empty-poll="training_flag, med_threshold, high_threshold"
+
+* ``training_flag`` : optional, enable/disable training mode. Default value is 0. If the training_flag is set as 1(true), then the application will start in training mode and print out the trained threshold values. If the training_flag is set as 0(false), the application will start in normal mode, and will use either the default thresholds or those supplied on the command line. The trained threshold values are specific to the user’s system, may give a better power profile when compared to the default threshold values.
+
+* ``med_threshold`` : optional, sets the empty poll threshold of a modestly busy system state. If this is not supplied, the application will apply the default value of 350000.
+
+* ``high_threshold`` : optional, sets the empty poll threshold of a busy system state. If this is not supplied, the application will apply the default value of 580000.
+
+* -l : optional, set up the LOW power state frequency index
+
+* -m : optional, set up the MED power state frequency index
+
+* -h : optional, set up the HIGH power state frequency index
+
+Empty Poll Mode Example Usage
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To initially obtain the ideal thresholds for the system, the training
+mode should be run first. This is achieved by running the l3fwd-power
+app with the training flag set to “1”, and the other parameters set to
+0.
+
+.. code-block:: console
+
+        ./examples/l3fwd-power/build/l3fwd-power -l 1-3 -- -p 0x0f --config="(0,0,2),(0,1,3)" --empty-poll "1,0,0" –P
+
+This will run the training algorithm for x seconds on each core (cores 2
+and 3), and then print out the recommended threshold values for those
+cores. The thresholds should be very similar for each core.
+
+.. code-block:: console
+
+        POWER: Bring up the Timer
+        POWER: set the power freq to MED
+        POWER: Low threshold is 230277
+        POWER: MED threshold is 335071
+        POWER: HIGH threshold is 523769
+        POWER: Training is Complete for 2
+        POWER: set the power freq to MED
+        POWER: Low threshold is 236814
+        POWER: MED threshold is 344567
+        POWER: HIGH threshold is 538580
+        POWER: Training is Complete for 3
+
+Once the values have been measured for a particular system, the app can
+then be started without the training mode so traffic can start immediately.
+
+.. code-block:: console
+
+        ./examples/l3fwd-power/build/l3fwd-power -l 1-3 -- -p 0x0f --config="(0,0,2),(0,1,3)" --empty-poll "0,340000,540000" –P
index c7665fe..695c088 100644 (file)
@@ -137,7 +137,6 @@ The global configuration is stored in a static structure:
     static const struct rte_eth_conf port_conf = {
         .rxmode = {
             .split_hdr_size = 0,
-            .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
         },
         .txmode = {},
         .intr_conf = {
diff --git a/doc/guides/sample_app_ug/vdpa.rst b/doc/guides/sample_app_ug/vdpa.rst
new file mode 100644 (file)
index 0000000..745f196
--- /dev/null
@@ -0,0 +1,120 @@
+..  SPDX-License-Identifier: BSD-3-Clause
+    Copyright(c) 2018 Intel Corporation.
+
+Vdpa Sample Application
+=======================
+
+The vdpa sample application creates vhost-user sockets by using the
+vDPA backend. vDPA stands for vhost Data Path Acceleration which utilizes
+virtio ring compatible devices to serve virtio driver directly to enable
+datapath acceleration. As vDPA driver can help to set up vhost datapath,
+this application doesn't need to launch dedicated worker threads for vhost
+enqueue/dequeue operations.
+
+Testing steps
+-------------
+
+This section shows the steps of how to start VMs with vDPA vhost-user
+backend and verify network connection & live migration.
+
+Build
+~~~~~
+
+To compile the sample application see :doc:`compiling`.
+
+The application is located in the ``vdpa`` sub-directory.
+
+Start the vdpa example
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. code-block:: console
+
+        ./vdpa [EAL options]  -- [--client] [--interactive|-i] or [--iface SOCKET_PATH]
+
+where
+
+* --client means running vdpa app in client mode, in the client mode, QEMU needs
+  to run as the server mode and take charge of socket file creation.
+* --iface specifies the path prefix of the UNIX domain socket file, e.g.
+  /tmp/vhost-user-, then the socket files will be named as /tmp/vhost-user-<n>
+  (n starts from 0).
+* --interactive means run the vdpa sample in interactive mode, currently 4
+  internal cmds are supported:
+
+  1. help: show help message
+  2. list: list all available vdpa devices
+  3. create: create a new vdpa port with socket file and vdpa device address
+  4. quit: unregister vhost driver and exit the application
+
+Take IFCVF driver for example:
+
+.. code-block:: console
+
+        ./vdpa -c 0x2 -n 4 --socket-mem 1024,1024 \
+                -w 0000:06:00.3,vdpa=1 -w 0000:06:00.4,vdpa=1 \
+                -- --interactive
+
+.. note::
+    Here 0000:06:00.3 and 0000:06:00.4 refer to virtio ring compatible devices,
+    and we need to bind vfio-pci to them before running vdpa sample.
+
+    * modprobe vfio-pci
+    * ./usertools/dpdk-devbind.py -b vfio-pci 06:00.3 06:00.4
+
+Then we can create 2 vdpa ports in interactive cmdline.
+
+.. code-block:: console
+
+        vdpa> list
+        device id       device address  queue num       supported features
+        0               0000:06:00.3    1               0x14c238020
+        1               0000:06:00.4    1               0x14c238020
+        2               0000:06:00.5    1               0x14c238020
+
+        vdpa> create /tmp/vdpa-socket0 0000:06:00.3
+        vdpa> create /tmp/vdpa-socket1 0000:06:00.4
+
+.. _vdpa_app_run_vm:
+
+Start the VMs
+~~~~~~~~~~~~~
+
+.. code-block:: console
+
+       qemu-system-x86_64 -cpu host -enable-kvm \
+       <snip>
+       -mem-prealloc \
+       -chardev socket,id=char0,path=<socket_file created in above steps> \
+       -netdev type=vhost-user,id=vdpa,chardev=char0 \
+       -device virtio-net-pci,netdev=vdpa,mac=00:aa:bb:cc:dd:ee,page-per-vq=on \
+
+After the VMs launches, we can login the VMs and configure the ip, verify the
+network connection via ping or netperf.
+
+.. note::
+    Suggest to use QEMU 3.0.0 which extends vhost-user for vDPA.
+
+Live Migration
+~~~~~~~~~~~~~~
+vDPA supports cross-backend live migration, user can migrate SW vhost backend
+VM to vDPA backend VM and vice versa. Here are the detailed steps. Assume A is
+the source host with SW vhost VM and B is the destination host with vDPA.
+
+1. Start vdpa sample and launch a VM with exact same parameters as the VM on A,
+   in migration-listen mode:
+
+.. code-block:: console
+
+        B: <qemu-command-line> -incoming tcp:0:4444 (or other PORT))
+
+2. Start the migration (on source host):
+
+.. code-block:: console
+
+        A: (qemu) migrate -d tcp:<B ip>:4444 (or other PORT)
+
+3. Check the status (on source host):
+
+.. code-block:: console
+
+        A: (qemu) info migrate
index fd42cb3..df4d6f9 100644 (file)
@@ -78,7 +78,7 @@ could be done by:
 .. code-block:: console
 
    modprobe uio_pci_generic
-   $RTE_SDK/usertools/dpdk-devbind.py -b=uio_pci_generic 0000:00:04.0
+   $RTE_SDK/usertools/dpdk-devbind.py -b uio_pci_generic 0000:00:04.0
 
 Then start testpmd for packet forwarding testing.
 
index 65c86a5..3db57ea 100644 (file)
@@ -28,24 +28,22 @@ Start the vhost_crypto example
 
 .. code-block:: console
 
-    ./vhost_crypto [EAL options] -- [--socket-file PATH]
-        [--cdev-id ID] [--cdev-queue-id ID] [--zero-copy] [--guest-polling]
+    ./vhost_crypto [EAL options] --
+               --config (lcore,cdev-id,queue-id)[,(lcore,cdev-id,queue-id)]
+               --socketfile lcore,PATH
+               [--zero-copy]
+               [--guest-polling]
 
 where,
 
-* socket-file PATH: the path of UNIX socket file to be created, multiple
-  instances of this config item is supported. Upon absence of this item,
-  the default socket-file `/tmp/vhost_crypto1.socket` is used.
+* config (lcore,cdev-id,queue-id): build the lcore-cryptodev id-queue id
+  connection. Once specified, the specified lcore will only work with
+  specified cryptodev's queue.
 
-* cdev-id ID: the target DPDK Cryptodev's ID to process the actual crypto
-  workload. Upon absence of this item the default value of `0` will be used.
-  For details of DPDK Cryptodev, please refer to DPDK Cryptodev Library
-  Programmers' Guide.
-
-* cdev-queue-id ID: the target DPDK Cryptodev's queue ID to process the
-  actual crypto workload. Upon absence of this item the default value of `0`
-  will be used. For details of DPDK Cryptodev, please refer to DPDK Cryptodev
-  Library Programmers' Guide.
+* socket-file lcore,PATH: the path of UNIX socket file to be created and
+  the lcore id that will deal with the all workloads of the socket. Multiple
+  instances of this config item is supported and one lcore supports processing
+  multiple sockets.
 
 * zero-copy: the presence of this item means the ZERO-COPY feature will be
   enabled. Otherwise it is disabled. PLEASE NOTE the ZERO-COPY feature is still
index 855570d..1ad4f14 100644 (file)
@@ -199,7 +199,7 @@ see :doc:`compiling`.
 
 The application is located in the ``vm_power_manager`` sub-directory.
 
-To build just the ``vm_power_manager`` application:
+To build just the ``vm_power_manager`` application using ``make``:
 
 .. code-block:: console
 
@@ -208,6 +208,22 @@ To build just the ``vm_power_manager`` application:
   cd ${RTE_SDK}/examples/vm_power_manager/
   make
 
+The resulting binary will be ${RTE_SDK}/build/examples/vm_power_manager
+
+To build just the ``vm_power_manager`` application using ``meson/ninja``:
+
+.. code-block:: console
+
+  export RTE_SDK=/path/to/rte_sdk
+  cd ${RTE_SDK}
+  meson build
+  cd build
+  ninja
+  meson configure -Dexamples=vm_power_manager
+  ninja
+
+The resulting binary will be ${RTE_SDK}/build/examples/dpdk-vm_power_manager
+
 Running
 ~~~~~~~
 
@@ -337,6 +353,270 @@ monitoring of branch ratio on cores doing busy polling via PMDs.
   and will need to be adjusted for different workloads.
 
 
+
+JSON API
+~~~~~~~~
+
+In addition to the command line interface for host command and a virtio-serial
+interface for VM power policies, there is also a JSON interface through which
+power commands and policies can be sent. This functionality adds a dependency
+on the Jansson library, and the Jansson development package must be installed
+on the system before the JSON parsing functionality is included in the app.
+This is achieved by:
+
+  .. code-block:: javascript
+
+    apt-get install libjansson-dev
+
+The command and package name may be different depending on your operating
+system. It's worth noting that the app will successfully build without this
+package present, but a warning is shown during compilation, and the JSON
+parsing functionality will not be present in the app.
+
+Sending a command or policy to the power manager application is achieved by
+simply opening a fifo file, writing a JSON string to that fifo, and closing
+the file.
+
+The fifo is at /tmp/powermonitor/fifo
+
+The jason string can be a policy or instruction, and takes the following
+format:
+
+  .. code-block:: javascript
+
+    {"packet_type": {
+      "pair_1": value,
+      "pair_2": value
+    }}
+
+The 'packet_type' header can contain one of two values, depending on
+whether a policy or power command is being sent. The two possible values are
+"policy" and "instruction", and the expected name-value pairs is different
+depending on which type is being sent.
+
+The pairs are the format of standard JSON name-value pairs. The value type
+varies between the different name/value pairs, and may be integers, strings,
+arrays, etc. Examples of policies follow later in this document. The allowed
+names and value types are as follows:
+
+
+:Pair Name: "name"
+:Description: Name of the VM or Host. Allows the parser to associate the
+  policy with the relevant VM or Host OS.
+:Type: string
+:Values: any valid string
+:Required: yes
+:Example:
+
+    .. code-block:: javascript
+
+      "name", "ubuntu2"
+
+
+:Pair Name: "command"
+:Description: The type of packet we're sending to the power manager. We can be
+  creating or destroying a policy, or sending a direct command to adjust
+  the frequency of a core, similar to the command line interface.
+:Type: string
+:Values:
+
+  :CREATE: used when creating a new policy,
+  :DESTROY: used when removing a policy,
+  :POWER: used when sending an immediate command, max, min, etc.
+:Required: yes
+:Example:
+
+    .. code-block:: javascript
+
+      "command", "CREATE"
+
+
+:Pair Name: "policy_type"
+:Description: Type of policy to apply. Please see vm_power_manager documentation
+  for more information on the types of policies that may be used.
+:Type: string
+:Values:
+
+  :TIME: Time-of-day policy. Frequencies of the relevant cores are
+    scaled up/down depending on busy and quiet hours.
+  :TRAFFIC: This policy takes statistics from the NIC and scales up
+    and down accordingly.
+  :WORKLOAD: This policy looks at how heavily loaded the cores are,
+    and scales up and down accordingly.
+  :BRANCH_RATIO: This out-of-band policy can look at the ratio between
+    branch hits and misses on a core, and is useful for detecting
+    how much packet processing a core is doing.
+:Required: only for CREATE/DESTROY command
+:Example:
+
+  .. code-block:: javascript
+
+    "policy_type", "TIME"
+
+:Pair Name: "busy_hours"
+:Description: The hours of the day in which we scale up the cores for busy
+  times.
+:Type: array of integers
+:Values: array with list of hour numbers, (0-23)
+:Required: only for TIME policy
+:Example:
+
+  .. code-block:: javascript
+
+    "busy_hours":[ 17, 18, 19, 20, 21, 22, 23 ]
+
+:Pair Name: "quiet_hours"
+:Description: The hours of the day in which we scale down the cores for quiet
+  times.
+:Type: array of integers
+:Values: array with list of hour numbers, (0-23)
+:Required: only for TIME policy
+:Example:
+
+  .. code-block:: javascript
+
+    "quiet_hours":[ 2, 3, 4, 5, 6 ]
+
+:Pair Name: "avg_packet_thresh"
+:Description: Threshold below which the frequency will be set to min for
+  the TRAFFIC policy. If the traffic rate is above this and below max, the
+  frequency will be set to medium.
+:Type: integer
+:Values: The number of packets below which the TRAFFIC policy applies the
+  minimum frequency, or medium frequency if between avg and max thresholds.
+:Required: only for TRAFFIC policy
+:Example:
+
+  .. code-block:: javascript
+
+    "avg_packet_thresh": 100000
+
+:Pair Name: "max_packet_thresh"
+:Description: Threshold above which the frequency will be set to max for
+  the TRAFFIC policy
+:Type: integer
+:Values: The number of packets per interval above which the TRAFFIC policy
+  applies the maximum frequency
+:Required: only for TRAFFIC policy
+:Example:
+
+  .. code-block:: javascript
+
+    "max_packet_thresh": 500000
+
+:Pair Name: "core_list"
+:Description: The cores to which to apply the policy.
+:Type: array of integers
+:Values: array with list of virtual CPUs.
+:Required: only policy CREATE/DESTROY
+:Example:
+
+  .. code-block:: javascript
+
+    "core_list":[ 10, 11 ]
+
+:Pair Name: "workload"
+:Description: When our policy is of type WORKLOAD, we need to specify how
+  heavy our workload is.
+:Type: string
+:Values:
+
+  :HIGH: For cores running workloads that require high frequencies
+  :MEDIUM: For cores running workloads that require medium frequencies
+  :LOW: For cores running workloads that require low frequencies
+:Required: only for WORKLOAD policy types
+:Example:
+
+  .. code-block:: javascript
+
+    "workload", "MEDIUM"
+
+:Pair Name: "mac_list"
+:Description: When our policy is of type TRAFFIC, we need to specify the
+  MAC addresses that the host needs to monitor
+:Type: string
+:Values: array with a list of mac address strings.
+:Required: only for TRAFFIC policy types
+:Example:
+
+  .. code-block:: javascript
+
+    "mac_list":[ "de:ad:be:ef:01:01", "de:ad:be:ef:01:02" ]
+
+:Pair Name: "unit"
+:Description: the type of power operation to apply in the command
+:Type: string
+:Values:
+
+  :SCALE_MAX: Scale frequency of this core to maximum
+  :SCALE_MIN: Scale frequency of this core to minimum
+  :SCALE_UP: Scale up frequency of this core
+  :SCALE_DOWN: Scale down frequency of this core
+  :ENABLE_TURBO: Enable Turbo Boost for this core
+  :DISABLE_TURBO: Disable Turbo Boost for this core
+:Required: only for POWER instruction
+:Example:
+
+  .. code-block:: javascript
+
+    "unit", "SCALE_MAX"
+
+:Pair Name: "resource_id"
+:Description: The core to which to apply the power command.
+:Type: integer
+:Values: valid core id for VM or host OS.
+:Required: only POWER instruction
+:Example:
+
+  .. code-block:: javascript
+
+    "resource_id": 10
+
+JSON API Examples
+~~~~~~~~~~~~~~~~~
+
+Profile create example:
+
+  .. code-block:: javascript
+
+    {"policy": {
+      "name": "ubuntu",
+      "command": "create",
+      "policy_type": "TIME",
+      "busy_hours":[ 17, 18, 19, 20, 21, 22, 23 ],
+      "quiet_hours":[ 2, 3, 4, 5, 6 ],
+      "core_list":[ 11 ]
+    }}
+
+Profile destroy example:
+
+  .. code-block:: javascript
+
+    {"profile": {
+      "name": "ubuntu",
+      "command": "destroy",
+    }}
+
+Power command example:
+
+  .. code-block:: javascript
+
+    {"command": {
+      "name": "ubuntu",
+      "unit": "SCALE_MAX",
+      "resource_id": 10
+    }}
+
+To send a JSON string to the Power Manager application, simply paste the
+example JSON string into a text file and cat it into the fifo:
+
+  .. code-block:: console
+
+    cat file.json >/tmp/powermonitor/fifo
+
+The console of the Power Manager application should indicate the command that
+was just received via the fifo.
+
 Compiling and Running the Guest Applications
 --------------------------------------------
 
@@ -366,7 +646,7 @@ For compiling and running l3fwd-power, see :doc:`l3_forward_power_man`.
 
 The application is located in the ``guest_cli`` sub-directory under ``vm_power_manager``.
 
-To build just the ``guest_vm_power_manager`` application:
+To build just the ``guest_vm_power_manager`` application using ``make``:
 
 .. code-block:: console
 
@@ -375,6 +655,22 @@ To build just the ``guest_vm_power_manager`` application:
   cd ${RTE_SDK}/examples/vm_power_manager/guest_cli/
   make
 
+The resulting binary will be ${RTE_SDK}/build/examples/guest_cli
+
+To build just the ``vm_power_manager`` application using ``meson/ninja``:
+
+.. code-block:: console
+
+  export RTE_SDK=/path/to/rte_sdk
+  cd ${RTE_SDK}
+  meson build
+  cd build
+  ninja
+  meson configure -Dexamples=vm_power_manager/guest_cli
+  ninja
+
+The resulting binary will be ${RTE_SDK}/build/examples/guest_cli
+
 Running
 ~~~~~~~
 
index f301c2b..c79fd0d 100644 (file)
@@ -133,6 +133,10 @@ See the DPDK Getting Started Guides for more information on these options.
 
     Use malloc instead of hugetlbfs.
 
+*   ``--iova-mode <pa|va>``
+
+    Force IOVA mode to a specific value.
+
 
 Testpmd Command-line Options
 ----------------------------
@@ -332,7 +336,7 @@ The commandline options are:
 
        io (the default)
        mac
-       mac_swap
+       macswap
        flowgen
        rxonly
        txonly
@@ -340,6 +344,7 @@ The commandline options are:
        icmpecho
        ieee1588
        tm
+       noisy
 
 *   ``--rss-ip``
 
@@ -498,3 +503,47 @@ The commandline options are:
 *   ``--no-mlockall``
 
     Disable locking all memory.
+
+*   ``--mp-alloc <native|anon|xmem|xmemhuge>``
+
+    Select mempool allocation mode:
+
+    * native: create and populate mempool using native DPDK memory
+    * anon: create mempool using native DPDK memory, but populate using
+      anonymous memory
+    * xmem: create and populate mempool using externally and anonymously
+      allocated area
+    * xmemhuge: create and populate mempool using externally and anonymously
+      allocated hugepage area
+
+*   ``--noisy-tx-sw-buffer-size``
+
+    Set the number of maximum elements  of the FIFO queue to be created
+    for buffering packets. Only available with the noisy forwarding mode.
+    The default value is 0.
+
+*   ``--noisy-tx-sw-buffer-flushtime=N``
+
+    Set the time before packets in the FIFO queue is flushed.
+    Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-lkup-memory=N``
+
+    Set the size of the noisy neighbour simulation memory buffer in MB to N.
+    Only available with the noisy forwarding mode. The default value is 0.
+
+
+*   ``--noisy-lkup-num-reads=N``
+
+    Set the number of reads to be done in noisy neighbour simulation memory buffer to N.
+    Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-lkup-num-writes=N``
+
+    Set the number of writes to be done in noisy neighbour simulation memory buffer to N.
+    Only available with the noisy forwarding mode. The default value is 0.
+
+*   ``--noisy-lkup-num-reads-writes=N``
+
+    Set the number of r/w accesses to be done in noisy neighbour simulation memory buffer to N.
+    Only available with the noisy forwarding mode. The default value is 0.
index dde205a..e23079b 100644 (file)
@@ -159,12 +159,14 @@ show port
 
 Display information for a given port or all ports::
 
-   testpmd> show port (info|stats|xstats|fdir|stat_qmap|dcb_tc|cap) (port_id|all)
+   testpmd> show port (info|summary|stats|xstats|fdir|stat_qmap|dcb_tc|cap) (port_id|all)
 
 The available information categories are:
 
 * ``info``: General port information such as MAC address.
 
+* ``summary``: Brief port summary such as Device Name, Driver Name etc.
+
 * ``stats``: RX/TX statistics.
 
 * ``xstats``: RX/TX extended NIC statistics.
@@ -231,7 +233,7 @@ show port rss-hash
 
 Display the RSS hash functions and RSS hash key of a port::
 
-   testpmd> show port (port_id) rss-hash ipv4|ipv4-frag|ipv4-tcp|ipv4-udp|ipv4-sctp|ipv4-other|ipv6|ipv6-frag|ipv6-tcp|ipv6-udp|ipv6-sctp|ipv6-other|l2-payload|ipv6-ex|ipv6-tcp-ex|ipv6-udp-ex [key]
+   testpmd> show port (port_id) rss-hash [key]
 
 clear port
 ~~~~~~~~~~
@@ -289,7 +291,7 @@ set fwd
 Set the packet forwarding mode::
 
    testpmd> set fwd (io|mac|macswap|flowgen| \
-                     rxonly|txonly|csum|icmpecho) (""|retry)
+                     rxonly|txonly|csum|icmpecho|noisy) (""|retry)
 
 ``retry`` can be specified for forwarding engines except ``rx_only``.
 
@@ -323,6 +325,10 @@ The available information categories are:
 * ``softnic``: Demonstrates the softnic forwarding operation. In this mode, packet forwarding is
   similar to I/O mode except for the fact that packets are loopback to the softnic ports only. Therefore, portmask parameter should be set to softnic port only. The various software based custom NIC pipelines specified through the softnic firmware (DPDK packet framework script) can be tested in this mode. Furthermore, it allows to build 5-level hierarchical QoS scheduler as a default option that can be enabled through CLI once testpmd application is initialised. The user can modify the default scheduler hierarchy or can specify the new QoS Scheduler hierarchy through CLI. Requires ``CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y``.
 
+* ``noisy``: Noisy neighbour simulation.
+  Simulate more realistic behavior of a guest machine engaged in receiving
+  and sending packets performing Virtual Network Function (VNF).
+
 Example::
 
    testpmd> set fwd rxonly
@@ -417,6 +423,12 @@ List port level and all queue level Tx offloading configuration::
 
    testpmd> show port (port_id) tx_offload configuration
 
+show tx metadata setting
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Show Tx metadata value set for a specific port::
+
+   testpmd> show port (port_id) tx_metadata
 
 Configuration Functions
 -----------------------
@@ -443,7 +455,12 @@ Set the debug verbosity level::
 
    testpmd> set verbose (level)
 
-Currently the only available levels are 0 (silent except for error) and 1 (fully verbose).
+Available levels are as following:
+
+* ``0`` silent except for error.
+* ``1`` fully verbose except for Tx packets.
+* ``2`` fully verbose except for Rx packets.
+* ``> 2`` fully verbose.
 
 set log
 ~~~~~~~
@@ -592,6 +609,17 @@ For example, to change the port forwarding:
    RX P=1/Q=0 (socket 0) -> TX P=3/Q=0 (socket 0) peer=02:00:00:00:00:03
    RX P=3/Q=0 (socket 0) -> TX P=1/Q=0 (socket 0) peer=02:00:00:00:00:02
 
+set port setup on
+~~~~~~~~~~~~~~~~~
+
+Select how to retrieve new ports created after "port attach" command::
+
+   testpmd> set port setup on (iterator|event)
+
+For each new port, a setup is done.
+It will find the probed ports via RTE_ETH_FOREACH_MATCHING_DEV loop
+in iterator mode, or via RTE_ETH_EVENT_NEW in event mode.
+
 set tx loopback
 ~~~~~~~~~~~~~~~
 
@@ -857,7 +885,7 @@ csum set
 Select hardware or software calculation of the checksum when
 transmitting a packet using the ``csum`` forwarding engine::
 
-   testpmd> csum set (ip|udp|tcp|sctp|outer-ip) (hw|sw) (port_id)
+   testpmd> csum set (ip|udp|tcp|sctp|outer-ip|outer-udp) (hw|sw) (port_id)
 
 Where:
 
@@ -867,6 +895,10 @@ Where:
   as a tunnel packet by the forwarding engine (vxlan, gre and ipip are
   supported). See also the ``csum parse-tunnel`` command.
 
+* ``outer-udp`` relates to the outer UDP layer in the case where the packet is recognized
+  as a tunnel packet by the forwarding engine (vxlan, vxlan-gpe are
+  supported). See also the ``csum parse-tunnel`` command.
+
 .. note::
 
    Check the NIC Datasheet for hardware limits.
@@ -940,7 +972,7 @@ Consider a packet in packet like the following::
 
 * If parse-tunnel is enabled, the ``ip|udp|tcp|sctp`` parameters of ``csum set``
   command relate to the inner headers (here ``ipv4_in`` and ``tcp_in``), and the
-  ``outer-ip parameter`` relates to the outer headers (here ``ipv4_out``).
+  ``outer-ip|outer-udp`` parameter relates to the outer headers (here ``ipv4_out`` and ``udp_out``).
 
 * If parse-tunnel is disabled, the ``ip|udp|tcp|sctp`` parameters of ``csum  set``
    command relate to the outer headers, here ``ipv4_out`` and ``udp_out``.
@@ -1517,7 +1549,8 @@ Enable or disable a per port Tx offloading on all Tx queues of a port::
                   sctp_cksum, tcp_tso, udp_tso, outer_ipv4_cksum,
                   qinq_insert, vxlan_tnl_tso, gre_tnl_tso,
                   ipip_tnl_tso, geneve_tnl_tso, macsec_insert,
-                  mt_lockfree, multi_segs, mbuf_fast_free, security
+                  mt_lockfree, multi_segs, mbuf_fast_free, security,
+                  match_metadata
 
 This command should be run when the port is stopped, or else it will fail.
 
@@ -1570,6 +1603,92 @@ flow rule using the action nvgre_encap will use the last configuration set.
 To have a different encapsulation header, one of those commands must be called
 before the flow rule creation.
 
+Config L2 Encap
+~~~~~~~~~~~~~~~
+
+Configure the l2 to be used when encapsulating a packet with L2::
+
+ set l2_encap ip-version (ipv4|ipv6) eth-src (eth-src) eth-dst (eth-dst)
+ set l2_encap-with-vlan ip-version (ipv4|ipv6) vlan-tci (vlan-tci) \
+        eth-src (eth-src) eth-dst (eth-dst)
+
+Those commands will set an internal configuration inside testpmd, any following
+flow rule using the action l2_encap will use the last configuration set.
+To have a different encapsulation header, one of those commands must be called
+before the flow rule creation.
+
+Config L2 Decap
+~~~~~~~~~~~~~~~
+
+Configure the l2 to be removed when decapsulating a packet with L2::
+
+ set l2_decap ip-version (ipv4|ipv6)
+ set l2_decap-with-vlan ip-version (ipv4|ipv6)
+
+Those commands will set an internal configuration inside testpmd, any following
+flow rule using the action l2_decap will use the last configuration set.
+To have a different encapsulation header, one of those commands must be called
+before the flow rule creation.
+
+Config MPLSoGRE Encap outer layers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Configure the outer layer to encapsulate a packet inside a MPLSoGRE tunnel::
+
+ set mplsogre_encap ip-version (ipv4|ipv6) label (label) \
+        ip-src (ip-src) ip-dst (ip-dst) eth-src (eth-src) eth-dst (eth-dst)
+ set mplsogre_encap-with-vlan ip-version (ipv4|ipv6) label (label) \
+        ip-src (ip-src) ip-dst (ip-dst) vlan-tci (vlan-tci) \
+        eth-src (eth-src) eth-dst (eth-dst)
+
+Those command will set an internal configuration inside testpmd, any following
+flow rule using the action mplsogre_encap will use the last configuration set.
+To have a different encapsulation header, one of those commands must be called
+before the flow rule creation.
+
+Config MPLSoGRE Decap outer layers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Configure the outer layer to decapsulate MPLSoGRE packet::
+
+ set mplsogre_decap ip-version (ipv4|ipv6)
+ set mplsogre_decap-with-vlan ip-version (ipv4|ipv6)
+
+Those command will set an internal configuration inside testpmd, any following
+flow rule using the action mplsogre_decap will use the last configuration set.
+To have a different decapsulation header, one of those commands must be called
+before the flow rule creation.
+
+Config MPLSoUDP Encap outer layers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Configure the outer layer to encapsulate a packet inside a MPLSoUDP tunnel::
+
+ set mplsoudp_encap ip-version (ipv4|ipv6) label (label) udp-src (udp-src) \
+        udp-dst (udp-dst) ip-src (ip-src) ip-dst (ip-dst) \
+        eth-src (eth-src) eth-dst (eth-dst)
+ set mplsoudp_encap-with-vlan ip-version (ipv4|ipv6) label (label) \
+        udp-src (udp-src) udp-dst (udp-dst) ip-src (ip-src) ip-dst (ip-dst) \
+        vlan-tci (vlan-tci) eth-src (eth-src) eth-dst (eth-dst)
+
+Those command will set an internal configuration inside testpmd, any following
+flow rule using the action mplsoudp_encap will use the last configuration set.
+To have a different encapsulation header, one of those commands must be called
+before the flow rule creation.
+
+Config MPLSoUDP Decap outer layers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Configure the outer layer to decapsulate MPLSoUDP packet::
+
+ set mplsoudp_decap ip-version (ipv4|ipv6)
+ set mplsoudp_decap-with-vlan ip-version (ipv4|ipv6)
+
+Those command will set an internal configuration inside testpmd, any following
+flow rule using the action mplsoudp_decap will use the last configuration set.
+To have a different decapsulation header, one of those commands must be called
+before the flow rule creation.
+
 Port Functions
 --------------
 
@@ -1765,6 +1884,13 @@ Start/stop a rx/tx queue on a specific port::
 
    testpmd> port (port_id) (rxq|txq) (queue_id) (start|stop)
 
+port config - queue deferred start
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Switch on/off deferred start of a specific port queue::
+
+   testpmd> port (port_id) (rxq|txq) (queue_id) deferred_start (on|off)
+
 port setup queue
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -2006,6 +2132,14 @@ port config udp_tunnel_port
 Add/remove UDP tunnel port for VXLAN/GENEVE tunneling protocols::
     testpmd> port config (port_id) udp_tunnel_port add|rm vxlan|geneve (udp_port)
 
+port config tx_metadata
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Set Tx metadata value per port.
+testpmd will add this value to any Tx packet sent from this port::
+
+   testpmd> port config (port_id) tx_metadata (value)
+
 Link Bonding Functions
 ----------------------
 
@@ -2656,6 +2790,63 @@ where:
   call failure. On the other hand, hierarchy is preserved when this parameter
   is equal to zero.
 
+Set port traffic management mark VLAN dei
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Enables/Disables the traffic management marking on the port for VLAN packets::
+
+   testpmd> set port tm mark vlan_dei <port_id> <green> <yellow> <red>
+
+where:
+
+* ``port_id``: The port which on which VLAN packets marked as ``green`` or
+  ``yellow`` or ``red`` will have dei bit enabled
+
+* ``green`` enable 1, disable 0 marking for dei bit of VLAN packets marked as green
+
+* ``yellow`` enable 1, disable 0 marking for dei bit of VLAN packets marked as yellow
+
+* ``red`` enable 1, disable 0 marking for dei bit of VLAN packets marked as red
+
+Set port traffic management mark IP dscp
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Enables/Disables the traffic management marking on the port for IP dscp packets::
+
+   testpmd> set port tm mark ip_dscp <port_id> <green> <yellow> <red>
+
+where:
+
+* ``port_id``: The port which on which IP packets marked as ``green`` or
+  ``yellow`` or ``red`` will have IP dscp bits updated
+
+* ``green`` enable 1, disable 0 marking IP dscp to low drop precedence for green packets
+
+* ``yellow`` enable 1, disable 0 marking IP dscp to medium drop precedence for yellow packets
+
+* ``red`` enable 1, disable 0 marking IP dscp to high drop precedence for red packets
+
+Set port traffic management mark IP ecn
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Enables/Disables the traffic management marking on the port for IP ecn packets::
+
+   testpmd> set port tm mark ip_ecn <port_id> <green> <yellow> <red>
+
+where:
+
+* ``port_id``: The port which on which IP packets marked as ``green`` or
+  ``yellow`` or ``red`` will have IP ecn bits updated
+
+* ``green`` enable 1, disable 0 marking IP ecn for green marked packets with ecn of 2'b01  or 2'b10
+  to ecn of 2'b11 when IP is caring TCP or SCTP
+
+* ``yellow`` enable 1, disable 0 marking IP ecn for yellow marked packets with ecn of 2'b01  or 2'b10
+  to ecn of 2'b11 when IP is caring TCP or SCTP
+
+* ``red`` enable 1, disable 0 marking IP ecn for yellow marked packets with ecn of 2'b01  or 2'b10
+  to ecn of 2'b11 when IP is caring TCP or SCTP
+
 Set port traffic management default hierarchy (softnic forwarding mode)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -3516,6 +3707,10 @@ This section lists supported pattern items and their attributes, if any.
 
   - ``tla {MAC-48}``: target Ethernet LLA.
 
+- ``meta``: match application specific metadata.
+
+  - ``data {unsigned}``: metadata value.
+
 Actions list
 ^^^^^^^^^^^^
 
@@ -3697,6 +3892,68 @@ This section lists supported actions and their attributes, if any.
 - ``nvgre_decap``: Performs a decapsulation action by stripping all headers of
   the NVGRE tunnel network overlay from the matched flow.
 
+- ``l2_encap``: Performs a L2 encapsulation, L2 configuration
+  is done through `Config L2 Encap`_.
+
+- ``l2_decap``: Performs a L2 decapsulation, L2 configuration
+  is done through `Config L2 Decap`_.
+
+- ``mplsogre_encap``: Performs a MPLSoGRE encapsulation, outer layer
+  configuration is done through `Config MPLSoGRE Encap outer layers`_.
+
+- ``mplsogre_decap``: Performs a MPLSoGRE decapsulation, outer layer
+  configuration is done through `Config MPLSoGRE Decap outer layers`_.
+
+- ``mplsoudp_encap``: Performs a MPLSoUDP encapsulation, outer layer
+  configuration is done through `Config MPLSoUDP Encap outer layers`_.
+
+- ``mplsoudp_decap``: Performs a MPLSoUDP decapsulation, outer layer
+  configuration is done through `Config MPLSoUDP Decap outer layers`_.
+
+- ``set_ipv4_src``: Set a new IPv4 source address in the outermost IPv4 header.
+
+  - ``ipv4_addr``: New IPv4 source address.
+
+- ``set_ipv4_dst``: Set a new IPv4 destination address in the outermost IPv4
+  header.
+
+  - ``ipv4_addr``: New IPv4 destination address.
+
+- ``set_ipv6_src``: Set a new IPv6 source address in the outermost IPv6 header.
+
+  - ``ipv6_addr``: New IPv6 source address.
+
+- ``set_ipv6_dst``: Set a new IPv6 destination address in the outermost IPv6
+  header.
+
+  - ``ipv6_addr``: New IPv6 destination address.
+
+- ``of_set_tp_src``: Set a new source port number in the outermost TCP/UDP
+  header.
+
+  - ``port``: New TCP/UDP source port number.
+
+- ``of_set_tp_dst``: Set a new destination port number in the outermost TCP/UDP
+  header.
+
+  - ``port``: New TCP/UDP destination port number.
+
+- ``mac_swap``: Swap the source and destination MAC addresses in the outermost
+  Ethernet header.
+
+- ``dec_ttl``: Performs a decrease TTL value action
+
+- ``set_ttl``: Set TTL value with specificed value
+  - ``ttl_value {unsigned}``: The new TTL value to be set
+
+- ``set_mac_src``: set source MAC address
+
+  - ``mac_addr {MAC-48}``: new source MAC address
+
+- ``set_mac_dst``: set destination MAC address
+
+  - ``mac_addr {MAC-48}``: new destination MAC address
+
 Destroying flow rules
 ~~~~~~~~~~~~~~~~~~~~~
 
@@ -4025,6 +4282,180 @@ IPv6 NVGRE outer header::
  testpmd> flow create 0 ingress pattern end actions nvgre_encap /
         queue index 0 / end
 
+Sample L2 encapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+L2 encapsulation has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+L2 header::
+
+ testpmd> set l2_encap ip-version ipv4
+        eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 ingress pattern eth / ipv4 / udp / mpls / end actions
+        mplsoudp_decap / l2_encap / end
+
+L2 with VXLAN header::
+
+ testpmd> set l2_encap-with-vlan ip-version ipv4 vlan-tci 34
+         eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 ingress pattern eth / ipv4 / udp / mpls / end actions
+        mplsoudp_decap / l2_encap / end
+
+Sample L2 decapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+L2 decapsulation has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+L2 header::
+
+ testpmd> set l2_decap
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap / mplsoudp_encap /
+        queue index 0 / end
+
+L2 with VXLAN header::
+
+ testpmd> set l2_encap-with-vlan
+ testpmd> flow create 0 egress pattern eth / end actions l2_encap / mplsoudp_encap /
+         queue index 0 / end
+
+Sample MPLSoGRE encapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MPLSoGRE encapsulation outer layer has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+IPv4 MPLSoGRE outer header::
+
+ testpmd> set mplsogre_encap ip-version ipv4 label 4
+        ip-src 127.0.0.1 ip-dst 128.0.0.1 eth-src 11:11:11:11:11:11
+        eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsogre_encap / end
+
+IPv4 MPLSoGRE with VLAN outer header::
+
+ testpmd> set mplsogre_encap-with-vlan ip-version ipv4 label 4
+        ip-src 127.0.0.1 ip-dst 128.0.0.1 vlan-tci 34
+        eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsogre_encap / end
+
+IPv6 MPLSoGRE outer header::
+
+ testpmd> set mplsogre_encap ip-version ipv6 mask 4
+        ip-src ::1 ip-dst ::2222 eth-src 11:11:11:11:11:11
+        eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsogre_encap / end
+
+IPv6 MPLSoGRE with VLAN outer header::
+
+ testpmd> set mplsogre_encap-with-vlan ip-version ipv6 mask 4
+        ip-src ::1 ip-dst ::2222 vlan-tci 34
+        eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsogre_encap / end
+
+Sample MPLSoGRE decapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MPLSoGRE decapsulation outer layer has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+IPv4 MPLSoGRE outer header::
+
+ testpmd> set mplsogre_decap ip-version ipv4
+ testpmd> flow create 0 ingress pattern eth / ipv4 / gre / mpls / end actions
+        mplsogre_decap / l2_encap / end
+
+IPv4 MPLSoGRE with VLAN outer header::
+
+ testpmd> set mplsogre_decap-with-vlan ip-version ipv4
+ testpmd> flow create 0 ingress pattern eth / vlan / ipv4 / gre / mpls / end
+        actions mplsogre_decap / l2_encap / end
+
+IPv6 MPLSoGRE outer header::
+
+ testpmd> set mplsogre_decap ip-version ipv6
+ testpmd> flow create 0 ingress pattern eth / ipv6 / gre / mpls / end
+        actions mplsogre_decap / l2_encap / end
+
+IPv6 MPLSoGRE with VLAN outer header::
+
+ testpmd> set mplsogre_decap-with-vlan ip-version ipv6
+ testpmd> flow create 0 ingress pattern eth / vlan / ipv6 / gre / mpls / end
+        actions mplsogre_decap / l2_encap / end
+
+Sample MPLSoUDP encapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MPLSoUDP encapsulation outer layer has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+IPv4 MPLSoUDP outer header::
+
+ testpmd> set mplsoudp_encap ip-version ipv4 label 4 udp-src 5 udp-dst 10
+        ip-src 127.0.0.1 ip-dst 128.0.0.1 eth-src 11:11:11:11:11:11
+        eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsoudp_encap / end
+
+IPv4 MPLSoUDP with VLAN outer header::
+
+ testpmd> set mplsoudp_encap-with-vlan ip-version ipv4 label 4 udp-src 5
+        udp-dst 10 ip-src 127.0.0.1 ip-dst 128.0.0.1 vlan-tci 34
+        eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsoudp_encap / end
+
+IPv6 MPLSoUDP outer header::
+
+ testpmd> set mplsoudp_encap ip-version ipv6 mask 4 udp-src 5 udp-dst 10
+        ip-src ::1 ip-dst ::2222 eth-src 11:11:11:11:11:11
+        eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsoudp_encap / end
+
+IPv6 MPLSoUDP with VLAN outer header::
+
+ testpmd> set mplsoudp_encap-with-vlan ip-version ipv6 mask 4 udp-src 5
+        udp-dst 10 ip-src ::1 ip-dst ::2222 vlan-tci 34
+        eth-src 11:11:11:11:11:11 eth-dst 22:22:22:22:22:22
+ testpmd> flow create 0 egress pattern eth / end actions l2_decap /
+        mplsoudp_encap / end
+
+Sample MPLSoUDP decapsulation rule
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+MPLSoUDP decapsulation outer layer has default value pre-configured in testpmd
+source code, those can be changed by using the following commands
+
+IPv4 MPLSoUDP outer header::
+
+ testpmd> set mplsoudp_decap ip-version ipv4
+ testpmd> flow create 0 ingress pattern eth / ipv4 / udp / mpls / end actions
+        mplsoudp_decap / l2_encap / end
+
+IPv4 MPLSoUDP with VLAN outer header::
+
+ testpmd> set mplsoudp_decap-with-vlan ip-version ipv4
+ testpmd> flow create 0 ingress pattern eth / vlan / ipv4 / udp / mpls / end
+        actions mplsoudp_decap / l2_encap / end
+
+IPv6 MPLSoUDP outer header::
+
+ testpmd> set mplsoudp_decap ip-version ipv6
+ testpmd> flow create 0 ingress pattern eth / ipv6 / udp / mpls / end
+        actions mplsoudp_decap / l2_encap / end
+
+IPv6 MPLSoUDP with VLAN outer header::
+
+ testpmd> set mplsoudp_decap-with-vlan ip-version ipv6
+ testpmd> flow create 0 ingress pattern eth / vlan / ipv6 / udp / mpls / end
+        actions mplsoudp_decap / l2_encap / end
+
 BPF Functions
 --------------
 
index e333679..707b9b5 100644 (file)
@@ -20,7 +20,7 @@
    height="288.34286"
    id="svg3868"
    version="1.1"
-   inkscape:version="0.92.2 (5c3e80d, 2017-08-06)"
+   inkscape:version="0.92.2 2405546, 2018-03-11"
    sodipodi:docname="eventdev_pipeline_atq_test_generic.svg"
    sodipodi:version="0.32"
    inkscape:output_extension="org.inkscape.output.svg.inkscape"
          d="M 5.77,0 -2.88,5 V -5 Z"
          id="path39725" />
     </marker>
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker35935"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         id="path35933"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
     <marker
        inkscape:isstock="true"
        style="overflow:visible"
        x2="677.85718"
        y2="244.50504"
        gradientUnits="userSpaceOnUse"
-       gradientTransform="matrix(0.78263355,0,0,0.98605918,90.06838,5.0013749)" />
+       gradientTransform="matrix(0.84881476,0,0,0.98593266,86.966576,5.0323108)" />
     <linearGradient
-       gradientTransform="matrix(0.78674479,0,0,1.0000825,87.83543,1.2279738)"
+       gradientTransform="matrix(0.85327366,0,0,0.99995418,84.544803,1.2593939)"
        inkscape:collect="always"
        xlink:href="#linearGradient6391"
        id="linearGradient2965"
        effect="spiro"
        id="path-effect14461-7-5-6"
        is_visible="true" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3993"
-       id="linearGradient3995-5"
-       x1="155.21329"
-       y1="231.61366"
-       x2="207.95523"
-       y2="231.61366"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="translate(454.68566,-41.755492)" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3993"
-       id="linearGradient3995-5-6"
-       x1="155.21329"
-       y1="231.61366"
-       x2="207.95523"
-       y2="231.61366"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="translate(373.71198,205.50594)" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3993"
-       id="linearGradient3995-5-6-4"
-       x1="155.21329"
-       y1="231.61366"
-       x2="207.95523"
-       y2="231.61366"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="translate(454.58517,69.679557)" />
     <inkscape:path-effect
        effect="bspline"
        id="path-effect2658-8"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3438"
-       id="linearGradient16362"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="translate(2.283166,-2.283166)"
-       x1="534.06958"
-       y1="163.49922"
-       x2="580.73291"
-       y2="163.49922" />
     <marker
        inkscape:stockid="Arrow1Mend"
        orient="auto"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-4"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <marker
+       inkscape:isstock="true"
+       style="overflow:visible"
+       id="marker32613-8-5"
+       refX="0"
+       refY="0"
+       orient="auto"
+       inkscape:stockid="TriangleOutM">
+      <path
+         inkscape:connector-curvature="0"
+         transform="scale(0.4)"
+         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         id="path32611-8-0" />
+    </marker>
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-4-4"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1"
+       id="TriangleOutM-5-2-3"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49"
+         inkscape:connector-curvature="0"
+         id="path2123-3-9-20"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
+         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2"
+       id="path-effect5228-5-1-61"
        is_visible="true"
        weight="33.333333"
        steps="2"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-6"
+       id="TriangleOutM-5-2-3-0"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-8"
+         inkscape:connector-curvature="0"
+         id="path2123-3-9-20-6"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
+         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-9"
+       id="path-effect5228-5-1-61-1"
        is_visible="true"
        weight="33.333333"
        steps="2"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-6-6"
+       id="TriangleOutM-5-2-3-9"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-8-6"
+         inkscape:connector-curvature="0"
+         id="path2123-3-9-20-4"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
+         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-4"
+       id="path-effect5228-5-1-61-9"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-5"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(454.68566,-41.755492)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,282.08991,-43.80364)" />
     <marker
-       inkscape:isstock="true"
-       style="overflow:visible"
-       id="marker32613-8-5"
-       refX="0"
-       refY="0"
+       inkscape:stockid="TriangleOutM"
        orient="auto"
-       inkscape:stockid="TriangleOutM">
+       refY="0"
+       refX="0"
+       id="marker35935-1-6-5-1-0"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
       <path
-         inkscape:connector-curvature="0"
-         transform="scale(0.4)"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         id="path35933-49-8-6-2-3"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         id="path32611-8-0" />
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-4-4"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3993"
-       id="linearGradient1920-1"
-       x1="475.00314"
-       y1="156.97769"
-       x2="515.13684"
-       y2="156.97769"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="matrix(0.6515192,0,0,1.0041442,189.20967,67.917365)" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="TriangleOutM-5-2-3"
+       id="marker35935-1-6-5-1-0-0"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         inkscape:connector-curvature="0"
-         id="path2123-3-9-20"
+         id="path35933-49-8-6-2-3-6"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)" />
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-61"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-3"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-5-6"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(373.71198,205.50594)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-5-6-4"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="translate(454.58517,69.679557)" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="TriangleOutM-5-2-3-0"
+       id="marker35935-1-6-5-1-0-06"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         inkscape:connector-curvature="0"
-         id="path2123-3-9-20-6"
+         id="path35933-49-8-6-2-3-1"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)" />
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-61-1"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-5"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8-9-9"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,282.25651,68.385308)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8-9"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,282.88878,12.631328)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient1760-3"
+       x1="405.34961"
+       y1="243.36557"
+       x2="651.55652"
+       y2="243.36557"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(0.65213006,0,0,0.72134316,230.98899,64.590305)" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="TriangleOutM-5-2-3-9"
+       id="marker35935-1-6-5"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         inkscape:connector-curvature="0"
-         id="path2123-3-9-20-4"
+         id="path35933-49-8-6"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#f78202;fill-opacity:1;fill-rule:evenodd;stroke:#f78202;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)" />
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-61-9"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-  </defs>
-  <sodipodi:namedview
-     id="base"
-     pagecolor="#ffffff"
-     bordercolor="#666666"
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker35935-1-6-5-1"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         id="path35933-49-8-6-2"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker35935-1-6-5-9"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         id="path35933-49-8-6-3"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-6"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+  </defs>
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
      borderopacity="1.0"
      inkscape:pageopacity="0.0"
      inkscape:pageshadow="2"
-     inkscape:zoom="1.53467"
-     inkscape:cx="477.6217"
-     inkscape:cy="141.14731"
+     inkscape:zoom="2.200307"
+     inkscape:cx="336.61535"
+     inkscape:cy="145.77389"
      inkscape:document-units="px"
      inkscape:current-layer="layer1"
      showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1046"
-     inkscape:window-x="1920"
-     inkscape:window-y="34"
-     inkscape:window-maximized="1"
+     inkscape:window-width="1912"
+     inkscape:window-height="1033"
+     inkscape:window-x="4"
+     inkscape:window-y="22"
+     inkscape:window-maximized="0"
      fit-margin-top="0.1"
      fit-margin-left="0.1"
      fit-margin-right="0.1"
      transform="translate(-46.542857,-100.33361)"
      style="display:inline;opacity:1">
     <rect
-       style="fill:url(#linearGradient4519);fill-opacity:1;stroke:url(#linearGradient2965);stroke-width:0.87847757;stroke-opacity:1"
+       style="fill:url(#linearGradient4519);fill-opacity:1;stroke:url(#linearGradient2965);stroke-width:0.91480815;stroke-opacity:1"
        id="rect3697"
-       width="493.61813"
-       height="283.13986"
-       x="126.96397"
-       y="104.52792"
+       width="535.35956"
+       height="283.10355"
+       x="126.98213"
+       y="104.54609"
        rx="0"
        ry="0" />
     <text
          x="199.44385"
          y="188.49918"
          id="tspan5223-0-9"
-         style="font-size:10px;line-height:1.25">port n+2</tspan></text>
+         style="font-size:10px;line-height:1.25">port n+1</tspan></text>
     <rect
        style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1920);stroke-width:1.06814909;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
        id="rect3736-8-4"
          x="199.35846"
          y="244.55573"
          id="tspan5223-0-9-0"
-         style="font-size:10px;line-height:1.25">port n+3</tspan></text>
+         style="font-size:10px;line-height:1.25">port n+2</tspan></text>
     <rect
        style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1920-2);stroke-width:1.06814909;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
        id="rect3736-8-4-6"
          x="242.32845"
          y="123.36828"
          id="tspan5223-10"
-         style="font-size:10px;line-height:1.25">total queues = number of ethernet dev + 1</tspan></text>
+         style="font-size:10px;line-height:1.25">total queues = 2 * number of ethernet dev </tspan></text>
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
          x="285.26294"
          y="240.01315"
          style="stroke-width:0.68894428" /></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
+       x="259.86884"
+       y="164.78368"
+       id="text5219-2-3-7-2-1"
+       transform="scale(0.97663117,1.023928)"><tspan
+         sodipodi:role="line"
+         x="259.86884"
+         y="164.78368"
+         id="tspan5223-0-6-5-9-5"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
+         sodipodi:role="line"
+         x="259.86884"
+         y="174.78368"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
+         id="tspan883-1-9">Rx adptr 0</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
+       x="260.25055"
+       y="217.84813"
+       id="text5219-2-3-7-2-1-4"
+       transform="scale(0.97663117,1.023928)"><tspan
+         sodipodi:role="line"
+         x="260.25055"
+         y="217.84813"
+         id="tspan5223-0-6-5-9-5-4"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
+         sodipodi:role="line"
+         x="260.25055"
+         y="227.84813"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
+         id="tspan883-1-9-4">Rx adptr 1</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
+       x="260.25055"
+       y="271.71359"
+       id="text5219-2-3-7-2-1-47"
+       transform="scale(0.97663117,1.023928)"><tspan
+         sodipodi:role="line"
+         x="260.25055"
+         y="271.71359"
+         id="tspan5223-0-6-5-9-5-6"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
+         sodipodi:role="line"
+         x="260.25055"
+         y="281.71359"
+         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
+         id="tspan883-1-9-3">Rx adptr q</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
+       x="595.27808"
+       y="136.64076"
+       id="text5219-2-4-3-3-4-54-8-8"
+       transform="scale(0.91487885,1.0930409)"><tspan
+         sodipodi:role="line"
+         x="595.27808"
+         y="139.22064"
+         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
+         id="tspan1265-4-6-7" /></text>
+    <path
+       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89999998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.6, 0.9;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3)"
+       d="m 356.74765,186.83153 c 15.88009,-0.11696 31.75919,-0.23391 47.6373,-0.35085"
+       id="path5226-6-2-5"
+       inkscape:connector-curvature="0"
+       inkscape:path-effect="#path-effect5228-5-1-61"
+       inkscape:original-d="m 356.74765,186.83153 c 15.88008,-0.11795 31.75918,-0.2349 47.6373,-0.35085"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89999998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.6, 0.9;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3-0)"
+       d="m 357.05625,242.97941 c 15.74231,0.0176 31.48469,0.0352 47.22712,0.0528"
+       id="path5226-6-2-5-5"
+       inkscape:connector-curvature="0"
+       inkscape:path-effect="#path-effect5228-5-1-61-1"
+       inkscape:original-d="m 357.05625,242.97941 c 15.74231,0.0166 31.48469,0.0342 47.22712,0.0528"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89337438;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.57349763, 0.89337441;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3-9)"
+       d="m 356.67155,297.12049 c 15.97521,0.0733 31.94945,0.14663 47.92273,0.21994"
+       id="path5226-6-2-5-0"
+       inkscape:connector-curvature="0"
+       inkscape:path-effect="#path-effect5228-5-1-61-9"
+       inkscape:original-d="m 356.67155,297.12049 c 15.97521,0.0723 31.94945,0.14563 47.92273,0.21994"
+       sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
+       x="606.06958"
+       y="346.46628"
+       id="text5219-2-4-3-3-4-54-8-7"
+       transform="scale(0.91487885,1.0930409)"><tspan
+         sodipodi:role="line"
+         x="606.06958"
+         y="346.46628"
+         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
+         id="tspan1265-4-6-2">(Tx Generic)</tspan></text>
     <g
+       style="display:inline;opacity:1"
        id="g20550"
-       transform="translate(25.709043,-190.70754)">
+       transform="translate(69.258261,-194.86398)">
       <rect
          ry="16.293755"
          rx="11.6051"
            sodipodi:role="line">    Txq 0</tspan></text>
     </g>
     <g
+       style="display:inline;opacity:1"
        id="g13899"
-       transform="translate(-54.904385,-3.0966742)">
+       transform="translate(-12.211349,-3.253112)">
       <rect
          ry="16.293755"
          rx="11.6051"
            sodipodi:role="line">    Txq 0</tspan></text>
     </g>
     <g
+       style="display:inline;opacity:1"
        id="g13911"
-       transform="translate(-54.904385,-1.0966741)">
+       transform="translate(-10.498979,-2.682322)">
       <rect
          ry="16.293755"
          rx="11.6051"
            x="621.71729"
            sodipodi:role="line">    Txq 0</tspan></text>
     </g>
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
-       x="259.86884"
-       y="164.78368"
-       id="text5219-2-3-7-2-1"
-       transform="scale(0.97663117,1.023928)"><tspan
-         sodipodi:role="line"
-         x="259.86884"
-         y="164.78368"
-         id="tspan5223-0-6-5-9-5"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
-         sodipodi:role="line"
-         x="259.86884"
-         y="174.78368"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
-         id="tspan883-1-9">Rx adptr 0</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
-       x="260.25055"
-       y="217.84813"
-       id="text5219-2-3-7-2-1-4"
-       transform="scale(0.97663117,1.023928)"><tspan
-         sodipodi:role="line"
-         x="260.25055"
-         y="217.84813"
-         id="tspan5223-0-6-5-9-5-4"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
-         sodipodi:role="line"
-         x="260.25055"
-         y="227.84813"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
-         id="tspan883-1-9-4">Rx adptr 1</tspan></text>
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:11.59418297px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.96618187"
-       x="260.25055"
-       y="271.71359"
-       id="text5219-2-3-7-2-1-47"
-       transform="scale(0.97663117,1.023928)"><tspan
-         sodipodi:role="line"
-         x="260.25055"
-         y="271.71359"
-         id="tspan5223-0-6-5-9-5-6"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187">Event eth</tspan><tspan
-         sodipodi:role="line"
-         x="260.25055"
-         y="281.71359"
-         style="font-size:8px;line-height:1.25;stroke-width:0.96618187"
-         id="tspan883-1-9-3">Rx adptr q</tspan></text>
-    <g
-       id="g16360"
-       transform="matrix(1.0874414,0,0,0.99912695,-98.49816,-6.4077434)">
-      <ellipse
-         ry="24.258638"
-         rx="22.831659"
-         cy="161.21605"
-         cx="559.68445"
-         id="path8843"
-         style="fill:#ffffff;fill-opacity:0.98039216;stroke:url(#linearGradient16362);stroke-opacity:1" />
-      <text
-         transform="scale(0.92048084,1.0863887)"
-         id="text5219-2-4-3-3-4-5"
-         y="146.21904"
-         x="588.44147"
-         style="font-style:normal;font-weight:normal;font-size:11.04576969px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.92048085"
-         xml:space="preserve"><tspan
-           id="tspan1265-5"
-           style="font-size:7.97750044px;line-height:1.25;stroke-width:0.92048085"
-           y="146.21904"
-           x="588.44147"
-           sodipodi:role="line">Tx Service</tspan><tspan
-           style="font-size:7.97750044px;line-height:1.25;stroke-width:0.92048085"
-           y="152.00201"
-           x="588.44147"
-           sodipodi:role="line"
-           id="tspan39139" /><tspan
-           style="font-size:7.97750044px;line-height:1.25;stroke-width:0.92048085"
-           y="156.19092"
-           x="588.44147"
-           sodipodi:role="line"
-           id="tspan39141">port n + 1</tspan></text>
-    </g>
-    <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.22799993, 1.61399996;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker32613)"
-       d="m 511.70299,212.50867 c -0.1614,-10.49392 -0.32276,-20.98539 -0.48409,-31.47439"
-       id="path5226-6-2-1-2-4-5-1"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6"
-       inkscape:original-d="m 511.70299,212.50867 c -0.16039,-10.49394 -0.32175,-20.98541 -0.48409,-31.47439"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935)"
-       d="m 523.50111,175.62989 c 10.13298,2.21215 20.26379,4.42384 30.39241,6.63504"
-       id="path5226-6-2-1-2-4-5-1-5"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1"
-       inkscape:original-d="m 523.50111,175.62989 c 10.13323,2.21099 20.26404,4.42267 30.39241,6.63504"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-5)"
-       d="m 523.50111,175.62989 c 9.91161,22.53065 19.82206,45.05865 29.73129,67.58389"
-       id="path5226-6-2-1-2-4-5-1-5-6"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-7"
-       inkscape:original-d="m 523.50111,175.62989 c 9.91282,22.53012 19.82327,45.05812 29.73129,67.58389"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-5-5)"
-       d="m 523.50111,175.62989 c 10.16587,40.76181 20.3305,81.51868 30.49385,122.27042"
-       id="path5226-6-2-1-2-4-5-1-5-6-3"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-7-9"
-       inkscape:original-d="m 523.50111,175.62989 c 10.16704,40.76152 20.33167,81.51839 30.49385,122.27042"
-       sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.88;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.52, 0.88;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1)"
-       d="m 457.99431,185.46823 c 13.07561,8.94945 26.1492,17.89751 39.22072,26.84415"
-       id="path5226-6-2-1-2-4-5-1-5-0"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.75503534;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.02014133, 0.75503534;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5)"
+       d="m 459.25963,298.68538 c 12.4298,0.0326 24.85706,0.0653 37.28169,0.0979"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2"
-       inkscape:original-d="m 457.99431,185.46823 c 13.0764,8.9483 26.14999,17.89636 39.22072,26.84415"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2"
+       inkscape:original-d="m 459.25963,298.68538 c 12.4298,0.0316 24.85706,0.0643 37.28169,0.0979"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6)"
-       d="m 459.47717,245.71809 c 12.28232,-4.96638 24.56173,-9.93159 36.83817,-14.89559"
-       id="path5226-6-2-1-2-4-5-1-5-0-2"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.77332252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.09329006, 0.77332252;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1)"
+       d="m 458.61908,243.27181 c 12.91755,-0.0156 25.83246,-0.0312 38.74462,-0.0468"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9"
-       inkscape:original-d="m 459.47717,245.71809 c 12.28211,-4.96689 24.56152,-9.9321 36.83817,-14.89559"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7"
+       inkscape:original-d="m 458.61908,243.27181 c 12.91755,-0.0166 25.83246,-0.0322 38.74462,-0.0468"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-6)"
-       d="m 459.54824,301.10401 c 12.64219,-20.37548 25.28189,-40.74696 37.91905,-61.11434"
-       id="path5226-6-2-1-2-4-5-1-5-0-2-9"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.77624762;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.10499055, 0.77624764;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-9)"
+       d="m 457.5506,186.45733 c 12.95011,-0.0208 25.89755,-0.0415 38.84226,-0.0623"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-06"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-4"
-       inkscape:original-d="M 459.54824,301.10401 C 472.1907,280.7287 484.8304,260.35722 497.46729,239.98967"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-6"
+       inkscape:original-d="m 457.5506,186.45733 c 12.95011,-0.0218 25.89755,-0.0426 38.84226,-0.0623"
        sodipodi:nodetypes="cc" />
+    <rect
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79"
+       width="72.081367"
+       height="32.405426"
+       x="499.14511"
+       y="170.31314"
+       rx="16.175425"
+       ry="16.202713" />
     <text
        xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="595.27808"
-       y="136.64076"
-       id="text5219-2-4-3-3-4-54-8-8"
-       transform="scale(0.91487885,1.0930409)"><tspan
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="502.77109"
+       y="189.40137"
+       id="text5219-2-6-2"><tspan
          sodipodi:role="line"
-         x="595.27808"
-         y="139.22064"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4-6-7" /></text>
+         x="502.77109"
+         y="189.40137"
+         id="tspan5223-0-9-02"
+         style="font-size:10px;line-height:1.25">port n+m+1</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="514.66077"
+       y="225.14934"
+       id="text5219-2-3-7-2-1-8-3"
+       transform="scale(0.89243778,1.1205263)"><tspan
+         sodipodi:role="line"
+         x="514.66077"
+         y="225.14934"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6">Single link</tspan></text>
     <rect
-       style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1920-1);stroke-width:0.86395979;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect3736-8-4-9"
-       width="25.451954"
-       height="24.448395"
-       x="499.03128"
-       y="213.32141" />
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8-9);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79-1"
+       width="72.081367"
+       height="32.405426"
+       x="499.944"
+       y="226.74811"
+       rx="16.175425"
+       ry="16.202713" />
     <text
        xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="548.03668"
-       y="204.31348"
-       id="text5219-2-4-3-3-4-54-8"
-       transform="scale(0.91487885,1.0930409)"><tspan
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="504.46329"
+       y="246.05832"
+       id="text5219-2-6-1-7"><tspan
          sodipodi:role="line"
-         x="548.03668"
-         y="204.31348"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4-6">Single </tspan><tspan
+         x="504.46329"
+         y="246.05832"
+         id="tspan5223-0-9-0-5"
+         style="font-size:10px;line-height:1.25">port n+m+2</tspan></text>
+    <rect
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8-9-9);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79-1-7"
+       width="72.081367"
+       height="32.405426"
+       x="499.31168"
+       y="282.50211"
+       rx="16.175425"
+       ry="16.202713" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="512.51819"
+       y="301.5791"
+       id="text5219-2-6-1-6-2"><tspan
          sodipodi:role="line"
-         x="548.03668"
-         y="213.27945"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan57836">Link Q</tspan></text>
+         x="512.51819"
+         y="301.5791"
+         id="tspan5223-0-9-0-4-2"
+         style="font-size:10px;line-height:1.25">port n+o</tspan></text>
     <path
-       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89999998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.6, 0.9;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3)"
-       d="m 356.74765,186.83153 c 15.88009,-0.11696 31.75919,-0.23391 47.6373,-0.35085"
-       id="path5226-6-2-5"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0)"
+       d="m 571.86582,186.42744 c 7.95108,0.0405 15.90052,0.0811 23.84823,0.12159"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-61"
-       inkscape:original-d="m 356.74765,186.83153 c 15.88008,-0.11795 31.75918,-0.2349 47.6373,-0.35085"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6"
+       inkscape:original-d="m 571.86582,186.42744 c 7.95109,0.0395 15.90052,0.0801 23.84823,0.12159"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89999998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.6, 0.9;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3-0)"
-       d="m 357.05625,242.97941 c 15.74231,0.0176 31.48469,0.0352 47.22712,0.0528"
-       id="path5226-6-2-5-5"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0-0)"
+       d="m 572.74002,242.8173 c 7.86699,0.091 15.73233,0.18199 23.59597,0.27295"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1-2"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-61-1"
-       inkscape:original-d="m 357.05625,242.97941 c 15.74231,0.0166 31.48469,0.0342 47.22712,0.0528"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-3"
+       inkscape:original-d="m 572.74002,242.8173 c 7.867,0.09 15.73234,0.18097 23.59597,0.27295"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:0.89337438;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.57349763, 0.89337441;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-3-9)"
-       d="m 356.67155,297.12049 c 15.97521,0.0733 31.94945,0.14663 47.92273,0.21994"
-       id="path5226-6-2-5-0"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0-06)"
+       d="m 571.86429,299.00558 c 8.49934,0.0508 16.99697,0.10162 25.49284,0.15242"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1-5"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-61-9"
-       inkscape:original-d="m 356.67155,297.12049 c 15.97521,0.0723 31.94945,0.14563 47.92273,0.21994"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-5"
+       inkscape:original-d="m 571.86429,299.00558 c 8.49935,0.0498 16.99698,0.10062 25.49284,0.15242"
        sodipodi:nodetypes="cc" />
+    <rect
+       style="display:inline;opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient1760-3);stroke-width:0.67135191;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect2896-6-7"
+       width="159.92059"
+       height="161.38417"
+       x="495.64883"
+       y="159.4483"
+       ry="4.080533"
+       rx="5.9213624"
+       inkscape:export-filename="/home/matz/barracuda/rapports/mbuf-api-v2-images/octeon_multi.png"
+       inkscape:export-xdpi="112"
+       inkscape:export-ydpi="112" />
     <text
        xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="606.06958"
-       y="346.46628"
-       id="text5219-2-4-3-3-4-54-8-7"
-       transform="scale(0.91487885,1.0930409)"><tspan
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="515.76257"
+       y="175.4832"
+       id="text5219-2-3-7-2-1-8-3-5"
+       transform="scale(0.89243778,1.1205263)"><tspan
          sodipodi:role="line"
-         x="606.06958"
-         y="346.46628"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4-6-2">(Tx Generic)</tspan></text>
+         x="515.76257"
+         y="175.4832"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6-3">Single link</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="515.76501"
+       y="274.05133"
+       id="text5219-2-3-7-2-1-8-3-56"
+       transform="scale(0.89243778,1.1205263)"><tspan
+         sodipodi:role="line"
+         x="515.76501"
+         y="274.05133"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6-2">Single link</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="546.92126"
+       y="155.57758"
+       id="text5219-2-4-2"><tspan
+         sodipodi:role="line"
+         x="546.92126"
+         y="155.57758"
+         id="tspan5223-0-7-70"
+         style="font-size:10px;line-height:1.25">Tx adapter</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="468.36612"
+       y="180.9222"
+       id="text5219-1-9-4-9-3"><tspan
+         sodipodi:role="line"
+         x="468.36612"
+         y="180.9222"
+         id="tspan5223-2-3-5-0-6"
+         style="font-size:10px;line-height:1.25">q3</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="467.61584"
+       y="239.3683"
+       id="text5219-1-9-4-9-3-0"><tspan
+         sodipodi:role="line"
+         x="467.61584"
+         y="239.3683"
+         id="tspan5223-2-3-5-0-6-6"
+         style="font-size:10px;line-height:1.25">q4</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="468.70688"
+       y="294.45236"
+       id="text5219-1-9-4-9-3-2"><tspan
+         sodipodi:role="line"
+         x="468.70688"
+         y="294.45236"
+         id="tspan5223-2-3-5-0-6-61"
+         style="font-size:10px;line-height:1.25">q5</tspan></text>
   </g>
 </svg>
@@ -20,8 +20,8 @@
    height="288.34286"
    id="svg3868"
    version="1.1"
-   inkscape:version="0.92.2 (5c3e80d, 2017-08-06)"
-   sodipodi:docname="eventdev_pipeline_atq_test_lockfree.svg"
+   inkscape:version="0.92.2 2405546, 2018-03-11"
+   sodipodi:docname="eventdev_pipeline_atq_test_internal_port.svg"
    sodipodi:version="0.32"
    inkscape:output_extension="org.inkscape.output.svg.inkscape"
    enable-background="new">
      borderopacity="1.0"
      inkscape:pageopacity="0.0"
      inkscape:pageshadow="2"
-     inkscape:zoom="1.7519532"
-     inkscape:cx="479.73438"
-     inkscape:cy="163.58755"
+     inkscape:zoom="2.0977641"
+     inkscape:cx="432.03729"
+     inkscape:cy="135.16016"
      inkscape:document-units="px"
      inkscape:current-layer="layer1"
      showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1046"
-     inkscape:window-x="0"
-     inkscape:window-y="34"
-     inkscape:window-maximized="1"
+     inkscape:window-width="1912"
+     inkscape:window-height="1033"
+     inkscape:window-x="4"
+     inkscape:window-y="22"
+     inkscape:window-maximized="0"
      fit-margin-top="0.1"
      fit-margin-left="0.1"
      fit-margin-right="0.1"
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="602.09888"
+       x="604.28497"
        y="347.66293"
        id="text5219-2-4-3-3-4-54"
        transform="scale(0.91487885,1.0930409)"><tspan
          sodipodi:role="line"
-         x="602.09888"
+         x="604.28497"
          y="347.66293"
          style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4">(Tx Lock free)</tspan></text>
+         id="tspan1265-4">(Internal port)</tspan></text>
   </g>
 </svg>
index 732d488..9fe743f 100644 (file)
    height="288.34286"
    id="svg3868"
    version="1.1"
-   inkscape:version="0.92.2 (5c3e80d, 2017-08-06)"
+   inkscape:version="0.92.2 2405546, 2018-03-11"
    sodipodi:docname="eventdev_pipeline_queue_test_generic.svg"
    sodipodi:version="0.32"
    inkscape:output_extension="org.inkscape.output.svg.inkscape"
    enable-background="new">
   <defs
      id="defs3870">
+    <linearGradient
+       id="linearGradient6820"
+       osb:paint="solid">
+      <stop
+         style="stop-color:#008080;stop-opacity:1;"
+         offset="0"
+         id="stop6818" />
+    </linearGradient>
     <linearGradient
        id="linearGradient6916"
        osb:paint="solid">
        x2="651.55652"
        y2="243.36557"
        gradientUnits="userSpaceOnUse"
-       gradientTransform="matrix(0.76448972,0,0,0.86504892,-92.637138,19.716473)" />
+       gradientTransform="matrix(0.76448972,0,0,0.78486608,-92.637138,48.19976)" />
     <linearGradient
        inkscape:collect="always"
        xlink:href="#linearGradient3808"
        y2="232.36095"
        gradientUnits="userSpaceOnUse"
        gradientTransform="translate(17.692568,-46.20799)" />
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker35935-1"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         id="path35933-49"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
     <inkscape:path-effect
        effect="bspline"
        id="path-effect5228-5-1-6-2-9-4-6-1-2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker35935-1-6"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         id="path35933-49-8"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
     <inkscape:path-effect
        effect="bspline"
        id="path-effect5228-5-1-6-2-9-4-6-1-2-9"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-    <marker
-       inkscape:stockid="TriangleOutM"
-       orient="auto"
-       refY="0"
-       refX="0"
-       id="marker35935-1-6-6"
-       style="overflow:visible"
-       inkscape:isstock="true"
-       inkscape:collect="always">
-      <path
-         id="path35933-49-8-6"
-         d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14e4;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
-         transform="scale(0.4)"
-         inkscape:connector-curvature="0" />
-    </marker>
     <inkscape:path-effect
        effect="bspline"
        id="path-effect5228-5-1-6-2-9-4-6-1-2-9-4"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3438"
-       id="linearGradient16362"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="translate(2.283166,-2.283166)"
-       x1="534.06958"
-       y1="163.49922"
-       x2="580.73291"
-       y2="163.49922" />
     <marker
        inkscape:isstock="true"
        style="overflow:visible"
        effect="spiro"
        id="path-effect14461-7-5-1"
        is_visible="true" />
-    <linearGradient
-       inkscape:collect="always"
-       xlink:href="#linearGradient3993"
-       id="linearGradient1924-3"
-       x1="597.00317"
-       y1="156.97769"
-       x2="637.13684"
-       y2="156.97769"
-       gradientUnits="userSpaceOnUse"
-       gradientTransform="matrix(0.78531244,0,0,1,50.143534,82.69878)" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-6"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-0"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-0-7"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-0-6"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-5"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-2"
+       id="marker35935-1-6-5"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-6"
+         id="path35933-49-8-6"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
          transform="scale(0.4)"
          inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-6"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2"
        is_visible="true"
        weight="33.333333"
        steps="2"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-9"
+       id="marker35935-1-6-5-1"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-5"
+         id="path35933-49-8-6-2"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
          transform="scale(0.4)"
          inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-0"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7"
        is_visible="true"
        weight="33.333333"
        steps="2"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-9-7"
+       id="marker35935-1-6-5-9"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-5-1"
+         id="path35933-49-8-6-3"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
          transform="scale(0.4)"
          inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-0-7"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-6"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,300.23326,-43.855196)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8-9"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,301.03213,12.579775)" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient3995-8-9-9"
+       x1="155.21329"
+       y1="231.61366"
+       x2="207.95523"
+       y2="231.61366"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(1.3938205,0,0,0.9944124,300.39986,68.333755)" />
     <marker
        inkscape:stockid="TriangleOutM"
        orient="auto"
        refY="0"
        refX="0"
-       id="marker35935-1-9-72"
+       id="marker35935-1-6-5-1-0"
        style="overflow:visible"
        inkscape:isstock="true"
        inkscape:collect="always">
       <path
-         id="path35933-49-5-2"
+         id="path35933-49-8-6-2-3"
          d="M 5.77,0 -2.88,5 V -5 Z"
-         style="fill:#ac14ff;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
          transform="scale(0.4)"
          inkscape:connector-curvature="0" />
     </marker>
     <inkscape:path-effect
        effect="bspline"
-       id="path-effect5228-5-1-6-2-9-4-6-1-2-0-6"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker35935-1-6-5-1-0-0"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         id="path35933-49-8-6-2-3-6"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-3"
+       is_visible="true"
+       weight="33.333333"
+       steps="2"
+       helper_size="0"
+       apply_no_weight="true"
+       apply_with_weight="true"
+       only_selected="false" />
+    <marker
+       inkscape:stockid="TriangleOutM"
+       orient="auto"
+       refY="0"
+       refX="0"
+       id="marker35935-1-6-5-1-0-06"
+       style="overflow:visible"
+       inkscape:isstock="true"
+       inkscape:collect="always">
+      <path
+         id="path35933-49-8-6-2-3-1"
+         d="M 5.77,0 -2.88,5 V -5 Z"
+         style="fill:#ac14db;fill-opacity:1;fill-rule:evenodd;stroke:#ac14ff;stroke-width:1.00000003pt;stroke-opacity:1"
+         transform="scale(0.4)"
+         inkscape:connector-curvature="0" />
+    </marker>
+    <inkscape:path-effect
+       effect="bspline"
+       id="path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-5"
        is_visible="true"
        weight="33.333333"
        steps="2"
        apply_no_weight="true"
        apply_with_weight="true"
        only_selected="false" />
+    <linearGradient
+       inkscape:collect="always"
+       xlink:href="#linearGradient3993"
+       id="linearGradient1760-3"
+       x1="405.34961"
+       y1="243.36557"
+       x2="651.55652"
+       y2="243.36557"
+       gradientUnits="userSpaceOnUse"
+       gradientTransform="matrix(0.65213006,0,0,0.72134316,249.13234,64.538752)" />
   </defs>
   <sodipodi:namedview
      id="base"
      inkscape:pageopacity="0.0"
      inkscape:pageshadow="2"
      inkscape:zoom="1.7519532"
-     inkscape:cx="423.24137"
-     inkscape:cy="157.27924"
+     inkscape:cx="265.48225"
+     inkscape:cy="64.618341"
      inkscape:document-units="px"
      inkscape:current-layer="layer1"
      showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1046"
-     inkscape:window-x="1920"
-     inkscape:window-y="34"
-     inkscape:window-maximized="1"
+     inkscape:window-width="1912"
+     inkscape:window-height="1033"
+     inkscape:window-x="4"
+     inkscape:window-y="22"
+     inkscape:window-maximized="0"
      fit-margin-top="0.1"
      fit-margin-left="0.1"
      fit-margin-right="0.1"
          id="tspan5223-0"
          style="font-size:10px;line-height:1.25">producer 0</tspan></text>
     <rect
-       style="display:inline;opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient1760);stroke-width:0.7960096;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       style="display:inline;opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient1760);stroke-width:0.75822091;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
        id="rect2896-6"
        width="187.47435"
-       height="193.53508"
+       height="175.59599"
        x="217.62262"
-       y="133.47206"
-       ry="4.8934555"
+       y="151.41115"
+       ry="4.4398727"
        rx="6.9415913"
        inkscape:export-filename="/home/matz/barracuda/rapports/mbuf-api-v2-images/octeon_multi.png"
        inkscape:export-xdpi="112"
          x="115.44385"
          y="186.49918"
          id="tspan5223-0-9"
-         style="font-size:10px;line-height:1.25">port n+2</tspan></text>
+         style="font-size:10px;line-height:1.25">port n+1</tspan></text>
     <rect
        style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1920);stroke-width:1.06814909;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
        id="rect3736-8-4"
          x="115.35846"
          y="242.55573"
          id="tspan5223-0-9-0"
-         style="font-size:10px;line-height:1.25">port n+3</tspan></text>
+         style="font-size:10px;line-height:1.25">port n+2</tspan></text>
     <rect
        style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1920-2);stroke-width:1.06814909;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
        id="rect3736-8-4-6"
          x="199.11482"
          y="111.36845"
          id="tspan5223-10"
-         style="font-size:9.02731705px;line-height:1.25;stroke-width:0.90273178">total queues = (number of stages * number of ethernet dev) + 1</tspan></text>
+         style="font-size:9.02731705px;line-height:1.25;stroke-width:0.90273178">total queues = (number of stages * number of ethernet dev) + number of ethernet dev</tspan></text>
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;font-size:11.11939621px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.92661637"
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
-       x="426.57141"
+       x="428.57141"
        y="167.14041"
        id="text5219-2-4"><tspan
          sodipodi:role="line"
-         x="426.57141"
+         x="428.57141"
          y="167.14041"
          id="tspan5223-0-7"
          style="font-size:10px;line-height:1.25">worker 0</tspan></text>
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
-       x="428.30768"
+       x="430.30768"
        y="223.46143"
        id="text5219-2-4-3"><tspan
          sodipodi:role="line"
-         x="428.30768"
+         x="430.30768"
          y="223.46143"
          id="tspan5223-0-7-7"
          style="font-size:10px;line-height:1.25">worker 1</tspan></text>
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
-       x="426.30768"
+       x="428.30768"
        y="279.46143"
        id="text5219-2-4-3-4-2"><tspan
          sodipodi:role="line"
-         x="426.30768"
+         x="428.30768"
          y="279.46143"
          id="tspan5223-0-7-7-5-5"
          style="font-size:10px;line-height:1.25">worker n</tspan></text>
     <g
        style="display:inline;opacity:1"
        id="g20550"
-       transform="translate(65.401608,-190.91553)">
+       transform="translate(87.401608,-194.91553)">
       <rect
          ry="16.293755"
          rx="11.6051"
     <g
        style="display:inline;opacity:1"
        id="g13899"
-       transform="translate(-17.21182,-3.304662)">
+       transform="translate(5.9319927,-3.304662)">
       <rect
          ry="16.293755"
          rx="11.6051"
     <g
        style="display:inline;opacity:1"
        id="g13911"
-       transform="translate(-15.21182,-1.304662)">
+       transform="translate(7.6443673,-2.7338705)">
       <rect
          ry="16.293755"
          rx="11.6051"
            x="621.71729"
            sodipodi:role="line">    Txq 0</tspan></text>
     </g>
-    <g
-       style="display:inline;opacity:1"
-       id="g16360"
-       transform="matrix(1.0983058,0,0,1.0572541,-82.192809,-6.5664741)">
-      <ellipse
-         ry="24.258638"
-         rx="22.831659"
-         cy="161.21605"
-         cx="559.68445"
-         id="path8843"
-         style="fill:#ffffff;fill-opacity:0.98039216;stroke:url(#linearGradient16362);stroke-opacity:1" />
-      <text
-         transform="scale(0.94727182,1.0556632)"
-         id="text5219-2-4-3-3-4-5"
-         y="151.93637"
-         x="571.61011"
-         style="font-style:normal;font-weight:normal;font-size:10.76524448px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.89710373"
-         xml:space="preserve"><tspan
-           id="tspan1265-5"
-           style="font-size:7.77489901px;line-height:1.25;stroke-width:0.89710373"
-           y="151.93637"
-           x="571.61011"
-           sodipodi:role="line">Tx Service</tspan><tspan
-           style="font-size:7.77489901px;line-height:1.25;stroke-width:0.89710373"
-           y="161.655"
-           x="571.61011"
-           sodipodi:role="line"
-           id="tspan40484">port n + 1</tspan></text>
-    </g>
-    <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1)"
-       d="m 475.41709,184.68945 c 14.66204,14.27312 29.32201,28.54422 43.97988,42.81328"
-       id="path5226-6-2-1-2-4-5-1-5-0"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2"
-       inkscape:original-d="m 475.41709,184.68945 c 14.66303,14.2721 29.323,28.54321 43.97988,42.81328"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6)"
-       d="m 476.32916,241.51456 c 13.86102,-0.34 27.7191,-0.67992 41.57417,-1.01977"
-       id="path5226-6-2-1-2-4-5-1-5-0-2"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9"
-       inkscape:original-d="m 476.32916,241.51456 c 13.861,-0.34097 27.71908,-0.6809 41.57417,-1.01977"
-       sodipodi:nodetypes="cc" />
-    <path
-       style="display:inline;opacity:1;fill:#ac14e4;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-6)"
-       d="m 474.31327,298.61285 c 15.031,-15.59075 30.05891,-31.17831 45.0837,-46.76263"
-       id="path5226-6-2-1-2-4-5-1-5-0-2-9"
-       inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-4"
-       inkscape:original-d="m 474.31327,298.61285 c 15.03102,-15.59073 30.05893,-31.17829 45.0837,-46.76263"
-       sodipodi:nodetypes="cc" />
-    <rect
-       style="display:inline;opacity:1;fill:none;fill-opacity:1;stroke:url(#linearGradient1924-3);stroke-width:0.94657081;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
-       id="rect3736-8-0-1-7-7"
-       width="30.678661"
-       height="24.347494"
-       x="519.39697"
-       y="227.50273" />
-    <text
-       xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="571.69696"
-       y="217.79964"
-       id="text5219-2-4-3-3-4-54-8-7-5"
-       transform="scale(0.91487885,1.0930409)"><tspan
-         sodipodi:role="line"
-         x="571.69696"
-         y="217.79964"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4-6-2-3">Single</tspan><tspan
-         sodipodi:role="line"
-         x="571.69696"
-         y="226.76561"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan6344">Link Q</tspan></text>
     <path
        style="display:inline;opacity:1;fill:none;stroke:#f78202;stroke-width:1.01153409;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1;marker-end:url(#TriangleOutM-5-2-6-6)"
        d="m 391.11413,240.54267 c 10.00574,0.0714 20.0096,0.14275 30.01154,0.21411"
        inkscape:path-effect="#path-effect5228-5-1-6-84-8"
        inkscape:original-d="m 389.52644,184.04076 c 10.2068,0.0715 20.41172,0.14408 30.61473,0.21761"
        sodipodi:nodetypes="cc" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
+       x="665.00641"
+       y="346.51425"
+       id="text5219-2-4-3-3-4-54-8-7"
+       transform="scale(0.91487885,1.0930409)"><tspan
+         sodipodi:role="line"
+         x="665.00641"
+         y="346.51425"
+         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
+         id="tspan1265-4-6-2">(Tx Generic)</tspan></text>
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-2)"
-       d="m 533.61005,227.17178 c -0.11895,-11.90475 -0.23788,-23.80683 -0.35678,-35.70623"
-       id="path5226-6-2-1-2-4-5-1-5-0-4"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.77748054;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.10992218, 0.77748055;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5)"
+       d="m 475.15346,298.63383 c 13.1798,0.0326 26.3569,0.0653 39.53121,0.0979"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-6"
-       inkscape:original-d="m 533.61005,227.17178 c -0.11794,-11.90476 -0.23687,-23.80684 -0.35678,-35.70623"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2"
+       inkscape:original-d="m 475.15346,298.63383 c 13.1798,0.0316 26.3569,0.0643 39.53121,0.0979"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-9)"
-       d="m 554.18303,173.89676 c 12.12572,3.64515 24.2491,7.2896 36.37012,10.93334"
-       id="path5226-6-2-1-2-4-5-1-5-0-48"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.77332252;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.09329006, 0.77332252;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1)"
+       d="m 476.76243,243.22025 c 12.91755,-0.0156 25.83246,-0.0312 38.74462,-0.0468"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-0"
-       inkscape:original-d="m 554.18303,173.89676 c 12.12608,3.64396 24.24946,7.28841 36.37012,10.93334"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7"
+       inkscape:original-d="m 476.76243,243.22025 c 12.91755,-0.0166 25.83246,-0.0322 38.74462,-0.0468"
        sodipodi:nodetypes="cc" />
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-9-7)"
-       d="m 554.18303,173.89676 c 12.8469,22.86455 25.6922,45.72625 38.53585,68.585"
-       id="path5226-6-2-1-2-4-5-1-5-0-48-2"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.77624762;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.10499055, 0.77624764;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-9)"
+       d="m 475.69395,186.40577 c 12.95011,-0.0208 25.89755,-0.0415 38.84226,-0.0623"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-06"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-0-7"
-       inkscape:original-d="m 554.18303,173.89676 c 12.84809,22.86388 25.69339,45.72558 38.53585,68.585"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-6"
+       inkscape:original-d="m 475.69395,186.40577 c 12.95011,-0.0218 25.89755,-0.0426 38.84226,-0.0623"
        sodipodi:nodetypes="cc" />
+    <rect
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79"
+       width="72.081367"
+       height="32.405426"
+       x="517.28845"
+       y="170.26158"
+       rx="16.175425"
+       ry="16.202713" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="520.91443"
+       y="189.34982"
+       id="text5219-2-6-2"><tspan
+         sodipodi:role="line"
+         x="520.91443"
+         y="189.34982"
+         id="tspan5223-0-9-02"
+         style="font-size:10px;line-height:1.25">port n+m+1</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="534.99072"
+       y="225.10315"
+       id="text5219-2-3-7-2-1-8-3"
+       transform="scale(0.89243779,1.1205263)"><tspan
+         sodipodi:role="line"
+         x="534.99072"
+         y="225.10315"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6">Single link</tspan></text>
+    <rect
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8-9);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79-1"
+       width="72.081367"
+       height="32.405426"
+       x="518.08734"
+       y="226.69656"
+       rx="16.175425"
+       ry="16.202713" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="522.60663"
+       y="246.00677"
+       id="text5219-2-6-1-7"><tspan
+         sodipodi:role="line"
+         x="522.60663"
+         y="246.00677"
+         id="tspan5223-0-9-0-5"
+         style="font-size:10px;line-height:1.25">port n+m+2</tspan></text>
+    <rect
+       style="display:inline;opacity:1;fill:#ffffff;fill-opacity:1;stroke:url(#linearGradient3995-8-9-9);stroke-width:1.2090857;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect87-6-5-3-79-1-7"
+       width="72.081367"
+       height="32.405426"
+       x="517.45502"
+       y="282.45056"
+       rx="16.175425"
+       ry="16.202713" />
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="530.6615"
+       y="301.52756"
+       id="text5219-2-6-1-6-2"><tspan
+         sodipodi:role="line"
+         x="530.6615"
+         y="301.52756"
+         id="tspan5223-0-9-0-4-2"
+         style="font-size:10px;line-height:1.25">port n+o</tspan></text>
     <path
-       style="display:inline;opacity:1;fill:#ac14ff;fill-opacity:1;stroke:#ac14ff;stroke-width:0.80699998;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:3.228, 0.807;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-9-72)"
-       d="m 554.18303,173.89676 c 12.65661,41.60787 25.31164,83.21054 37.96507,124.80795"
-       id="path5226-6-2-1-2-4-5-1-5-0-48-1"
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0)"
+       d="m 590.00917,186.37588 c 7.95108,0.0405 15.90052,0.0811 23.84823,0.12159"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1"
        inkscape:connector-curvature="0"
-       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-0-6"
-       inkscape:original-d="m 554.18303,173.89676 c 12.65781,41.6075 25.31284,83.21018 37.96507,124.80795"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6"
+       inkscape:original-d="m 590.00917,186.37588 c 7.95109,0.0395 15.90052,0.0801 23.84823,0.12159"
        sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0-0)"
+       d="m 590.88337,242.76574 c 7.86699,0.091 15.73233,0.18199 23.59597,0.27295"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1-2"
+       inkscape:connector-curvature="0"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-3"
+       inkscape:original-d="m 590.88337,242.76574 c 7.867,0.09 15.73234,0.18097 23.59597,0.27295"
+       sodipodi:nodetypes="cc" />
+    <path
+       style="display:inline;opacity:1;fill:#ac14db;fill-opacity:1;stroke:#ac14ff;stroke-width:0.70236319;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:2.80945275, 0.70236319;stroke-dashoffset:0;stroke-opacity:1;marker-end:url(#marker35935-1-6-5-1-0-06)"
+       d="m 590.00764,298.95403 c 8.49934,0.0508 16.99697,0.10162 25.49284,0.15242"
+       id="path5226-6-2-1-2-4-5-1-5-0-2-9-0-1-5"
+       inkscape:connector-curvature="0"
+       inkscape:path-effect="#path-effect5228-5-1-6-2-9-4-6-1-2-9-2-7-6-5"
+       inkscape:original-d="m 590.00764,298.95403 c 8.49935,0.0498 16.99698,0.10062 25.49284,0.15242"
+       sodipodi:nodetypes="cc" />
+    <rect
+       style="display:inline;opacity:1;fill:none;fill-opacity:1;fill-rule:evenodd;stroke:url(#linearGradient1760-3);stroke-width:0.67135191;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1"
+       id="rect2896-6-7"
+       width="159.92059"
+       height="161.38417"
+       x="513.79218"
+       y="159.39674"
+       ry="4.080533"
+       rx="5.9213624"
+       inkscape:export-filename="/home/matz/barracuda/rapports/mbuf-api-v2-images/octeon_multi.png"
+       inkscape:export-xdpi="112"
+       inkscape:export-ydpi="112" />
     <text
        xml:space="preserve"
-       style="font-style:normal;font-weight:normal;font-size:9.9315424px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.82762849"
-       x="665.00641"
-       y="346.51425"
-       id="text5219-2-4-3-3-4-54-8-7"
-       transform="scale(0.91487885,1.0930409)"><tspan
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="536.09253"
+       y="175.43703"
+       id="text5219-2-3-7-2-1-8-3-5"
+       transform="scale(0.89243778,1.1205263)"><tspan
          sodipodi:role="line"
-         x="665.00641"
-         y="346.51425"
-         style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4-6-2">(Tx Generic)</tspan></text>
+         x="536.09253"
+         y="175.43703"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6-3">Single link</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;font-size:10.5946722px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.88288933"
+       x="533.85394"
+       y="274.00516"
+       id="text5219-2-3-7-2-1-8-3-56"
+       transform="scale(0.89243778,1.1205263)"><tspan
+         sodipodi:role="line"
+         x="533.85394"
+         y="274.00516"
+         style="font-size:7.31033659px;line-height:1.25;stroke-width:0.88288933"
+         id="tspan883-1-9-7-6-2">Single link</tspan></text>
+    <text
+       xml:space="preserve"
+       style="font-style:normal;font-weight:normal;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none"
+       x="575.06464"
+       y="155.52603"
+       id="text5219-2-4-2"><tspan
+         sodipodi:role="line"
+         x="575.06464"
+         y="155.52603"
+         id="tspan5223-0-7-70"
+         style="font-size:10px;line-height:1.25">Tx adapter</tspan></text>
   </g>
 </svg>
@@ -20,8 +20,8 @@
    height="288.34286"
    id="svg3868"
    version="1.1"
-   inkscape:version="0.92.2 (5c3e80d, 2017-08-06)"
-   sodipodi:docname="eventdev_pipeline_queue_test_lockfree.svg"
+   inkscape:version="0.92.2 2405546, 2018-03-11"
+   sodipodi:docname="eventdev_pipeline_queue_test_internal_port.svg"
    sodipodi:version="0.32"
    inkscape:output_extension="org.inkscape.output.svg.inkscape"
    enable-background="new">
      borderopacity="1.0"
      inkscape:pageopacity="0.0"
      inkscape:pageshadow="2"
-     inkscape:zoom="2.0000001"
-     inkscape:cx="394.32532"
-     inkscape:cy="122.70585"
+     inkscape:zoom="1.6933595"
+     inkscape:cx="466.69113"
+     inkscape:cy="93.384431"
      inkscape:document-units="px"
      inkscape:current-layer="layer1"
      showgrid="false"
-     inkscape:window-width="1920"
-     inkscape:window-height="1046"
-     inkscape:window-x="1920"
-     inkscape:window-y="34"
-     inkscape:window-maximized="1"
+     inkscape:window-width="1912"
+     inkscape:window-height="1033"
+     inkscape:window-x="4"
+     inkscape:window-y="22"
+     inkscape:window-maximized="0"
      fit-margin-top="0.1"
      fit-margin-left="0.1"
      fit-margin-right="0.1"
          x="670.83521"
          y="349.11719"
          style="font-size:7.17278051px;line-height:1.25;stroke-width:0.82762849"
-         id="tspan1265-4">(Tx Lock free)</tspan></text>
+         id="tspan1265-4">(Internal port)</tspan></text>
     <text
        xml:space="preserve"
        style="font-style:normal;font-weight:normal;font-size:11.11939621px;line-height:0%;font-family:'Bitstream Vera Sans';display:inline;opacity:1;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.92661637"
index 46effd8..cddba3b 100644 (file)
@@ -70,6 +70,8 @@ The following are the application command-line options:
          order_atq
          perf_queue
          perf_atq
+         pipeline_atq
+         pipeline_queue
 
 * ``--socket_id <n>``
 
@@ -521,8 +523,9 @@ This is a pipeline test case that aims at testing the following:
    +===+==============+================+=========================================+
    | 1 | nb_queues    | (nb_producers  | Queues will be configured based on the  |
    |   |              | * nb_stages) + | user requested sched type list(--stlist)|
-   |   |              | x              | Here value of x is 1 in generic pipeline|
-   |   |              |                | and nb_producers in lockfree pipeline   |
+   |   |              | nb_producers   | At the last stage of the schedule list  |
+   |   |              |                | the event is enqueued onto per port     |
+   |   |              |                | unique queue which is then Transmitted. |
    +---+--------------+----------------+-----------------------------------------+
    | 2 | nb_producers | >= 1           | Producers will be configured based on   |
    |   |              |                | the number of detected ethernet devices.|
@@ -533,17 +536,19 @@ This is a pipeline test case that aims at testing the following:
    |   |              |                | argument                                |
    +---+--------------+----------------+-----------------------------------------+
    | 4 | nb_ports     | nb_workers +   | Workers use port 0 to port n.           |
-   |   |              | nb_producers   | Producers use port n+1 to port n+m,     |
-   |   |              |                | depending on the Rx adapter capability. |
+   |   |              | (nb_produces * | Producers use port n+1 to port n+m,     |
+   |   |              | 2)             | depending on the Rx adapter capability. |
+   |   |              |                | Consumers use port n+m+1 to port n+o    |
+   |   |              |                | depending on the Tx adapter capability. |
    +---+--------------+----------------+-----------------------------------------+
 
 .. _figure_eventdev_pipeline_queue_test_generic:
 
 .. figure:: img/eventdev_pipeline_queue_test_generic.*
 
-.. _figure_eventdev_pipeline_queue_test_lockfree:
+.. _figure_eventdev_pipeline_queue_test_internal_port:
 
-.. figure:: img/eventdev_pipeline_queue_test_lockfree.*
+.. figure:: img/eventdev_pipeline_queue_test_internal_port.*
 
    pipeline queue test operation.
 
@@ -568,10 +573,11 @@ the last stage in the pipeline if the event type is ``atomic`` it is enqueued
 onto ethdev Tx queue else to maintain ordering the event type is set to
 ``atomic`` and enqueued onto the last stage queue.
 
-If the ethernet has ``DEV_TX_OFFLOAD_MT_LOCKFREE`` capability then the worker
-cores transmit the packets directly. Else the worker cores enqueue the packet
-onto the ``SINGLE_LINK_QUEUE`` that is managed by a Tx service. The Tx service
-dequeues the packet and transmits it.
+If the ethdev and eventdev pair have ``RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT``
+capability then the worker cores enqueue the packets to the eventdev directly
+using ``rte_event_eth_tx_adapter_enqueue`` else the worker cores enqueue the
+packet onto the ``SINGLE_LINK_QUEUE`` that is managed by the Tx adapter.
+The Tx adapter dequeues the packet and transmits it.
 
 On packet Tx, application increments the number events processed and print
 periodically in one second to get the number of events processed in one
@@ -628,8 +634,9 @@ This is a pipeline test case that aims at testing the following with
    +===+==============+================+=========================================+
    | 1 | nb_queues    | nb_producers + | Queues will be configured based on the  |
    |   |              | x              | user requested sched type list(--stlist)|
-   |   |              |                | where x = 1 in generic pipeline and 0   |
-   |   |              |                | in lockfree pipeline                    |
+   |   |              |                | where x = nb_producers in generic       |
+   |   |              |                | pipeline and 0 if all the ethdev        |
+   |   |              |                | being used have Internal port capability|
    +---+--------------+----------------+-----------------------------------------+
    | 2 | nb_producers | >= 1           | Producers will be configured based on   |
    |   |              |                | the number of detected ethernet devices.|
@@ -640,17 +647,22 @@ This is a pipeline test case that aims at testing the following with
    |   |              |                | argument                                |
    +---+--------------+----------------+-----------------------------------------+
    | 4 | nb_ports     | nb_workers +   | Workers use port 0 to port n.           |
-   |   |              | nb_producers   | Producers use port n+1 to port n+m,     |
-   |   |              |                | depending on the Rx adapter capability. |
+   |   |              | nb_producers + | Producers use port n+1 to port n+m,     |
+   |   |              | x              | depending on the Rx adapter capability. |
+   |   |              |                | x = nb_producers in generic pipeline and|
+   |   |              |                | 0 if all the ethdev being used have     |
+   |   |              |                | Internal port capability.               |
+   |   |              |                | Consumers may use port n+m+1 to port n+o|
+   |   |              |                | depending on the Tx adapter capability. |
    +---+--------------+----------------+-----------------------------------------+
 
 .. _figure_eventdev_pipeline_atq_test_generic:
 
 .. figure:: img/eventdev_pipeline_atq_test_generic.*
 
-.. _figure_eventdev_pipeline_atq_test_lockfree:
+.. _figure_eventdev_pipeline_atq_test_internal_port:
 
-.. figure:: img/eventdev_pipeline_atq_test_lockfree.*
+.. figure:: img/eventdev_pipeline_atq_test_internal_port.*
 
    pipeline atq test operation.
 
diff --git a/doc/meson.build b/doc/meson.build
new file mode 100644 (file)
index 0000000..c5410d8
--- /dev/null
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+doc_targets = []
+doc_target_names = []
+subdir('api')
+subdir('guides')
+
+if doc_targets.length() == 0
+       message = 'No docs targets found'
+else
+       message = 'Building docs:'
+endif
+run_target('doc', command: ['echo', message, doc_target_names],
+       depends: doc_targets)
index 7566076..7d5da5d 100644 (file)
@@ -5,6 +5,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-y += common
 DIRS-y += bus
+DEPDIRS-bus := common
 DIRS-y += mempool
 DEPDIRS-mempool := common bus
 DIRS-y += net
diff --git a/drivers/baseband/meson.build b/drivers/baseband/meson.build
new file mode 100644 (file)
index 0000000..52489df
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+drivers = ['null']
+
+config_flag_fmt = 'RTE_LIBRTE_@0@_PMD'
+driver_name_fmt = 'rte_pmd_@0@'
diff --git a/drivers/baseband/null/meson.build b/drivers/baseband/null/meson.build
new file mode 100644 (file)
index 0000000..64c29d8
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+deps += ['bbdev', 'bus_vdev', 'ring']
+name = 'bbdev_null'
+allow_experimental_apis = true
+sources = files('bbdev_null.c')
index bffaa9d..800e5cd 100644 (file)
@@ -14,7 +14,6 @@ CFLAGS := -I$(SRCDIR) $(CFLAGS)
 CFLAGS += -O3 $(WERROR_FLAGS)
 CFLAGS += -Wno-pointer-arith
 CFLAGS += -Wno-cast-qual
-CFLAGS += -D _GNU_SOURCE
 CFLAGS += -I$(RTE_BUS_DPAA)/
 CFLAGS += -I$(RTE_BUS_DPAA)/include
 CFLAGS += -I$(RTE_BUS_DPAA)/base/qbman
@@ -24,7 +23,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
 # versioning export map
 EXPORT_MAP := rte_bus_dpaa_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 # all source are stored in SRCS-y
 #
@@ -48,5 +47,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_DPAA_BUS) += \
 LDLIBS += -lpthread
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index 031c6f1..6b52242 100644 (file)
@@ -21,7 +21,7 @@
 /* This data structure contaings all configurations information
  * related to usages of DPA devices.
  */
-struct netcfg_info *netcfg;
+static struct netcfg_info *netcfg;
 /* fd to open a socket for making ioctl request to disable/enable shared
  *  interfaces.
  */
index b14b590..750b756 100644 (file)
@@ -23,7 +23,7 @@ static void *bman_ccsr_map;
 /* Portal driver */
 /*****************/
 
-static __thread int fd = -1;
+static __thread int bmfd = -1;
 static __thread struct bm_portal_config pcfg;
 static __thread struct dpaa_ioctl_portal_map map = {
        .type = dpaa_portal_bman
@@ -70,14 +70,14 @@ static int fsl_bman_portal_init(uint32_t idx, int is_shared)
        pcfg.index = map.index;
        bman_depletion_fill(&pcfg.mask);
 
-       fd = open(BMAN_PORTAL_IRQ_PATH, O_RDONLY);
-       if (fd == -1) {
+       bmfd = open(BMAN_PORTAL_IRQ_PATH, O_RDONLY);
+       if (bmfd == -1) {
                pr_err("BMan irq init failed");
                process_portal_unmap(&map.addr);
                return -EBUSY;
        }
        /* Use the IRQ FD as a unique IRQ number */
-       pcfg.irq = fd;
+       pcfg.irq = bmfd;
 
        portal = bman_create_affine_portal(&pcfg);
        if (!portal) {
@@ -90,7 +90,7 @@ static int fsl_bman_portal_init(uint32_t idx, int is_shared)
        /* Set the IRQ number */
        irq_map.type = dpaa_portal_bman;
        irq_map.portal_cinh = map.addr.cinh;
-       process_portal_irq_map(fd, &irq_map);
+       process_portal_irq_map(bmfd, &irq_map);
        return 0;
 }
 
@@ -99,7 +99,7 @@ static int fsl_bman_portal_finish(void)
        __maybe_unused const struct bm_portal_config *cfg;
        int ret;
 
-       process_portal_irq_unmap(fd);
+       process_portal_irq_unmap(bmfd);
 
        cfg = bman_destroy_affine_portal();
        DPAA_BUG_ON(cfg != &pcfg);
@@ -109,6 +109,11 @@ static int fsl_bman_portal_finish(void)
        return ret;
 }
 
+int bman_thread_fd(void)
+{
+       return bmfd;
+}
+
 int bman_thread_init(void)
 {
        /* Convert from contiguous/virtual cpu numbering to real cpu when
index 7c17027..dc64d08 100644 (file)
@@ -852,11 +852,9 @@ mr_loop:
                        case QM_MR_VERB_FQPN:
                                /* Parked */
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
-                               fq = get_fq_table_entry(
-                                       be32_to_cpu(msg->fq.contextB));
+                               fq = get_fq_table_entry(msg->fq.contextB);
 #else
-                               fq = (void *)(uintptr_t)
-                                       be32_to_cpu(msg->fq.contextB);
+                               fq = (void *)(uintptr_t)msg->fq.contextB;
 #endif
                                fq_state_change(p, fq, msg, verb);
                                if (fq->cb.fqs)
@@ -967,7 +965,6 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p,
                *shadow = *dq;
                dq = shadow;
                shadow->fqid = be32_to_cpu(shadow->fqid);
-               shadow->contextB = be32_to_cpu(shadow->contextB);
                shadow->seqnum = be16_to_cpu(shadow->seqnum);
                hw_fd_to_cpu(&shadow->fd);
 #endif
@@ -1040,6 +1037,50 @@ static inline unsigned int __poll_portal_fast(struct qman_portal *p,
        return limit;
 }
 
+int qman_irqsource_add(u32 bits)
+{
+       struct qman_portal *p = get_affine_portal();
+
+       bits = bits & QM_PIRQ_VISIBLE;
+
+       /* Clear any previously remaining interrupt conditions in
+        * QCSP_ISR. This prevents raising a false interrupt when
+        * interrupt conditions are enabled in QCSP_IER.
+        */
+       qm_isr_status_clear(&p->p, bits);
+       dpaa_set_bits(bits, &p->irq_sources);
+       qm_isr_enable_write(&p->p, p->irq_sources);
+
+
+       return 0;
+}
+
+int qman_irqsource_remove(u32 bits)
+{
+       struct qman_portal *p = get_affine_portal();
+       u32 ier;
+
+       /* Our interrupt handler only processes+clears status register bits that
+        * are in p->irq_sources. As we're trimming that mask, if one of them
+        * were to assert in the status register just before we remove it from
+        * the enable register, there would be an interrupt-storm when we
+        * release the IRQ lock. So we wait for the enable register update to
+        * take effect in h/w (by reading it back) and then clear all other bits
+        * in the status register. Ie. we clear them from ISR once it's certain
+        * IER won't allow them to reassert.
+        */
+
+       bits &= QM_PIRQ_VISIBLE;
+       dpaa_clear_bits(bits, &p->irq_sources);
+       qm_isr_enable_write(&p->p, p->irq_sources);
+       ier = qm_isr_enable_read(&p->p);
+       /* Using "~ier" (rather than "bits" or "~p->irq_sources") creates a
+        * data-dependency, ie. to protect against re-ordering.
+        */
+       qm_isr_status_clear(&p->p, ~ier);
+       return 0;
+}
+
 u16 qman_affine_channel(int cpu)
 {
        if (cpu < 0) {
@@ -1092,9 +1133,9 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
 
                /* SDQCR: context_b points to the FQ */
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
-               fq = qman_fq_lookup_table[be32_to_cpu(dq[rx_number]->contextB)];
+               fq = qman_fq_lookup_table[dq[rx_number]->contextB];
 #else
-               fq = (void *)be32_to_cpu(dq[rx_number]->contextB);
+               fq = (void *)dq[rx_number]->contextB;
 #endif
                if (fq->cb.dqrr_prepare)
                        fq->cb.dqrr_prepare(shadow[rx_number],
@@ -1114,6 +1155,14 @@ unsigned int qman_portal_poll_rx(unsigned int poll_limit,
        return rx_number;
 }
 
+void qman_clear_irq(void)
+{
+       struct qman_portal *p = get_affine_portal();
+       u32 clear = QM_DQAVAIL_MASK | (p->irq_sources &
+               ~(QM_PIRQ_CSCI | QM_PIRQ_CCSCI));
+       qm_isr_status_clear(&p->p, clear);
+}
+
 u32 qman_portal_dequeue(struct rte_event ev[], unsigned int poll_limit,
                        void **bufs)
 {
@@ -1143,7 +1192,6 @@ u32 qman_portal_dequeue(struct rte_event ev[], unsigned int poll_limit,
                *shadow = *dq;
                dq = shadow;
                shadow->fqid = be32_to_cpu(shadow->fqid);
-               shadow->contextB = be32_to_cpu(shadow->contextB);
                shadow->seqnum = be16_to_cpu(shadow->seqnum);
                hw_fd_to_cpu(&shadow->fd);
 #endif
@@ -1208,7 +1256,6 @@ struct qm_dqrr_entry *qman_dequeue(struct qman_fq *fq)
        *shadow = *dq;
        dq = shadow;
        shadow->fqid = be32_to_cpu(shadow->fqid);
-       shadow->contextB = be32_to_cpu(shadow->contextB);
        shadow->seqnum = be16_to_cpu(shadow->seqnum);
        hw_fd_to_cpu(&shadow->fd);
 #endif
@@ -1504,7 +1551,7 @@ int qman_init_fq(struct qman_fq *fq, u32 flags, struct qm_mcc_initfq *opts)
 
                mcc->initfq.we_mask |= QM_INITFQ_WE_CONTEXTB;
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
-               mcc->initfq.fqd.context_b = fq->key;
+               mcc->initfq.fqd.context_b = cpu_to_be32(fq->key);
 #else
                mcc->initfq.fqd.context_b = (u32)(uintptr_t)fq;
 #endif
@@ -2186,11 +2233,6 @@ int qman_enqueue_multi(struct qman_fq *fq,
        /* try to send as many frames as possible */
        while (eqcr->available && frames_to_send--) {
                eq->fqid = fq->fqid_le;
-#ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
-               eq->tag = cpu_to_be32(fq->key);
-#else
-               eq->tag = cpu_to_be32((u32)(uintptr_t)fq);
-#endif
                eq->fd.opaque_addr = fd->opaque_addr;
                eq->fd.addr = cpu_to_be40(fd->addr);
                eq->fd.status = cpu_to_be32(fd->status);
index f6ecd6b..ba15339 100644 (file)
@@ -113,6 +113,11 @@ static int fsl_qman_portal_finish(void)
        return ret;
 }
 
+int qman_thread_fd(void)
+{
+       return qmfd;
+}
+
 int qman_thread_init(void)
 {
        /* Convert from contiguous/virtual cpu numbering to real cpu when
@@ -135,7 +140,7 @@ void qman_thread_irq(void)
         * rather than breaking that encapsulation I am simply hard-coding the
         * offset to the inhibit register here.
         */
-       out_be32(qpcfg.addr_virt[DPAA_PORTAL_CI] + 0xe0c, 0);
+       out_be32(qpcfg.addr_virt[DPAA_PORTAL_CI] + 0x36C0, 0);
 }
 
 struct qman_portal *fsl_qman_portal_create(void)
index 16fabd1..203f60d 100644 (file)
@@ -34,6 +34,7 @@
 
 #include <rte_dpaa_bus.h>
 #include <rte_dpaa_logs.h>
+#include <dpaax_iova_table.h>
 
 #include <fsl_usd.h>
 #include <fsl_qman.h>
@@ -46,7 +47,7 @@ int dpaa_logtype_mempool;
 int dpaa_logtype_pmd;
 int dpaa_logtype_eventdev;
 
-struct rte_dpaa_bus rte_dpaa_bus;
+static struct rte_dpaa_bus rte_dpaa_bus;
 struct netcfg_info *dpaa_netcfg;
 
 /* define a variable to hold the portal_key, once created.*/
@@ -165,6 +166,8 @@ dpaa_create_device_list(void)
                        goto cleanup;
                }
 
+               dev->device.bus = &rte_dpaa_bus.bus;
+
                cfg = &dpaa_netcfg->port_cfg[i];
                fman_intf = cfg->fman_if;
 
@@ -546,6 +549,9 @@ rte_dpaa_bus_probe(void)
                fclose(svr_file);
        }
 
+       /* And initialize the PA->VA translation table */
+       dpaax_iova_table_populate();
+
        /* For each registered driver, and device, call the driver->probe */
        TAILQ_FOREACH(dev, &rte_dpaa_bus.device_list, next) {
                TAILQ_FOREACH(drv, &rte_dpaa_bus.driver_list, next) {
@@ -553,6 +559,9 @@ rte_dpaa_bus_probe(void)
                        if (ret)
                                continue;
 
+                       if (rte_dev_is_probed(&dev->device))
+                               continue;
+
                        if (!drv->probe ||
                            (dev->device.devargs &&
                            dev->device.devargs->policy == RTE_DEV_BLACKLISTED))
@@ -563,8 +572,12 @@ rte_dpaa_bus_probe(void)
                            dev->device.devargs->policy ==
                            RTE_DEV_WHITELISTED)) {
                                ret = drv->probe(drv, dev);
-                               if (ret)
+                               if (ret) {
                                        DPAA_BUS_ERR("Unable to probe.\n");
+                               } else {
+                                       dev->driver = drv;
+                                       dev->device.driver = &drv->driver;
+                               }
                        }
                        break;
                }
@@ -611,7 +624,7 @@ rte_dpaa_get_iommu_class(void)
        return RTE_IOVA_PA;
 }
 
-struct rte_dpaa_bus rte_dpaa_bus = {
+static struct rte_dpaa_bus rte_dpaa_bus = {
        .bus = {
                .scan = rte_dpaa_bus_scan,
                .probe = rte_dpaa_bus_probe,
index 92241d2..4122657 100644 (file)
@@ -57,8 +57,9 @@
 #ifndef __packed
 #define __packed       __rte_packed
 #endif
+#ifndef noinline
 #define noinline       __attribute__((noinline))
-
+#endif
 #define L1_CACHE_BYTES 64
 #define ____cacheline_aligned __attribute__((aligned(L1_CACHE_BYTES)))
 #define __stringify_1(x) #x
                printf(fmt, ##args); \
                fflush(stdout); \
        } while (0)
-
+#ifndef pr_crit
 #define pr_crit(fmt, args...)   prflush("CRIT:" fmt, ##args)
+#endif
+#ifndef pr_err
 #define pr_err(fmt, args...)    prflush("ERR:" fmt, ##args)
+#endif
+#ifndef pr_warn
 #define pr_warn(fmt, args...)   prflush("WARN:" fmt, ##args)
+#endif
+#ifndef pr_info
 #define pr_info(fmt, args...)   prflush(fmt, ##args)
-
-#ifdef RTE_LIBRTE_DPAA_DEBUG_BUS
-#ifdef pr_debug
-#undef pr_debug
 #endif
+#ifndef pr_debug
+#ifdef RTE_LIBRTE_DPAA_DEBUG_BUS
 #define pr_debug(fmt, args...) printf(fmt, ##args)
 #else
 #define pr_debug(fmt, args...) {}
 #endif
+#endif
 
 #define DPAA_BUG_ON(x) RTE_ASSERT(x)
 
@@ -256,7 +262,9 @@ __bswap_24(uint32_t x)
 #define be16_to_cpu(x) rte_be_to_cpu_16(x)
 
 #define cpu_to_be64(x) rte_cpu_to_be_64(x)
+#if !defined(cpu_to_be32)
 #define cpu_to_be32(x) rte_cpu_to_be_32(x)
+#endif
 #define cpu_to_be16(x) rte_cpu_to_be_16(x)
 
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
index bf162f3..08ad630 100644 (file)
@@ -42,9 +42,9 @@ struct fman_crc64_t {
        uint64_t initial;
        uint64_t table[1 << 8];
 };
-extern struct fman_crc64_t FMAN_CRC64_ECMA_182;
+extern struct fman_crc64_t fman_crc64_ecma_182;
 #define DECLARE_FMAN_CRC64_TABLE() \
-struct fman_crc64_t FMAN_CRC64_ECMA_182 = { \
+struct fman_crc64_t fman_crc64_ecma_182 = { \
        0xFFFFFFFFFFFFFFFFULL, \
        { \
                0x0000000000000000ULL, 0xb32e4cbe03a75f6fULL, \
@@ -183,7 +183,7 @@ struct fman_crc64_t FMAN_CRC64_ECMA_182 = { \
  */
 static inline uint64_t fman_crc64_init(void)
 {
-       return FMAN_CRC64_ECMA_182.initial;
+       return fman_crc64_ecma_182.initial;
 }
 
 /* Updates the CRC with arbitrary data */
@@ -192,7 +192,7 @@ static inline uint64_t fman_crc64_update(uint64_t crc,
 {
        uint8_t *p = data;
        while (len--)
-               crc = FMAN_CRC64_ECMA_182.table[(crc ^ *(p++)) & 0xff] ^
+               crc = fman_crc64_ecma_182.table[(crc ^ *(p++)) & 0xff] ^
                                (crc >> 8);
        return crc;
 }
index b18cf03..e438414 100644 (file)
@@ -1315,6 +1315,26 @@ int qman_get_portal_index(void);
 u32 qman_portal_dequeue(struct rte_event ev[], unsigned int poll_limit,
                        void **bufs);
 
+/**
+ * qman_irqsource_add - add processing sources to be interrupt-driven
+ * @bits: bitmask of QM_PIRQ_**I processing sources
+ *
+ * Adds processing sources that should be interrupt-driven (rather than
+ * processed via qman_poll_***() functions). Returns zero for success, or
+ * -EINVAL if the current CPU is sharing a portal hosted on another CPU.
+ */
+int qman_irqsource_add(u32 bits);
+
+/**
+ * qman_irqsource_remove - remove processing sources from being interrupt-driven
+ * @bits: bitmask of QM_PIRQ_**I processing sources
+ *
+ * Removes processing sources from being interrupt-driven, so that they will
+ * instead be processed via qman_poll_***() functions. Returns zero for success,
+ * or -EINVAL if the current CPU is sharing a portal hosted on another CPU.
+ */
+int qman_irqsource_remove(u32 bits);
+
 /**
  * qman_affine_channel - return the channel ID of an portal
  * @cpu: the cpu whose affine portal is the subject of the query
index e183617..ec1ab7c 100644 (file)
@@ -55,6 +55,10 @@ int qman_free_raw_portal(struct dpaa_raw_portal *portal);
 int bman_allocate_raw_portal(struct dpaa_raw_portal *portal);
 int bman_free_raw_portal(struct dpaa_raw_portal *portal);
 
+/* Obtain thread-local UIO file-descriptors */
+int qman_thread_fd(void);
+int bman_thread_fd(void);
+
 /* Post-process interrupts. NB, the kernel IRQ handler disables the interrupt
  * line before notifying us, and this post-processing re-enables it once
  * processing is complete. As such, it is essential to call this before going
@@ -63,6 +67,8 @@ int bman_free_raw_portal(struct dpaa_raw_portal *portal);
 void qman_thread_irq(void);
 void bman_thread_irq(void);
 
+void qman_clear_irq(void);
+
 /* Global setup */
 int qman_global_init(void);
 int bman_global_init(void);
index d10b62c..1fcb4e9 100644 (file)
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
         build = false
 endif
 
-deps += ['eventdev']
+deps += ['common_dpaax', 'eventdev']
 sources = files('base/fman/fman.c',
                'base/fman/fman_hw.c',
                'base/fman/netcfg_layer.c',
@@ -26,4 +28,3 @@ if cc.has_argument('-Wno-cast-qual')
 endif
 
 includes += include_directories('include', 'base/qbman')
-cflags += ['-D_GNU_SOURCE']
index 7d6d624..70076c7 100644 (file)
@@ -95,10 +95,24 @@ DPDK_18.02 {
 
 DPDK_18.08 {
        global:
-
        fman_if_get_sg_enable;
        fman_if_set_sg;
        of_get_mac_address;
 
        local: *;
 } DPDK_18.02;
+
+DPDK_18.11 {
+       global:
+       bman_thread_irq;
+       fman_if_get_sg_enable;
+       fman_if_set_sg;
+       qman_clear_irq;
+
+       qman_irqsource_add;
+       qman_irqsource_remove;
+       qman_thread_fd;
+       qman_thread_irq;
+
+       local: *;
+} DPDK_18.08;
index 15dc6a4..1d580a0 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <rte_bus.h>
 #include <rte_mempool.h>
+#include <dpaax_iova_table.h>
 
 #include <fsl_usd.h>
 #include <fsl_qman.h>
@@ -110,6 +111,11 @@ extern struct dpaa_memseg_list rte_dpaa_memsegs;
 static inline void *rte_dpaa_mem_ptov(phys_addr_t paddr)
 {
        struct dpaa_memseg *ms;
+       void *va;
+
+       va = dpaax_iova_table_get_va(paddr);
+       if (likely(va != NULL))
+               return va;
 
        /* Check if the address is already part of the memseg list internally
         * maintained by the dpaa driver.
index 515d0f5..218d9bd 100644 (file)
@@ -19,12 +19,13 @@ CFLAGS += -I$(RTE_SDK)/drivers/bus/fslmc/qbman/include
 CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev
+LDLIBS += -lrte_common_dpaax
 
 # versioning export map
 EXPORT_MAP := rte_bus_fslmc_version.map
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 SRCS-$(CONFIG_RTE_LIBRTE_FSLMC_BUS) += \
         qbman/qbman_portal.c \
index d2900ed..89af938 100644 (file)
@@ -20,6 +20,8 @@
 #include <fslmc_vfio.h>
 #include "fslmc_logs.h"
 
+#include <dpaax_iova_table.h>
+
 int dpaa2_logtype_bus;
 
 #define VFIO_IOMMU_GROUP_PATH "/sys/kernel/iommu_groups"
@@ -161,6 +163,8 @@ scan_one_fslmc_device(char *dev_name)
                return -ENOMEM;
        }
 
+       dev->device.bus = &rte_fslmc_bus.bus;
+
        /* Parse the device name and ID */
        t_ptr = strtok(dup_dev_name, ".");
        if (!t_ptr) {
@@ -375,6 +379,19 @@ rte_fslmc_probe(void)
 
        probe_all = rte_fslmc_bus.bus.conf.scan_mode != RTE_BUS_SCAN_WHITELIST;
 
+       /* In case of PA, the FD addresses returned by qbman APIs are physical
+        * addresses, which need conversion into equivalent VA address for
+        * rte_mbuf. For that, a table (a serial array, in memory) is used to
+        * increase translation efficiency.
+        * This has to be done before probe as some device initialization
+        * (during) probe allocate memory (dpaa2_sec) which needs to be pinned
+        * to this table.
+        *
+        * Error is ignored as relevant logs are handled within dpaax and
+        * handling for unavailable dpaax table too is transparent to caller.
+        */
+       dpaax_iova_table_populate();
+
        TAILQ_FOREACH(dev, &rte_fslmc_bus.device_list, next) {
                TAILQ_FOREACH(drv, &rte_fslmc_bus.driver_list, next) {
                        ret = rte_fslmc_match(drv, dev);
@@ -384,6 +401,9 @@ rte_fslmc_probe(void)
                        if (!drv->probe)
                                continue;
 
+                       if (rte_dev_is_probed(&dev->device))
+                               continue;
+
                        if (dev->device.devargs &&
                          dev->device.devargs->policy == RTE_DEV_BLACKLISTED) {
                                DPAA2_BUS_LOG(DEBUG, "%s Blacklisted, skipping",
@@ -396,8 +416,12 @@ rte_fslmc_probe(void)
                           dev->device.devargs->policy ==
                           RTE_DEV_WHITELISTED)) {
                                ret = drv->probe(drv, dev);
-                               if (ret)
+                               if (ret) {
                                        DPAA2_BUS_ERR("Unable to probe");
+                               } else {
+                                       dev->driver = drv;
+                                       dev->device.driver = &drv->driver;
+                               }
                        }
                        break;
                }
@@ -450,6 +474,11 @@ rte_fslmc_driver_unregister(struct rte_dpaa2_driver *driver)
 
        fslmc_bus = driver->fslmc_bus;
 
+       /* Cleanup the PA->VA Translation table; From whereever this function
+        * is called from.
+        */
+       dpaax_iova_table_depopulate();
+
        TAILQ_REMOVE(&fslmc_bus->driver_list, driver, next);
        /* Update Bus references */
        driver->fslmc_bus = NULL;
@@ -490,6 +519,10 @@ rte_dpaa2_get_iommu_class(void)
        if (TAILQ_EMPTY(&rte_fslmc_bus.device_list))
                return RTE_IOVA_DC;
 
+#ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
+       return RTE_IOVA_PA;
+#endif
+
        /* check if all devices on the bus support Virtual addressing or not */
        has_iova_va = fslmc_all_device_support_iova();
 
index 4c2cd2a..493b6e5 100644 (file)
@@ -221,6 +221,13 @@ fslmc_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
                                        "alloc" : "dealloc",
                                va, virt_addr, iova_addr, map_len);
 
+               /* iova_addr may be set to RTE_BAD_IOVA */
+               if (iova_addr == RTE_BAD_IOVA) {
+                       DPAA2_BUS_DEBUG("Segment has invalid iova, skipping\n");
+                       cur_len += map_len;
+                       continue;
+               }
+
                if (type == RTE_MEM_EVENT_ALLOC)
                        ret = fslmc_map_dma(virt_addr, iova_addr, map_len);
                else
@@ -318,11 +325,15 @@ fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
 
 static int
 fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
-                const struct rte_memseg *ms, void *arg)
+               const struct rte_memseg *ms, void *arg)
 {
        int *n_segs = arg;
        int ret;
 
+       /* if IOVA address is invalid, skip */
+       if (ms->iova == RTE_BAD_IOVA)
+               return 0;
+
        ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
        if (ret)
                DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
index 0215d22..d910340 100644 (file)
@@ -248,6 +248,16 @@ int dpbp_reset(struct fsl_mc_io *mc_io,
        /* send command to mc*/
        return mc_send_command(mc_io, &cmd);
 }
+/**
+ * dpbp_get_attributes - Retrieve DPBP attributes.
+ *
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPBP object
+ * @attr:      Returned object's attributes
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
 int dpbp_get_attributes(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
index ff366bf..95edae9 100644 (file)
@@ -265,6 +265,15 @@ int dpci_reset(struct fsl_mc_io *mc_io,
        return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpci_get_attributes() - Retrieve DPCI attributes.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @attr:      Returned object's attributes
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
 int dpci_get_attributes(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
@@ -292,6 +301,94 @@ int dpci_get_attributes(struct fsl_mc_io *mc_io,
        return 0;
 }
 
+/**
+ * dpci_get_peer_attributes() - Retrieve peer DPCI attributes.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @attr:      Returned peer attributes
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpci_get_peer_attributes(struct fsl_mc_io *mc_io,
+                            uint32_t cmd_flags,
+                            uint16_t token,
+                            struct dpci_peer_attr *attr)
+{
+       struct dpci_rsp_get_peer_attr *rsp_params;
+       struct mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_PEER_ATTR,
+                                         cmd_flags,
+                                         token);
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpci_rsp_get_peer_attr *)cmd.params;
+       attr->peer_id = le32_to_cpu(rsp_params->id);
+       attr->num_of_priorities = rsp_params->num_of_priorities;
+
+       return 0;
+}
+
+/**
+ * dpci_get_link_state() - Retrieve the DPCI link state.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @up:                Returned link state; returns '1' if link is up, '0' otherwise
+ *
+ * DPCI can be connected to another DPCI, together they
+ * create a 'link'. In order to use the DPCI Tx and Rx queues,
+ * both objects must be enabled.
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpci_get_link_state(struct fsl_mc_io *mc_io,
+                       uint32_t cmd_flags,
+                       uint16_t token,
+                       int *up)
+{
+       struct dpci_rsp_get_link_state *rsp_params;
+       struct mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_LINK_STATE,
+                                         cmd_flags,
+                                         token);
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpci_rsp_get_link_state *)cmd.params;
+       *up = dpci_get_field(rsp_params->up, UP);
+
+       return 0;
+}
+
+/**
+ * dpci_set_rx_queue() - Set Rx queue configuration
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @priority:  Select the queue relative to number of
+ *                     priorities configured at DPCI creation; use
+ *                     DPCI_ALL_QUEUES to configure all Rx queues
+ *                     identically.
+ * @cfg:       Rx queue configuration
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
 int dpci_set_rx_queue(struct fsl_mc_io *mc_io,
                      uint32_t cmd_flags,
                      uint16_t token,
@@ -314,6 +411,9 @@ int dpci_set_rx_queue(struct fsl_mc_io *mc_io,
        dpci_set_field(cmd_params->dest_type,
                       DEST_TYPE,
                       cfg->dest_cfg.dest_type);
+       dpci_set_field(cmd_params->dest_type,
+                      ORDER_PRESERVATION,
+                      cfg->order_preservation_en);
 
        /* send command to mc*/
        return mc_send_command(mc_io, &cmd);
@@ -438,3 +538,100 @@ int dpci_get_api_version(struct fsl_mc_io *mc_io,
 
        return 0;
 }
+
+/**
+ * dpci_set_opr() - Set Order Restoration configuration.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @index:     The queue index
+ * @options:   Configuration mode options
+ *             can be OPR_OPT_CREATE or OPR_OPT_RETIRE
+ * @cfg:       Configuration options for the OPR
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpci_set_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t index,
+                uint8_t options,
+                struct opr_cfg *cfg)
+{
+       struct dpci_cmd_set_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPCI_CMDID_SET_OPR,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpci_cmd_set_opr *)cmd.params;
+       cmd_params->index = index;
+       cmd_params->options = options;
+       cmd_params->oloe = cfg->oloe;
+       cmd_params->oeane = cfg->oeane;
+       cmd_params->olws = cfg->olws;
+       cmd_params->oa = cfg->oa;
+       cmd_params->oprrws = cfg->oprrws;
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpci_get_opr() - Retrieve Order Restoration config and query.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCI object
+ * @index:     The queue index
+ * @cfg:       Returned OPR configuration
+ * @qry:       Returned OPR query
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpci_get_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t index,
+                struct opr_cfg *cfg,
+                struct opr_qry *qry)
+{
+       struct dpci_rsp_get_opr *rsp_params;
+       struct dpci_cmd_get_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPCI_CMDID_GET_OPR,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpci_cmd_get_opr *)cmd.params;
+       cmd_params->index = index;
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpci_rsp_get_opr *)cmd.params;
+       cfg->oloe = rsp_params->oloe;
+       cfg->oeane = rsp_params->oeane;
+       cfg->olws = rsp_params->olws;
+       cfg->oa = rsp_params->oa;
+       cfg->oprrws = rsp_params->oprrws;
+       qry->rip = dpci_get_field(rsp_params->flags, RIP);
+       qry->enable = dpci_get_field(rsp_params->flags, OPR_ENABLE);
+       qry->nesn = le16_to_cpu(rsp_params->nesn);
+       qry->ndsn = le16_to_cpu(rsp_params->ndsn);
+       qry->ea_tseq = le16_to_cpu(rsp_params->ea_tseq);
+       qry->tseq_nlis = dpci_get_field(rsp_params->tseq_nlis, TSEQ_NLIS);
+       qry->ea_hseq = le16_to_cpu(rsp_params->ea_hseq);
+       qry->hseq_nlis = dpci_get_field(rsp_params->hseq_nlis, HSEQ_NLIS);
+       qry->ea_hptr = le16_to_cpu(rsp_params->ea_hptr);
+       qry->ea_tptr = le16_to_cpu(rsp_params->ea_tptr);
+       qry->opr_vid = le16_to_cpu(rsp_params->opr_vid);
+       qry->opr_id = le16_to_cpu(rsp_params->opr_id);
+
+       return 0;
+}
index 3f6e04b..92bd265 100644 (file)
@@ -295,6 +295,36 @@ int dpcon_get_attributes(struct fsl_mc_io *mc_io,
        return 0;
 }
 
+/**
+ * dpcon_set_notification() - Set DPCON notification destination
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPCON object
+ * @cfg:       Notification parameters
+ *
+ * Return:     '0' on Success; Error code otherwise
+ */
+int dpcon_set_notification(struct fsl_mc_io *mc_io,
+                          uint32_t cmd_flags,
+                          uint16_t token,
+                          struct dpcon_notification_cfg *cfg)
+{
+       struct dpcon_cmd_set_notification *dpcon_cmd;
+       struct mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPCON_CMDID_SET_NOTIFICATION,
+                                         cmd_flags,
+                                         token);
+       dpcon_cmd = (struct dpcon_cmd_set_notification *)cmd.params;
+       dpcon_cmd->dpio_id = cpu_to_le32(cfg->dpio_id);
+       dpcon_cmd->priority = cfg->priority;
+       dpcon_cmd->user_ctx = cpu_to_le64(cfg->user_ctx);
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpcon_get_api_version - Get Data Path Concentrator API version
  * @mc_io:     Pointer to MC portal's DPCON object
index 528889d..dcb9d51 100644 (file)
@@ -113,6 +113,7 @@ int dpdmai_create(struct fsl_mc_io *mc_io,
                                          cmd_flags,
                                          dprc_token);
        cmd_params = (struct dpdmai_cmd_create *)cmd.params;
+       cmd_params->num_queues = cfg->num_queues;
        cmd_params->priorities[0] = cfg->priorities[0];
        cmd_params->priorities[1] = cfg->priorities[1];
 
@@ -297,6 +298,7 @@ int dpdmai_get_attributes(struct fsl_mc_io *mc_io,
        rsp_params = (struct dpdmai_rsp_get_attr *)cmd.params;
        attr->id = le32_to_cpu(rsp_params->id);
        attr->num_of_priorities = rsp_params->num_of_priorities;
+       attr->num_of_queues = rsp_params->num_of_queues;
 
        return 0;
 }
@@ -306,6 +308,8 @@ int dpdmai_get_attributes(struct fsl_mc_io *mc_io,
  * @mc_io:     Pointer to MC portal's I/O object
  * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:     Token of DPDMAI object
+ * @queue_idx: Rx queue index. Accepted values are form 0 to num_queues
+ *             parameter provided in dpdmai_create
  * @priority:  Select the queue relative to number of
  *             priorities configured at DPDMAI creation; use
  *             DPDMAI_ALL_QUEUES to configure all Rx queues
@@ -317,6 +321,7 @@ int dpdmai_get_attributes(struct fsl_mc_io *mc_io,
 int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        const struct dpdmai_rx_queue_cfg *cfg)
 {
@@ -331,6 +336,7 @@ int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io,
        cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
        cmd_params->dest_priority = cfg->dest_cfg.priority;
        cmd_params->priority = priority;
+       cmd_params->queue_idx = queue_idx;
        cmd_params->user_ctx = cpu_to_le64(cfg->user_ctx);
        cmd_params->options = cpu_to_le32(cfg->options);
        dpdmai_set_field(cmd_params->dest_type,
@@ -346,6 +352,8 @@ int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io,
  * @mc_io:     Pointer to MC portal's I/O object
  * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:     Token of DPDMAI object
+ * @queue_idx: Rx queue index. Accepted values are form 0 to num_queues
+ *             parameter provided in dpdmai_create
  * @priority:  Select the queue relative to number of
  *             priorities configured at DPDMAI creation
  * @attr:      Returned Rx queue attributes
@@ -355,6 +363,7 @@ int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io,
 int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        struct dpdmai_rx_queue_attr *attr)
 {
@@ -369,6 +378,7 @@ int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io,
                                          token);
        cmd_params = (struct dpdmai_cmd_get_queue *)cmd.params;
        cmd_params->priority = priority;
+       cmd_params->queue_idx = queue_idx;
 
        /* send command to mc*/
        err = mc_send_command(mc_io, &cmd);
@@ -392,6 +402,8 @@ int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io,
  * @mc_io:     Pointer to MC portal's I/O object
  * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:     Token of DPDMAI object
+ * @queue_idx: Tx queue index. Accepted values are form 0 to num_queues
+ *             parameter provided in dpdmai_create
  * @priority:  Select the queue relative to number of
  *             priorities configured at DPDMAI creation
  * @attr:      Returned Tx queue attributes
@@ -401,6 +413,7 @@ int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io,
 int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        struct dpdmai_tx_queue_attr *attr)
 {
@@ -415,6 +428,7 @@ int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io,
                                          token);
        cmd_params = (struct dpdmai_cmd_get_queue *)cmd.params;
        cmd_params->priority = priority;
+       cmd_params->queue_idx = queue_idx;
 
        /* send command to mc*/
        err = mc_send_command(mc_io, &cmd);
index 966277c..a3382ed 100644 (file)
@@ -268,6 +268,15 @@ int dpio_reset(struct fsl_mc_io *mc_io,
        return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpio_get_attributes() - Retrieve DPIO attributes
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPIO object
+ * @attr:      Returned object's attributes
+ *
+ * Return:     '0' on Success; Error code otherwise
+ */
 int dpio_get_attributes(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
index 1118362..9d405b4 100644 (file)
@@ -82,6 +82,7 @@ int dpbp_get_attributes(struct fsl_mc_io *mc_io,
 /**
  * BPSCN write will attempt to allocate into a cache (coherent write)
  */
+#define DPBP_NOTIF_OPT_COHERENT_WRITE  0x00000001
 int dpbp_get_api_version(struct fsl_mc_io *mc_io,
                         uint32_t cmd_flags,
                         uint16_t *major_ver,
index 18402ce..55c9fc9 100644 (file)
@@ -9,13 +9,15 @@
 
 /* DPBP Version */
 #define DPBP_VER_MAJOR                         3
-#define DPBP_VER_MINOR                         3
+#define DPBP_VER_MINOR                         4
 
 /* Command versioning */
 #define DPBP_CMD_BASE_VERSION                  1
+#define DPBP_CMD_VERSION_2                     2
 #define DPBP_CMD_ID_OFFSET                     4
 
 #define DPBP_CMD(id)   ((id << DPBP_CMD_ID_OFFSET) | DPBP_CMD_BASE_VERSION)
+#define DPBP_CMD_V2(id)        ((id << DPBP_CMD_ID_OFFSET) | DPBP_CMD_VERSION_2)
 
 /* Command IDs */
 #define DPBP_CMDID_CLOSE               DPBP_CMD(0x800)
@@ -37,8 +39,8 @@
 #define DPBP_CMDID_GET_IRQ_STATUS      DPBP_CMD(0x016)
 #define DPBP_CMDID_CLEAR_IRQ_STATUS    DPBP_CMD(0x017)
 
-#define DPBP_CMDID_SET_NOTIFICATIONS   DPBP_CMD(0x1b0)
-#define DPBP_CMDID_GET_NOTIFICATIONS   DPBP_CMD(0x1b1)
+#define DPBP_CMDID_SET_NOTIFICATIONS   DPBP_CMD_V2(0x1b0)
+#define DPBP_CMDID_GET_NOTIFICATIONS   DPBP_CMD_V2(0x1b1)
 
 #define DPBP_CMDID_GET_FREE_BUFFERS_NUM        DPBP_CMD(0x1b2)
 
@@ -68,8 +70,8 @@ struct dpbp_cmd_set_notifications {
        uint32_t depletion_exit;
        uint32_t surplus_entry;
        uint32_t surplus_exit;
-       uint16_t options;
-       uint16_t pad[3];
+       uint32_t options;
+       uint16_t pad[2];
        uint64_t message_ctx;
        uint64_t message_iova;
 };
@@ -79,8 +81,8 @@ struct dpbp_rsp_get_notifications {
        uint32_t depletion_exit;
        uint32_t surplus_entry;
        uint32_t surplus_exit;
-       uint16_t options;
-       uint16_t pad[3];
+       uint32_t options;
+       uint16_t pad[2];
        uint64_t message_ctx;
        uint64_t message_iova;
 };
index f69ed3f..9af9097 100644 (file)
@@ -6,6 +6,8 @@
 #ifndef __FSL_DPCI_H
 #define __FSL_DPCI_H
 
+#include <fsl_dpopr.h>
+
 /* Data Path Communication Interface API
  * Contains initialization APIs and runtime control APIs for DPCI
  */
@@ -17,7 +19,7 @@ struct fsl_mc_io;
 /**
  * Maximum number of Tx/Rx priorities per DPCI object
  */
-#define DPCI_PRIO_NUM          2
+#define DPCI_PRIO_NUM          4
 
 /**
  * Indicates an invalid frame queue
@@ -106,6 +108,27 @@ int dpci_get_attributes(struct fsl_mc_io *mc_io,
                        uint16_t token,
                        struct dpci_attr *attr);
 
+/**
+ * struct dpci_peer_attr - Structure representing the peer DPCI attributes
+ * @peer_id:           DPCI peer id; if no peer is connected returns (-1)
+ * @num_of_priorities: The pper's number of receive priorities; determines the
+ *                     number of transmit priorities for the local DPCI object
+ */
+struct dpci_peer_attr {
+       int peer_id;
+       uint8_t num_of_priorities;
+};
+
+int dpci_get_peer_attributes(struct fsl_mc_io *mc_io,
+                            uint32_t cmd_flags,
+                            uint16_t token,
+                            struct dpci_peer_attr *attr);
+
+int dpci_get_link_state(struct fsl_mc_io *mc_io,
+                       uint32_t cmd_flags,
+                       uint16_t token,
+                       int *up);
+
 /**
  * enum dpci_dest - DPCI destination types
  * @DPCI_DEST_NONE:    Unassigned destination; The queue is set in parked mode
@@ -153,6 +176,11 @@ struct dpci_dest_cfg {
  */
 #define DPCI_QUEUE_OPT_DEST            0x00000002
 
+/**
+ * Set the queue to hold active mode.
+ */
+#define DPCI_QUEUE_OPT_HOLD_ACTIVE     0x00000004
+
 /**
  * struct dpci_rx_queue_cfg - Structure representing RX queue configuration
  * @options:   Flags representing the suggested modifications to the queue;
@@ -163,11 +191,14 @@ struct dpci_dest_cfg {
  *             'options'
  * @dest_cfg:  Queue destination parameters;
  *             valid only if 'DPCI_QUEUE_OPT_DEST' is contained in 'options'
+ * @order_preservation_en: order preservation configuration for the rx queue
+ * valid only if 'DPCI_QUEUE_OPT_HOLD_ACTIVE' is contained in 'options'
  */
 struct dpci_rx_queue_cfg {
        uint32_t options;
        uint64_t user_ctx;
        struct dpci_dest_cfg dest_cfg;
+       int order_preservation_en;
 };
 
 int dpci_set_rx_queue(struct fsl_mc_io *mc_io,
@@ -217,4 +248,18 @@ int dpci_get_api_version(struct fsl_mc_io *mc_io,
                         uint16_t *major_ver,
                         uint16_t *minor_ver);
 
+int dpci_set_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t index,
+                uint8_t options,
+                struct opr_cfg *cfg);
+
+int dpci_get_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t index,
+                struct opr_cfg *cfg,
+                struct opr_qry *qry);
+
 #endif /* __FSL_DPCI_H */
index 634248a..92b85a8 100644 (file)
@@ -8,7 +8,7 @@
 
 /* DPCI Version */
 #define DPCI_VER_MAJOR                 3
-#define DPCI_VER_MINOR                 3
+#define DPCI_VER_MINOR                 4
 
 #define DPCI_CMD_BASE_VERSION          1
 #define DPCI_CMD_BASE_VERSION_V2       2
@@ -35,6 +35,8 @@
 #define DPCI_CMDID_GET_PEER_ATTR       DPCI_CMD_V1(0x0e2)
 #define DPCI_CMDID_GET_RX_QUEUE                DPCI_CMD_V1(0x0e3)
 #define DPCI_CMDID_GET_TX_QUEUE                DPCI_CMD_V1(0x0e4)
+#define DPCI_CMDID_SET_OPR             DPCI_CMD_V1(0x0e5)
+#define DPCI_CMDID_GET_OPR             DPCI_CMD_V1(0x0e6)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPCI_MASK(field)        \
@@ -90,6 +92,8 @@ struct dpci_rsp_get_link_state {
 
 #define DPCI_DEST_TYPE_SHIFT   0
 #define DPCI_DEST_TYPE_SIZE    4
+#define DPCI_ORDER_PRESERVATION_SHIFT  4
+#define DPCI_ORDER_PRESERVATION_SIZE   1
 
 struct dpci_cmd_set_rx_queue {
        uint32_t dest_id;
@@ -128,5 +132,61 @@ struct dpci_rsp_get_api_version {
        uint16_t minor;
 };
 
+struct dpci_cmd_set_opr {
+       uint16_t pad0;
+       uint8_t index;
+       uint8_t options;
+       uint8_t pad1[7];
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+};
+
+struct dpci_cmd_get_opr {
+       uint16_t pad;
+       uint8_t index;
+};
+
+#define DPCI_RIP_SHIFT         0
+#define DPCI_RIP_SIZE          1
+#define DPCI_OPR_ENABLE_SHIFT  1
+#define DPCI_OPR_ENABLE_SIZE   1
+#define DPCI_TSEQ_NLIS_SHIFT   0
+#define DPCI_TSEQ_NLIS_SIZE    1
+#define DPCI_HSEQ_NLIS_SHIFT   0
+#define DPCI_HSEQ_NLIS_SIZE    1
+
+struct dpci_rsp_get_opr {
+       uint64_t pad0;
+       /* from LSB: rip:1 enable:1 */
+       uint8_t flags;
+       uint16_t pad1;
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+       uint16_t nesn;
+       uint16_t pad8;
+       uint16_t ndsn;
+       uint16_t pad2;
+       uint16_t ea_tseq;
+       /* only the LSB */
+       uint8_t tseq_nlis;
+       uint8_t pad3;
+       uint16_t ea_hseq;
+       /* only the LSB */
+       uint8_t hseq_nlis;
+       uint8_t pad4;
+       uint16_t ea_hptr;
+       uint16_t pad5;
+       uint16_t ea_tptr;
+       uint16_t pad6;
+       uint16_t opr_vid;
+       uint16_t pad7;
+       uint16_t opr_id;
+};
 #pragma pack(pop)
 #endif /* _FSL_DPCI_CMD_H */
index 36dd5f3..fc0430d 100644 (file)
@@ -81,6 +81,25 @@ int dpcon_get_attributes(struct fsl_mc_io *mc_io,
                         uint16_t token,
                         struct dpcon_attr *attr);
 
+/**
+ * struct dpcon_notification_cfg - Structure representing notification params
+ * @dpio_id:   DPIO object ID; must be configured with a notification channel;
+ *             to disable notifications set it to 'DPCON_INVALID_DPIO_ID';
+ * @priority:  Priority selection within the DPIO channel; valid values
+ *             are 0-7, depending on the number of priorities in that channel
+ * @user_ctx:  User context value provided with each CDAN message
+ */
+struct dpcon_notification_cfg {
+       int dpio_id;
+       uint8_t priority;
+       uint64_t user_ctx;
+};
+
+int dpcon_set_notification(struct fsl_mc_io *mc_io,
+                          uint32_t cmd_flags,
+                          uint16_t token,
+                          struct dpcon_notification_cfg *cfg);
+
 int dpcon_get_api_version(struct fsl_mc_io *mc_io,
                          uint32_t cmd_flags,
                          uint16_t *major_ver,
index 03e46ec..40469cc 100644 (file)
@@ -39,6 +39,7 @@ int dpdmai_close(struct fsl_mc_io *mc_io,
  *     should be configured with 0
  */
 struct dpdmai_cfg {
+       uint8_t num_queues;
        uint8_t priorities[DPDMAI_PRIO_NUM];
 };
 
@@ -78,6 +79,7 @@ int dpdmai_reset(struct fsl_mc_io *mc_io,
 struct dpdmai_attr {
        int id;
        uint8_t num_of_priorities;
+       uint8_t num_of_queues;
 };
 
 int dpdmai_get_attributes(struct fsl_mc_io *mc_io,
@@ -149,6 +151,7 @@ struct dpdmai_rx_queue_cfg {
 int dpdmai_set_rx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        const struct dpdmai_rx_queue_cfg *cfg);
 
@@ -168,6 +171,7 @@ struct dpdmai_rx_queue_attr {
 int dpdmai_get_rx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        struct dpdmai_rx_queue_attr *attr);
 
@@ -183,6 +187,7 @@ struct dpdmai_tx_queue_attr {
 int dpdmai_get_tx_queue(struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
                        uint16_t token,
+                       uint8_t queue_idx,
                        uint8_t priority,
                        struct dpdmai_tx_queue_attr *attr);
 
index 618e19e..7e122de 100644 (file)
@@ -7,30 +7,32 @@
 
 /* DPDMAI Version */
 #define DPDMAI_VER_MAJOR               3
-#define DPDMAI_VER_MINOR               2
+#define DPDMAI_VER_MINOR               3
 
 /* Command versioning */
 #define DPDMAI_CMD_BASE_VERSION                1
+#define DPDMAI_CMD_VERSION_2           2
 #define DPDMAI_CMD_ID_OFFSET           4
 
 #define DPDMAI_CMD(id) ((id << DPDMAI_CMD_ID_OFFSET) | DPDMAI_CMD_BASE_VERSION)
+#define DPDMAI_CMD_V2(id) ((id << DPDMAI_CMD_ID_OFFSET) | DPDMAI_CMD_VERSION_2)
 
 /* Command IDs */
 #define DPDMAI_CMDID_CLOSE             DPDMAI_CMD(0x800)
 #define DPDMAI_CMDID_OPEN              DPDMAI_CMD(0x80E)
-#define DPDMAI_CMDID_CREATE            DPDMAI_CMD(0x90E)
+#define DPDMAI_CMDID_CREATE            DPDMAI_CMD_V2(0x90E)
 #define DPDMAI_CMDID_DESTROY           DPDMAI_CMD(0x98E)
 #define DPDMAI_CMDID_GET_API_VERSION   DPDMAI_CMD(0xa0E)
 
 #define DPDMAI_CMDID_ENABLE            DPDMAI_CMD(0x002)
 #define DPDMAI_CMDID_DISABLE           DPDMAI_CMD(0x003)
-#define DPDMAI_CMDID_GET_ATTR          DPDMAI_CMD(0x004)
+#define DPDMAI_CMDID_GET_ATTR          DPDMAI_CMD_V2(0x004)
 #define DPDMAI_CMDID_RESET             DPDMAI_CMD(0x005)
 #define DPDMAI_CMDID_IS_ENABLED                DPDMAI_CMD(0x006)
 
-#define DPDMAI_CMDID_SET_RX_QUEUE      DPDMAI_CMD(0x1A0)
-#define DPDMAI_CMDID_GET_RX_QUEUE      DPDMAI_CMD(0x1A1)
-#define DPDMAI_CMDID_GET_TX_QUEUE      DPDMAI_CMD(0x1A2)
+#define DPDMAI_CMDID_SET_RX_QUEUE      DPDMAI_CMD_V2(0x1A0)
+#define DPDMAI_CMDID_GET_RX_QUEUE      DPDMAI_CMD_V2(0x1A1)
+#define DPDMAI_CMDID_GET_TX_QUEUE      DPDMAI_CMD_V2(0x1A2)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPDMAI_MASK(field)        \
@@ -47,7 +49,7 @@ struct dpdmai_cmd_open {
 };
 
 struct dpdmai_cmd_create {
-       uint8_t pad;
+       uint8_t num_queues;
        uint8_t priorities[2];
 };
 
@@ -66,6 +68,7 @@ struct dpdmai_rsp_is_enabled {
 struct dpdmai_rsp_get_attr {
        uint32_t id;
        uint8_t num_of_priorities;
+       uint8_t num_of_queues;
 };
 
 #define DPDMAI_DEST_TYPE_SHIFT 0
@@ -77,7 +80,7 @@ struct dpdmai_cmd_set_rx_queue {
        uint8_t priority;
        /* from LSB: dest_type:4 */
        uint8_t dest_type;
-       uint8_t pad;
+       uint8_t queue_idx;
        uint64_t user_ctx;
        uint32_t options;
 };
@@ -85,6 +88,7 @@ struct dpdmai_cmd_set_rx_queue {
 struct dpdmai_cmd_get_queue {
        uint8_t pad[5];
        uint8_t priority;
+       uint8_t queue_idx;
 };
 
 struct dpdmai_rsp_get_rx_queue {
index afaf9b7..8559bef 100644 (file)
@@ -18,7 +18,7 @@ struct fsl_mc_io;
  * Management Complex firmware version information
  */
 #define MC_VER_MAJOR 10
-#define MC_VER_MINOR 3
+#define MC_VER_MINOR 10
 
 /**
  * struct mc_version
diff --git a/drivers/bus/fslmc/mc/fsl_dpopr.h b/drivers/bus/fslmc/mc/fsl_dpopr.h
new file mode 100644 (file)
index 0000000..fd727e0
--- /dev/null
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+ *
+ * Copyright 2013-2015 Freescale Semiconductor Inc.
+ * Copyright 2018 NXP
+ *
+ */
+#ifndef __FSL_DPOPR_H_
+#define __FSL_DPOPR_H_
+
+/** @addtogroup dpopr Data Path Order Restoration API
+ * Contains initialization APIs and runtime APIs for the Order Restoration
+ * @{
+ */
+
+/** Order Restoration properties */
+
+/**
+ * Create a new Order Point Record option
+ */
+#define OPR_OPT_CREATE 0x1
+/**
+ * Retire an existing Order Point Record option
+ */
+#define OPR_OPT_RETIRE 0x2
+
+/**
+ * struct opr_cfg - Structure representing OPR configuration
+ * @oprrws: Order point record (OPR) restoration window size (0 to 5)
+ *                     0 - Window size is 32 frames.
+ *                     1 - Window size is 64 frames.
+ *                     2 - Window size is 128 frames.
+ *                     3 - Window size is 256 frames.
+ *                     4 - Window size is 512 frames.
+ *                     5 - Window size is 1024 frames.
+ *@oa: OPR auto advance NESN window size (0 disabled, 1 enabled)
+ *@olws: OPR acceptable late arrival window size (0 to 3)
+ *                     0 - Disabled. Late arrivals are always rejected.
+ *                     1 - Window size is 32 frames.
+ *                     2 - Window size is the same as the OPR restoration
+ *                     window size configured in the OPRRWS field.
+ *                     3 - Window size is 8192 frames.
+ *                     Late arrivals are always accepted.
+ *@oeane: Order restoration list (ORL) resource exhaustion
+ *                     advance NESN enable (0 disabled, 1 enabled)
+ *@oloe: OPR loose ordering enable (0 disabled, 1 enabled)
+ */
+struct opr_cfg {
+       uint8_t oprrws;
+       uint8_t oa;
+       uint8_t olws;
+       uint8_t oeane;
+       uint8_t oloe;
+};
+
+/**
+ * struct opr_qry - Structure representing OPR configuration
+ * @enable: Enabled state
+ * @rip: Retirement In Progress
+ * @ndsn: Next dispensed sequence number
+ * @nesn: Next expected sequence number
+ * @ea_hseq: Early arrival head sequence number
+ * @hseq_nlis: HSEQ not last in sequence
+ * @ea_tseq: Early arrival tail sequence number
+ * @tseq_nlis: TSEQ not last in sequence
+ * @ea_tptr: Early arrival tail pointer
+ * @ea_hptr: Early arrival head pointer
+ * @opr_id: Order Point Record ID
+ * @opr_vid: Order Point Record Virtual ID
+ */
+struct opr_qry {
+       char enable;
+       char rip;
+       uint16_t ndsn;
+       uint16_t nesn;
+       uint16_t ea_hseq;
+       char hseq_nlis;
+       uint16_t ea_tseq;
+       char tseq_nlis;
+       uint16_t ea_tptr;
+       uint16_t ea_hptr;
+       uint16_t opr_id;
+       uint16_t opr_vid;
+};
+
+#endif /* __FSL_DPOPR_H_ */
index 22a56a6..4b05215 100644 (file)
@@ -1,11 +1,13 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
         build = false
 endif
 
-deps += ['eventdev', 'kvargs']
+deps += ['common_dpaax', 'eventdev', 'kvargs']
 sources = files('fslmc_bus.c',
                'fslmc_vfio.c',
                'mc/dpbp.c',
@@ -24,4 +26,3 @@ sources = files('fslmc_bus.c',
 allow_experimental_apis = true
 
 includes += include_directories('mc', 'qbman/include', 'portal')
-cflags += ['-D_GNU_SOURCE']
index 39c5adf..db49d63 100644 (file)
 #include "portal/dpaa2_hw_pvt.h"
 #include "portal/dpaa2_hw_dpio.h"
 
+/* List of all the memseg information locally maintained in dpaa2 driver. This
+ * is to optimize the PA_to_VA searches until a better mechanism (algo) is
+ * available.
+ */
+struct dpaa2_memseg_list rte_dpaa2_memsegs
+       = TAILQ_HEAD_INITIALIZER(rte_dpaa2_memsegs);
+
 TAILQ_HEAD(dpbp_dev_list, dpaa2_dpbp_dev);
 static struct dpbp_dev_list dpbp_dev_list
        = TAILQ_HEAD_INITIALIZER(dpbp_dev_list); /*!< DPBP device list */
index 99f70be..ce06998 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP
+ *   Copyright 2016-2018 NXP
  *
  */
 #include <unistd.h>
@@ -53,6 +53,11 @@ static uint32_t io_space_count;
 /* Variable to store DPAA2 platform type */
 uint32_t dpaa2_svr_family;
 
+/* Variable to store DPAA2 DQRR size */
+uint8_t dpaa2_dqrr_size;
+/* Variable to store DPAA2 EQCR size */
+uint8_t dpaa2_eqcr_size;
+
 /*Stashing Macros default for LS208x*/
 static int dpaa2_core_cluster_base = 0x04;
 static int dpaa2_cluster_sz = 2;
@@ -125,7 +130,7 @@ static void dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id)
                 cpu_mask, token);
        ret = system(command);
        if (ret < 0)
-               DPAA2_BUS_WARN(
+               DPAA2_BUS_DEBUG(
                        "Failed to affine interrupts on respective core");
        else
                DPAA2_BUS_DEBUG(" %s command is executed", command);
@@ -177,68 +182,6 @@ static int dpaa2_dpio_intr_init(struct dpaa2_dpio_dev *dpio_dev)
 }
 #endif
 
-static int
-configure_dpio_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
-{
-       struct qbman_swp_desc p_des;
-       struct dpio_attr attr;
-
-       dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io));
-       if (!dpio_dev->dpio) {
-               DPAA2_BUS_ERR("Memory allocation failure");
-               return -1;
-       }
-
-       dpio_dev->dpio->regs = dpio_dev->mc_portal;
-       if (dpio_open(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->hw_id,
-                     &dpio_dev->token)) {
-               DPAA2_BUS_ERR("Failed to allocate IO space");
-               free(dpio_dev->dpio);
-               return -1;
-       }
-
-       if (dpio_reset(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
-               DPAA2_BUS_ERR("Failed to reset dpio");
-               dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-               free(dpio_dev->dpio);
-               return -1;
-       }
-
-       if (dpio_enable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
-               DPAA2_BUS_ERR("Failed to Enable dpio");
-               dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-               free(dpio_dev->dpio);
-               return -1;
-       }
-
-       if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW,
-                               dpio_dev->token, &attr)) {
-               DPAA2_BUS_ERR("DPIO Get attribute failed");
-               dpio_disable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-               dpio_close(dpio_dev->dpio, CMD_PRI_LOW,  dpio_dev->token);
-               free(dpio_dev->dpio);
-               return -1;
-       }
-
-       /* Configure & setup SW portal */
-       p_des.block = NULL;
-       p_des.idx = attr.qbman_portal_id;
-       p_des.cena_bar = (void *)(dpio_dev->qbman_portal_ce_paddr);
-       p_des.cinh_bar = (void *)(dpio_dev->qbman_portal_ci_paddr);
-       p_des.irq = -1;
-       p_des.qman_version = attr.qbman_version;
-
-       dpio_dev->sw_portal = qbman_swp_init(&p_des);
-       if (dpio_dev->sw_portal == NULL) {
-               DPAA2_BUS_ERR("QBMan SW Portal Init failed");
-               dpio_close(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
-               free(dpio_dev->dpio);
-               return -1;
-       }
-
-       return 0;
-}
-
 static int
 dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
 {
@@ -402,15 +345,17 @@ dpaa2_create_dpio_device(int vdev_fd,
                         struct vfio_device_info *obj_info,
                         int object_id)
 {
-       struct dpaa2_dpio_dev *dpio_dev;
+       struct dpaa2_dpio_dev *dpio_dev = NULL;
        struct vfio_region_info reg_info = { .argsz = sizeof(reg_info)};
+       struct qbman_swp_desc p_des;
+       struct dpio_attr attr;
 
        if (obj_info->num_regions < NUM_DPIO_REGIONS) {
                DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
                return -1;
        }
 
-       dpio_dev = rte_malloc(NULL, sizeof(struct dpaa2_dpio_dev),
+       dpio_dev = rte_zmalloc(NULL, sizeof(struct dpaa2_dpio_dev),
                              RTE_CACHE_LINE_SIZE);
        if (!dpio_dev) {
                DPAA2_BUS_ERR("Memory allocation failed for DPIO Device");
@@ -423,45 +368,33 @@ dpaa2_create_dpio_device(int vdev_fd,
        /* Using single portal  for all devices */
        dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
 
-       reg_info.index = 0;
-       if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
-               DPAA2_BUS_ERR("vfio: error getting region info");
-               rte_free(dpio_dev);
-               return -1;
+       dpio_dev->dpio = malloc(sizeof(struct fsl_mc_io));
+       if (!dpio_dev->dpio) {
+               DPAA2_BUS_ERR("Memory allocation failure");
+               goto err;
        }
 
-       dpio_dev->ce_size = reg_info.size;
-       dpio_dev->qbman_portal_ce_paddr = (size_t)mmap(NULL, reg_info.size,
-                               PROT_WRITE | PROT_READ, MAP_SHARED,
-                               vdev_fd, reg_info.offset);
-
-       reg_info.index = 1;
-       if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
-               DPAA2_BUS_ERR("vfio: error getting region info");
-               rte_free(dpio_dev);
-               return -1;
+       dpio_dev->dpio->regs = dpio_dev->mc_portal;
+       if (dpio_open(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->hw_id,
+                     &dpio_dev->token)) {
+               DPAA2_BUS_ERR("Failed to allocate IO space");
+               goto err;
        }
 
-       dpio_dev->ci_size = reg_info.size;
-       dpio_dev->qbman_portal_ci_paddr = (size_t)mmap(NULL, reg_info.size,
-                               PROT_WRITE | PROT_READ, MAP_SHARED,
-                               vdev_fd, reg_info.offset);
-
-       if (configure_dpio_qbman_swp(dpio_dev)) {
-               DPAA2_BUS_ERR(
-                            "Fail to configure the dpio qbman portal for %d",
-                            dpio_dev->hw_id);
-               rte_free(dpio_dev);
-               return -1;
+       if (dpio_reset(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
+               DPAA2_BUS_ERR("Failed to reset dpio");
+               goto err;
        }
 
-       io_space_count++;
-       dpio_dev->index = io_space_count;
+       if (dpio_enable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token)) {
+               DPAA2_BUS_ERR("Failed to Enable dpio");
+               goto err;
+       }
 
-       if (rte_dpaa2_vfio_setup_intr(&dpio_dev->intr_handle, vdev_fd, 1)) {
-               DPAA2_BUS_ERR("Fail to setup interrupt for %d",
-                             dpio_dev->hw_id);
-               rte_free(dpio_dev);
+       if (dpio_get_attributes(dpio_dev->dpio, CMD_PRI_LOW,
+                               dpio_dev->token, &attr)) {
+               DPAA2_BUS_ERR("DPIO Get attribute failed");
+               goto err;
        }
 
        /* find the SoC type for the first time */
@@ -481,11 +414,77 @@ dpaa2_create_dpio_device(int vdev_fd,
                        DPAA2_BUS_DEBUG("LX2160 Platform Detected");
                }
                dpaa2_svr_family = (mc_plat_info.svr & 0xffff0000);
+
+               if (dpaa2_svr_family == SVR_LX2160A) {
+                       dpaa2_dqrr_size = DPAA2_LX2_DQRR_RING_SIZE;
+                       dpaa2_eqcr_size = DPAA2_LX2_EQCR_RING_SIZE;
+               } else {
+                       dpaa2_dqrr_size = DPAA2_DQRR_RING_SIZE;
+                       dpaa2_eqcr_size = DPAA2_EQCR_RING_SIZE;
+               }
+       }
+
+       if (dpaa2_svr_family == SVR_LX2160A)
+               reg_info.index = DPAA2_SWP_CENA_MEM_REGION;
+       else
+               reg_info.index = DPAA2_SWP_CENA_REGION;
+
+       if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+               DPAA2_BUS_ERR("vfio: error getting region info");
+               goto err;
+       }
+
+       dpio_dev->ce_size = reg_info.size;
+       dpio_dev->qbman_portal_ce_paddr = (size_t)mmap(NULL, reg_info.size,
+                               PROT_WRITE | PROT_READ, MAP_SHARED,
+                               vdev_fd, reg_info.offset);
+
+       reg_info.index = DPAA2_SWP_CINH_REGION;
+       if (ioctl(vdev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info)) {
+               DPAA2_BUS_ERR("vfio: error getting region info");
+               goto err;
+       }
+
+       dpio_dev->ci_size = reg_info.size;
+       dpio_dev->qbman_portal_ci_paddr = (size_t)mmap(NULL, reg_info.size,
+                               PROT_WRITE | PROT_READ, MAP_SHARED,
+                               vdev_fd, reg_info.offset);
+
+       /* Configure & setup SW portal */
+       p_des.block = NULL;
+       p_des.idx = attr.qbman_portal_id;
+       p_des.cena_bar = (void *)(dpio_dev->qbman_portal_ce_paddr);
+       p_des.cinh_bar = (void *)(dpio_dev->qbman_portal_ci_paddr);
+       p_des.irq = -1;
+       p_des.qman_version = attr.qbman_version;
+
+       dpio_dev->sw_portal = qbman_swp_init(&p_des);
+       if (dpio_dev->sw_portal == NULL) {
+               DPAA2_BUS_ERR("QBMan SW Portal Init failed");
+               goto err;
+       }
+
+       io_space_count++;
+       dpio_dev->index = io_space_count;
+
+       if (rte_dpaa2_vfio_setup_intr(&dpio_dev->intr_handle, vdev_fd, 1)) {
+               DPAA2_BUS_ERR("Fail to setup interrupt for %d",
+                             dpio_dev->hw_id);
+               goto err;
        }
 
        TAILQ_INSERT_TAIL(&dpio_dev_list, dpio_dev, next);
 
        return 0;
+
+err:
+       if (dpio_dev->dpio) {
+               dpio_disable(dpio_dev->dpio, CMD_PRI_LOW, dpio_dev->token);
+               dpio_close(dpio_dev->dpio, CMD_PRI_LOW,  dpio_dev->token);
+               free(dpio_dev->dpio);
+       }
+       rte_free(dpio_dev);
+       return -1;
 }
 
 void
@@ -506,7 +505,7 @@ dpaa2_alloc_dq_storage(struct queue_storage_info_t *q_storage)
 
        for (i = 0; i < NUM_DQS_PER_QUEUE; i++) {
                q_storage->dq_storage[i] = rte_malloc(NULL,
-                       DPAA2_DQRR_RING_SIZE * sizeof(struct qbman_result),
+                       dpaa2_dqrr_size * sizeof(struct qbman_result),
                        RTE_CACHE_LINE_SIZE);
                if (!q_storage->dq_storage[i])
                        goto fail;
index d593eea..462501a 100644 (file)
@@ -30,6 +30,10 @@ RTE_DECLARE_PER_LCORE(struct dpaa2_io_portal_t, _dpaa2_io);
 
 /* Variable to store DPAA2 platform type */
 extern uint32_t dpaa2_svr_family;
+/* Variable to store DPAA2 DQRR size */
+extern uint8_t dpaa2_dqrr_size;
+/* Variable to store DPAA2 EQCR size */
+extern uint8_t dpaa2_eqcr_size;
 
 extern struct dpaa2_io_portal_t dpaa2_io_portal[RTE_MAX_LCORE];
 
index 8207593..efbeebe 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP
+ *   Copyright 2016-2018 NXP
  *
  */
 
@@ -9,6 +9,7 @@
 #define _DPAA2_HW_PVT_H_
 
 #include <rte_eventdev.h>
+#include <dpaax_iova_table.h>
 
 #include <mc/fsl_mc_sys.h>
 #include <fsl_qbman_portal.h>
 #define VLAN_TAG_SIZE   4 /** < Vlan Header Length */
 #endif
 
-#define MAX_TX_RING_SLOTS      8
-       /** <Maximum number of slots available in TX ring*/
+/* Maximum number of slots available in TX ring */
+#define MAX_TX_RING_SLOTS                      32
 
-#define DPAA2_DQRR_RING_SIZE   16
-       /** <Maximum number of slots available in RX ring*/
+/* Maximum number of slots available in RX ring */
+#define DPAA2_EQCR_RING_SIZE           8
+/* Maximum number of slots available in RX ring on LX2 */
+#define DPAA2_LX2_EQCR_RING_SIZE       32
+
+/* Maximum number of slots available in RX ring */
+#define DPAA2_DQRR_RING_SIZE           16
+/* Maximum number of slots available in RX ring on LX2 */
+#define DPAA2_LX2_DQRR_RING_SIZE       32
+
+/* EQCR shift to get EQCR size (2 >> 3) = 8 for LS2/LS2 */
+#define DPAA2_EQCR_SHIFT               3
+/* EQCR shift to get EQCR size for LX2 (2 >> 5) = 32 for LX2 */
+#define DPAA2_LX2_EQCR_SHIFT           5
+
+#define DPAA2_SWP_CENA_REGION          0
+#define DPAA2_SWP_CINH_REGION          1
+#define DPAA2_SWP_CENA_MEM_REGION      2
 
 #define MC_PORTAL_INDEX                0
 #define NUM_DPIO_REGIONS       2
@@ -193,6 +210,12 @@ enum qbman_fd_format {
 #define DPAA2_RESET_FD_CTRL(fd)         ((fd)->simple.ctrl = 0)
 
 #define        DPAA2_SET_FD_ASAL(fd, asal)     ((fd)->simple.ctrl |= (asal << 16))
+
+#define DPAA2_RESET_FD_FLC(fd) do {    \
+       (fd)->simple.flc_lo = 0;        \
+       (fd)->simple.flc_hi = 0;        \
+} while (0)
+
 #define DPAA2_SET_FD_FLC(fd, addr)     do { \
        (fd)->simple.flc_lo = lower_32_bits((size_t)(addr));    \
        (fd)->simple.flc_hi = upper_32_bits((uint64_t)(addr));  \
@@ -275,28 +298,26 @@ extern struct dpaa2_memseg_list rte_dpaa2_memsegs;
 #ifdef RTE_LIBRTE_DPAA2_USE_PHYS_IOVA
 extern uint8_t dpaa2_virt_mode;
 static void *dpaa2_mem_ptov(phys_addr_t paddr) __attribute__((unused));
-/* todo - this is costly, need to write a fast coversion routine */
+
 static void *dpaa2_mem_ptov(phys_addr_t paddr)
 {
-       struct dpaa2_memseg *ms;
+       void *va;
 
        if (dpaa2_virt_mode)
                return (void *)(size_t)paddr;
 
-       /* Check if the address is already part of the memseg list internally
-        * maintained by the dpaa2 driver.
-        */
-       TAILQ_FOREACH(ms, &rte_dpaa2_memsegs, next) {
-               if (paddr >= ms->iova && paddr <
-                       ms->iova + ms->len)
-                       return RTE_PTR_ADD(ms->vaddr, (uintptr_t)(paddr - ms->iova));
-       }
+       va = (void *)dpaax_iova_table_get_va(paddr);
+       if (likely(va != NULL))
+               return va;
 
        /* If not, Fallback to full memseg list searching */
-       return rte_mem_iova2virt(paddr);
+       va = rte_mem_iova2virt(paddr);
+
+       return va;
 }
 
 static phys_addr_t dpaa2_mem_vtop(uint64_t vaddr) __attribute__((unused));
+
 static phys_addr_t dpaa2_mem_vtop(uint64_t vaddr)
 {
        const struct rte_memseg *memseg;
index 7be8f54..655bff4 100644 (file)
@@ -78,13 +78,14 @@ do { \
 #define lower_32_bits(x) ((uint32_t)(x))
 #define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16))
 
-
 #define __iomem
 
 #define __raw_readb(p) (*(const volatile unsigned char *)(p))
 #define __raw_readl(p) (*(const volatile unsigned int *)(p))
 #define __raw_writel(v, p) {*(volatile unsigned int *)(p) = (v); }
 
+#define dma_wmb()              rte_smp_mb()
+
 #define atomic_t                rte_atomic32_t
 #define atomic_read(v)          rte_atomic32_read(v)
 #define atomic_set(v, i)        rte_atomic32_set(v, i)
index 3e63db3..10c72e0 100644 (file)
@@ -42,6 +42,15 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
  */
 void qbman_swp_finish(struct qbman_swp *p);
 
+/**
+ * qbman_swp_invalidate() - Invalidate the cache enabled area of the QBMan
+ * portal. This is required to be called if a portal moved to another core
+ * because the QBMan portal area is non coherent
+ * @p: the qbman_swp object to be invalidated
+ *
+ */
+void qbman_swp_invalidate(struct qbman_swp *p);
+
 /**
  * qbman_swp_get_desc() - Get the descriptor of the given portal object.
  * @p: the given portal object.
@@ -172,7 +181,7 @@ void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit);
 /**
  * struct qbman_result - structure for qbman dequeue response and/or
  * notification.
- * @donot_manipulate_directly: the 16 32bit data to represent the whole
+ * @dont_manipulate_directly: the 16 32bit data to represent the whole
  * possible qbman dequeue result.
  */
 struct qbman_result {
@@ -262,7 +271,7 @@ void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable);
  */
 struct qbman_pull_desc {
        union {
-               uint32_t donot_manipulate_directly[16];
+               uint32_t dont_manipulate_directly[16];
                struct pull {
                        uint8_t verb;
                        uint8_t numf;
@@ -355,6 +364,14 @@ void qbman_pull_desc_set_wq(struct qbman_pull_desc *d, uint32_t wqid,
 void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid,
                                 enum qbman_pull_type_e dct);
 
+/**
+ * qbman_pull_desc_set_rad() - Decide whether reschedule the fq after dequeue
+ *
+ * @rad: 1 = Reschedule the FQ after dequeue.
+ *      0 = Allow the FQ to remain active after dequeue.
+ */
+void qbman_pull_desc_set_rad(struct qbman_pull_desc *d, int rad);
+
 /**
  * qbman_swp_pull() - Issue the pull dequeue command
  * @s: the software portal object.
@@ -775,7 +792,7 @@ uint64_t qbman_result_cgcu_icnt(const struct qbman_result *scn);
 /* struct qbman_eq_desc - structure of enqueue descriptor */
 struct qbman_eq_desc {
        union {
-               uint32_t donot_manipulate_directly[8];
+               uint32_t dont_manipulate_directly[8];
                struct eq {
                        uint8_t verb;
                        uint8_t dca;
@@ -796,11 +813,11 @@ struct qbman_eq_desc {
 
 /**
  * struct qbman_eq_response - structure of enqueue response
- * @donot_manipulate_directly: the 16 32bit data to represent the whole
+ * @dont_manipulate_directly: the 16 32bit data to represent the whole
  * enqueue response.
  */
 struct qbman_eq_response {
-       uint32_t donot_manipulate_directly[16];
+       uint32_t dont_manipulate_directly[16];
 };
 
 /**
@@ -958,6 +975,7 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
  * @s: the software portal used for enqueue.
  * @d: the enqueue descriptor.
  * @fd: the frame descriptor to be enqueued.
+ * @flags: bit-mask of QBMAN_ENQUEUE_FLAG_*** options
  * @num_frames: the number of the frames to be enqueued.
  *
  * Return the number of enqueued frames, -EBUSY if the EQCR is not ready.
@@ -973,7 +991,6 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s,
  * @s: the software portal used for enqueue.
  * @d: the enqueue descriptor.
  * @fd: the frame descriptor to be enqueued.
- * @flags: bit-mask of QBMAN_ENQUEUE_FLAG_*** options
  * @num_frames: the number of the frames to be enqueued.
  *
  * Return the number of enqueued frames, -EBUSY if the EQCR is not ready.
@@ -998,12 +1015,12 @@ int qbman_swp_enqueue_thresh(struct qbman_swp *s, unsigned int thresh);
        /*******************/
 /**
  * struct qbman_release_desc - The structure for buffer release descriptor
- * @donot_manipulate_directly: the 32bit data to represent the whole
+ * @dont_manipulate_directly: the 32bit data to represent the whole
  * possible settings of qbman release descriptor.
  */
 struct qbman_release_desc {
        union {
-               uint32_t donot_manipulate_directly[16];
+               uint32_t dont_manipulate_directly[16];
                struct br {
                        uint8_t verb;
                        uint8_t reserved;
index 0714500..3380e54 100644 (file)
@@ -1,39 +1,17 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2018 NXP
  *
  */
 
+#include "qbman_sys.h"
 #include "qbman_portal.h"
 
 /* QBMan portal management command codes */
 #define QBMAN_MC_ACQUIRE       0x30
 #define QBMAN_WQCHAN_CONFIGURE 0x46
 
-/* CINH register offsets */
-#define QBMAN_CINH_SWP_EQCR_PI 0x800
-#define QBMAN_CINH_SWP_EQCR_CI 0x840
-#define QBMAN_CINH_SWP_EQAR    0x8c0
-#define QBMAN_CINH_SWP_DQPI    0xa00
-#define QBMAN_CINH_SWP_DCAP    0xac0
-#define QBMAN_CINH_SWP_SDQCR   0xb00
-#define QBMAN_CINH_SWP_RAR     0xcc0
-#define QBMAN_CINH_SWP_ISR     0xe00
-#define QBMAN_CINH_SWP_IER     0xe40
-#define QBMAN_CINH_SWP_ISDR    0xe80
-#define QBMAN_CINH_SWP_IIR     0xec0
-#define QBMAN_CINH_SWP_DQRR_ITR    0xa80
-#define QBMAN_CINH_SWP_ITPR    0xf40
-
-/* CENA register offsets */
-#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((uint32_t)(n) << 6))
-#define QBMAN_CENA_SWP_CR      0x600
-#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((uint32_t)(vb) >> 1))
-#define QBMAN_CENA_SWP_VDQCR   0x780
-#define QBMAN_CENA_SWP_EQCR_CI 0x840
-
 /* Reverse mapping of QBMAN_CENA_SWP_DQRR() */
 #define QBMAN_IDX_FROM_DQRR(p) (((unsigned long)p & 0x1ff) >> 6)
 
@@ -83,6 +61,102 @@ enum qbman_sdqcr_fc {
 #define MAX_QBMAN_PORTALS  64
 static struct qbman_swp *portal_idx_map[MAX_QBMAN_PORTALS];
 
+/* Internal Function declaration */
+static int
+qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+static int
+qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+
+static int
+qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+static int
+qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd);
+
+static int
+qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames);
+
+static int
+qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames);
+static int
+qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames);
+
+static int
+qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
+qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
+
+const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
+
+static int
+qbman_swp_release_direct(struct qbman_swp *s,
+               const struct qbman_release_desc *d,
+               const uint64_t *buffers, unsigned int num_buffers);
+static int
+qbman_swp_release_mem_back(struct qbman_swp *s,
+               const struct qbman_release_desc *d,
+               const uint64_t *buffers, unsigned int num_buffers);
+
+/* Function pointers */
+static int (*qbman_swp_enqueue_array_mode_ptr)(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd)
+       = qbman_swp_enqueue_array_mode_direct;
+
+static int (*qbman_swp_enqueue_ring_mode_ptr)(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd)
+       = qbman_swp_enqueue_ring_mode_direct;
+
+static int (*qbman_swp_enqueue_multiple_ptr)(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               uint32_t *flags,
+               int num_frames)
+       = qbman_swp_enqueue_multiple_direct;
+
+static int (*qbman_swp_enqueue_multiple_desc_ptr)(struct qbman_swp *s,
+               const struct qbman_eq_desc *d,
+               const struct qbman_fd *fd,
+               int num_frames)
+       = qbman_swp_enqueue_multiple_desc_direct;
+
+static int (*qbman_swp_pull_ptr)(struct qbman_swp *s,
+               struct qbman_pull_desc *d)
+       = qbman_swp_pull_direct;
+
+const struct qbman_result *(*qbman_swp_dqrr_next_ptr)(struct qbman_swp *s)
+               = qbman_swp_dqrr_next_direct;
+
+static int (*qbman_swp_release_ptr)(struct qbman_swp *s,
+                       const struct qbman_release_desc *d,
+                       const uint64_t *buffers, unsigned int num_buffers)
+                       = qbman_swp_release_direct;
+
 /*********************************/
 /* Portal constructor/destructor */
 /*********************************/
@@ -104,25 +178,30 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 {
        int ret;
        uint32_t eqcr_pi;
+       uint32_t mask_size;
        struct qbman_swp *p = malloc(sizeof(*p));
 
        if (!p)
                return NULL;
+
+       memset(p, 0, sizeof(struct qbman_swp));
+
        p->desc = *d;
 #ifdef QBMAN_CHECKING
        p->mc.check = swp_mc_can_start;
 #endif
        p->mc.valid_bit = QB_VALID_BIT;
-       p->sdq = 0;
        p->sdq |= qbman_sdqcr_dct_prio_ics << QB_SDQCR_DCT_SHIFT;
        p->sdq |= qbman_sdqcr_fc_up_to_3 << QB_SDQCR_FC_SHIFT;
        p->sdq |= QMAN_SDQCR_TOKEN << QB_SDQCR_TOK_SHIFT;
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+               p->mr.valid_bit = QB_VALID_BIT;
 
        atomic_set(&p->vdq.busy, 1);
        p->vdq.valid_bit = QB_VALID_BIT;
-       p->dqrr.next_idx = 0;
        p->dqrr.valid_bit = QB_VALID_BIT;
-       if ((p->desc.qman_version & 0xFFFF0000) < QMAN_REV_4100) {
+       qman_version = p->desc.qman_version;
+       if ((qman_version & 0xFFFF0000) < QMAN_REV_4100) {
                p->dqrr.dqrr_size = 4;
                p->dqrr.reset_bug = 1;
        } else {
@@ -136,18 +215,54 @@ struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
                pr_err("qbman_swp_sys_init() failed %d\n", ret);
                return NULL;
        }
+
+       /* Verify that the DQRRPI is 0 - if it is not the portal isn't
+        * in default state which is an error
+        */
+       if (qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_DQPI) & 0xF) {
+               pr_err("qbman DQRR PI is not zero, portal is not clean\n");
+               free(p);
+               return NULL;
+       }
+
        /* SDQCR needs to be initialized to 0 when no channels are
         * being dequeued from or else the QMan HW will indicate an
         * error.  The values that were calculated above will be
         * applied when dequeues from a specific channel are enabled.
         */
        qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_SDQCR, 0);
+
+       p->eqcr.pi_ring_size = 8;
+       if ((qman_version & 0xFFFF0000) >= QMAN_REV_5000) {
+               p->eqcr.pi_ring_size = 32;
+               qbman_swp_enqueue_array_mode_ptr =
+                               qbman_swp_enqueue_array_mode_mem_back;
+               qbman_swp_enqueue_ring_mode_ptr =
+                               qbman_swp_enqueue_ring_mode_mem_back;
+               qbman_swp_enqueue_multiple_ptr =
+                               qbman_swp_enqueue_multiple_mem_back;
+               qbman_swp_enqueue_multiple_desc_ptr =
+                               qbman_swp_enqueue_multiple_desc_mem_back;
+               qbman_swp_pull_ptr = qbman_swp_pull_mem_back;
+               qbman_swp_dqrr_next_ptr = qbman_swp_dqrr_next_mem_back;
+               qbman_swp_release_ptr = qbman_swp_release_mem_back;
+       }
+
+       for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
+               p->eqcr.pi_mask = (p->eqcr.pi_mask<<1) + 1;
        eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI);
-       p->eqcr.pi = eqcr_pi & 0xF;
+       p->eqcr.pi = eqcr_pi & p->eqcr.pi_mask;
        p->eqcr.pi_vb = eqcr_pi & QB_VALID_BIT;
-       p->eqcr.ci = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_CI) & 0xF;
-       p->eqcr.available = QBMAN_EQCR_SIZE - qm_cyc_diff(QBMAN_EQCR_SIZE,
-                                               p->eqcr.ci, p->eqcr.pi);
+       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+               p->eqcr.ci = qbman_cinh_read(&p->sys,
+                               QBMAN_CINH_SWP_EQCR_CI) & p->eqcr.pi_mask;
+       else
+               p->eqcr.ci = qbman_cinh_read(&p->sys,
+                               QBMAN_CINH_SWP_EQCR_PI) & p->eqcr.pi_mask;
+       p->eqcr.available = p->eqcr.pi_ring_size -
+                               qm_cyc_diff(p->eqcr.pi_ring_size,
+                               p->eqcr.ci & (p->eqcr.pi_mask<<1),
+                               p->eqcr.pi & (p->eqcr.pi_mask<<1));
 
        portal_idx_map[p->desc.idx] = p;
        return p;
@@ -229,7 +344,8 @@ int qbman_swp_interrupt_get_inhibit(struct qbman_swp *p)
 
 void qbman_swp_interrupt_set_inhibit(struct qbman_swp *p, int inhibit)
 {
-       qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR, inhibit ? 0xffffffff : 0);
+       qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_IIR,
+                        inhibit ? 0xffffffff : 0);
 }
 
 /***********************/
@@ -246,7 +362,10 @@ void *qbman_swp_mc_start(struct qbman_swp *p)
 #ifdef QBMAN_CHECKING
        QBMAN_BUG_ON(p->mc.check != swp_mc_can_start);
 #endif
-       ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
+       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+               ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR);
+       else
+               ret = qbman_cena_write_start(&p->sys, QBMAN_CENA_SWP_CR_MEM);
 #ifdef QBMAN_CHECKING
        if (!ret)
                p->mc.check = swp_mc_can_submit;
@@ -266,8 +385,17 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
         * caller wants to OR but has forgotten to do so.
         */
        QBMAN_BUG_ON((*v & cmd_verb) != *v);
-       *v = cmd_verb | p->mc.valid_bit;
-       qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
+               dma_wmb();
+               *v = cmd_verb | p->mc.valid_bit;
+               qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+               clean(cmd);
+       } else {
+               *v = cmd_verb | p->mr.valid_bit;
+               qbman_cena_write_complete(&p->sys, QBMAN_CENA_SWP_CR_MEM, cmd);
+               dma_wmb();
+               qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_CR_RT, QMAN_RT_MODE);
+       }
 #ifdef QBMAN_CHECKING
        p->mc.check = swp_mc_can_poll;
 #endif
@@ -279,17 +407,34 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
 #ifdef QBMAN_CHECKING
        QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
 #endif
-       qbman_cena_invalidate_prefetch(&p->sys,
-                                      QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-       ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
-       /* Remove the valid-bit - command completed if the rest is non-zero */
-       verb = ret[0] & ~QB_VALID_BIT;
-       if (!verb)
-               return NULL;
+       if ((p->desc.qman_version & QMAN_REV_MASK) < QMAN_REV_5000) {
+               qbman_cena_invalidate_prefetch(&p->sys,
+                               QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+               ret = qbman_cena_read(&p->sys,
+                               QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+               /* Remove the valid-bit -
+                * command completed iff the rest is non-zero
+                */
+               verb = ret[0] & ~QB_VALID_BIT;
+               if (!verb)
+                       return NULL;
+               p->mc.valid_bit ^= QB_VALID_BIT;
+       } else {
+               ret = qbman_cena_read(&p->sys, QBMAN_CENA_SWP_RR_MEM);
+               /* Command completed if the valid bit is toggled */
+               if (p->mr.valid_bit != (ret[0] & QB_VALID_BIT))
+                       return NULL;
+               /* Remove the valid-bit -
+                * command completed iff the rest is non-zero
+                */
+               verb = ret[0] & ~QB_VALID_BIT;
+               if (!verb)
+                       return NULL;
+               p->mr.valid_bit ^= QB_VALID_BIT;
+       }
 #ifdef QBMAN_CHECKING
        p->mc.check = swp_mc_can_start;
 #endif
-       p->mc.valid_bit ^= QB_VALID_BIT;
        return ret;
 }
 
@@ -417,13 +562,26 @@ void qbman_eq_desc_set_dca(struct qbman_eq_desc *d, int enable,
        }
 }
 
-#define EQAR_IDX(eqar)     ((eqar) & 0x7)
+#define EQAR_IDX(eqar)     ((eqar) & 0x1f)
 #define EQAR_VB(eqar)      ((eqar) & 0x80)
 #define EQAR_SUCCESS(eqar) ((eqar) & 0x100)
 
-static int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
-                                       const struct qbman_eq_desc *d,
-                                       const struct qbman_fd *fd)
+static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
+                                                  uint8_t idx)
+{
+       if (idx < 16)
+               qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT + idx * 4,
+                                    QMAN_RT_MODE);
+       else
+               qbman_cinh_write(&p->sys, QBMAN_CINH_SWP_EQCR_AM_RT2 +
+                                    (idx - 16) * 4,
+                                    QMAN_RT_MODE);
+}
+
+
+static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
+                                              const struct qbman_eq_desc *d,
+                                              const struct qbman_fd *fd)
 {
        uint32_t *p;
        const uint32_t *cl = qb_cl(d);
@@ -433,39 +591,69 @@ static int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
        if (!EQAR_SUCCESS(eqar))
                return -EBUSY;
        p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+                       QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
        memcpy(&p[1], &cl[1], 28);
        memcpy(&p[8], fd, sizeof(*fd));
+
        /* Set the verb byte, have to substitute in the valid-bit */
-       lwsync();
+       dma_wmb();
        p[0] = cl[0] | EQAR_VB(eqar);
        qbman_cena_write_complete_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+                               QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
        return 0;
 }
+static int qbman_swp_enqueue_array_mode_mem_back(struct qbman_swp *s,
+                                                const struct qbman_eq_desc *d,
+                                                const struct qbman_fd *fd)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_EQAR);
 
-static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
-                                      const struct qbman_eq_desc *d,
-                                      const struct qbman_fd *fd)
+       pr_debug("EQAR=%08x\n", eqar);
+       if (!EQAR_SUCCESS(eqar))
+               return -EBUSY;
+       p = qbman_cena_write_start_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_EQCR(EQAR_IDX(eqar)));
+       memcpy(&p[1], &cl[1], 28);
+       memcpy(&p[8], fd, sizeof(*fd));
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | EQAR_VB(eqar);
+       dma_wmb();
+       qbman_write_eqcr_am_rt_register(s, EQAR_IDX(eqar));
+       return 0;
+}
+
+static inline int qbman_swp_enqueue_array_mode(struct qbman_swp *s,
+                                              const struct qbman_eq_desc *d,
+                                              const struct qbman_fd *fd)
+{
+       return qbman_swp_enqueue_array_mode_ptr(s, d, fd);
+}
+
+static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
+                                             const struct qbman_eq_desc *d,
+                                             const struct qbman_fd *fd)
 {
        uint32_t *p;
        const uint32_t *cl = qb_cl(d);
-       uint32_t eqcr_ci;
-       uint8_t diff;
+       uint32_t eqcr_ci, full_mask, half_mask;
 
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-               diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-                                  eqcr_ci, s->eqcr.ci);
-               s->eqcr.available += diff;
-               if (!diff)
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
                        return -EBUSY;
        }
 
        p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7));
+                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
        memcpy(&p[1], &cl[1], 28);
        memcpy(&p[8], fd, sizeof(*fd));
        lwsync();
@@ -473,16 +661,61 @@ static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
        /* Set the verb byte, have to substitute in the valid-bit */
        p[0] = cl[0] | s->eqcr.pi_vb;
        qbman_cena_write_complete_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & 7));
+                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
        s->eqcr.pi++;
-       s->eqcr.pi &= 0xF;
+       s->eqcr.pi &= full_mask;
        s->eqcr.available--;
-       if (!(s->eqcr.pi & 7))
+       if (!(s->eqcr.pi & half_mask))
                s->eqcr.pi_vb ^= QB_VALID_BIT;
 
        return 0;
 }
 
+static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
+                                               const struct qbman_eq_desc *d,
+                                               const struct qbman_fd *fd)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, full_mask, half_mask;
+
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return -EBUSY;
+       }
+
+       p = qbman_cena_write_start_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+       memcpy(&p[1], &cl[1], 28);
+       memcpy(&p[8], fd, sizeof(*fd));
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | s->eqcr.pi_vb;
+       s->eqcr.pi++;
+       s->eqcr.pi &= full_mask;
+       s->eqcr.available--;
+       if (!(s->eqcr.pi & half_mask))
+               s->eqcr.pi_vb ^= QB_VALID_BIT;
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+       return 0;
+}
+
+static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
+                                      const struct qbman_eq_desc *d,
+                                      const struct qbman_fd *fd)
+{
+       return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+}
+
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
                      const struct qbman_fd *fd)
 {
@@ -492,27 +725,27 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
                return qbman_swp_enqueue_ring_mode(s, d, fd);
 }
 
-int qbman_swp_enqueue_multiple(struct qbman_swp *s,
-                              const struct qbman_eq_desc *d,
-                              const struct qbman_fd *fd,
-                              uint32_t *flags,
-                              int num_frames)
+static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
+                                            const struct qbman_eq_desc *d,
+                                            const struct qbman_fd *fd,
+                                            uint32_t *flags,
+                                            int num_frames)
 {
-       uint32_t *p;
+       uint32_t *p = NULL;
        const uint32_t *cl = qb_cl(d);
-       uint32_t eqcr_ci, eqcr_pi;
-       uint8_t diff;
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
        int i, num_enqueued = 0;
        uint64_t addr_cena;
 
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-               diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-                                  eqcr_ci, s->eqcr.ci);
-               s->eqcr.available += diff;
-               if (!diff)
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                               eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
                        return 0;
        }
 
@@ -523,11 +756,10 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s,
        /* Fill in the EQCR ring */
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                memcpy(&p[1], &cl[1], 28);
                memcpy(&p[8], &fd[i], sizeof(*fd));
                eqcr_pi++;
-               eqcr_pi &= 0xF;
        }
 
        lwsync();
@@ -536,7 +768,7 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s,
        eqcr_pi = s->eqcr.pi;
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                p[0] = cl[0] | s->eqcr.pi_vb;
                if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
                        struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
@@ -545,8 +777,7 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s,
                                ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
                }
                eqcr_pi++;
-               eqcr_pi &= 0xF;
-               if (!(eqcr_pi & 7))
+               if (!(eqcr_pi & half_mask))
                        s->eqcr.pi_vb ^= QB_VALID_BIT;
        }
 
@@ -554,35 +785,104 @@ int qbman_swp_enqueue_multiple(struct qbman_swp *s,
        eqcr_pi = s->eqcr.pi;
        addr_cena = (size_t)s->sys.addr_cena;
        for (i = 0; i < num_enqueued; i++) {
-               dcbf((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)));
+               dcbf((uintptr_t)(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
                eqcr_pi++;
-               eqcr_pi &= 0xF;
        }
-       s->eqcr.pi = eqcr_pi;
+       s->eqcr.pi = eqcr_pi & full_mask;
 
        return num_enqueued;
 }
 
-int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
-                                   const struct qbman_eq_desc *d,
-                                   const struct qbman_fd *fd,
-                                   int num_frames)
+static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
+                                              const struct qbman_eq_desc *d,
+                                              const struct qbman_fd *fd,
+                                              uint32_t *flags,
+                                              int num_frames)
+{
+       uint32_t *p = NULL;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               p[0] = cl[0] | s->eqcr.pi_vb;
+               if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+                       struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+                       d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+                               ((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+               }
+               eqcr_pi++;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
+       return num_enqueued;
+}
+
+inline int qbman_swp_enqueue_multiple(struct qbman_swp *s,
+                                     const struct qbman_eq_desc *d,
+                                     const struct qbman_fd *fd,
+                                     uint32_t *flags,
+                                     int num_frames)
+{
+       return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+}
+
+static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
+                                       const struct qbman_eq_desc *d,
+                                       const struct qbman_fd *fd,
+                                       int num_frames)
 {
        uint32_t *p;
        const uint32_t *cl;
-       uint32_t eqcr_ci, eqcr_pi;
-       uint8_t diff;
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
        int i, num_enqueued = 0;
        uint64_t addr_cena;
 
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
        if (!s->eqcr.available) {
                eqcr_ci = s->eqcr.ci;
                s->eqcr.ci = qbman_cena_read_reg(&s->sys,
-                               QBMAN_CENA_SWP_EQCR_CI) & 0xF;
-               diff = qm_cyc_diff(QBMAN_EQCR_SIZE,
-                                  eqcr_ci, s->eqcr.ci);
-               s->eqcr.available += diff;
-               if (!diff)
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
                        return 0;
        }
 
@@ -593,12 +893,11 @@ int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
        /* Fill in the EQCR ring */
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                cl = qb_cl(&d[i]);
                memcpy(&p[1], &cl[1], 28);
                memcpy(&p[8], &fd[i], sizeof(*fd));
                eqcr_pi++;
-               eqcr_pi &= 0xF;
        }
 
        lwsync();
@@ -607,12 +906,11 @@ int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
        eqcr_pi = s->eqcr.pi;
        for (i = 0; i < num_enqueued; i++) {
                p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                       QBMAN_CENA_SWP_EQCR(eqcr_pi & 7));
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
                cl = qb_cl(&d[i]);
                p[0] = cl[0] | s->eqcr.pi_vb;
                eqcr_pi++;
-               eqcr_pi &= 0xF;
-               if (!(eqcr_pi & 7))
+               if (!(eqcr_pi & half_mask))
                        s->eqcr.pi_vb ^= QB_VALID_BIT;
        }
 
@@ -620,14 +918,78 @@ int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
        eqcr_pi = s->eqcr.pi;
        addr_cena = (size_t)s->sys.addr_cena;
        for (i = 0; i < num_enqueued; i++) {
-               dcbf((addr_cena + QBMAN_CENA_SWP_EQCR(eqcr_pi & 7)));
+               dcbf((uintptr_t)(addr_cena +
+                       QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask)));
+               eqcr_pi++;
+       }
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       return num_enqueued;
+}
+
+static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
+                                       const struct qbman_eq_desc *d,
+                                       const struct qbman_fd *fd,
+                                       int num_frames)
+{
+       uint32_t *p;
+       const uint32_t *cl;
+       uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+       int i, num_enqueued = 0;
+
+       half_mask = (s->eqcr.pi_mask>>1);
+       full_mask = s->eqcr.pi_mask;
+       if (!s->eqcr.available) {
+               eqcr_ci = s->eqcr.ci;
+               s->eqcr.ci = qbman_cinh_read(&s->sys,
+                               QBMAN_CENA_SWP_EQCR_CI) & full_mask;
+               s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+                                       eqcr_ci, s->eqcr.ci);
+               if (!s->eqcr.available)
+                       return 0;
+       }
+
+       eqcr_pi = s->eqcr.pi;
+       num_enqueued = (s->eqcr.available < num_frames) ?
+                       s->eqcr.available : num_frames;
+       s->eqcr.available -= num_enqueued;
+       /* Fill in the EQCR ring */
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               memcpy(&p[1], &cl[1], 28);
+               memcpy(&p[8], &fd[i], sizeof(*fd));
+               eqcr_pi++;
+       }
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       eqcr_pi = s->eqcr.pi;
+       for (i = 0; i < num_enqueued; i++) {
+               p = qbman_cena_write_start_wo_shadow(&s->sys,
+                               QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+               cl = qb_cl(&d[i]);
+               p[0] = cl[0] | s->eqcr.pi_vb;
                eqcr_pi++;
-               eqcr_pi &= 0xF;
+               if (!(eqcr_pi & half_mask))
+                       s->eqcr.pi_vb ^= QB_VALID_BIT;
        }
-       s->eqcr.pi = eqcr_pi;
+
+       s->eqcr.pi = eqcr_pi & full_mask;
+
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_EQCR_PI,
+                               (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb);
 
        return num_enqueued;
 }
+inline int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
+                                          const struct qbman_eq_desc *d,
+                                          const struct qbman_fd *fd,
+                                          int num_frames)
+{
+       return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+}
 
 /*************************/
 /* Static (push) dequeue */
@@ -670,6 +1032,7 @@ void qbman_swp_push_set(struct qbman_swp *s, uint8_t channel_idx, int enable)
 #define QB_VDQCR_VERB_DT_SHIFT     2
 #define QB_VDQCR_VERB_RLS_SHIFT    4
 #define QB_VDQCR_VERB_WAE_SHIFT    5
+#define QB_VDQCR_VERB_RAD_SHIFT    6
 
 enum qb_pull_dt_e {
        qb_pull_dt_channel,
@@ -702,7 +1065,8 @@ void qbman_pull_desc_set_storage(struct qbman_pull_desc *d,
        d->pull.rsp_addr = storage_phys;
 }
 
-void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d, uint8_t numframes)
+void qbman_pull_desc_set_numframes(struct qbman_pull_desc *d,
+                                  uint8_t numframes)
 {
        d->pull.numf = numframes - 1;
 }
@@ -735,7 +1099,20 @@ void qbman_pull_desc_set_channel(struct qbman_pull_desc *d, uint32_t chid,
        d->pull.dq_src = chid;
 }
 
-int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+void qbman_pull_desc_set_rad(struct qbman_pull_desc *d, int rad)
+{
+       if (d->pull.verb & (1 << QB_VDQCR_VERB_RLS_SHIFT)) {
+               if (rad)
+                       d->pull.verb |= 1 << QB_VDQCR_VERB_RAD_SHIFT;
+               else
+                       d->pull.verb &= ~(1 << QB_VDQCR_VERB_RAD_SHIFT);
+       } else {
+               printf("The RAD feature is not valid when RLS = 0\n");
+       }
+}
+
+static int qbman_swp_pull_direct(struct qbman_swp *s,
+                                struct qbman_pull_desc *d)
 {
        uint32_t *p;
        uint32_t *cl = qb_cl(d);
@@ -759,6 +1136,36 @@ int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
        return 0;
 }
 
+static int qbman_swp_pull_mem_back(struct qbman_swp *s,
+                                  struct qbman_pull_desc *d)
+{
+       uint32_t *p;
+       uint32_t *cl = qb_cl(d);
+
+       if (!atomic_dec_and_test(&s->vdq.busy)) {
+               atomic_inc(&s->vdq.busy);
+               return -EBUSY;
+       }
+
+       d->pull.tok = s->sys.idx + 1;
+       s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+       p = qbman_cena_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR_MEM);
+       memcpy(&p[1], &cl[1], 12);
+
+       /* Set the verb byte, have to substitute in the valid-bit */
+       p[0] = cl[0] | s->vdq.valid_bit;
+       s->vdq.valid_bit ^= QB_VALID_BIT;
+       dma_wmb();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_VDQCR_RT, QMAN_RT_MODE);
+
+       return 0;
+}
+
+inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
+{
+       return qbman_swp_pull_ptr(s, d);
+}
+
 /****************/
 /* Polling DQRR */
 /****************/
@@ -791,7 +1198,12 @@ void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
  * only once, so repeated calls can return a sequence of DQRR entries, without
  * requiring they be consumed immediately or in any particular order.
  */
-const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
+inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
+{
+       return qbman_swp_dqrr_next_ptr(s);
+}
+
+const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
 {
        uint32_t verb;
        uint32_t response_verb;
@@ -801,7 +1213,7 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
        /* Before using valid-bit to detect if something is there, we have to
         * handle the case of the DQRR reset bug...
         */
-       if (unlikely(s->dqrr.reset_bug)) {
+       if (s->dqrr.reset_bug) {
                /* We pick up new entries by cache-inhibited producer index,
                 * which means that a non-coherent mapping would require us to
                 * invalidate and read *only* once that PI has indicated that
@@ -833,7 +1245,8 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
                                        QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
        }
        p = qbman_cena_read_wo_shadow(&s->sys,
-                                     QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+                       QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
        verb = p->dq.verb;
 
        /* If the valid-bit isn't of the expected polarity, nothing there. Note,
@@ -867,11 +1280,54 @@ const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
        return p;
 }
 
+const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
+{
+       uint32_t verb;
+       uint32_t response_verb;
+       uint32_t flags;
+       const struct qbman_result *p;
+
+       p = qbman_cena_read_wo_shadow(&s->sys,
+                       QBMAN_CENA_SWP_DQRR_MEM(s->dqrr.next_idx));
+
+       verb = p->dq.verb;
+
+       /* If the valid-bit isn't of the expected polarity, nothing there. Note,
+        * in the DQRR reset bug workaround, we shouldn't need to skip these
+        * check, because we've already determined that a new entry is available
+        * and we've invalidated the cacheline before reading it, so the
+        * valid-bit behaviour is repaired and should tell us what we already
+        * knew from reading PI.
+        */
+       if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+               return NULL;
+
+       /* There's something there. Move "next_idx" attention to the next ring
+        * entry (and prefetch it) before returning what we found.
+        */
+       s->dqrr.next_idx++;
+       if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+               s->dqrr.next_idx = 0;
+               s->dqrr.valid_bit ^= QB_VALID_BIT;
+       }
+       /* If this is the final response to a volatile dequeue command
+        * indicate that the vdq is no longer busy
+        */
+       flags = p->dq.stat;
+       response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+       if ((response_verb == QBMAN_RESULT_DQ) &&
+           (flags & QBMAN_DQ_STAT_VOLATILE) &&
+           (flags & QBMAN_DQ_STAT_EXPIRED))
+               atomic_inc(&s->vdq.busy);
+       return p;
+}
+
 /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */
 void qbman_swp_dqrr_consume(struct qbman_swp *s,
                            const struct qbman_result *dq)
 {
-       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
+       qbman_cinh_write(&s->sys,
+                       QBMAN_CINH_SWP_DCAP, QBMAN_IDX_FROM_DQRR(dq));
 }
 
 /* Consume DQRR entries previously returned from qbman_swp_dqrr_next(). */
@@ -884,6 +1340,7 @@ void qbman_swp_dqrr_idx_consume(struct qbman_swp *s,
 /*********************************/
 /* Polling user-provided storage */
 /*********************************/
+
 int qbman_result_has_new_result(struct qbman_swp *s,
                                struct qbman_result *dq)
 {
@@ -898,11 +1355,11 @@ int qbman_result_has_new_result(struct qbman_swp *s,
        ((struct qbman_result *)dq)->dq.tok = 0;
 
        /*
-        * VDQCR "no longer busy" hook - not quite the same as DQRR, because the
-        * fact "VDQCR" shows busy doesn't mean that we hold the result that
-        * makes it available. Eg. we may be looking at our 10th dequeue result,
-        * having released VDQCR after the 1st result and it is now busy due to
-        * some other command!
+        * VDQCR "no longer busy" hook - not quite the same as DQRR, because
+        * the fact "VDQCR" shows busy doesn't mean that we hold the result
+        * that makes it available. Eg. we may be looking at our 10th dequeue
+        * result, having released VDQCR after the 1st result and it is now
+        * busy due to some other command!
         */
        if (s->vdq.storage == dq) {
                s->vdq.storage = NULL;
@@ -936,11 +1393,11 @@ int qbman_check_command_complete(struct qbman_result *dq)
 
        s = portal_idx_map[dq->dq.tok - 1];
        /*
-        * VDQCR "no longer busy" hook - not quite the same as DQRR, because the
-        * fact "VDQCR" shows busy doesn't mean that we hold the result that
-        * makes it available. Eg. we may be looking at our 10th dequeue result,
-        * having released VDQCR after the 1st result and it is now busy due to
-        * some other command!
+        * VDQCR "no longer busy" hook - not quite the same as DQRR, because
+        * the fact "VDQCR" shows busy doesn't mean that we hold the result
+        * that makes it available. Eg. we may be looking at our 10th dequeue
+        * result, having released VDQCR after the 1st result and it is now
+        * busy due to some other command!
         */
        if (s->vdq.storage == dq) {
                s->vdq.storage = NULL;
@@ -1142,8 +1599,10 @@ void qbman_release_desc_set_rcdi(struct qbman_release_desc *d, int enable)
 #define RAR_VB(rar)      ((rar) & 0x80)
 #define RAR_SUCCESS(rar) ((rar) & 0x100)
 
-int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
-                     const uint64_t *buffers, unsigned int num_buffers)
+static int qbman_swp_release_direct(struct qbman_swp *s,
+                                   const struct qbman_release_desc *d,
+                                   const uint64_t *buffers,
+                                   unsigned int num_buffers)
 {
        uint32_t *p;
        const uint32_t *cl = qb_cl(d);
@@ -1157,22 +1616,63 @@ int qbman_swp_release(struct qbman_swp *s, const struct qbman_release_desc *d,
 
        /* Start the release command */
        p = qbman_cena_write_start_wo_shadow(&s->sys,
-                                            QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+                                    QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
 
        /* Copy the caller's buffer pointers to the command */
        u64_to_le32_copy(&p[2], buffers, num_buffers);
 
-       /* Set the verb byte, have to substitute in the valid-bit and the number
-        * of buffers.
+       /* Set the verb byte, have to substitute in the valid-bit and the
+        * number of buffers.
         */
        lwsync();
        p[0] = cl[0] | RAR_VB(rar) | num_buffers;
        qbman_cena_write_complete_wo_shadow(&s->sys,
-                                           QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+                                   QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
 
        return 0;
 }
 
+static int qbman_swp_release_mem_back(struct qbman_swp *s,
+                                     const struct qbman_release_desc *d,
+                                     const uint64_t *buffers,
+                                     unsigned int num_buffers)
+{
+       uint32_t *p;
+       const uint32_t *cl = qb_cl(d);
+       uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+       pr_debug("RAR=%08x\n", rar);
+       if (!RAR_SUCCESS(rar))
+               return -EBUSY;
+
+       QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+       /* Start the release command */
+       p = qbman_cena_write_start_wo_shadow(&s->sys,
+               QBMAN_CENA_SWP_RCR_MEM(RAR_IDX(rar)));
+
+       /* Copy the caller's buffer pointers to the command */
+       u64_to_le32_copy(&p[2], buffers, num_buffers);
+
+       /* Set the verb byte, have to substitute in the valid-bit and the
+        * number of buffers.
+        */
+       p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+       lwsync();
+       qbman_cinh_write(&s->sys, QBMAN_CINH_SWP_RCR_AM_RT +
+               RAR_IDX(rar) * 4, QMAN_RT_MODE);
+
+       return 0;
+}
+
+inline int qbman_swp_release(struct qbman_swp *s,
+                            const struct qbman_release_desc *d,
+                            const uint64_t *buffers,
+                            unsigned int num_buffers)
+{
+       return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+}
+
 /*******************/
 /* Buffer acquires */
 /*******************/
@@ -1214,7 +1714,7 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
 
        /* Complete the management command */
        r = qbman_swp_mc_complete(s, p, QBMAN_MC_ACQUIRE);
-       if (unlikely(!r)) {
+       if (!r) {
                pr_err("qbman: acquire from BPID %d failed, no response\n",
                       bpid);
                return -EIO;
@@ -1224,7 +1724,7 @@ int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
        QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
 
        /* Determine success or failure */
-       if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+       if (r->rslt != QBMAN_MC_RSLT_OK) {
                pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
                       bpid, r->rslt);
                return -EIO;
@@ -1271,7 +1771,7 @@ static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid,
 
        /* Complete the management command */
        r = qbman_swp_mc_complete(s, p, alt_fq_verb);
-       if (unlikely(!r)) {
+       if (!r) {
                pr_err("qbman: mgmt cmd failed, no response (verb=0x%x)\n",
                       alt_fq_verb);
                return -EIO;
@@ -1281,7 +1781,7 @@ static int qbman_swp_alt_fq_state(struct qbman_swp *s, uint32_t fqid,
        QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != alt_fq_verb);
 
        /* Determine success or failure */
-       if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+       if (r->rslt != QBMAN_MC_RSLT_OK) {
                pr_err("ALT FQID %d failed: verb = 0x%08x, code = 0x%02x\n",
                       fqid, alt_fq_verb, r->rslt);
                return -EIO;
@@ -1362,7 +1862,7 @@ static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid,
 
        /* Complete the management command */
        r = qbman_swp_mc_complete(s, p, QBMAN_WQCHAN_CONFIGURE);
-       if (unlikely(!r)) {
+       if (!r) {
                pr_err("qbman: wqchan config failed, no response\n");
                return -EIO;
        }
@@ -1372,7 +1872,7 @@ static int qbman_swp_CDAN_set(struct qbman_swp *s, uint16_t channelid,
                     != QBMAN_WQCHAN_CONFIGURE);
 
        /* Determine success or failure */
-       if (unlikely(r->rslt != QBMAN_MC_RSLT_OK)) {
+       if (r->rslt != QBMAN_MC_RSLT_OK) {
                pr_err("CDAN cQID %d failed: code = 0x%02x\n",
                       channelid, r->rslt);
                return -EIO;
index dbea22a..3b0fc54 100644 (file)
@@ -1,12 +1,17 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
+ * Copyright 2018 NXP
  *
  */
 
+#ifndef _QBMAN_PORTAL_H_
+#define _QBMAN_PORTAL_H_
+
 #include "qbman_sys.h"
 #include <fsl_qbman_portal.h>
 
+uint32_t qman_version;
 #define QMAN_REV_4000   0x04000000
 #define QMAN_REV_4100   0x04010000
 #define QMAN_REV_4101   0x04010001
 /* All QBMan command and result structures use this "valid bit" encoding */
 #define QB_VALID_BIT ((uint32_t)0x80)
 
+/* All QBMan command use this "Read trigger bit" encoding */
+#define QB_RT_BIT ((uint32_t)0x100)
+
 /* Management command result codes */
 #define QBMAN_MC_RSLT_OK      0xf0
 
 /* QBMan DQRR size is set at runtime in qbman_portal.c */
 
-#define QBMAN_EQCR_SIZE 8
-
 static inline uint8_t qm_cyc_diff(uint8_t ringsize, uint8_t first,
                                  uint8_t last)
 {
@@ -51,6 +57,10 @@ struct qbman_swp {
 #endif
                uint32_t valid_bit; /* 0x00 or 0x80 */
        } mc;
+       /* Management response */
+       struct {
+               uint32_t valid_bit; /* 0x00 or 0x80 */
+       } mr;
        /* Push dequeues */
        uint32_t sdq;
        /* Volatile dequeues */
@@ -87,6 +97,8 @@ struct qbman_swp {
        struct {
                uint32_t pi;
                uint32_t pi_vb;
+               uint32_t pi_ring_size;
+               uint32_t pi_mask;
                uint32_t ci;
                int available;
        } eqcr;
@@ -141,4 +153,16 @@ static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
  * an inline) is necessary to work with different descriptor types and to work
  * correctly with const and non-const inputs (and similarly-qualified outputs).
  */
-#define qb_cl(d) (&(d)->donot_manipulate_directly[0])
+#define qb_cl(d) (&(d)->dont_manipulate_directly[0])
+
+#ifdef RTE_ARCH_ARM64
+       #define clean(p) \
+                       { asm volatile("dc cvac, %0;" : : "r" (p) : "memory"); }
+       #define invalidate(p) \
+                       { asm volatile("dc ivac, %0" : : "r"(p) : "memory"); }
+#else
+       #define clean(p)
+       #define invalidate(p)
+#endif
+
+#endif
index 2bd33ea..d41af83 100644 (file)
  * *not* to provide linux compatibility.
  */
 
+#ifndef _QBMAN_SYS_H_
+#define _QBMAN_SYS_H_
+
 #include "qbman_sys_decl.h"
 
 #define CENA_WRITE_ENABLE 0
 #define CINH_WRITE_ENABLE 1
 
+/* CINH register offsets */
+#define QBMAN_CINH_SWP_EQCR_PI      0x800
+#define QBMAN_CINH_SWP_EQCR_CI      0x840
+#define QBMAN_CINH_SWP_EQAR         0x8c0
+#define QBMAN_CINH_SWP_CR_RT        0x900
+#define QBMAN_CINH_SWP_VDQCR_RT     0x940
+#define QBMAN_CINH_SWP_EQCR_AM_RT   0x980
+#define QBMAN_CINH_SWP_RCR_AM_RT    0x9c0
+#define QBMAN_CINH_SWP_DQPI         0xa00
+#define QBMAN_CINH_SWP_DQRR_ITR     0xa80
+#define QBMAN_CINH_SWP_DCAP         0xac0
+#define QBMAN_CINH_SWP_SDQCR        0xb00
+#define QBMAN_CINH_SWP_EQCR_AM_RT2  0xb40
+#define QBMAN_CINH_SWP_RCR_PI       0xc00
+#define QBMAN_CINH_SWP_RAR          0xcc0
+#define QBMAN_CINH_SWP_ISR          0xe00
+#define QBMAN_CINH_SWP_IER          0xe40
+#define QBMAN_CINH_SWP_ISDR         0xe80
+#define QBMAN_CINH_SWP_IIR          0xec0
+#define QBMAN_CINH_SWP_ITPR         0xf40
+
+/* CENA register offsets */
+#define QBMAN_CENA_SWP_EQCR(n) (0x000 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_DQRR(n) (0x200 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_RCR(n)  (0x400 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_CR      0x600
+#define QBMAN_CENA_SWP_RR(vb)  (0x700 + ((uint32_t)(vb) >> 1))
+#define QBMAN_CENA_SWP_VDQCR   0x780
+#define QBMAN_CENA_SWP_EQCR_CI 0x840
+
+/* CENA register offsets in memory-backed mode */
+#define QBMAN_CENA_SWP_DQRR_MEM(n)  (0x800 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_RCR_MEM(n)   (0x1400 + ((uint32_t)(n) << 6))
+#define QBMAN_CENA_SWP_CR_MEM       0x1600
+#define QBMAN_CENA_SWP_RR_MEM       0x1680
+#define QBMAN_CENA_SWP_VDQCR_MEM    0x1780
+
 /* Debugging assists */
 static inline void __hexdump(unsigned long start, unsigned long end,
                             unsigned long p, size_t sz, const unsigned char *c)
@@ -125,8 +165,8 @@ struct qbman_swp_sys {
         * place-holder.
         */
        uint8_t *cena;
-       uint8_t __iomem *addr_cena;
-       uint8_t __iomem *addr_cinh;
+       uint8_t *addr_cena;
+       uint8_t *addr_cinh;
        uint32_t idx;
        enum qbman_eqcr_mode eqcr_mode;
 };
@@ -292,13 +332,16 @@ static inline void qbman_cena_prefetch(struct qbman_swp_sys *s,
  * qbman_portal.c. So use of it is declared locally here.
  */
 #define QBMAN_CINH_SWP_CFG   0xd00
-#define QBMAN_CINH_SWP_CFG   0xd00
+
 #define SWP_CFG_DQRR_MF_SHIFT 20
 #define SWP_CFG_EST_SHIFT     16
+#define SWP_CFG_CPBS_SHIFT    15
 #define SWP_CFG_WN_SHIFT      14
 #define SWP_CFG_RPM_SHIFT     12
 #define SWP_CFG_DCM_SHIFT     10
 #define SWP_CFG_EPM_SHIFT     8
+#define SWP_CFG_VPM_SHIFT     7
+#define SWP_CFG_CPM_SHIFT     6
 #define SWP_CFG_SD_SHIFT      5
 #define SWP_CFG_SP_SHIFT      4
 #define SWP_CFG_SE_SHIFT      3
@@ -329,11 +372,20 @@ static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn,
        return reg;
 }
 
+#define QMAN_RT_MODE   0x00000100
+
+#define QMAN_REV_4000  0x04000000
+#define QMAN_REV_4100  0x04010000
+#define QMAN_REV_4101  0x04010001
+#define QMAN_REV_5000  0x05000000
+#define QMAN_REV_MASK  0xffff0000
+
 static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
                                     const struct qbman_swp_desc *d,
                                     uint8_t dqrr_size)
 {
        uint32_t reg;
+       int i;
 #ifdef RTE_ARCH_64
        uint8_t wn = CENA_WRITE_ENABLE;
 #else
@@ -343,7 +395,7 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
        s->addr_cena = d->cena_bar;
        s->addr_cinh = d->cinh_bar;
        s->idx = (uint32_t)d->idx;
-       s->cena = malloc(4096);
+       s->cena = malloc(64*1024);
        if (!s->cena) {
                pr_err("Could not allocate page for cena shadow\n");
                return -1;
@@ -358,12 +410,34 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
        reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
        QBMAN_BUG_ON(reg);
 #endif
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000)
+               memset(s->addr_cena, 0, 64*1024);
+       else {
+               /* Invalidate the portal memory.
+                * This ensures no stale cache lines
+                */
+               for (i = 0; i < 0x1000; i += 64)
+                       dccivac(s->addr_cena + i);
+       }
+
        if (s->eqcr_mode == qman_eqcr_vb_array)
-               reg = qbman_set_swp_cfg(dqrr_size, wn, 0, 3, 2, 3, 1, 1, 1, 1,
-                                       1, 1);
-       else
-               reg = qbman_set_swp_cfg(dqrr_size, wn, 1, 3, 2, 2, 1, 1, 1, 1,
-                                       1, 1);
+               reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                       0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+       else {
+               if ((d->qman_version & QMAN_REV_MASK) < QMAN_REV_5000)
+                       reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                               1, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+               else
+                       reg = qbman_set_swp_cfg(dqrr_size, wn,
+                                               1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+       }
+
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
+               reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+                      1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
+                      1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
+       }
+
        qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
        reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
        if (!reg) {
@@ -371,6 +445,12 @@ static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
                free(s->cena);
                return -1;
        }
+
+       if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000) {
+               qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+               qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+       }
+
        return 0;
 }
 
@@ -378,3 +458,5 @@ static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)
 {
        free(s->cena);
 }
+
+#endif /* _QBMAN_SYS_H_ */
index fa6977f..a29f5b4 100644 (file)
@@ -3,6 +3,9 @@
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
  *
  */
+#ifndef _QBMAN_SYS_DECL_H_
+#define _QBMAN_SYS_DECL_H_
+
 #include <compat.h>
 #include <fsl_qbman_base.h>
 
@@ -51,3 +54,4 @@ static inline void prefetch_for_store(void *p)
        RTE_SET_USED(p);
 }
 #endif
+#endif /* _QBMAN_SYS_DECL_H_ */
index fe45a11..dcc4e08 100644 (file)
@@ -114,5 +114,18 @@ DPDK_18.05 {
        dpdmai_open;
        dpdmai_set_rx_queue;
        rte_dpaa2_free_dpci_dev;
+       rte_dpaa2_memsegs;
 
 } DPDK_18.02;
+
+DPDK_18.11 {
+       global:
+
+       dpaa2_dqrr_size;
+       dpaa2_eqcr_size;
+       dpci_get_link_state;
+       dpci_get_opr;
+       dpci_get_peer_attributes;
+       dpci_set_opr;
+
+} DPDK_18.05;
index 3ff3bdb..514452b 100644 (file)
@@ -19,7 +19,7 @@ LDLIBS += -lrte_kvargs
 EXPORT_MAP := rte_bus_ifpga_version.map
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 SRCS-$(CONFIG_RTE_LIBRTE_IFPGA_BUS) += ifpga_bus.c
 SRCS-$(CONFIG_RTE_LIBRTE_IFPGA_BUS) += ifpga_common.c
index b324872..5f23ed8 100644 (file)
@@ -142,6 +142,7 @@ ifpga_scan_one(struct rte_rawdev *rawdev,
        if (!afu_dev)
                goto end;
 
+       afu_dev->device.bus = &rte_ifpga_bus;
        afu_dev->device.devargs = devargs;
        afu_dev->device.numa_node = SOCKET_ID_ANY;
        afu_dev->device.name = devargs->name;
@@ -279,14 +280,13 @@ ifpga_probe_one_driver(struct rte_afu_driver *drv,
 
        /* reference driver structure */
        afu_dev->driver = drv;
-       afu_dev->device.driver = &drv->driver;
 
        /* call the driver probe() function */
        ret = drv->probe(afu_dev);
-       if (ret) {
+       if (ret)
                afu_dev->driver = NULL;
-               afu_dev->device.driver = NULL;
-       }
+       else
+               afu_dev->device.driver = &drv->driver;
 
        return ret;
 }
@@ -301,8 +301,11 @@ ifpga_probe_all_drivers(struct rte_afu_device *afu_dev)
                return -1;
 
        /* Check if a driver is already loaded */
-       if (afu_dev->driver != NULL)
-               return 0;
+       if (rte_dev_is_probed(&afu_dev->device)) {
+               IFPGA_BUS_DEBUG("Device %s is already probed\n",
+                               rte_ifpga_device_name(afu_dev));
+               return -EEXIST;
+       }
 
        TAILQ_FOREACH(drv, &ifpga_afu_drv_list, next) {
                if (ifpga_probe_one_driver(drv, afu_dev)) {
@@ -325,14 +328,13 @@ ifpga_probe(void)
        int ret = 0;
 
        TAILQ_FOREACH(afu_dev, &ifpga_afu_dev_list, next) {
-               if (afu_dev->device.driver)
-                       continue;
-
                ret = ifpga_probe_all_drivers(afu_dev);
+               if (ret == -EEXIST)
+                       continue;
                if (ret < 0)
                        IFPGA_BUS_ERR("failed to initialize %s device\n",
                                rte_ifpga_device_name(afu_dev));
-               }
+       }
 
        return ret;
 }
@@ -347,23 +349,20 @@ static int
 ifpga_remove_driver(struct rte_afu_device *afu_dev)
 {
        const char *name;
-       const struct rte_afu_driver *driver;
 
        name = rte_ifpga_device_name(afu_dev);
-       if (!afu_dev->device.driver) {
+       if (afu_dev->driver == NULL) {
                IFPGA_BUS_DEBUG("no driver attach to device %s\n", name);
                return 1;
        }
 
-       driver = RTE_DRV_TO_AFU_CONST(afu_dev->device.driver);
-       return driver->remove(afu_dev);
+       return afu_dev->driver->remove(afu_dev);
 }
 
 static int
 ifpga_unplug(struct rte_device *dev)
 {
        struct rte_afu_device *afu_dev = NULL;
-       struct rte_devargs *devargs = NULL;
        int ret;
 
        if (dev == NULL)
@@ -373,15 +372,13 @@ ifpga_unplug(struct rte_device *dev)
        if (!afu_dev)
                return -ENOENT;
 
-       devargs = dev->devargs;
-
        ret = ifpga_remove_driver(afu_dev);
        if (ret)
                return ret;
 
        TAILQ_REMOVE(&ifpga_afu_dev_list, afu_dev, next);
 
-       rte_devargs_remove(devargs->bus->name, devargs->name);
+       rte_devargs_remove(dev->devargs);
        free(afu_dev);
        return 0;
 
index c9b08c8..0b5c38d 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2010-2018 Intel Corporation
 
+version = 2
+
 deps += ['pci', 'kvargs', 'rawdev']
 install_headers('rte_bus_ifpga.h')
 sources = files('ifpga_common.c', 'ifpga_bus.c')
index 51d5ae0..d53c0f4 100644 (file)
@@ -83,9 +83,6 @@ struct rte_afu_device {
 #define RTE_DEV_TO_AFU(ptr) \
        container_of(ptr, struct rte_afu_device, device)
 
-#define RTE_DRV_TO_AFU_CONST(ptr) \
-       container_of(ptr, const struct rte_afu_driver, driver)
-
 /**
  * Initialization function for the driver called during FPGA BUS probing.
  */
index cf37306..f33e012 100644 (file)
@@ -4,7 +4,7 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 LIB = librte_bus_pci.a
-LIBABIVER := 1
+LIBABIVER := 2
 EXPORT_MAP := rte_bus_pci_version.map
 
 CFLAGS := -I$(SRCDIR) $(CFLAGS)
@@ -26,10 +26,11 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/$(SYSTEM)app/eal
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
-LDLIBS += -lrte_ethdev -lrte_pci
+LDLIBS += -lrte_ethdev -lrte_pci -lrte_kvargs
 
 include $(RTE_SDK)/drivers/bus/pci/$(SYSTEM)/Makefile
 SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) := $(addprefix $(SYSTEM)/,$(SRCS))
+SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_params.c
 SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common.c
 SRCS-$(CONFIG_RTE_LIBRTE_PCI_BUS) += pci_common_uio.c
 
index 655b34b..d09f8ee 100644 (file)
@@ -223,6 +223,8 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
        }
 
        memset(dev, 0, sizeof(*dev));
+       dev->device.bus = &rte_pci_bus.bus;
+
        dev->addr.domain = conf->pc_sel.pc_domain;
        dev->addr.bus = conf->pc_sel.pc_bus;
        dev->addr.devid = conf->pc_sel.pc_dev;
@@ -439,6 +441,8 @@ int rte_pci_read_config(const struct rte_pci_device *dev,
 {
        int fd = -1;
        int size;
+       /* Copy Linux implementation's behaviour */
+       const int return_len = len;
        struct pci_io pi = {
                .pi_sel = {
                        .pc_domain = dev->addr.domain,
@@ -469,7 +473,7 @@ int rte_pci_read_config(const struct rte_pci_device *dev,
        }
        close(fd);
 
-       return 0;
+       return return_len;
 
  error:
        if (fd >= 0)
index 96ea1d5..9040446 100644 (file)
@@ -4,5 +4,3 @@
 SRCS += pci.c
 SRCS += pci_uio.c
 SRCS += pci_vfio.c
-
-CFLAGS += -D_GNU_SOURCE
index 04648ac..45c24ef 100644 (file)
@@ -119,7 +119,7 @@ rte_pci_unmap_device(struct rte_pci_device *dev)
 static int
 find_max_end_va(const struct rte_memseg_list *msl, void *arg)
 {
-       size_t sz = msl->memseg_arr.len * msl->page_sz;
+       size_t sz = msl->len;
        void *end_va = RTE_PTR_ADD(msl->base_va, sz);
        void **max_va = arg;
 
@@ -228,6 +228,7 @@ pci_scan_one(const char *dirname, const struct rte_pci_addr *addr)
                return -1;
 
        memset(dev, 0, sizeof(*dev));
+       dev->device.bus = &rte_pci_bus.bus;
        dev->addr = *addr;
 
        /* get vendor id */
@@ -588,10 +589,8 @@ pci_one_device_iommu_support_va(struct rte_pci_device *dev)
        fclose(fp);
 
        mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
-       if (mgaw < X86_VA_WIDTH)
-               return false;
 
-       return true;
+       return rte_eal_check_dma_mask(mgaw) == 0 ? true : false;
 }
 #elif defined(RTE_ARCH_PPC_64)
 static bool
@@ -620,8 +619,11 @@ pci_devices_iommu_support_va(void)
                FOREACH_DEVICE_ON_PCIBUS(dev) {
                        if (!rte_pci_match(drv, dev))
                                continue;
-                       if (!pci_one_device_iommu_support_va(dev))
-                               return false;
+                       /*
+                        * just one PCI device needs to be checked out because
+                        * the IOMMU hardware is the same for all of them.
+                        */
+                       return pci_one_device_iommu_support_va(dev);
                }
        }
        return true;
@@ -672,23 +674,21 @@ rte_pci_get_iommu_class(void)
 int rte_pci_read_config(const struct rte_pci_device *device,
                void *buf, size_t len, off_t offset)
 {
+       char devname[RTE_DEV_NAME_MAX_LEN] = "";
        const struct rte_intr_handle *intr_handle = &device->intr_handle;
 
-       switch (intr_handle->type) {
-       case RTE_INTR_HANDLE_UIO:
-       case RTE_INTR_HANDLE_UIO_INTX:
+       switch (device->kdrv) {
+       case RTE_KDRV_IGB_UIO:
                return pci_uio_read_config(intr_handle, buf, len, offset);
-
 #ifdef VFIO_PRESENT
-       case RTE_INTR_HANDLE_VFIO_MSIX:
-       case RTE_INTR_HANDLE_VFIO_MSI:
-       case RTE_INTR_HANDLE_VFIO_LEGACY:
+       case RTE_KDRV_VFIO:
                return pci_vfio_read_config(intr_handle, buf, len, offset);
 #endif
        default:
+               rte_pci_device_name(&device->addr, devname,
+                                   RTE_DEV_NAME_MAX_LEN);
                RTE_LOG(ERR, EAL,
-                       "Unknown handle type of fd %d\n",
-                                       intr_handle->fd);
+                       "Unknown driver type for %s\n", devname);
                return -1;
        }
 }
@@ -697,23 +697,21 @@ int rte_pci_read_config(const struct rte_pci_device *device,
 int rte_pci_write_config(const struct rte_pci_device *device,
                const void *buf, size_t len, off_t offset)
 {
+       char devname[RTE_DEV_NAME_MAX_LEN] = "";
        const struct rte_intr_handle *intr_handle = &device->intr_handle;
 
-       switch (intr_handle->type) {
-       case RTE_INTR_HANDLE_UIO:
-       case RTE_INTR_HANDLE_UIO_INTX:
+       switch (device->kdrv) {
+       case RTE_KDRV_IGB_UIO:
                return pci_uio_write_config(intr_handle, buf, len, offset);
-
 #ifdef VFIO_PRESENT
-       case RTE_INTR_HANDLE_VFIO_MSIX:
-       case RTE_INTR_HANDLE_VFIO_MSI:
-       case RTE_INTR_HANDLE_VFIO_LEGACY:
+       case RTE_KDRV_VFIO:
                return pci_vfio_write_config(intr_handle, buf, len, offset);
 #endif
        default:
+               rte_pci_device_name(&device->addr, devname,
+                                   RTE_DEV_NAME_MAX_LEN);
                RTE_LOG(ERR, EAL,
-                       "Unknown handle type of fd %d\n",
-                                       intr_handle->fd);
+                       "Unknown driver type for %s\n", devname);
                return -1;
        }
 }
index 686386d..305cc06 100644 (file)
@@ -17,6 +17,8 @@
 #include <rte_eal_memconfig.h>
 #include <rte_malloc.h>
 #include <rte_vfio.h>
+#include <rte_eal.h>
+#include <rte_bus.h>
 
 #include "eal_filesystem.h"
 
@@ -35,7 +37,9 @@
 
 #ifdef VFIO_PRESENT
 
+#ifndef PAGE_SIZE
 #define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
+#endif
 #define PAGE_MASK   (~(PAGE_SIZE - 1))
 
 static struct rte_tailq_elem rte_vfio_tailq = {
@@ -277,6 +281,114 @@ pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd)
        return -1;
 }
 
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+static void
+pci_vfio_req_handler(void *param)
+{
+       struct rte_bus *bus;
+       int ret;
+       struct rte_device *device = (struct rte_device *)param;
+
+       bus = rte_bus_find_by_device(device);
+       if (bus == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
+                       device->name);
+               return;
+       }
+
+       /*
+        * vfio kernel module request user space to release allocated
+        * resources before device be deleted in kernel, so it can directly
+        * call the vfio bus hot-unplug handler to process it.
+        */
+       ret = bus->hot_unplug_handler(device);
+       if (ret)
+               RTE_LOG(ERR, EAL,
+                       "Can not handle hot-unplug for device (%s)\n",
+                       device->name);
+}
+
+/* enable notifier (only enable req now) */
+static int
+pci_vfio_enable_notifier(struct rte_pci_device *dev, int vfio_dev_fd)
+{
+       int ret;
+       int fd = -1;
+
+       /* set up an eventfd for req notifier */
+       fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+       if (fd < 0) {
+               RTE_LOG(ERR, EAL, "Cannot set up eventfd, error %i (%s)\n",
+                       errno, strerror(errno));
+               return -1;
+       }
+
+       dev->vfio_req_intr_handle.fd = fd;
+       dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_VFIO_REQ;
+       dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+
+       ret = rte_intr_callback_register(&dev->vfio_req_intr_handle,
+                                        pci_vfio_req_handler,
+                                        (void *)&dev->device);
+       if (ret) {
+               RTE_LOG(ERR, EAL, "Fail to register req notifier handler.\n");
+               goto error;
+       }
+
+       ret = rte_intr_enable(&dev->vfio_req_intr_handle);
+       if (ret) {
+               RTE_LOG(ERR, EAL, "Fail to enable req notifier.\n");
+               ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+                                                pci_vfio_req_handler,
+                                                (void *)&dev->device);
+               if (ret < 0)
+                       RTE_LOG(ERR, EAL,
+                               "Fail to unregister req notifier handler.\n");
+               goto error;
+       }
+
+       return 0;
+error:
+       close(fd);
+
+       dev->vfio_req_intr_handle.fd = -1;
+       dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+       dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+       return -1;
+}
+
+/* disable notifier (only disable req now) */
+static int
+pci_vfio_disable_notifier(struct rte_pci_device *dev)
+{
+       int ret;
+
+       ret = rte_intr_disable(&dev->vfio_req_intr_handle);
+       if (ret) {
+               RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+               return -1;
+       }
+
+       ret = rte_intr_callback_unregister(&dev->vfio_req_intr_handle,
+                                          pci_vfio_req_handler,
+                                          (void *)&dev->device);
+       if (ret < 0) {
+               RTE_LOG(ERR, EAL,
+                        "fail to unregister req notifier handler.\n");
+               return -1;
+       }
+
+       close(dev->vfio_req_intr_handle.fd);
+
+       dev->vfio_req_intr_handle.fd = -1;
+       dev->vfio_req_intr_handle.type = RTE_INTR_HANDLE_UNKNOWN;
+       dev->vfio_req_intr_handle.vfio_dev_fd = -1;
+
+       return 0;
+}
+#endif
+
 static int
 pci_vfio_is_ioport_bar(int vfio_dev_fd, int bar_index)
 {
@@ -415,6 +527,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,
        return 0;
 }
 
+/*
+ * region info may contain capability headers, so we need to keep reallocating
+ * the memory until we match allocated memory size with argsz.
+ */
+static int
+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,
+               int region)
+{
+       struct vfio_region_info *ri;
+       size_t argsz = sizeof(*ri);
+       int ret;
+
+       ri = malloc(sizeof(*ri));
+       if (ri == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot allocate memory for region info\n");
+               return -1;
+       }
+again:
+       memset(ri, 0, argsz);
+       ri->argsz = argsz;
+       ri->index = region;
+
+       ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);
+       if (ret < 0) {
+               free(ri);
+               return ret;
+       }
+       if (ri->argsz != argsz) {
+               struct vfio_region_info *tmp;
+
+               argsz = ri->argsz;
+               tmp = realloc(ri, argsz);
+
+               if (tmp == NULL) {
+                       /* realloc failed but the ri is still there */
+                       free(ri);
+                       RTE_LOG(ERR, EAL, "Cannot reallocate memory for region info\n");
+                       return -1;
+               }
+               ri = tmp;
+               goto again;
+       }
+       *info = ri;
+
+       return 0;
+}
+
+static struct vfio_info_cap_header *
+pci_vfio_info_cap(struct vfio_region_info *info, int cap)
+{
+       struct vfio_info_cap_header *h;
+       size_t offset;
+
+       if ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {
+               /* VFIO info does not advertise capabilities */
+               return NULL;
+       }
+
+       offset = VFIO_CAP_OFFSET(info);
+       while (offset != 0) {
+               h = RTE_PTR_ADD(info, offset);
+               if (h->id == cap)
+                       return h;
+               offset = h->next;
+       }
+       return NULL;
+}
+
+static int
+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)
+{
+       struct vfio_region_info *info;
+       int ret;
+
+       ret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);
+       if (ret < 0)
+               return -1;
+
+       ret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;
+
+       /* cleanup */
+       free(info);
+
+       return ret;
+}
+
+
 static int
 pci_vfio_map_resource_primary(struct rte_pci_device *dev)
 {
@@ -430,6 +629,9 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
        struct pci_map *maps;
 
        dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       dev->vfio_req_intr_handle.fd = -1;
+#endif
 
        /* store PCI address string */
        snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -464,56 +666,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
        if (ret < 0) {
                RTE_LOG(ERR, EAL, "  %s cannot get MSI-X BAR number!\n",
                                pci_addr);
-               goto err_vfio_dev_fd;
+               goto err_vfio_res;
+       }
+       /* if we found our MSI-X BAR region, check if we can mmap it */
+       if (vfio_res->msix_table.bar_index != -1) {
+               int ret = pci_vfio_msix_is_mappable(vfio_dev_fd,
+                               vfio_res->msix_table.bar_index);
+               if (ret < 0) {
+                       RTE_LOG(ERR, EAL, "Couldn't check if MSI-X BAR is mappable\n");
+                       goto err_vfio_res;
+               } else if (ret != 0) {
+                       /* we can map it, so we don't care where it is */
+                       RTE_LOG(DEBUG, EAL, "VFIO reports MSI-X BAR as mappable\n");
+                       vfio_res->msix_table.bar_index = -1;
+               }
        }
 
        for (i = 0; i < (int) vfio_res->nb_maps; i++) {
-               struct vfio_region_info reg = { .argsz = sizeof(reg) };
+               struct vfio_region_info *reg = NULL;
                void *bar_addr;
 
-               reg.index = i;
-
-               ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, &reg);
-               if (ret) {
+               ret = pci_vfio_get_region_info(vfio_dev_fd, &reg, i);
+               if (ret < 0) {
                        RTE_LOG(ERR, EAL, "  %s cannot get device region info "
-                                       "error %i (%s)\n", pci_addr, errno, strerror(errno));
+                               "error %i (%s)\n", pci_addr, errno,
+                               strerror(errno));
                        goto err_vfio_res;
                }
 
                /* chk for io port region */
                ret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);
-               if (ret < 0)
+               if (ret < 0) {
+                       free(reg);
                        goto err_vfio_res;
-               else if (ret) {
+               else if (ret) {
                        RTE_LOG(INFO, EAL, "Ignore mapping IO port bar(%d)\n",
                                        i);
+                       free(reg);
                        continue;
                }
 
                /* skip non-mmapable BARs */
-               if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)
+               if ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {
+                       free(reg);
                        continue;
+               }
 
                /* try mapping somewhere close to the end of hugepages */
                if (pci_map_addr == NULL)
                        pci_map_addr = pci_find_max_end_va();
 
                bar_addr = pci_map_addr;
-               pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);
+               pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);
 
                maps[i].addr = bar_addr;
-               maps[i].offset = reg.offset;
-               maps[i].size = reg.size;
+               maps[i].offset = reg->offset;
+               maps[i].size = reg->size;
                maps[i].path = NULL; /* vfio doesn't have per-resource paths */
 
                ret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);
                if (ret < 0) {
                        RTE_LOG(ERR, EAL, "  %s mapping BAR%i failed: %s\n",
                                        pci_addr, i, strerror(errno));
+                       free(reg);
                        goto err_vfio_res;
                }
 
                dev->mem_resource[i].addr = maps[i].addr;
+
+               free(reg);
        }
 
        if (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {
@@ -521,6 +742,13 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)
                goto err_vfio_res;
        }
 
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       if (pci_vfio_enable_notifier(dev, vfio_dev_fd) != 0) {
+               RTE_LOG(ERR, EAL, "Error setting up notifier!\n");
+               goto err_vfio_res;
+       }
+
+#endif
        TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next);
 
        return 0;
@@ -546,6 +774,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
        struct pci_map *maps;
 
        dev->intr_handle.fd = -1;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       dev->vfio_req_intr_handle.fd = -1;
+#endif
 
        /* store PCI address string */
        snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
@@ -586,6 +817,9 @@ pci_vfio_map_resource_secondary(struct rte_pci_device *dev)
 
        /* we need save vfio_dev_fd, so it can be used during release */
        dev->intr_handle.vfio_dev_fd = vfio_dev_fd;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       dev->vfio_req_intr_handle.vfio_dev_fd = vfio_dev_fd;
+#endif
 
        return 0;
 err_vfio_dev_fd:
@@ -658,6 +892,14 @@ pci_vfio_unmap_resource_primary(struct rte_pci_device *dev)
        snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT,
                        loc->domain, loc->bus, loc->devid, loc->function);
 
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       ret = pci_vfio_disable_notifier(dev);
+       if (ret) {
+               RTE_LOG(ERR, EAL, "fail to disable req notifier.\n");
+               return -1;
+       }
+
+#endif
        if (close(dev->intr_handle.fd) < 0) {
                RTE_LOG(INFO, EAL, "Error when closing eventfd file descriptor for %s\n",
                        pci_addr);
index 72939e5..a3140ff 100644 (file)
@@ -1,15 +1,18 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
+
 deps += ['pci']
 install_headers('rte_bus_pci.h')
-sources = files('pci_common.c', 'pci_common_uio.c')
+sources = files('pci_common.c',
+       'pci_common_uio.c',
+       'pci_params.c')
 if host_machine.system() == 'linux'
        sources += files('linux/pci.c',
                        'linux/pci_uio.c',
                        'linux/pci_vfio.c')
        includes += include_directories('linux')
-       cflags += ['-D_GNU_SOURCE']
 else
        sources += files('bsd/pci.c')
        includes += include_directories('bsd')
@@ -17,3 +20,5 @@ endif
 
 # memseg walk is not part of stable API yet
 allow_experimental_apis = true
+
+deps += ['kvargs']
index 7736b3f..6276e5d 100644 (file)
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <inttypes.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <sys/queue.h>
 #include <rte_string_fns.h>
 #include <rte_common.h>
 #include <rte_devargs.h>
+#include <rte_vfio.h>
 
 #include "private.h"
 
 
-extern struct rte_pci_bus rte_pci_bus;
-
 #define SYSFS_PCI_DEVICES "/sys/bus/pci/devices"
 
 const char *rte_pci_get_sysfs_path(void)
@@ -123,6 +123,7 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                         struct rte_pci_device *dev)
 {
        int ret;
+       bool already_probed;
        struct rte_pci_addr *loc;
 
        if ((dr == NULL) || (dev == NULL))
@@ -153,6 +154,13 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                dev->device.numa_node = 0;
        }
 
+       already_probed = rte_dev_is_probed(&dev->device);
+       if (already_probed && !(dr->drv_flags & RTE_PCI_DRV_PROBE_AGAIN)) {
+               RTE_LOG(DEBUG, EAL, "Device %s is already probed\n",
+                               dev->device.name);
+               return -EEXIST;
+       }
+
        RTE_LOG(INFO, EAL, "  probe driver: %x:%x %s\n", dev->id.vendor_id,
                dev->id.device_id, dr->driver.name);
 
@@ -161,24 +169,24 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
         * This needs to be before rte_pci_map_device(), as it enables to use
         * driver flags for adjusting configuration.
         */
-       dev->driver = dr;
-       dev->device.driver = &dr->driver;
+       if (!already_probed)
+               dev->driver = dr;
 
-       if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) {
+       if (!already_probed && (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING)) {
                /* map resources for devices that use igb_uio */
                ret = rte_pci_map_device(dev);
                if (ret != 0) {
                        dev->driver = NULL;
-                       dev->device.driver = NULL;
                        return ret;
                }
        }
 
        /* call the driver probe() function */
        ret = dr->probe(dr, dev);
+       if (already_probed)
+               return ret; /* no rollback if already succeeded earlier */
        if (ret) {
                dev->driver = NULL;
-               dev->device.driver = NULL;
                if ((dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) &&
                        /* Don't unmap if device is unsupported and
                         * driver needs mapped resources.
@@ -186,6 +194,8 @@ rte_pci_probe_one_driver(struct rte_pci_driver *dr,
                        !(ret > 0 &&
                                (dr->drv_flags & RTE_PCI_DRV_KEEP_MAPPED_RES)))
                        rte_pci_unmap_device(dev);
+       } else {
+               dev->device.driver = &dr->driver;
        }
 
        return ret;
@@ -233,7 +243,7 @@ rte_pci_detach_dev(struct rte_pci_device *dev)
 
 /*
  * If vendor/device ID match, call the probe() function of all
- * registered driver for the given device. Return -1 if initialization
+ * registered driver for the given device. Return < 0 if initialization
  * failed, return 1 if no driver is found for this device.
  */
 static int
@@ -243,17 +253,13 @@ pci_probe_all_drivers(struct rte_pci_device *dev)
        int rc = 0;
 
        if (dev == NULL)
-               return -1;
-
-       /* Check if a driver is already loaded */
-       if (dev->driver != NULL)
-               return 0;
+               return -EINVAL;
 
        FOREACH_DRIVER_ON_PCIBUS(dr) {
                rc = rte_pci_probe_one_driver(dr, dev);
                if (rc < 0)
                        /* negative value is an error */
-                       return -1;
+                       return rc;
                if (rc > 0)
                        /* positive value means driver doesn't support it */
                        continue;
@@ -290,11 +296,14 @@ rte_pci_probe(void)
                        devargs->policy == RTE_DEV_WHITELISTED)
                        ret = pci_probe_all_drivers(dev);
                if (ret < 0) {
-                       RTE_LOG(ERR, EAL, "Requested device " PCI_PRI_FMT
-                                " cannot be used\n", dev->addr.domain, dev->addr.bus,
-                                dev->addr.devid, dev->addr.function);
-                       rte_errno = errno;
-                       failed++;
+                       if (ret != -EEXIST) {
+                               RTE_LOG(ERR, EAL, "Requested device "
+                                       PCI_PRI_FMT " cannot be used\n",
+                                       dev->addr.domain, dev->addr.bus,
+                                       dev->addr.devid, dev->addr.function);
+                               rte_errno = errno;
+                               failed++;
+                       }
                        ret = 0;
                }
        }
@@ -405,6 +414,98 @@ pci_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
        return NULL;
 }
 
+/*
+ * find the device which encounter the failure, by iterate over all device on
+ * PCI bus to check if the memory failure address is located in the range
+ * of the BARs of the device.
+ */
+static struct rte_pci_device *
+pci_find_device_by_addr(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       uint64_t check_point, start, end, len;
+       int i;
+
+       check_point = (uint64_t)(uintptr_t)failure_addr;
+
+       FOREACH_DEVICE_ON_PCIBUS(pdev) {
+               for (i = 0; i != RTE_DIM(pdev->mem_resource); i++) {
+                       start = (uint64_t)(uintptr_t)pdev->mem_resource[i].addr;
+                       len = pdev->mem_resource[i].len;
+                       end = start + len;
+                       if (check_point >= start && check_point < end) {
+                               RTE_LOG(DEBUG, EAL, "Failure address %16.16"
+                                       PRIx64" belongs to device %s!\n",
+                                       check_point, pdev->device.name);
+                               return pdev;
+                       }
+               }
+       }
+       return NULL;
+}
+
+static int
+pci_hot_unplug_handler(struct rte_device *dev)
+{
+       struct rte_pci_device *pdev = NULL;
+       int ret = 0;
+
+       pdev = RTE_DEV_TO_PCI(dev);
+       if (!pdev)
+               return -1;
+
+       switch (pdev->kdrv) {
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       case RTE_KDRV_VFIO:
+               /*
+                * vfio kernel module guaranty the pci device would not be
+                * deleted until the user space release the resource, so no
+                * need to remap BARs resource here, just directly notify
+                * the req event to the user space to handle it.
+                */
+               rte_dev_event_callback_process(dev->name,
+                                              RTE_DEV_EVENT_REMOVE);
+               break;
+#endif
+       case RTE_KDRV_IGB_UIO:
+       case RTE_KDRV_UIO_GENERIC:
+       case RTE_KDRV_NIC_UIO:
+               /* BARs resource is invalid, remap it to be safe. */
+               ret = pci_uio_remap_resource(pdev);
+               break;
+       default:
+               RTE_LOG(DEBUG, EAL,
+                       "Not managed by a supported kernel driver, skipped\n");
+               ret = -1;
+               break;
+       }
+
+       return ret;
+}
+
+static int
+pci_sigbus_handler(const void *failure_addr)
+{
+       struct rte_pci_device *pdev = NULL;
+       int ret = 0;
+
+       pdev = pci_find_device_by_addr(failure_addr);
+       if (!pdev) {
+               /* It is a generic sigbus error, no bus would handle it. */
+               ret = 1;
+       } else {
+               /* The sigbus error is caused of hot-unplug. */
+               ret = pci_hot_unplug_handler(&pdev->device);
+               if (ret) {
+                       RTE_LOG(ERR, EAL,
+                               "Failed to handle hot-unplug for device %s",
+                               pdev->name);
+                       ret = -1;
+               }
+       }
+       return ret;
+}
+
 static int
 pci_plug(struct rte_device *dev)
 {
@@ -421,6 +522,7 @@ pci_unplug(struct rte_device *dev)
        ret = rte_pci_detach_dev(pdev);
        if (ret == 0) {
                rte_pci_remove_device(pdev);
+               rte_devargs_remove(dev->devargs);
                free(pdev);
        }
        return ret;
@@ -435,6 +537,9 @@ struct rte_pci_bus rte_pci_bus = {
                .unplug = pci_unplug,
                .parse = pci_parse,
                .get_iommu_class = rte_pci_get_iommu_class,
+               .dev_iterate = rte_pci_dev_iterate,
+               .hot_unplug_handler = pci_hot_unplug_handler,
+               .sigbus_handler = pci_sigbus_handler,
        },
        .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list),
        .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list),
index 54bc20b..7ea73db 100644 (file)
@@ -146,6 +146,39 @@ pci_uio_unmap(struct mapped_pci_resource *uio_res)
        }
 }
 
+/* remap the PCI resource of a PCI device in anonymous virtual memory */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev)
+{
+       int i;
+       void *map_address;
+
+       if (dev == NULL)
+               return -1;
+
+       /* Remap all BARs */
+       for (i = 0; i != PCI_MAX_RESOURCE; i++) {
+               /* skip empty BAR */
+               if (dev->mem_resource[i].phys_addr == 0)
+                       continue;
+               map_address = mmap(dev->mem_resource[i].addr,
+                               (size_t)dev->mem_resource[i].len,
+                               PROT_READ | PROT_WRITE,
+                               MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+               if (map_address == MAP_FAILED) {
+                       RTE_LOG(ERR, EAL,
+                               "Cannot remap resource for device %s\n",
+                               dev->name);
+                       return -1;
+               }
+               RTE_LOG(INFO, EAL,
+                       "Successful remap resource for device %s\n",
+                       dev->name);
+       }
+
+       return 0;
+}
+
 static struct mapped_pci_resource *
 pci_uio_find_resource(struct rte_pci_device *dev)
 {
diff --git a/drivers/bus/pci/pci_params.c b/drivers/bus/pci/pci_params.c
new file mode 100644 (file)
index 0000000..3192e9c
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <rte_bus.h>
+#include <rte_bus_pci.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_pci.h>
+
+#include "private.h"
+
+enum pci_params {
+       RTE_PCI_PARAM_ADDR,
+       RTE_PCI_PARAM_MAX,
+};
+
+static const char * const pci_params_keys[] = {
+       [RTE_PCI_PARAM_ADDR] = "addr",
+       [RTE_PCI_PARAM_MAX] = NULL,
+};
+
+static int
+pci_addr_kv_cmp(const char *key __rte_unused,
+               const char *value,
+               void *_addr2)
+{
+       struct rte_pci_addr _addr1;
+       struct rte_pci_addr *addr1 = &_addr1;
+       struct rte_pci_addr *addr2 = _addr2;
+
+       if (rte_pci_addr_parse(value, addr1))
+               return -1;
+       return -abs(rte_pci_addr_cmp(addr1, addr2));
+}
+
+static int
+pci_dev_match(const struct rte_device *dev,
+             const void *_kvlist)
+{
+       const struct rte_kvargs *kvlist = _kvlist;
+       const struct rte_pci_device *pdev;
+
+       if (kvlist == NULL)
+               /* Empty string matches everything. */
+               return 0;
+       pdev = RTE_DEV_TO_PCI_CONST(dev);
+       /* if any field does not match. */
+       if (rte_kvargs_process(kvlist, pci_params_keys[RTE_PCI_PARAM_ADDR],
+                              &pci_addr_kv_cmp,
+                              (void *)(intptr_t)&pdev->addr))
+               return 1;
+       return 0;
+}
+
+void *
+rte_pci_dev_iterate(const void *start,
+                   const char *str,
+                   const struct rte_dev_iterator *it __rte_unused)
+{
+       rte_bus_find_device_t find_device;
+       struct rte_kvargs *kvargs = NULL;
+       struct rte_device *dev;
+
+       if (str != NULL) {
+               kvargs = rte_kvargs_parse(str, pci_params_keys);
+               if (kvargs == NULL) {
+                       RTE_LOG(ERR, EAL, "cannot parse argument list\n");
+                       rte_errno = EINVAL;
+                       return NULL;
+               }
+       }
+       find_device = rte_pci_bus.bus.find_device;
+       dev = find_device(start, pci_dev_match, kvargs);
+       rte_kvargs_free(kvargs);
+       return dev;
+}
index 8ddd03e..13c3324 100644 (file)
 #include <rte_pci.h>
 #include <rte_bus_pci.h>
 
+extern struct rte_pci_bus rte_pci_bus;
+
 struct rte_pci_driver;
 struct rte_pci_device;
 
+extern struct rte_pci_bus rte_pci_bus;
+
 /**
  * Probe the PCI bus
  *
@@ -122,6 +126,18 @@ int pci_uio_alloc_resource(struct rte_pci_device *dev,
 void pci_uio_free_resource(struct rte_pci_device *dev,
                struct mapped_pci_resource *uio_res);
 
+/**
+ * Remap the PCI resource of a PCI device in anonymous virtual memory.
+ *
+ * @param dev
+ *   Point to the struct rte pci device.
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+pci_uio_remap_resource(struct rte_pci_device *dev);
+
 /**
  * Map device memory to uio resource
  *
@@ -166,4 +182,27 @@ rte_pci_match(const struct rte_pci_driver *pci_drv,
 enum rte_iova_mode
 rte_pci_get_iommu_class(void);
 
+/*
+ * Iterate over internal devices,
+ * matching any device against the provided
+ * string.
+ *
+ * @param start
+ *   Iteration starting point.
+ *
+ * @param str
+ *   Device string to match against.
+ *
+ * @param it
+ *   (unused) iterator structure.
+ *
+ * @return
+ *   A pointer to the next matching device if any.
+ *   NULL otherwise.
+ */
+void *
+rte_pci_dev_iterate(const void *start,
+                   const char *str,
+                   const struct rte_dev_iterator *it);
+
 #endif /* _PCI_PRIVATE_H_ */
index 0d1955f..f0d6d81 100644 (file)
@@ -62,10 +62,12 @@ struct rte_pci_device {
        struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE];
                                            /**< PCI Memory Resource */
        struct rte_intr_handle intr_handle; /**< Interrupt handle */
-       struct rte_pci_driver *driver;      /**< Associated driver */
+       struct rte_pci_driver *driver;      /**< PCI driver used in probing */
        uint16_t max_vfs;                   /**< sriov enable if not zero */
        enum rte_kernel_driver kdrv;        /**< Kernel driver passthrough */
        char name[PCI_PRI_STR_SIZE+1];      /**< PCI location (ASCII) */
+       struct rte_intr_handle vfio_req_intr_handle;
+                               /**< Handler of VFIO request interrupt */
 };
 
 /**
@@ -121,7 +123,7 @@ struct rte_pci_driver {
        pci_probe_t *probe;                /**< Device Probe function. */
        pci_remove_t *remove;              /**< Device Remove function. */
        const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */
-       uint32_t drv_flags;                /**< Flags contolling handling of device. */
+       uint32_t drv_flags;                /**< Flags RTE_PCI_DRV_*. */
 };
 
 /**
@@ -137,6 +139,8 @@ struct rte_pci_bus {
 #define RTE_PCI_DRV_NEED_MAPPING 0x0001
 /** Device needs PCI BAR mapping with enabled write combining (wc) */
 #define RTE_PCI_DRV_WC_ACTIVATE 0x0002
+/** Device already probed can be probed again to check for new ports. */
+#define RTE_PCI_DRV_PROBE_AGAIN 0x0004
 /** Device driver supports link state interrupt */
 #define RTE_PCI_DRV_INTR_LSC   0x0008
 /** Device driver supports device removal interrupt */
@@ -219,6 +223,8 @@ void rte_pci_unregister(struct rte_pci_driver *driver);
  *   The length of the data buffer.
  * @param offset
  *   The offset into PCI config space
+ * @return
+ *  Number of bytes read on success, negative on error.
  */
 int rte_pci_read_config(const struct rte_pci_device *device,
                void *buf, size_t len, off_t offset);
index bd0bb89..803b8ea 100644 (file)
@@ -16,11 +16,12 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 EXPORT_MAP := rte_bus_vdev_version.map
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 SRCS-y += vdev.c
+SRCS-y += vdev_params.c
 
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_kvargs
 
 #
 # Export include files
index 2ee648b..803785f 100644 (file)
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('vdev.c')
+version = 2
+
+sources = files('vdev.c',
+       'vdev_params.c')
 install_headers('rte_bus_vdev.h')
 
 allow_experimental_apis = true
+
+deps += ['kvargs']
index 6139dd5..9c66bdc 100644 (file)
@@ -23,6 +23,7 @@
 
 #include "rte_bus_vdev.h"
 #include "vdev_logs.h"
+#include "vdev_private.h"
 
 #define VDEV_MP_KEY    "bus_vdev_mp"
 
@@ -40,7 +41,7 @@ static struct vdev_device_list vdev_device_list =
 static rte_spinlock_recursive_t vdev_device_list_lock =
        RTE_SPINLOCK_RECURSIVE_INITIALIZER;
 
-struct vdev_driver_list vdev_driver_list =
+static struct vdev_driver_list vdev_driver_list =
        TAILQ_HEAD_INITIALIZER(vdev_driver_list);
 
 struct vdev_custom_scan {
@@ -149,10 +150,9 @@ vdev_probe_all_drivers(struct rte_vdev_device *dev)
 
        if (vdev_parse(name, &driver))
                return -1;
-       dev->device.driver = &driver->driver;
        ret = driver->probe(dev);
-       if (ret)
-               dev->device.driver = NULL;
+       if (ret == 0)
+               dev->device.driver = &driver->driver;
        return ret;
 }
 
@@ -202,7 +202,9 @@ alloc_devargs(const char *name, const char *args)
 }
 
 static int
-insert_vdev(const char *name, const char *args, struct rte_vdev_device **p_dev)
+insert_vdev(const char *name, const char *args,
+               struct rte_vdev_device **p_dev,
+               bool init)
 {
        struct rte_vdev_device *dev;
        struct rte_devargs *devargs;
@@ -221,17 +223,24 @@ insert_vdev(const char *name, const char *args, struct rte_vdev_device **p_dev)
                goto fail;
        }
 
+       dev->device.bus = &rte_vdev_bus;
        dev->device.devargs = devargs;
        dev->device.numa_node = SOCKET_ID_ANY;
        dev->device.name = devargs->name;
 
        if (find_vdev(name)) {
+               /*
+                * A vdev is expected to have only one port.
+                * So there is no reason to try probing again,
+                * even with new arguments.
+                */
                ret = -EEXIST;
                goto fail;
        }
 
        TAILQ_INSERT_TAIL(&vdev_device_list, dev, next);
-       rte_devargs_insert(devargs);
+       if (init)
+               rte_devargs_insert(devargs);
 
        if (p_dev)
                *p_dev = dev;
@@ -248,20 +257,18 @@ int
 rte_vdev_init(const char *name, const char *args)
 {
        struct rte_vdev_device *dev;
-       struct rte_devargs *devargs;
        int ret;
 
        rte_spinlock_recursive_lock(&vdev_device_list_lock);
-       ret = insert_vdev(name, args, &dev);
+       ret = insert_vdev(name, args, &dev, true);
        if (ret == 0) {
                ret = vdev_probe_all_drivers(dev);
                if (ret) {
                        if (ret > 0)
                                VDEV_LOG(ERR, "no driver found for %s", name);
                        /* If fails, remove it from vdev list */
-                       devargs = dev->device.devargs;
                        TAILQ_REMOVE(&vdev_device_list, dev, next);
-                       rte_devargs_remove(devargs->bus->name, devargs->name);
+                       rte_devargs_remove(dev->device.devargs);
                        free(dev);
                }
        }
@@ -289,7 +296,6 @@ int
 rte_vdev_uninit(const char *name)
 {
        struct rte_vdev_device *dev;
-       struct rte_devargs *devargs;
        int ret;
 
        if (name == NULL)
@@ -308,8 +314,7 @@ rte_vdev_uninit(const char *name)
                goto unlock;
 
        TAILQ_REMOVE(&vdev_device_list, dev, next);
-       devargs = dev->device.devargs;
-       rte_devargs_remove(devargs->bus->name, devargs->name);
+       rte_devargs_remove(dev->device.devargs);
        free(dev);
 
 unlock:
@@ -346,6 +351,7 @@ vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)
        const struct vdev_param *in = (const struct vdev_param *)mp_msg->param;
        const char *devname;
        int num;
+       int ret;
 
        strlcpy(mp_resp.name, VDEV_MP_KEY, sizeof(mp_resp.name));
        mp_resp.len_param = sizeof(*ou);
@@ -380,7 +386,10 @@ vdev_action(const struct rte_mp_msg *mp_msg, const void *peer)
                break;
        case VDEV_SCAN_ONE:
                VDEV_LOG(INFO, "receive vdev, %s", in->name);
-               if (insert_vdev(in->name, NULL, NULL) < 0)
+               ret = insert_vdev(in->name, NULL, NULL, false);
+               if (ret == -EEXIST)
+                       VDEV_LOG(DEBUG, "device already exist, %s", in->name);
+               else if (ret < 0)
                        VDEV_LOG(ERR, "failed to add vdev, %s", in->name);
                break;
        default:
@@ -419,6 +428,7 @@ vdev_scan(void)
                        mp_rep = &mp_reply.msgs[0];
                        resp = (struct vdev_param *)mp_rep->param;
                        VDEV_LOG(INFO, "Received %d vdevs", resp->num);
+                       free(mp_reply.msgs);
                } else
                        VDEV_LOG(ERR, "Failed to request vdev from primary");
 
@@ -455,6 +465,7 @@ vdev_scan(void)
                        continue;
                }
 
+               dev->device.bus = &rte_vdev_bus;
                dev->device.devargs = devargs;
                dev->device.numa_node = SOCKET_ID_ANY;
                dev->device.name = devargs->name;
@@ -480,7 +491,7 @@ vdev_probe(void)
                 * we call each driver probe.
                 */
 
-               if (dev->device.driver)
+               if (rte_dev_is_probed(&dev->device))
                        continue;
 
                if (vdev_probe_all_drivers(dev)) {
@@ -493,9 +504,9 @@ vdev_probe(void)
        return ret;
 }
 
-static struct rte_device *
-vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
-                const void *data)
+struct rte_device *
+rte_vdev_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+                    const void *data)
 {
        const struct rte_vdev_device *vstart;
        struct rte_vdev_device *dev;
@@ -532,10 +543,11 @@ vdev_unplug(struct rte_device *dev)
 static struct rte_bus rte_vdev_bus = {
        .scan = vdev_scan,
        .probe = vdev_probe,
-       .find_device = vdev_find_device,
+       .find_device = rte_vdev_find_device,
        .plug = vdev_plug,
        .unplug = vdev_unplug,
        .parse = vdev_parse,
+       .dev_iterate = rte_vdev_dev_iterate,
 };
 
 RTE_REGISTER_BUS(vdev, rte_vdev_bus);
diff --git a/drivers/bus/vdev/vdev_params.c b/drivers/bus/vdev/vdev_params.c
new file mode 100644 (file)
index 0000000..6f74704
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#include <string.h>
+
+#include <rte_dev.h>
+#include <rte_bus.h>
+#include <rte_kvargs.h>
+#include <rte_errno.h>
+
+#include "vdev_logs.h"
+#include "vdev_private.h"
+
+enum vdev_params {
+       RTE_VDEV_PARAM_NAME,
+       RTE_VDEV_PARAM_MAX,
+};
+
+static const char * const vdev_params_keys[] = {
+       [RTE_VDEV_PARAM_NAME] = "name",
+       [RTE_VDEV_PARAM_MAX] = NULL,
+};
+
+static int
+vdev_dev_match(const struct rte_device *dev,
+              const void *_kvlist)
+{
+       int ret;
+       const struct rte_kvargs *kvlist = _kvlist;
+       char *name;
+
+       /* cannot pass const dev->name to rte_kvargs_process() */
+       name = strdup(dev->name);
+       if (name == NULL)
+               return -1;
+       ret = rte_kvargs_process(kvlist,
+               vdev_params_keys[RTE_VDEV_PARAM_NAME],
+               rte_kvargs_strcmp, name);
+       free(name);
+       if (ret != 0)
+               return -1;
+
+       return 0;
+}
+
+void *
+rte_vdev_dev_iterate(const void *start,
+                    const char *str,
+                    const struct rte_dev_iterator *it __rte_unused)
+{
+       struct rte_kvargs *kvargs = NULL;
+       struct rte_device *dev;
+
+       if (str != NULL) {
+               kvargs = rte_kvargs_parse(str, vdev_params_keys);
+               if (kvargs == NULL) {
+                       VDEV_LOG(ERR, "cannot parse argument list\n");
+                       rte_errno = EINVAL;
+                       return NULL;
+               }
+       }
+       dev = rte_vdev_find_device(start, vdev_dev_match, kvargs);
+       rte_kvargs_free(kvargs);
+       return dev;
+}
diff --git a/drivers/bus/vdev/vdev_private.h b/drivers/bus/vdev/vdev_private.h
new file mode 100644 (file)
index 0000000..ba6dc48
--- /dev/null
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Gaëtan Rivet
+ */
+
+#ifndef _VDEV_PRIVATE_H_
+#define _VDEV_PRIVATE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct rte_device *
+rte_vdev_find_device(const struct rte_device *start,
+                    rte_dev_cmp_t cmp,
+                    const void *data);
+
+void *
+rte_vdev_dev_iterate(const void *start,
+                    const char *str,
+                    const struct rte_dev_iterator *it);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDEV_PRIVATE_H_ */
index deee9dd..e54c557 100644 (file)
@@ -3,7 +3,7 @@
 include $(RTE_SDK)/mk/rte.vars.mk
 
 LIB = librte_bus_vmbus.a
-LIBABIVER := 1
+LIBABIVER := 2
 EXPORT_MAP := rte_bus_vmbus_version.map
 
 CFLAGS += -I$(SRCDIR)
index 52d6a3c..a4755a3 100644 (file)
@@ -229,6 +229,7 @@ vmbus_scan_one(const char *name)
        if (dev == NULL)
                return -1;
 
+       dev->device.bus = &rte_vmbus_bus.bus;
        dev->device.name = strdup(name);
        if (!dev->device.name)
                goto error;
@@ -276,6 +277,8 @@ vmbus_scan_one(const char *name)
                dev->device.numa_node = SOCKET_ID_ANY;
        }
 
+       dev->device.devargs = vmbus_devargs_lookup(dev);
+
        /* device is valid, add in list (sorted) */
        VMBUS_LOG(DEBUG, "Adding vmbus device %s", name);
 
index 18daabe..0e4d058 100644 (file)
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
+version = 2
+
 allow_experimental_apis = true
 
 install_headers('rte_bus_vmbus.h','rte_vmbus_reg.h')
index 9964fc4..211127d 100644 (file)
 #include <sys/uio.h>
 #include <rte_log.h>
 #include <rte_vmbus_reg.h>
+#include <rte_bus_vmbus.h>
 
 #ifndef PAGE_SIZE
 #define PAGE_SIZE      4096
 #endif
 
+extern struct rte_vmbus_bus rte_vmbus_bus;
+
 extern int vmbus_logtype_bus;
 #define VMBUS_LOG(level, fmt, args...) \
        rte_log(RTE_LOG_ ## level, vmbus_logtype_bus, "%s(): " fmt "\n", \
@@ -66,6 +69,9 @@ struct vmbus_channel {
 
 #define VMBUS_MAX_CHANNELS     64
 
+struct rte_devargs *
+vmbus_devargs_lookup(struct rte_vmbus_device *dev);
+
 int vmbus_chan_create(const struct rte_vmbus_device *device,
                      uint16_t relid, uint16_t subid, uint8_t monitor_id,
                      struct vmbus_channel **new_chan);
index 4a2c1f6..2839fef 100644 (file)
@@ -364,6 +364,21 @@ void rte_vmbus_chan_signal_read(struct vmbus_channel *chan, uint32_t bytes_read)
  */
 uint16_t rte_vmbus_sub_channel_index(const struct vmbus_channel *chan);
 
+/**
+ * Set the host monitor latency hint
+ *
+ * @param dev
+ *    VMBUS device
+ * @param chan
+ *     Pointer to vmbus_channel structure.
+ * @param latency
+ *     Approximate wait period between hypervisor examinations of
+ *     the trigger page (in nanoseconds).
+ */
+void rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
+                          const struct vmbus_channel *chan,
+                          uint32_t latency);
+
 /**
  * Register a VMBUS driver.
  *
index dabb920..ae231ad 100644 (file)
@@ -27,3 +27,10 @@ DPDK_18.08 {
 
        local: *;
 };
+
+DPDK_18.11 {
+       global:
+
+       rte_vmbus_set_latency;
+
+} DPDK_18.08;
index cc5f3e8..bd14c06 100644 (file)
@@ -59,6 +59,32 @@ vmbus_set_event(const struct rte_vmbus_device *dev,
        vmbus_set_monitor(dev, chan->monitor_id);
 }
 
+/*
+ * Set the wait between when hypervisor examines the trigger.
+ */
+void
+rte_vmbus_set_latency(const struct rte_vmbus_device *dev,
+                     const struct vmbus_channel *chan,
+                     uint32_t latency)
+{
+       uint32_t trig_idx = chan->monitor_id / VMBUS_MONTRIG_LEN;
+       uint32_t trig_offs = chan->monitor_id % VMBUS_MONTRIG_LEN;
+
+       if (latency >= UINT16_MAX * 100) {
+               VMBUS_LOG(ERR, "invalid latency value %u", latency);
+               return;
+       }
+
+       if (trig_idx >= VMBUS_MONTRIGS_MAX) {
+               VMBUS_LOG(ERR, "invalid monitor trigger %u",
+                         trig_idx);
+               return;
+       }
+
+       /* Host value is expressed in 100 nanosecond units */
+       dev->monitor_page->lat[trig_idx][trig_offs] = latency / 100;
+}
+
 /*
  * Notify host that there are data pending on our TX bufring.
  *
index c7165ad..48a219f 100644 (file)
@@ -85,7 +85,6 @@ vmbus_match(const struct rte_vmbus_driver *dr,
 
        return false;
 }
-
 /*
  * If device ID match, call the devinit() function of the driver.
  */
@@ -112,7 +111,6 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
 
        /* reference driver structure */
        dev->driver = dr;
-       dev->device.driver = &dr->driver;
 
        if (dev->device.numa_node < 0) {
                VMBUS_LOG(WARNING, "  Invalid NUMA socket, default to 0");
@@ -125,6 +123,8 @@ vmbus_probe_one_driver(struct rte_vmbus_driver *dr,
        if (ret) {
                dev->driver = NULL;
                rte_vmbus_unmap_device(dev);
+       } else {
+               dev->device.driver = &dr->driver;
        }
 
        return ret;
@@ -143,7 +143,7 @@ vmbus_probe_all_drivers(struct rte_vmbus_device *dev)
        int rc;
 
        /* Check if a driver is already loaded */
-       if (dev->driver != NULL) {
+       if (rte_dev_is_probed(&dev->device)) {
                VMBUS_LOG(DEBUG, "VMBUS driver already loaded");
                return 0;
        }
@@ -204,6 +204,27 @@ vmbus_parse(const char *name, void *addr)
        return ret;
 }
 
+/*
+ * scan for matching device args on command line
+ * example:
+ *     -w 'vmbus:635a7ae3-091e-4410-ad59-667c4f8c04c3,latency=20'
+ */
+struct rte_devargs *
+vmbus_devargs_lookup(struct rte_vmbus_device *dev)
+{
+       struct rte_devargs *devargs;
+       rte_uuid_t addr;
+
+       RTE_EAL_DEVARGS_FOREACH("vmbus", devargs) {
+               vmbus_parse(devargs->name, &addr);
+
+               if (rte_uuid_compare(dev->device_id, addr) == 0)
+                       return devargs;
+       }
+       return NULL;
+
+}
+
 /* register vmbus driver */
 void
 rte_vmbus_register(struct rte_vmbus_driver *driver)
index 0fd2237..87b8a59 100644 (file)
@@ -4,8 +4,23 @@
 
 include $(RTE_SDK)/mk/rte.vars.mk
 
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO),y)
+DIRS-y += cpt
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF)$(CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL),yy)
 DIRS-y += octeontx
 endif
 
+MVEP-y := $(CONFIG_RTE_LIBRTE_MVPP2_PMD)
+MVEP-y += $(CONFIG_RTE_LIBRTE_MVNETA_PMD)
+MVEP-y += $(CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO)
+ifneq (,$(findstring y,$(MVEP-y)))
+DIRS-y += mvep
+endif
+
+ifeq ($(CONFIG_RTE_LIBRTE_COMMON_DPAAX),y)
+DIRS-y += dpaax
+endif
+
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/common/cpt/Makefile b/drivers/common/cpt/Makefile
new file mode 100644 (file)
index 0000000..2340aa9
--- /dev/null
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Cavium, Inc
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_common_cpt.a
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
+EXPORT_MAP := rte_common_cpt_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += cpt_pmd_ops_helper.c
+
+LDLIBS += -lrte_eal
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/common/cpt/cpt_common.h b/drivers/common/cpt/cpt_common.h
new file mode 100644 (file)
index 0000000..8461cd6
--- /dev/null
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_COMMON_H_
+#define _CPT_COMMON_H_
+
+/*
+ * This file defines common macros and structs
+ */
+
+/*
+ * Macros to determine CPT model. Driver makefile will define CPT_MODEL
+ * accordingly
+ */
+#define CRYPTO_OCTEONTX                0x1
+
+#define TIME_IN_RESET_COUNT    5
+
+/* Default command timeout in seconds */
+#define DEFAULT_COMMAND_TIMEOUT        4
+
+#define CPT_COUNT_THOLD                32
+#define CPT_TIMER_THOLD                0x3F
+
+#define AE_TYPE 1
+#define SE_TYPE 2
+
+#ifndef ROUNDUP4
+#define ROUNDUP4(val)  (((val) + 3) & 0xfffffffc)
+#endif
+
+#ifndef ROUNDUP8
+#define ROUNDUP8(val)  (((val) + 7) & 0xfffffff8)
+#endif
+
+#ifndef ROUNDUP16
+#define ROUNDUP16(val) (((val) + 15) & 0xfffffff0)
+#endif
+
+#ifndef __hot
+#define __hot __attribute__((hot))
+#endif
+
+#define MOD_INC(i, l)   ((i) == (l - 1) ? (i) = 0 : (i)++)
+
+struct cptvf_meta_info {
+       void *cptvf_meta_pool;
+       int cptvf_op_mlen;
+       int cptvf_op_sb_mlen;
+};
+
+struct rid {
+       /** Request id of a crypto operation */
+       uintptr_t rid;
+};
+
+/*
+ * Pending queue structure
+ *
+ */
+struct pending_queue {
+       /** Tail of queue to be used for enqueue */
+       uint16_t enq_tail;
+       /** Head of queue to be used for dequeue */
+       uint16_t deq_head;
+       /** Array of pending requests */
+       struct rid *rid_queue;
+       /** Pending requests count */
+       uint64_t pending_count;
+};
+
+struct cpt_request_info {
+       /** Data path fields */
+       uint64_t comp_baddr;
+       volatile uint64_t *completion_addr;
+       volatile uint64_t *alternate_caddr;
+       void *op;
+       struct {
+               uint64_t ei0;
+               uint64_t ei1;
+               uint64_t ei2;
+               uint64_t ei3;
+       } ist;
+
+       /** Control path fields */
+       uint64_t time_out;
+       uint8_t extra_time;
+};
+
+#endif /* _CPT_COMMON_H_ */
diff --git a/drivers/common/cpt/cpt_hw_types.h b/drivers/common/cpt/cpt_hw_types.h
new file mode 100644 (file)
index 0000000..cff59c7
--- /dev/null
@@ -0,0 +1,522 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_HW_TYPES_H_
+#define _CPT_HW_TYPES_H_
+
+#include <rte_byteorder.h>
+
+/*
+ * This file defines HRM specific structs.
+ *
+ */
+
+#define CPT_VF_INTR_MBOX_MASK   (1<<0)
+#define CPT_VF_INTR_DOVF_MASK   (1<<1)
+#define CPT_VF_INTR_IRDE_MASK   (1<<2)
+#define CPT_VF_INTR_NWRP_MASK   (1<<3)
+#define CPT_VF_INTR_SWERR_MASK  (1<<4)
+#define CPT_VF_INTR_HWERR_MASK  (1<<5)
+#define CPT_VF_INTR_FAULT_MASK  (1<<6)
+
+#define CPT_INST_SIZE           (64)
+#define CPT_NEXT_CHUNK_PTR_SIZE (8)
+
+/*
+ * CPT_INST_S software command definitions
+ * Words EI (0-3)
+ */
+typedef union {
+       uint64_t u64;
+       struct {
+               uint16_t opcode;
+               uint16_t param1;
+               uint16_t param2;
+               uint16_t dlen;
+       } s;
+} vq_cmd_word0_t;
+
+typedef union {
+       uint64_t u64;
+       struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+               uint64_t grp    : 3;
+               uint64_t cptr   : 61;
+#else
+               uint64_t cptr   : 61;
+               uint64_t grp    : 3;
+#endif
+       } s;
+} vq_cmd_word3_t;
+
+typedef struct cpt_vq_command {
+       vq_cmd_word0_t cmd;
+       uint64_t dptr;
+       uint64_t rptr;
+       vq_cmd_word3_t cptr;
+} cpt_vq_cmd_t;
+
+/**
+ * Structure cpt_inst_s
+ *
+ * CPT Instruction Structure
+ * This structure specifies the instruction layout.
+ * Instructions are stored in memory as little-endian unless
+ * CPT()_PF_Q()_CTL[INST_BE] is set.
+ */
+typedef union cpt_inst_s {
+       uint64_t u[8];
+       struct cpt_inst_s_8s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_17_63        : 47;
+               /* [ 16: 16] Done interrupt.
+                * 0 = No interrupts related to this instruction.
+                * 1 = When the instruction completes,CPT()_VQ()_DONE[DONE]
+                * will be incremented, and based on the rules described
+                * there an interrupt may occur.
+                */
+               uint64_t doneint               : 1;
+               uint64_t reserved_0_15         : 16;
+#else /* Word 0 - Little Endian */
+               uint64_t reserved_0_15         : 16;
+               uint64_t doneint               : 1;
+               uint64_t reserved_17_63        : 47;
+#endif /* Word 0 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 1 - Big Endian */
+               /* [127: 64] Result IOVA.
+                * If nonzero, specifies where to write CPT_RES_S.
+                * If zero, no result structure will be written.
+                * Address must be 16-byte aligned.
+                *
+                * Bits <63:49> are ignored by hardware; software should
+                * use a sign-extended bit <48> for forward compatibility.
+                */
+               uint64_t res_addr              : 64;
+#else /* Word 1 - Little Endian */
+               uint64_t res_addr              : 64;
+#endif /* Word 1 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 2 - Big Endian */
+               uint64_t reserved_172_191      : 20;
+               /* [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to
+                * use when CPT submits work to SSO.
+                * For the SSO to not discard the add-work request, FPA_PF_MAP()
+                * must map [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid.
+                */
+               uint64_t grp                   : 10;
+               /* [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use
+                * when CPT submits work to SSO.
+                */
+               uint64_t tt                    : 2;
+               /* [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when
+                * CPT submits work to SSO.
+                */
+               uint64_t tag                   : 32;
+#else /* Word 2 - Little Endian */
+               uint64_t tag                   : 32;
+               uint64_t tt                    : 2;
+               uint64_t grp                   : 10;
+               uint64_t reserved_172_191      : 20;
+#endif /* Word 2 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 3 - Big Endian */
+               /** [255:192] If [WQ_PTR] is nonzero, it is a pointer to a
+                * work-queue entry that CPT submits work to SSO after all
+                * context, output data, and result write operations are
+                * visible to other CNXXXX units and the cores.
+                * Bits <2:0> must be zero.
+                * Bits <63:49> are ignored by hardware; software should use a
+                * sign-extended bit <48> for forward compatibility.
+                * Internal:Bits <63:49>, <2:0> are ignored by hardware,
+                * treated as always 0x0.
+                **/
+               uint64_t wq_ptr                : 64;
+#else /* Word 3 - Little Endian */
+               uint64_t wq_ptr                : 64;
+#endif /* Word 3 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 4 - Big Endian */
+               union {
+                       /** [319:256] Engine instruction word 0. Passed to the
+                        * AE/SE.
+                        **/
+                       uint64_t ei0                   : 64;
+                       vq_cmd_word0_t vq_cmd_w0;
+               };
+#else /* Word 4 - Little Endian */
+               union {
+                       uint64_t ei0                   : 64;
+                       vq_cmd_word0_t vq_cmd_w0;
+               };
+#endif /* Word 4 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 5 - Big Endian */
+               union {
+                       /** [383:320] Engine instruction word 1. Passed to the
+                        * AE/SE.
+                        **/
+                       uint64_t ei1                   : 64;
+                       uint64_t dptr;
+               };
+#else /* Word 5 - Little Endian */
+               union {
+                       uint64_t ei1                   : 64;
+                       uint64_t dptr;
+               };
+#endif /* Word 5 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 6 - Big Endian */
+               union {
+                       /** [447:384] Engine instruction word 2. Passed to the
+                        * AE/SE.
+                        **/
+                       uint64_t ei2                   : 64;
+                       uint64_t rptr;
+               };
+#else /* Word 6 - Little Endian */
+               union {
+                       uint64_t ei2                   : 64;
+                       uint64_t rptr;
+               };
+#endif /* Word 6 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 7 - Big Endian */
+               union {
+                       /** [511:448] Engine instruction word 3. Passed to the
+                        * AE/SE.
+                        **/
+                       uint64_t ei3                   : 64;
+                       vq_cmd_word3_t vq_cmd_w3;
+               };
+#else /* Word 7 - Little Endian */
+               union {
+                       uint64_t ei3                   : 64;
+                       vq_cmd_word3_t vq_cmd_w3;
+               };
+#endif /* Word 7 - End */
+       } s8x;
+} cpt_inst_s_t;
+
+/**
+ * Structure cpt_res_s
+ *
+ * CPT Result Structure
+ * The CPT coprocessor writes the result structure after it completes a
+ * CPT_INST_S instruction. The result structure is exactly 16 bytes, and each
+ * instruction completion produces exactly one result structure.
+ *
+ * This structure is stored in memory as little-endian unless
+ * CPT()_PF_Q()_CTL[INST_BE] is set.
+ */
+typedef union cpt_res_s {
+       uint64_t u[2];
+       struct cpt_res_s_8s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_17_63        : 47;
+               /** [ 16: 16] Done interrupt. This bit is copied from the
+                * corresponding instruction's CPT_INST_S[DONEINT].
+                **/
+               uint64_t doneint               : 1;
+               uint64_t reserved_8_15         : 8;
+               /** [  7:  0] Indicates completion/error status of the CPT
+                * coprocessor for the associated instruction, as enumerated by
+                * CPT_COMP_E. Core software may write the memory location
+                * containing [COMPCODE] to 0x0 before ringing the doorbell, and
+                * then poll for completion by checking for a nonzero value.
+                *
+                * Once the core observes a nonzero [COMPCODE] value in this
+                * case, the CPT coprocessor will have also completed L2/DRAM
+                * write operations.
+                **/
+               uint64_t compcode              : 8;
+#else /* Word 0 - Little Endian */
+               uint64_t compcode              : 8;
+               uint64_t reserved_8_15         : 8;
+               uint64_t doneint               : 1;
+               uint64_t reserved_17_63        : 47;
+#endif /* Word 0 - End */
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 1 - Big Endian */
+               uint64_t reserved_64_127       : 64;
+#else /* Word 1 - Little Endian */
+               uint64_t reserved_64_127       : 64;
+#endif /* Word 1 - End */
+       } s8x;
+} cpt_res_s_t;
+
+/**
+ * Register (NCB) cpt#_vq#_ctl
+ *
+ * CPT VF Queue Control Registers
+ * This register configures queues. This register should be changed (other than
+ * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]).
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_ctl_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_1_63         : 63;
+               /** [  0:  0](R/W/H) Enables the logical instruction queue.
+                * See also CPT()_PF_Q()_CTL[CONT_ERR] and
+                * CPT()_VQ()_INPROG[INFLIGHT].
+                * 1 = Queue is enabled.
+                * 0 = Queue is disabled.
+                **/
+               uint64_t ena                   : 1;
+#else /* Word 0 - Little Endian */
+               uint64_t ena                   : 1;
+               uint64_t reserved_1_63         : 63;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_ctl_t;
+
+/**
+ * Register (NCB) cpt#_vq#_done
+ *
+ * CPT Queue Done Count Registers
+ * These registers contain the per-queue instruction done count.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_done_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_20_63        : 44;
+               /** [ 19:  0](R/W/H) Done count. When CPT_INST_S[DONEINT] set
+                * and that instruction completes,CPT()_VQ()_DONE[DONE] is
+                * incremented when the instruction finishes. Write to this
+                * field are for diagnostic use only; instead software writes
+                * CPT()_VQ()_DONE_ACK with the number of decrements for this
+                * field.
+                *
+                * Interrupts are sent as follows:
+                *
+                * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending,
+                * the interrupt coalescing timer is held to zero, and an
+                * interrupt is not sent.
+                *
+                * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt
+                * coalescing timer counts. If the counter is >= CPT()_VQ()_DONE
+                * _WAIT[TIME_WAIT]*1024, or CPT()_VQ()_DONE[DONE] >= CPT()_VQ()
+                * _DONE_WAIT[NUM_WAIT], i.e. enough time has passed or enough
+                * results have arrived, then the interrupt is sent.  Otherwise,
+                * it is not sent due to coalescing.
+                *
+                * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is
+                * written but this is not typical), the interrupt coalescing
+                * timer restarts.  Note after decrementing this interrupt
+                * equation is recomputed, for example if CPT()_VQ()_DONE[DONE]
+                * >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] and because the timer is
+                * zero, the interrupt will be resent immediately.  (This covers
+                * the race case between software acknowledging an interrupt and
+                * a result returning.)
+                *
+                * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not
+                * sent, but the counting described above still occurs.
+                *
+                * Since CPT instructions complete out-of-order, if software is
+                * using completion interrupts the suggested scheme is to
+                * request a DONEINT on each request, and when an interrupt
+                * arrives perform a "greedy" scan for completions; even if a
+                * later command is acknowledged first this will not result in
+                * missing a completion.
+                *
+                * Software is responsible for making sure [DONE] does not
+                * overflow; for example by insuring there are not more than
+                * 2^20-1 instructions in flight that may request interrupts.
+                **/
+               uint64_t done                  : 20;
+#else /* Word 0 - Little Endian */
+               uint64_t done                  : 20;
+               uint64_t reserved_20_63        : 44;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_done_t;
+
+/**
+ * Register (NCB) cpt#_vq#_done_ack
+ *
+ * CPT Queue Done Count Ack Registers
+ * This register is written by software to acknowledge interrupts.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_done_ack_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_20_63        : 44;
+               /** [ 19:  0](R/W/H) Number of decrements to CPT()_VQ()_DONE
+                * [DONE]. Reads CPT()_VQ()_DONE[DONE].
+                *
+                * Written by software to acknowledge interrupts. If CPT()_VQ()_
+                * DONE[DONE] is still nonzero the interrupt will be re-sent if
+                * the conditions described in CPT()_VQ()_DONE[DONE] are
+                * satisfied.
+                **/
+               uint64_t done_ack              : 20;
+#else /* Word 0 - Little Endian */
+               uint64_t done_ack              : 20;
+               uint64_t reserved_20_63        : 44;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_done_ack_t;
+
+/**
+ * Register (NCB) cpt#_vq#_done_wait
+ *
+ * CPT Queue Done Interrupt Coalescing Wait Registers
+ * Specifies the per queue interrupt coalescing settings.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_done_wait_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_48_63        : 16;
+               /** [ 47: 32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] =
+                * 0, or CPT()_VQ()_DONE_ACK is written a timer is cleared. When
+                * the timer reaches [TIME_WAIT]*1024 then interrupt coalescing
+                * ends; see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is
+                * disabled.
+                **/
+               uint64_t time_wait             : 16;
+               uint64_t reserved_20_31        : 12;
+               /** [ 19:  0](R/W) Number of messages hold-off. When
+                * CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing
+                * ends; see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as
+                * 0x1.
+                **/
+               uint64_t num_wait              : 20;
+#else /* Word 0 - Little Endian */
+               uint64_t num_wait              : 20;
+               uint64_t reserved_20_31        : 12;
+               uint64_t time_wait             : 16;
+               uint64_t reserved_48_63        : 16;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_done_wait_t;
+
+/**
+ * Register (NCB) cpt#_vq#_doorbell
+ *
+ * CPT Queue Doorbell Registers
+ * Doorbells for the CPT instruction queues.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_doorbell_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_20_63        : 44;
+               uint64_t dbell_cnt             : 20;
+               /** [ 19:  0](R/W/H) Number of instruction queue 64-bit words
+                * to add to the CPT instruction doorbell count. Readback value
+                * is the the current number of pending doorbell requests.
+                *
+                * If counter overflows CPT()_VQ()_MISC_INT[DBELL_DOVF] is set.
+                *
+                * To reset the count back to zero, write one to clear
+                * CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], then write a value
+                * of 2^20 minus the read [DBELL_CNT], then write one to
+                * CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and
+                * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF].
+                *
+                * Must be a multiple of 8.  All CPT instructions are 8 words
+                * and require a doorbell count of multiple of 8.
+                **/
+#else /* Word 0 - Little Endian */
+               uint64_t dbell_cnt             : 20;
+               uint64_t reserved_20_63        : 44;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_doorbell_t;
+
+/**
+ * Register (NCB) cpt#_vq#_inprog
+ *
+ * CPT Queue In Progress Count Registers
+ * These registers contain the per-queue instruction in flight registers.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_inprog_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_8_63         : 56;
+               /** [  7:  0](RO/H) Inflight count. Counts the number of
+                * instructions for the VF for which CPT is fetching, executing
+                * or responding to instructions. However this does not include
+                * any interrupts that are awaiting software handling
+                * (CPT()_VQ()_DONE[DONE] != 0x0).
+                *
+                * A queue may not be reconfigured until:
+                *  1. CPT()_VQ()_CTL[ENA] is cleared by software.
+                *  2. [INFLIGHT] is polled until equals to zero.
+                **/
+               uint64_t inflight              : 8;
+#else /* Word 0 - Little Endian */
+               uint64_t inflight              : 8;
+               uint64_t reserved_8_63         : 56;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_inprog_t;
+
+/**
+ * Register (NCB) cpt#_vq#_misc_int
+ *
+ * CPT Queue Misc Interrupt Register
+ * These registers contain the per-queue miscellaneous interrupts.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_misc_int_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_7_63         : 57;
+               /** [  6:  6](R/W1C/H) Translation fault detected. */
+               uint64_t fault                 : 1;
+               /** [  5:  5](R/W1C/H) Hardware error from engines. */
+               uint64_t hwerr                 : 1;
+               /** [  4:  4](R/W1C/H) Software error from engines. */
+               uint64_t swerr                 : 1;
+               /** [  3:  3](R/W1C/H) NCB result write response error. */
+               uint64_t nwrp                  : 1;
+               /** [  2:  2](R/W1C/H) Instruction NCB read response error. */
+               uint64_t irde                  : 1;
+               /** [  1:  1](R/W1C/H) Doorbell overflow. */
+               uint64_t dovf                  : 1;
+               /** [  0:  0](R/W1C/H) PF to VF mailbox interrupt. Set when
+                * CPT()_VF()_PF_MBOX(0) is written.
+                **/
+               uint64_t mbox                  : 1;
+#else /* Word 0 - Little Endian */
+               uint64_t mbox                  : 1;
+               uint64_t dovf                  : 1;
+               uint64_t irde                  : 1;
+               uint64_t nwrp                  : 1;
+               uint64_t swerr                 : 1;
+               uint64_t hwerr                 : 1;
+               uint64_t fault                 : 1;
+               uint64_t reserved_5_63         : 59;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_misc_int_t;
+
+/**
+ * Register (NCB) cpt#_vq#_saddr
+ *
+ * CPT Queue Starting Buffer Address Registers
+ * These registers set the instruction buffer starting address.
+ */
+typedef union {
+       uint64_t u;
+       struct cptx_vqx_saddr_s {
+#if (RTE_BYTE_ORDER == RTE_BIG_ENDIAN) /* Word 0 - Big Endian */
+               uint64_t reserved_49_63        : 15;
+               /** [ 48:  6](R/W/H) Instruction buffer IOVA <48:6>
+                * (64-byte aligned). When written, it is the initial buffer
+                * starting address; when read, it is the next read pointer to
+                * be requested from L2C. The PTR field is overwritten with the
+                * next pointer each time that the command buffer segment is
+                * exhausted. New commands will then be read from the newly
+                * specified command buffer pointer.
+                **/
+               uint64_t ptr                   : 43;
+               uint64_t reserved_0_5          : 6;
+#else /* Word 0 - Little Endian */
+               uint64_t reserved_0_5          : 6;
+               uint64_t ptr                   : 43;
+               uint64_t reserved_49_63        : 15;
+#endif /* Word 0 - End */
+       } s;
+} cptx_vqx_saddr_t;
+
+#endif /*_CPT_HW_TYPES_H_ */
diff --git a/drivers/common/cpt/cpt_mcode_defines.h b/drivers/common/cpt/cpt_mcode_defines.h
new file mode 100644 (file)
index 0000000..becc14f
--- /dev/null
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_MCODE_DEFINES_H_
+#define _CPT_MCODE_DEFINES_H_
+
+#include <rte_byteorder.h>
+#include <rte_memory.h>
+
+/*
+ * This file defines macros and structures according to microcode spec
+ *
+ */
+/* SE opcodes */
+#define CPT_MAJOR_OP_FC                0x33
+#define CPT_MAJOR_OP_HASH      0x34
+#define CPT_MAJOR_OP_HMAC      0x35
+#define CPT_MAJOR_OP_ZUC_SNOW3G        0x37
+#define CPT_MAJOR_OP_KASUMI    0x38
+#define CPT_MAJOR_OP_MISC      0x01
+
+#define CPT_BYTE_16            16
+#define CPT_BYTE_24            24
+#define CPT_BYTE_32            32
+#define CPT_MAX_SG_IN_OUT_CNT  32
+#define CPT_MAX_SG_CNT         (CPT_MAX_SG_IN_OUT_CNT/2)
+
+#define COMPLETION_CODE_SIZE   8
+#define COMPLETION_CODE_INIT   0
+
+#define SG_LIST_HDR_SIZE       (8u)
+#define SG_ENTRY_SIZE          sizeof(sg_comp_t)
+
+#define CPT_DMA_MODE           (1 << 7)
+
+#define CPT_FROM_CTX           0
+#define CPT_FROM_DPTR          1
+
+#define FC_GEN                 0x1
+#define ZUC_SNOW3G             0x2
+#define KASUMI                 0x3
+#define HASH_HMAC              0x4
+
+#define ZS_EA                  0x1
+#define ZS_IA                  0x2
+#define K_F8                   0x4
+#define K_F9                   0x8
+
+#define CPT_OP_CIPHER_ENCRYPT  0x1
+#define CPT_OP_CIPHER_DECRYPT  0x2
+#define CPT_OP_CIPHER_MASK     0x3
+
+#define CPT_OP_AUTH_VERIFY     0x4
+#define CPT_OP_AUTH_GENERATE   0x8
+#define CPT_OP_AUTH_MASK       0xC
+
+#define CPT_OP_ENCODE  (CPT_OP_CIPHER_ENCRYPT | CPT_OP_AUTH_GENERATE)
+#define CPT_OP_DECODE  (CPT_OP_CIPHER_DECRYPT | CPT_OP_AUTH_VERIFY)
+
+/* #define CPT_ALWAYS_USE_SG_MODE */
+#define CPT_ALWAYS_USE_SEPARATE_BUF
+
+/*
+ * Parameters for Flexi Crypto
+ * requests
+ */
+#define VALID_AAD_BUF 0x01
+#define VALID_MAC_BUF 0x02
+#define VALID_IV_BUF 0x04
+#define SINGLE_BUF_INPLACE 0x08
+#define SINGLE_BUF_HEADTAILROOM 0x10
+
+#define ENCR_IV_OFFSET(__d_offs) ((__d_offs >> 32) & 0xffff)
+#define ENCR_OFFSET(__d_offs) ((__d_offs >> 16) & 0xffff)
+#define AUTH_OFFSET(__d_offs) (__d_offs & 0xffff)
+#define ENCR_DLEN(__d_lens) (__d_lens >> 32)
+#define AUTH_DLEN(__d_lens) (__d_lens & 0xffffffff)
+
+/* FC offset_control at start of DPTR in bytes */
+#define OFF_CTRL_LEN  8 /**< bytes */
+
+typedef enum {
+       MD5_TYPE        = 1,
+       SHA1_TYPE       = 2,
+       SHA2_SHA224     = 3,
+       SHA2_SHA256     = 4,
+       SHA2_SHA384     = 5,
+       SHA2_SHA512     = 6,
+       GMAC_TYPE       = 7,
+       XCBC_TYPE       = 8,
+       SHA3_SHA224     = 10,
+       SHA3_SHA256     = 11,
+       SHA3_SHA384     = 12,
+       SHA3_SHA512     = 13,
+       SHA3_SHAKE256   = 14,
+       SHA3_SHAKE512   = 15,
+
+       /* These are only for software use */
+       ZUC_EIA3        = 0x90,
+       SNOW3G_UIA2     = 0x91,
+       KASUMI_F9_CBC   = 0x92,
+       KASUMI_F9_ECB   = 0x93,
+} mc_hash_type_t;
+
+typedef enum {
+       /* To support passthrough */
+       PASSTHROUGH  = 0x0,
+       /*
+        * These are defined by MC for Flexi crypto
+        * for field of 4 bits
+        */
+       DES3_CBC    = 0x1,
+       DES3_ECB    = 0x2,
+       AES_CBC     = 0x3,
+       AES_ECB     = 0x4,
+       AES_CFB     = 0x5,
+       AES_CTR     = 0x6,
+       AES_GCM     = 0x7,
+       AES_XTS     = 0x8,
+
+       /* These are only for software use */
+       ZUC_EEA3        = 0x90,
+       SNOW3G_UEA2     = 0x91,
+       KASUMI_F8_CBC   = 0x92,
+       KASUMI_F8_ECB   = 0x93,
+} mc_cipher_type_t;
+
+typedef enum {
+       AES_128_BIT = 0x1,
+       AES_192_BIT = 0x2,
+       AES_256_BIT = 0x3
+} mc_aes_type_t;
+
+typedef enum {
+       /* Microcode errors */
+       NO_ERR = 0x00,
+       ERR_OPCODE_UNSUPPORTED = 0x01,
+
+       /* SCATTER GATHER */
+       ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02,
+       ERR_SCATTER_GATHER_LIST = 0x03,
+       ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04,
+
+       /* SE GC */
+       ERR_GC_LENGTH_INVALID = 0x41,
+       ERR_GC_RANDOM_LEN_INVALID = 0x42,
+       ERR_GC_DATA_LEN_INVALID = 0x43,
+       ERR_GC_DRBG_TYPE_INVALID = 0x44,
+       ERR_GC_CTX_LEN_INVALID = 0x45,
+       ERR_GC_CIPHER_UNSUPPORTED = 0x46,
+       ERR_GC_AUTH_UNSUPPORTED = 0x47,
+       ERR_GC_OFFSET_INVALID = 0x48,
+       ERR_GC_HASH_MODE_UNSUPPORTED = 0x49,
+       ERR_GC_DRBG_ENTROPY_LEN_INVALID = 0x4a,
+       ERR_GC_DRBG_ADDNL_LEN_INVALID = 0x4b,
+       ERR_GC_ICV_MISCOMPARE = 0x4c,
+       ERR_GC_DATA_UNALIGNED = 0x4d,
+
+       /* API Layer */
+       ERR_BAD_ALT_CCODE = 0xfd,
+       ERR_REQ_PENDING = 0xfe,
+       ERR_REQ_TIMEOUT = 0xff,
+
+       ERR_BAD_INPUT_LENGTH = (0x40000000 | 384),    /* 0x40000180 */
+       ERR_BAD_KEY_LENGTH,
+       ERR_BAD_KEY_HANDLE,
+       ERR_BAD_CONTEXT_HANDLE,
+       ERR_BAD_SCALAR_LENGTH,
+       ERR_BAD_DIGEST_LENGTH,
+       ERR_BAD_INPUT_ARG,
+       ERR_BAD_RECORD_PADDING,
+       ERR_NB_REQUEST_PENDING,
+       ERR_EIO,
+       ERR_ENODEV,
+} mc_error_code_t;
+
+/**
+ * Enumeration cpt_comp_e
+ *
+ * CPT Completion Enumeration
+ * Enumerates the values of CPT_RES_S[COMPCODE].
+ */
+typedef enum {
+       CPT_8X_COMP_E_NOTDONE    = (0x00),
+       CPT_8X_COMP_E_GOOD       = (0x01),
+       CPT_8X_COMP_E_FAULT      = (0x02),
+       CPT_8X_COMP_E_SWERR      = (0x03),
+       CPT_8X_COMP_E_HWERR      = (0x04),
+       CPT_8X_COMP_E_LAST_ENTRY = (0xFF)
+} cpt_comp_e_t;
+
+typedef struct sglist_comp {
+       union {
+               uint64_t len;
+               struct {
+                       uint16_t len[4];
+               } s;
+       } u;
+       uint64_t ptr[4];
+} sg_comp_t;
+
+struct cpt_sess_misc {
+       /** CPT opcode */
+       uint16_t cpt_op:4;
+       /** ZUC, SNOW3G &  KASUMI flags */
+       uint16_t zsk_flag:4;
+       /** Flag for AES GCM */
+       uint16_t aes_gcm:1;
+       /** Flag for AES CTR */
+       uint16_t aes_ctr:1;
+       /** Flag for NULL cipher/auth */
+       uint16_t is_null:1;
+       /** Flag for GMAC */
+       uint16_t is_gmac:1;
+       /** AAD length */
+       uint16_t aad_length;
+       /** MAC len in bytes */
+       uint8_t mac_len;
+       /** IV length in bytes */
+       uint8_t iv_length;
+       /** Auth IV length in bytes */
+       uint8_t auth_iv_length;
+       /** Reserved field */
+       uint8_t rsvd1;
+       /** IV offset in bytes */
+       uint16_t iv_offset;
+       /** Auth IV offset in bytes */
+       uint16_t auth_iv_offset;
+       /** Salt */
+       uint32_t salt;
+       /** Context DMA address */
+       phys_addr_t ctx_dma_addr;
+};
+
+typedef union {
+       uint64_t flags;
+       struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+               uint64_t enc_cipher   : 4;
+               uint64_t reserved1    : 1;
+               uint64_t aes_key      : 2;
+               uint64_t iv_source    : 1;
+               uint64_t hash_type    : 4;
+               uint64_t reserved2    : 3;
+               uint64_t auth_input_type : 1;
+               uint64_t mac_len      : 8;
+               uint64_t reserved3    : 8;
+               uint64_t encr_offset  : 16;
+               uint64_t iv_offset    : 8;
+               uint64_t auth_offset  : 8;
+#else
+               uint64_t auth_offset  : 8;
+               uint64_t iv_offset    : 8;
+               uint64_t encr_offset  : 16;
+               uint64_t reserved3    : 8;
+               uint64_t mac_len      : 8;
+               uint64_t auth_input_type : 1;
+               uint64_t reserved2    : 3;
+               uint64_t hash_type    : 4;
+               uint64_t iv_source    : 1;
+               uint64_t aes_key      : 2;
+               uint64_t reserved1    : 1;
+               uint64_t enc_cipher   : 4;
+#endif
+       } e;
+} encr_ctrl_t;
+
+typedef struct {
+       encr_ctrl_t enc_ctrl;
+       uint8_t  encr_key[32];
+       uint8_t  encr_iv[16];
+} mc_enc_context_t;
+
+typedef struct {
+       uint8_t  ipad[64];
+       uint8_t  opad[64];
+} mc_fc_hmac_context_t;
+
+typedef struct {
+       mc_enc_context_t     enc;
+       mc_fc_hmac_context_t hmac;
+} mc_fc_context_t;
+
+typedef struct {
+       uint8_t encr_auth_iv[16];
+       uint8_t ci_key[16];
+       uint8_t zuc_const[32];
+} mc_zuc_snow3g_ctx_t;
+
+typedef struct {
+       uint8_t reg_A[8];
+       uint8_t ci_key[16];
+} mc_kasumi_ctx_t;
+
+struct cpt_ctx {
+       /* Below fields are accessed by sw */
+       uint64_t enc_cipher     :8;
+       uint64_t hash_type      :8;
+       uint64_t mac_len        :8;
+       uint64_t auth_key_len   :8;
+       uint64_t fc_type        :4;
+       uint64_t hmac           :1;
+       uint64_t zsk_flags      :3;
+       uint64_t k_ecb          :1;
+       uint64_t snow3g         :1;
+       uint64_t rsvd           :22;
+       /* Below fields are accessed by hardware */
+       union {
+               mc_fc_context_t fctx;
+               mc_zuc_snow3g_ctx_t zs_ctx;
+               mc_kasumi_ctx_t k_ctx;
+       };
+       uint8_t  auth_key[64];
+};
+
+/* Buffer pointer */
+typedef struct buf_ptr {
+       void *vaddr;
+       phys_addr_t dma_addr;
+       uint32_t size;
+       uint32_t resv;
+} buf_ptr_t;
+
+/* IOV Pointer */
+typedef struct{
+       int buf_cnt;
+       buf_ptr_t bufs[0];
+} iov_ptr_t;
+
+typedef union opcode_info {
+       uint16_t flags;
+       struct {
+               uint8_t major;
+               uint8_t minor;
+       } s;
+} opcode_info_t;
+
+typedef struct fc_params {
+       /* 0th cache line */
+       union {
+               buf_ptr_t bufs[1];
+               struct {
+                       iov_ptr_t *src_iov;
+                       iov_ptr_t *dst_iov;
+               };
+       };
+       void *iv_buf;
+       void *auth_iv_buf;
+       buf_ptr_t meta_buf;
+       buf_ptr_t ctx_buf;
+       uint64_t rsvd2;
+
+       /* 1st cache line */
+       buf_ptr_t aad_buf;
+       buf_ptr_t mac_buf;
+
+} fc_params_t;
+
+/*
+ * Parameters for digest
+ * generate requests
+ * Only src_iov, op, ctx_buf, mac_buf, prep_req
+ * meta_buf, auth_data_len are used for digest gen.
+ */
+typedef struct fc_params digest_params_t;
+
+/* Cipher Algorithms */
+typedef mc_cipher_type_t cipher_type_t;
+
+/* Auth Algorithms */
+typedef mc_hash_type_t auth_type_t;
+
+/* Helper macros */
+
+#define CPT_P_ENC_CTRL(fctx)  fctx->enc.enc_ctrl.e
+
+#define SRC_IOV_SIZE \
+       (sizeof(iov_ptr_t) + (sizeof(buf_ptr_t) * CPT_MAX_SG_CNT))
+#define DST_IOV_SIZE \
+       (sizeof(iov_ptr_t) + (sizeof(buf_ptr_t) * CPT_MAX_SG_CNT))
+
+#define SESS_PRIV(__sess) \
+       (void *)((uint8_t *)__sess + sizeof(struct cpt_sess_misc))
+
+#endif /* _CPT_MCODE_DEFINES_H_ */
diff --git a/drivers/common/cpt/cpt_pmd_logs.h b/drivers/common/cpt/cpt_pmd_logs.h
new file mode 100644 (file)
index 0000000..4cbec4e
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_PMD_LOGS_H_
+#define _CPT_PMD_LOGS_H_
+
+#include <rte_log.h>
+
+/*
+ * This file defines log macros
+ */
+
+#define CPT_PMD_DRV_LOG_RAW(level, fmt, args...) \
+               rte_log(RTE_LOG_ ## level, cpt_logtype, \
+                       "cpt: %s(): " fmt "\n", __func__, ##args)
+
+#define CPT_PMD_INIT_FUNC_TRACE() CPT_PMD_DRV_LOG_RAW(DEBUG, " >>")
+
+#define CPT_LOG_INFO(fmt, args...) \
+       CPT_PMD_DRV_LOG_RAW(INFO, fmt, ## args)
+#define CPT_LOG_WARN(fmt, args...) \
+       CPT_PMD_DRV_LOG_RAW(WARNING, fmt, ## args)
+#define CPT_LOG_ERR(fmt, args...) \
+       CPT_PMD_DRV_LOG_RAW(ERR, fmt, ## args)
+
+/*
+ * DP logs, toggled out at compile time if level lower than current level.
+ * DP logs would be logged under 'PMD' type. So for dynamic logging, the
+ * level of 'pmd' has to be used.
+ */
+#define CPT_LOG_DP(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, fmt "\n", ## args)
+
+#define CPT_LOG_DP_DEBUG(fmt, args...) \
+       CPT_LOG_DP(DEBUG, fmt, ## args)
+#define CPT_LOG_DP_INFO(fmt, args...) \
+       CPT_LOG_DP(INFO, fmt, ## args)
+#define CPT_LOG_DP_WARN(fmt, args...) \
+       CPT_LOG_DP(WARNING, fmt, ## args)
+#define CPT_LOG_DP_ERR(fmt, args...) \
+       CPT_LOG_DP(ERR, fmt, ## args)
+
+/*
+ * cpt_logtype will be used for common logging. This field would be initialized
+ * by otx_* driver routines during PCI probe.
+ */
+int cpt_logtype;
+
+#endif /* _CPT_PMD_LOGS_H_ */
diff --git a/drivers/common/cpt/cpt_pmd_ops_helper.c b/drivers/common/cpt/cpt_pmd_ops_helper.c
new file mode 100644 (file)
index 0000000..1c18180
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#include <rte_common.h>
+
+#include "cpt_common.h"
+#include "cpt_hw_types.h"
+#include "cpt_mcode_defines.h"
+#include "cpt_pmd_ops_helper.h"
+
+#define CPT_MAX_IV_LEN 16
+#define CPT_OFFSET_CONTROL_BYTES 8
+
+int32_t
+cpt_pmd_ops_helper_get_mlen_direct_mode(void)
+{
+       uint32_t len = 0;
+
+       /* Request structure */
+       len = sizeof(struct cpt_request_info);
+
+       /* CPT HW result structure plus extra as it is aligned */
+       len += 2*sizeof(cpt_res_s_t);
+
+       return len;
+}
+
+int
+cpt_pmd_ops_helper_get_mlen_sg_mode(void)
+{
+       uint32_t len = 0;
+
+       len += sizeof(struct cpt_request_info);
+       len += CPT_OFFSET_CONTROL_BYTES + CPT_MAX_IV_LEN;
+       len += ROUNDUP8(SG_LIST_HDR_SIZE +
+                       (ROUNDUP4(CPT_MAX_SG_IN_OUT_CNT) >> 2) * SG_ENTRY_SIZE);
+       len += 2 * COMPLETION_CODE_SIZE;
+       len += 2 * sizeof(cpt_res_s_t);
+       return len;
+}
diff --git a/drivers/common/cpt/cpt_pmd_ops_helper.h b/drivers/common/cpt/cpt_pmd_ops_helper.h
new file mode 100644 (file)
index 0000000..dd32f9a
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_PMD_OPS_HELPER_H_
+#define _CPT_PMD_OPS_HELPER_H_
+
+/*
+ * This file defines the agreement between the common layer and the individual
+ * crypto drivers for OCTEON TX series. Control path in otx* directory can
+ * directly call functions declared here.
+ */
+
+/*
+ * Get meta length required when operating in direct mode (single buffer
+ * in-place)
+ *
+ * @return
+ *   - length
+ */
+
+int32_t
+cpt_pmd_ops_helper_get_mlen_direct_mode(void);
+
+/*
+ * Get size of contiguous meta buffer to be allocated when working in scatter
+ * gather mode.
+ *
+ * @return
+ *   - length
+ */
+int
+cpt_pmd_ops_helper_get_mlen_sg_mode(void);
+#endif /* _CPT_PMD_OPS_HELPER_H_ */
diff --git a/drivers/common/cpt/cpt_request_mgr.h b/drivers/common/cpt/cpt_request_mgr.h
new file mode 100644 (file)
index 0000000..4463cfb
--- /dev/null
@@ -0,0 +1,185 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_REQUEST_MGR_H_
+#define _CPT_REQUEST_MGR_H_
+
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+
+#include "cpt_common.h"
+#include "cpt_mcode_defines.h"
+
+#if CPT_MODEL == CRYPTO_OCTEONTX
+#include "../../crypto/octeontx/otx_cryptodev_hw_access.h"
+#endif
+
+/*
+ * This file defines the agreement between the common layer and the individual
+ * crypto drivers for OCTEON TX series. Datapath in otx* directory include this
+ * file and all these functions are static inlined for better performance.
+ *
+ */
+
+/*
+ * Get the session size
+ *
+ * This function is used in the data path.
+ *
+ * @return
+ *   - session size
+ */
+static __rte_always_inline unsigned int
+cpt_get_session_size(void)
+{
+       unsigned int ctx_len = sizeof(struct cpt_ctx);
+       return (sizeof(struct cpt_sess_misc) + RTE_ALIGN_CEIL(ctx_len, 8));
+}
+
+static __rte_always_inline int32_t __hot
+cpt_enqueue_req(struct cpt_instance *instance, struct pending_queue *pqueue,
+               void *req)
+{
+       struct cpt_request_info *user_req = (struct cpt_request_info *)req;
+       int32_t ret = 0;
+
+       if (unlikely(!req))
+               return 0;
+
+       if (unlikely(pqueue->pending_count >= DEFAULT_CMD_QLEN))
+               return -EAGAIN;
+
+       fill_cpt_inst(instance, req);
+
+       CPT_LOG_DP_DEBUG("req: %p op: %p ", req, user_req->op);
+
+       /* Fill time_out cycles */
+       user_req->time_out = rte_get_timer_cycles() +
+                       DEFAULT_COMMAND_TIMEOUT * rte_get_timer_hz();
+       user_req->extra_time = 0;
+
+       /* Default mode of software queue */
+       mark_cpt_inst(instance);
+
+       pqueue->rid_queue[pqueue->enq_tail].rid =
+               (uintptr_t)user_req;
+       /* We will use soft queue length here to limit
+        * requests
+        */
+       MOD_INC(pqueue->enq_tail, DEFAULT_CMD_QLEN);
+       pqueue->pending_count += 1;
+
+       CPT_LOG_DP_DEBUG("Submitted NB cmd with request: %p "
+                        "op: %p", user_req, user_req->op);
+
+       return ret;
+}
+
+static __rte_always_inline int __hot
+cpt_pmd_crypto_operation(struct cpt_instance *instance,
+               struct rte_crypto_op *op, struct pending_queue *pqueue,
+               uint8_t cpt_driver_id)
+{
+       struct cpt_sess_misc *sess = NULL;
+       struct rte_crypto_sym_op *sym_op = op->sym;
+       void *prep_req = NULL, *mdata = NULL;
+       int ret = 0;
+       uint64_t cpt_op;
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+
+       if (unlikely(op->sess_type == RTE_CRYPTO_OP_SESSIONLESS)) {
+               int sess_len;
+
+               sess_len = cpt_get_session_size();
+
+               sess = rte_calloc(__func__, 1, sess_len, 8);
+               if (!sess)
+                       return -ENOMEM;
+
+               sess->ctx_dma_addr =  rte_malloc_virt2iova(sess) +
+                       sizeof(struct cpt_sess_misc);
+
+               ret = instance_session_cfg(sym_op->xform, (void *)sess);
+               if (unlikely(ret))
+                       return -EINVAL;
+       } else {
+               sess = (struct cpt_sess_misc *)
+               get_sym_session_private_data(sym_op->session,
+               cpt_driver_id);
+       }
+
+       cpt_op = sess->cpt_op;
+
+       mdata = &(cptvf->meta_info);
+
+       if (likely(cpt_op & CPT_OP_CIPHER_MASK))
+               prep_req = fill_fc_params(op, sess, &mdata, &ret);
+       else
+               prep_req = fill_digest_params(op, sess, &mdata, &ret);
+
+       if (unlikely(!prep_req)) {
+               CPT_LOG_DP_ERR("prep cryto req : op %p, cpt_op 0x%x "
+                              "ret 0x%x", op, (unsigned int)cpt_op, ret);
+               goto req_fail;
+       }
+
+       /* Enqueue prepared instruction to HW */
+       ret = cpt_enqueue_req(instance, pqueue, prep_req);
+
+       if (unlikely(ret)) {
+               if (unlikely(ret == -EAGAIN))
+                       goto req_fail;
+               CPT_LOG_DP_ERR("Error enqueing crypto request : error "
+                              "code %d", ret);
+               goto req_fail;
+       }
+
+       return 0;
+
+req_fail:
+       if (mdata)
+               free_op_meta(mdata, cptvf->meta_info.cptvf_meta_pool);
+       return ret;
+}
+
+static __rte_always_inline int32_t __hot
+cpt_dequeue_burst(struct cpt_instance *instance, uint16_t cnt,
+                 void *resp[], uint8_t cc[], struct pending_queue *pqueue)
+{
+       struct cpt_request_info *user_req;
+       struct rid *rid_e;
+       int i, count, pcount;
+       uint8_t ret;
+
+       pcount = pqueue->pending_count;
+       count = (cnt > pcount) ? pcount : cnt;
+
+       for (i = 0; i < count; i++) {
+               rid_e = &pqueue->rid_queue[pqueue->deq_head];
+               user_req = (struct cpt_request_info *)(rid_e->rid);
+
+               if (likely((i+1) < count))
+                       rte_prefetch_non_temporal((void *)rid_e[1].rid);
+
+               ret = check_nb_command_id(user_req, instance);
+
+               if (unlikely(ret == ERR_REQ_PENDING)) {
+                       /* Stop checking for completions */
+                       break;
+               }
+
+               /* Return completion code and op handle */
+               cc[i] = (uint8_t)ret;
+               resp[i] = user_req->op;
+               CPT_LOG_DP_DEBUG("Request %p Op %p completed with code %d",
+                          user_req, user_req->op, ret);
+
+               MOD_INC(pqueue->deq_head, DEFAULT_CMD_QLEN);
+               pqueue->pending_count -= 1;
+       }
+
+       return i;
+}
+
+#endif /* _CPT_REQUEST_MGR_H_ */
diff --git a/drivers/common/cpt/cpt_ucode.h b/drivers/common/cpt/cpt_ucode.h
new file mode 100644 (file)
index 0000000..c5a9f34
--- /dev/null
@@ -0,0 +1,3648 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _CPT_UCODE_H_
+#define _CPT_UCODE_H_
+#include <stdbool.h>
+
+#include "cpt_common.h"
+#include "cpt_hw_types.h"
+#include "cpt_mcode_defines.h"
+
+/*
+ * This file defines functions that are interfaces to microcode spec.
+ *
+ */
+
+static uint8_t zuc_d[32] = {
+       0x44, 0xD7, 0x26, 0xBC, 0x62, 0x6B, 0x13, 0x5E,
+       0x57, 0x89, 0x35, 0xE2, 0x71, 0x35, 0x09, 0xAF,
+       0x4D, 0x78, 0x2F, 0x13, 0x6B, 0xC4, 0x1A, 0xF1,
+       0x5E, 0x26, 0x3C, 0x4D, 0x78, 0x9A, 0x47, 0xAC
+};
+
+static __rte_always_inline int
+cpt_is_algo_supported(struct rte_crypto_sym_xform *xform)
+{
+       /*
+        * Microcode only supports the following combination.
+        * Encryption followed by authentication
+        * Authentication followed by decryption
+        */
+       if (xform->next) {
+               if ((xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) &&
+                   (xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+                   (xform->next->cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT)) {
+                       /* Unsupported as of now by microcode */
+                       CPT_LOG_DP_ERR("Unsupported combination");
+                       return -1;
+               }
+               if ((xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+                   (xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH) &&
+                   (xform->cipher.op == RTE_CRYPTO_CIPHER_OP_DECRYPT)) {
+                       /* For GMAC auth there is no cipher operation */
+                       if (xform->aead.algo != RTE_CRYPTO_AEAD_AES_GCM ||
+                           xform->next->auth.algo !=
+                           RTE_CRYPTO_AUTH_AES_GMAC) {
+                               /* Unsupported as of now by microcode */
+                               CPT_LOG_DP_ERR("Unsupported combination");
+                               return -1;
+                       }
+               }
+       }
+       return 0;
+}
+
+static __rte_always_inline void
+gen_key_snow3g(uint8_t *ck, uint32_t *keyx)
+{
+       int i, base;
+
+       for (i = 0; i < 4; i++) {
+               base = 4 * i;
+               keyx[3 - i] = (ck[base] << 24) | (ck[base + 1] << 16) |
+                       (ck[base + 2] << 8) | (ck[base + 3]);
+               keyx[3 - i] = rte_cpu_to_be_32(keyx[3 - i]);
+       }
+}
+
+static __rte_always_inline void
+cpt_fc_salt_update(void *ctx,
+                  uint8_t *salt)
+{
+       struct cpt_ctx *cpt_ctx = ctx;
+       memcpy(&cpt_ctx->fctx.enc.encr_iv, salt, 4);
+}
+
+static __rte_always_inline int
+cpt_fc_ciph_validate_key_aes(uint16_t key_len)
+{
+       switch (key_len) {
+       case CPT_BYTE_16:
+       case CPT_BYTE_24:
+       case CPT_BYTE_32:
+               return 0;
+       default:
+               return -1;
+       }
+}
+
+static __rte_always_inline int
+cpt_fc_ciph_validate_key(cipher_type_t type, struct cpt_ctx *cpt_ctx,
+               uint16_t key_len)
+{
+       int fc_type = 0;
+       switch (type) {
+       case PASSTHROUGH:
+               fc_type = FC_GEN;
+               break;
+       case DES3_CBC:
+       case DES3_ECB:
+               fc_type = FC_GEN;
+               break;
+       case AES_CBC:
+       case AES_ECB:
+       case AES_CFB:
+       case AES_CTR:
+       case AES_GCM:
+               if (unlikely(cpt_fc_ciph_validate_key_aes(key_len) != 0))
+                       return -1;
+               fc_type = FC_GEN;
+               break;
+       case AES_XTS:
+               key_len = key_len / 2;
+               if (unlikely(key_len == CPT_BYTE_24)) {
+                       CPT_LOG_DP_ERR("Invalid AES key len for XTS");
+                       return -1;
+               }
+               if (unlikely(cpt_fc_ciph_validate_key_aes(key_len) != 0))
+                       return -1;
+               fc_type = FC_GEN;
+               break;
+       case ZUC_EEA3:
+       case SNOW3G_UEA2:
+               if (unlikely(key_len != 16))
+                       return -1;
+               /* No support for AEAD yet */
+               if (unlikely(cpt_ctx->hash_type))
+                       return -1;
+               fc_type = ZUC_SNOW3G;
+               break;
+       case KASUMI_F8_CBC:
+       case KASUMI_F8_ECB:
+               if (unlikely(key_len != 16))
+                       return -1;
+               /* No support for AEAD yet */
+               if (unlikely(cpt_ctx->hash_type))
+                       return -1;
+               fc_type = KASUMI;
+               break;
+       default:
+               return -1;
+       }
+       return fc_type;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_passthrough(struct cpt_ctx *cpt_ctx, mc_fc_context_t *fctx)
+{
+       cpt_ctx->enc_cipher = 0;
+       CPT_P_ENC_CTRL(fctx).enc_cipher = 0;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_set_aes_key_type(mc_fc_context_t *fctx, uint16_t key_len)
+{
+       mc_aes_type_t aes_key_type = 0;
+       switch (key_len) {
+       case CPT_BYTE_16:
+               aes_key_type = AES_128_BIT;
+               break;
+       case CPT_BYTE_24:
+               aes_key_type = AES_192_BIT;
+               break;
+       case CPT_BYTE_32:
+               aes_key_type = AES_256_BIT;
+               break;
+       default:
+               /* This should not happen */
+               CPT_LOG_DP_ERR("Invalid AES key len");
+               return;
+       }
+       CPT_P_ENC_CTRL(fctx).aes_key = aes_key_type;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_snow3g_uea2(struct cpt_ctx *cpt_ctx, uint8_t *key,
+               uint16_t key_len)
+{
+       uint32_t keyx[4];
+       cpt_ctx->snow3g = 1;
+       gen_key_snow3g(key, keyx);
+       memcpy(cpt_ctx->zs_ctx.ci_key, keyx, key_len);
+       cpt_ctx->fc_type = ZUC_SNOW3G;
+       cpt_ctx->zsk_flags = 0;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_zuc_eea3(struct cpt_ctx *cpt_ctx, uint8_t *key,
+               uint16_t key_len)
+{
+       cpt_ctx->snow3g = 0;
+       memcpy(cpt_ctx->zs_ctx.ci_key, key, key_len);
+       memcpy(cpt_ctx->zs_ctx.zuc_const, zuc_d, 32);
+       cpt_ctx->fc_type = ZUC_SNOW3G;
+       cpt_ctx->zsk_flags = 0;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_kasumi_f8_ecb(struct cpt_ctx *cpt_ctx, uint8_t *key,
+               uint16_t key_len)
+{
+       cpt_ctx->k_ecb = 1;
+       memcpy(cpt_ctx->k_ctx.ci_key, key, key_len);
+       cpt_ctx->zsk_flags = 0;
+       cpt_ctx->fc_type = KASUMI;
+}
+
+static __rte_always_inline void
+cpt_fc_ciph_set_key_kasumi_f8_cbc(struct cpt_ctx *cpt_ctx, uint8_t *key,
+               uint16_t key_len)
+{
+       memcpy(cpt_ctx->k_ctx.ci_key, key, key_len);
+       cpt_ctx->zsk_flags = 0;
+       cpt_ctx->fc_type = KASUMI;
+}
+
+static __rte_always_inline int
+cpt_fc_ciph_set_key(void *ctx, cipher_type_t type, uint8_t *key,
+                   uint16_t key_len, uint8_t *salt)
+{
+       struct cpt_ctx *cpt_ctx = ctx;
+       mc_fc_context_t *fctx = &cpt_ctx->fctx;
+       uint64_t *ctrl_flags = NULL;
+       int fc_type;
+
+       /* Validate key before proceeding */
+       fc_type = cpt_fc_ciph_validate_key(type, cpt_ctx, key_len);
+       if (unlikely(fc_type == -1))
+               return -1;
+
+       if (fc_type == FC_GEN) {
+               cpt_ctx->fc_type = FC_GEN;
+               ctrl_flags = (uint64_t *)&(fctx->enc.enc_ctrl.flags);
+               *ctrl_flags = rte_be_to_cpu_64(*ctrl_flags);
+               /*
+                * We need to always say IV is from DPTR as user can
+                * sometimes iverride IV per operation.
+                */
+               CPT_P_ENC_CTRL(fctx).iv_source = CPT_FROM_DPTR;
+       }
+
+       switch (type) {
+       case PASSTHROUGH:
+               cpt_fc_ciph_set_key_passthrough(cpt_ctx, fctx);
+               goto fc_success;
+       case DES3_CBC:
+               /* CPT performs DES using 3DES with the 8B DES-key
+                * replicated 2 more times to match the 24B 3DES-key.
+                * Eg. If org. key is "0x0a 0x0b", then new key is
+                * "0x0a 0x0b 0x0a 0x0b 0x0a 0x0b"
+                */
+               if (key_len == 8) {
+                       /* Skipping the first 8B as it will be copied
+                        * in the regular code flow
+                        */
+                       memcpy(fctx->enc.encr_key+key_len, key, key_len);
+                       memcpy(fctx->enc.encr_key+2*key_len, key, key_len);
+               }
+               break;
+       case DES3_ECB:
+               /* For DES3_ECB IV need to be from CTX. */
+               CPT_P_ENC_CTRL(fctx).iv_source = CPT_FROM_CTX;
+               break;
+       case AES_CBC:
+       case AES_ECB:
+       case AES_CFB:
+       case AES_CTR:
+               cpt_fc_ciph_set_key_set_aes_key_type(fctx, key_len);
+               break;
+       case AES_GCM:
+               /* Even though iv source is from dptr,
+                * aes_gcm salt is taken from ctx
+                */
+               if (salt) {
+                       memcpy(fctx->enc.encr_iv, salt, 4);
+                       /* Assuming it was just salt update
+                        * and nothing else
+                        */
+                       if (!key)
+                               goto fc_success;
+               }
+               cpt_fc_ciph_set_key_set_aes_key_type(fctx, key_len);
+               break;
+       case AES_XTS:
+               key_len = key_len / 2;
+               cpt_fc_ciph_set_key_set_aes_key_type(fctx, key_len);
+
+               /* Copy key2 for XTS into ipad */
+               memset(fctx->hmac.ipad, 0, sizeof(fctx->hmac.ipad));
+               memcpy(fctx->hmac.ipad, &key[key_len], key_len);
+               break;
+       case SNOW3G_UEA2:
+               cpt_fc_ciph_set_key_snow3g_uea2(cpt_ctx, key, key_len);
+               goto success;
+       case ZUC_EEA3:
+               cpt_fc_ciph_set_key_zuc_eea3(cpt_ctx, key, key_len);
+               goto success;
+       case KASUMI_F8_ECB:
+               cpt_fc_ciph_set_key_kasumi_f8_ecb(cpt_ctx, key, key_len);
+               goto success;
+       case KASUMI_F8_CBC:
+               cpt_fc_ciph_set_key_kasumi_f8_cbc(cpt_ctx, key, key_len);
+               goto success;
+       default:
+               break;
+       }
+
+       /* Only for FC_GEN case */
+
+       /* For GMAC auth, cipher must be NULL */
+       if (cpt_ctx->hash_type != GMAC_TYPE)
+               CPT_P_ENC_CTRL(fctx).enc_cipher = type;
+
+       memcpy(fctx->enc.encr_key, key, key_len);
+
+fc_success:
+       *ctrl_flags = rte_cpu_to_be_64(*ctrl_flags);
+
+success:
+       cpt_ctx->enc_cipher = type;
+
+       return 0;
+}
+
+static __rte_always_inline uint32_t
+fill_sg_comp(sg_comp_t *list,
+            uint32_t i,
+            phys_addr_t dma_addr,
+            uint32_t size)
+{
+       sg_comp_t *to = &list[i>>2];
+
+       to->u.s.len[i%4] = rte_cpu_to_be_16(size);
+       to->ptr[i%4] = rte_cpu_to_be_64(dma_addr);
+       i++;
+       return i;
+}
+
+static __rte_always_inline uint32_t
+fill_sg_comp_from_buf(sg_comp_t *list,
+                     uint32_t i,
+                     buf_ptr_t *from)
+{
+       sg_comp_t *to = &list[i>>2];
+
+       to->u.s.len[i%4] = rte_cpu_to_be_16(from->size);
+       to->ptr[i%4] = rte_cpu_to_be_64(from->dma_addr);
+       i++;
+       return i;
+}
+
+static __rte_always_inline uint32_t
+fill_sg_comp_from_buf_min(sg_comp_t *list,
+                         uint32_t i,
+                         buf_ptr_t *from,
+                         uint32_t *psize)
+{
+       sg_comp_t *to = &list[i >> 2];
+       uint32_t size = *psize;
+       uint32_t e_len;
+
+       e_len = (size > from->size) ? from->size : size;
+       to->u.s.len[i % 4] = rte_cpu_to_be_16(e_len);
+       to->ptr[i % 4] = rte_cpu_to_be_64(from->dma_addr);
+       *psize -= e_len;
+       i++;
+       return i;
+}
+
+/*
+ * This fills the MC expected SGIO list
+ * from IOV given by user.
+ */
+static __rte_always_inline uint32_t
+fill_sg_comp_from_iov(sg_comp_t *list,
+                     uint32_t i,
+                     iov_ptr_t *from, uint32_t from_offset,
+                     uint32_t *psize, buf_ptr_t *extra_buf,
+                     uint32_t extra_offset)
+{
+       int32_t j;
+       uint32_t extra_len = extra_buf ? extra_buf->size : 0;
+       uint32_t size = *psize - extra_len;
+       buf_ptr_t *bufs;
+
+       bufs = from->bufs;
+       for (j = 0; (j < from->buf_cnt) && size; j++) {
+               phys_addr_t e_dma_addr;
+               uint32_t e_len;
+               sg_comp_t *to = &list[i >> 2];
+
+               if (!bufs[j].size)
+                       continue;
+
+               if (unlikely(from_offset)) {
+                       if (from_offset >= bufs[j].size) {
+                               from_offset -= bufs[j].size;
+                               continue;
+                       }
+                       e_dma_addr = bufs[j].dma_addr + from_offset;
+                       e_len = (size > (bufs[j].size - from_offset)) ?
+                               (bufs[j].size - from_offset) : size;
+                       from_offset = 0;
+               } else {
+                       e_dma_addr = bufs[j].dma_addr;
+                       e_len = (size > bufs[j].size) ?
+                               bufs[j].size : size;
+               }
+
+               to->u.s.len[i % 4] = rte_cpu_to_be_16(e_len);
+               to->ptr[i % 4] = rte_cpu_to_be_64(e_dma_addr);
+
+               if (extra_len && (e_len >= extra_offset)) {
+                       /* Break the data at given offset */
+                       uint32_t next_len = e_len - extra_offset;
+                       phys_addr_t next_dma = e_dma_addr + extra_offset;
+
+                       if (!extra_offset) {
+                               i--;
+                       } else {
+                               e_len = extra_offset;
+                               size -= e_len;
+                               to->u.s.len[i % 4] = rte_cpu_to_be_16(e_len);
+                       }
+
+                       /* Insert extra data ptr */
+                       if (extra_len) {
+                               i++;
+                               to = &list[i >> 2];
+                               to->u.s.len[i % 4] =
+                                       rte_cpu_to_be_16(extra_buf->size);
+                               to->ptr[i % 4] =
+                                       rte_cpu_to_be_64(extra_buf->dma_addr);
+
+                               /* size already decremented by extra len */
+                       }
+
+                       /* insert the rest of the data */
+                       if (next_len) {
+                               i++;
+                               to = &list[i >> 2];
+                               to->u.s.len[i % 4] = rte_cpu_to_be_16(next_len);
+                               to->ptr[i % 4] = rte_cpu_to_be_64(next_dma);
+                               size -= next_len;
+                       }
+                       extra_len = 0;
+
+               } else {
+                       size -= e_len;
+               }
+               if (extra_offset)
+                       extra_offset -= size;
+               i++;
+       }
+
+       *psize = size;
+       return (uint32_t)i;
+}
+
+static __rte_always_inline int
+cpt_digest_gen_prep(uint32_t flags,
+                   uint64_t d_lens,
+                   digest_params_t *params,
+                   void *op,
+                   void **prep_req)
+{
+       struct cpt_request_info *req;
+       uint32_t size, i;
+       int32_t m_size;
+       uint16_t data_len, mac_len, key_len;
+       auth_type_t hash_type;
+       buf_ptr_t *meta_p;
+       struct cpt_ctx *ctx;
+       sg_comp_t *gather_comp;
+       sg_comp_t *scatter_comp;
+       uint8_t *in_buffer;
+       uint32_t g_size_bytes, s_size_bytes;
+       uint64_t dptr_dma, rptr_dma;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       void *c_vaddr, *m_vaddr;
+       uint64_t c_dma, m_dma;
+       opcode_info_t opcode;
+
+       if (!params || !params->ctx_buf.vaddr)
+               return ERR_BAD_INPUT_ARG;
+
+       ctx = params->ctx_buf.vaddr;
+       meta_p = &params->meta_buf;
+
+       if (!meta_p->vaddr || !meta_p->dma_addr)
+               return ERR_BAD_INPUT_ARG;
+
+       if (meta_p->size < sizeof(struct cpt_request_info))
+               return ERR_BAD_INPUT_ARG;
+
+       m_vaddr = meta_p->vaddr;
+       m_dma = meta_p->dma_addr;
+       m_size = meta_p->size;
+
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       req = m_vaddr;
+
+       size = sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       hash_type = ctx->hash_type;
+       mac_len = ctx->mac_len;
+       key_len = ctx->auth_key_len;
+       data_len = AUTH_DLEN(d_lens);
+
+       /*GP op header */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param2 = rte_cpu_to_be_16(((uint16_t)hash_type << 8));
+       if (ctx->hmac) {
+               opcode.s.major = CPT_MAJOR_OP_HMAC | CPT_DMA_MODE;
+               vq_cmd_w0.s.param1 = rte_cpu_to_be_16(key_len);
+               vq_cmd_w0.s.dlen =
+                       rte_cpu_to_be_16((data_len + ROUNDUP8(key_len)));
+       } else {
+               opcode.s.major = CPT_MAJOR_OP_HASH | CPT_DMA_MODE;
+               vq_cmd_w0.s.param1 = 0;
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(data_len);
+       }
+
+       opcode.s.minor = 0;
+
+       /* Null auth only case enters the if */
+       if (unlikely(!hash_type && !ctx->enc_cipher)) {
+               opcode.s.major = CPT_MAJOR_OP_MISC;
+               /* Minor op is passthrough */
+               opcode.s.minor = 0x03;
+               /* Send out completion code only */
+               vq_cmd_w0.s.param2 = 0x1;
+       }
+
+       vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+       /* DPTR has SG list */
+       in_buffer = m_vaddr;
+       dptr_dma = m_dma;
+
+       ((uint16_t *)in_buffer)[0] = 0;
+       ((uint16_t *)in_buffer)[1] = 0;
+
+       /* TODO Add error check if space will be sufficient */
+       gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+       /*
+        * Input gather list
+        */
+
+       i = 0;
+
+       if (ctx->hmac) {
+               uint64_t k_dma = params->ctx_buf.dma_addr +
+                       offsetof(struct cpt_ctx, auth_key);
+               /* Key */
+               i = fill_sg_comp(gather_comp, i, k_dma, ROUNDUP8(key_len));
+       }
+
+       /* input data */
+       size = data_len;
+       if (size) {
+               i = fill_sg_comp_from_iov(gather_comp, i, params->src_iov,
+                                         0, &size, NULL, 0);
+               if (size) {
+                       CPT_LOG_DP_DEBUG("Insufficient dst IOV size, short"
+                                        " by %dB", size);
+                       return ERR_BAD_INPUT_ARG;
+               }
+       } else {
+               /*
+                * Looks like we need to support zero data
+                * gather ptr in case of hash & hmac
+                */
+               i++;
+       }
+       ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+       g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       /*
+        * Output Gather list
+        */
+
+       i = 0;
+       scatter_comp = (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+       if (flags & VALID_MAC_BUF) {
+               if (params->mac_buf.size < mac_len)
+                       return ERR_BAD_INPUT_ARG;
+
+               size = mac_len;
+               i = fill_sg_comp_from_buf_min(scatter_comp, i,
+                                             &params->mac_buf, &size);
+       } else {
+               size = mac_len;
+               i = fill_sg_comp_from_iov(scatter_comp, i,
+                                         params->src_iov, data_len,
+                                         &size, NULL, 0);
+               if (size) {
+                       CPT_LOG_DP_DEBUG("Insufficient dst IOV size, short by"
+                                        " %dB", size);
+                       return ERR_BAD_INPUT_ARG;
+               }
+       }
+
+       ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+       s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+       /* This is DPTR len incase of SG mode */
+       vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* cpt alternate completion address saved earlier */
+       req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+       *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+       rptr_dma = c_dma - 8;
+
+       req->ist.ei1 = dptr_dma;
+       req->ist.ei2 = rptr_dma;
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_enc_hmac_prep(uint32_t flags,
+                 uint64_t d_offs,
+                 uint64_t d_lens,
+                 fc_params_t *fc_params,
+                 void *op,
+                 void **prep_req)
+{
+       uint32_t iv_offset = 0;
+       int32_t inputlen, outputlen, enc_dlen, auth_dlen;
+       struct cpt_ctx *cpt_ctx;
+       uint32_t cipher_type, hash_type;
+       uint32_t mac_len, size;
+       uint8_t iv_len = 16;
+       struct cpt_request_info *req;
+       buf_ptr_t *meta_p, *aad_buf = NULL;
+       uint32_t encr_offset, auth_offset;
+       uint32_t encr_data_len, auth_data_len, aad_len = 0;
+       uint32_t passthrough_len = 0;
+       void *m_vaddr, *offset_vaddr;
+       uint64_t m_dma, offset_dma, ctx_dma;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       void *c_vaddr;
+       uint64_t c_dma;
+       int32_t m_size;
+       opcode_info_t opcode;
+
+       meta_p = &fc_params->meta_buf;
+       m_vaddr = meta_p->vaddr;
+       m_dma = meta_p->dma_addr;
+       m_size = meta_p->size;
+
+       encr_offset = ENCR_OFFSET(d_offs);
+       auth_offset = AUTH_OFFSET(d_offs);
+       encr_data_len = ENCR_DLEN(d_lens);
+       auth_data_len = AUTH_DLEN(d_lens);
+       if (unlikely(flags & VALID_AAD_BUF)) {
+               /*
+                * We dont support both aad
+                * and auth data separately
+                */
+               auth_data_len = 0;
+               auth_offset = 0;
+               aad_len = fc_params->aad_buf.size;
+               aad_buf = &fc_params->aad_buf;
+       }
+       cpt_ctx = fc_params->ctx_buf.vaddr;
+       cipher_type = cpt_ctx->enc_cipher;
+       hash_type = cpt_ctx->hash_type;
+       mac_len = cpt_ctx->mac_len;
+
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* start cpt request info struct at 8 byte boundary */
+       size = (uint8_t *)RTE_PTR_ALIGN(m_vaddr, 8) -
+               (uint8_t *)m_vaddr;
+
+       req = (struct cpt_request_info *)((uint8_t *)m_vaddr + size);
+
+       size += sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       if (hash_type == GMAC_TYPE)
+               encr_data_len = 0;
+
+       if (unlikely(!(flags & VALID_IV_BUF))) {
+               iv_len = 0;
+               iv_offset = ENCR_IV_OFFSET(d_offs);
+       }
+
+       if (unlikely(flags & VALID_AAD_BUF)) {
+               /*
+                * When AAD is given, data above encr_offset is pass through
+                * Since AAD is given as separate pointer and not as offset,
+                * this is a special case as we need to fragment input data
+                * into passthrough + encr_data and then insert AAD in between.
+                */
+               if (hash_type != GMAC_TYPE) {
+                       passthrough_len = encr_offset;
+                       auth_offset = passthrough_len + iv_len;
+                       encr_offset = passthrough_len + aad_len + iv_len;
+                       auth_data_len = aad_len + encr_data_len;
+               } else {
+                       passthrough_len = 16 + aad_len;
+                       auth_offset = passthrough_len + iv_len;
+                       auth_data_len = aad_len;
+               }
+       } else {
+               encr_offset += iv_len;
+               auth_offset += iv_len;
+       }
+
+       /* Encryption */
+       opcode.s.major = CPT_MAJOR_OP_FC;
+       opcode.s.minor = 0;
+
+       auth_dlen = auth_offset + auth_data_len;
+       enc_dlen = encr_data_len + encr_offset;
+       if (unlikely(encr_data_len & 0xf)) {
+               if ((cipher_type == DES3_CBC) || (cipher_type == DES3_ECB))
+                       enc_dlen = ROUNDUP8(encr_data_len) + encr_offset;
+               else if (likely((cipher_type == AES_CBC) ||
+                               (cipher_type == AES_ECB)))
+                       enc_dlen = ROUNDUP16(encr_data_len) + encr_offset;
+       }
+
+       if (unlikely(hash_type == GMAC_TYPE)) {
+               encr_offset = auth_dlen;
+               enc_dlen = 0;
+       }
+
+       if (unlikely(auth_dlen > enc_dlen)) {
+               inputlen = auth_dlen;
+               outputlen = auth_dlen + mac_len;
+       } else {
+               inputlen = enc_dlen;
+               outputlen = enc_dlen + mac_len;
+       }
+
+       /* GP op header */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+       vq_cmd_w0.s.param2 = rte_cpu_to_be_16(auth_data_len);
+       /*
+        * In 83XX since we have a limitation of
+        * IV & Offset control word not part of instruction
+        * and need to be part of Data Buffer, we check if
+        * head room is there and then only do the Direct mode processing
+        */
+       if (likely((flags & SINGLE_BUF_INPLACE) &&
+                  (flags & SINGLE_BUF_HEADTAILROOM))) {
+               void *dm_vaddr = fc_params->bufs[0].vaddr;
+               uint64_t dm_dma_addr = fc_params->bufs[0].dma_addr;
+               /*
+                * This flag indicates that there is 24 bytes head room and
+                * 8 bytes tail room available, so that we get to do
+                * DIRECT MODE with limitation
+                */
+
+               offset_vaddr = (uint8_t *)dm_vaddr - OFF_CTRL_LEN - iv_len;
+               offset_dma = dm_dma_addr - OFF_CTRL_LEN - iv_len;
+
+               /* DPTR */
+               req->ist.ei1 = offset_dma;
+               /* RPTR should just exclude offset control word */
+               req->ist.ei2 = dm_dma_addr - iv_len;
+               req->alternate_caddr = (uint64_t *)((uint8_t *)dm_vaddr
+                                                   + outputlen - iv_len);
+
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(inputlen + OFF_CTRL_LEN);
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint64_t *dest = (uint64_t *)((uint8_t *)offset_vaddr
+                                                     + OFF_CTRL_LEN);
+                       uint64_t *src = fc_params->iv_buf;
+                       dest[0] = src[0];
+                       dest[1] = src[1];
+               }
+
+               *(uint64_t *)offset_vaddr =
+                       rte_cpu_to_be_64(((uint64_t)encr_offset << 16) |
+                               ((uint64_t)iv_offset << 8) |
+                               ((uint64_t)auth_offset));
+
+       } else {
+               uint32_t i, g_size_bytes, s_size_bytes;
+               uint64_t dptr_dma, rptr_dma;
+               sg_comp_t *gather_comp;
+               sg_comp_t *scatter_comp;
+               uint8_t *in_buffer;
+
+               /* This falls under strict SG mode */
+               offset_vaddr = m_vaddr;
+               offset_dma = m_dma;
+               size = OFF_CTRL_LEN + iv_len;
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               opcode.s.major |= CPT_DMA_MODE;
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint64_t *dest = (uint64_t *)((uint8_t *)offset_vaddr
+                                                     + OFF_CTRL_LEN);
+                       uint64_t *src = fc_params->iv_buf;
+                       dest[0] = src[0];
+                       dest[1] = src[1];
+               }
+
+               *(uint64_t *)offset_vaddr =
+                       rte_cpu_to_be_64(((uint64_t)encr_offset << 16) |
+                               ((uint64_t)iv_offset << 8) |
+                               ((uint64_t)auth_offset));
+
+               /* DPTR has SG list */
+               in_buffer = m_vaddr;
+               dptr_dma = m_dma;
+
+               ((uint16_t *)in_buffer)[0] = 0;
+               ((uint16_t *)in_buffer)[1] = 0;
+
+               /* TODO Add error check if space will be sufficient */
+               gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+               /*
+                * Input Gather List
+                */
+
+               i = 0;
+
+               /* Offset control word that includes iv */
+               i = fill_sg_comp(gather_comp, i, offset_dma,
+                                OFF_CTRL_LEN + iv_len);
+
+               /* Add input data */
+               size = inputlen - iv_len;
+               if (likely(size)) {
+                       uint32_t aad_offset = aad_len ? passthrough_len : 0;
+
+                       if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                               i = fill_sg_comp_from_buf_min(gather_comp, i,
+                                                             fc_params->bufs,
+                                                             &size);
+                       } else {
+                               i = fill_sg_comp_from_iov(gather_comp, i,
+                                                         fc_params->src_iov,
+                                                         0, &size,
+                                                         aad_buf, aad_offset);
+                       }
+
+                       if (unlikely(size)) {
+                               CPT_LOG_DP_ERR("Insufficient buffer space,"
+                                              " size %d needed", size);
+                               return ERR_BAD_INPUT_ARG;
+                       }
+               }
+               ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+               g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               /*
+                * Output Scatter list
+                */
+               i = 0;
+               scatter_comp =
+                       (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+               /* Add IV */
+               if (likely(iv_len)) {
+                       i = fill_sg_comp(scatter_comp, i,
+                                        offset_dma + OFF_CTRL_LEN,
+                                        iv_len);
+               }
+
+               /* output data or output data + digest*/
+               if (unlikely(flags & VALID_MAC_BUF)) {
+                       size = outputlen - iv_len - mac_len;
+                       if (size) {
+                               uint32_t aad_offset =
+                                       aad_len ? passthrough_len : 0;
+
+                               if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                                       i = fill_sg_comp_from_buf_min(
+                                                       scatter_comp,
+                                                       i,
+                                                       fc_params->bufs,
+                                                       &size);
+                               } else {
+                                       i = fill_sg_comp_from_iov(scatter_comp,
+                                                       i,
+                                                       fc_params->dst_iov,
+                                                       0,
+                                                       &size,
+                                                       aad_buf,
+                                                       aad_offset);
+                               }
+                               if (size)
+                                       return ERR_BAD_INPUT_ARG;
+                       }
+                       /* mac_data */
+                       if (mac_len) {
+                               i = fill_sg_comp_from_buf(scatter_comp, i,
+                                                         &fc_params->mac_buf);
+                       }
+               } else {
+                       /* Output including mac */
+                       size = outputlen - iv_len;
+                       if (likely(size)) {
+                               uint32_t aad_offset =
+                                       aad_len ? passthrough_len : 0;
+
+                               if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                                       i = fill_sg_comp_from_buf_min(
+                                                       scatter_comp,
+                                                       i,
+                                                       fc_params->bufs,
+                                                       &size);
+                               } else {
+                                       i = fill_sg_comp_from_iov(scatter_comp,
+                                                       i,
+                                                       fc_params->dst_iov,
+                                                       0,
+                                                       &size,
+                                                       aad_buf,
+                                                       aad_offset);
+                               }
+                               if (unlikely(size)) {
+                                       CPT_LOG_DP_ERR("Insufficient buffer"
+                                                      " space, size %d needed",
+                                                      size);
+                                       return ERR_BAD_INPUT_ARG;
+                               }
+                       }
+               }
+               ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+               s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+               /* This is DPTR len incase of SG mode */
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               /* cpt alternate completion address saved earlier */
+               req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+               *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+               rptr_dma = c_dma - 8;
+
+               req->ist.ei1 = dptr_dma;
+               req->ist.ei2 = rptr_dma;
+       }
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       ctx_dma = fc_params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, fctx);
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = ctx_dma;
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op  = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_dec_hmac_prep(uint32_t flags,
+                 uint64_t d_offs,
+                 uint64_t d_lens,
+                 fc_params_t *fc_params,
+                 void *op,
+                 void **prep_req)
+{
+       uint32_t iv_offset = 0, size;
+       int32_t inputlen, outputlen, enc_dlen, auth_dlen;
+       struct cpt_ctx *cpt_ctx;
+       int32_t hash_type, mac_len, m_size;
+       uint8_t iv_len = 16;
+       struct cpt_request_info *req;
+       buf_ptr_t *meta_p, *aad_buf = NULL;
+       uint32_t encr_offset, auth_offset;
+       uint32_t encr_data_len, auth_data_len, aad_len = 0;
+       uint32_t passthrough_len = 0;
+       void *m_vaddr, *offset_vaddr;
+       uint64_t m_dma, offset_dma, ctx_dma;
+       opcode_info_t opcode;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       void *c_vaddr;
+       uint64_t c_dma;
+
+       meta_p = &fc_params->meta_buf;
+       m_vaddr = meta_p->vaddr;
+       m_dma = meta_p->dma_addr;
+       m_size = meta_p->size;
+
+       encr_offset = ENCR_OFFSET(d_offs);
+       auth_offset = AUTH_OFFSET(d_offs);
+       encr_data_len = ENCR_DLEN(d_lens);
+       auth_data_len = AUTH_DLEN(d_lens);
+
+       if (unlikely(flags & VALID_AAD_BUF)) {
+               /*
+                * We dont support both aad
+                * and auth data separately
+                */
+               auth_data_len = 0;
+               auth_offset = 0;
+               aad_len = fc_params->aad_buf.size;
+               aad_buf = &fc_params->aad_buf;
+       }
+
+       cpt_ctx = fc_params->ctx_buf.vaddr;
+       hash_type = cpt_ctx->hash_type;
+       mac_len = cpt_ctx->mac_len;
+
+       if (hash_type == GMAC_TYPE)
+               encr_data_len = 0;
+
+       if (unlikely(!(flags & VALID_IV_BUF))) {
+               iv_len = 0;
+               iv_offset = ENCR_IV_OFFSET(d_offs);
+       }
+
+       if (unlikely(flags & VALID_AAD_BUF)) {
+               /*
+                * When AAD is given, data above encr_offset is pass through
+                * Since AAD is given as separate pointer and not as offset,
+                * this is a special case as we need to fragment input data
+                * into passthrough + encr_data and then insert AAD in between.
+                */
+               if (hash_type != GMAC_TYPE) {
+                       passthrough_len = encr_offset;
+                       auth_offset = passthrough_len + iv_len;
+                       encr_offset = passthrough_len + aad_len + iv_len;
+                       auth_data_len = aad_len + encr_data_len;
+               } else {
+                       passthrough_len = 16 + aad_len;
+                       auth_offset = passthrough_len + iv_len;
+                       auth_data_len = aad_len;
+               }
+       } else {
+               encr_offset += iv_len;
+               auth_offset += iv_len;
+       }
+
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+              (uint8_t *)m_vaddr;
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* start cpt request info structure at 8 byte alignment */
+       size = (uint8_t *)RTE_PTR_ALIGN(m_vaddr, 8) -
+               (uint8_t *)m_vaddr;
+
+       req = (struct cpt_request_info *)((uint8_t *)m_vaddr + size);
+
+       size += sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* Decryption */
+       opcode.s.major = CPT_MAJOR_OP_FC;
+       opcode.s.minor = 1;
+
+       enc_dlen = encr_offset + encr_data_len;
+       auth_dlen = auth_offset + auth_data_len;
+
+       if (auth_dlen > enc_dlen) {
+               inputlen = auth_dlen + mac_len;
+               outputlen = auth_dlen;
+       } else {
+               inputlen = enc_dlen + mac_len;
+               outputlen = enc_dlen;
+       }
+
+       if (hash_type == GMAC_TYPE)
+               encr_offset = inputlen;
+
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+       vq_cmd_w0.s.param2 = rte_cpu_to_be_16(auth_data_len);
+
+       /*
+        * In 83XX since we have a limitation of
+        * IV & Offset control word not part of instruction
+        * and need to be part of Data Buffer, we check if
+        * head room is there and then only do the Direct mode processing
+        */
+       if (likely((flags & SINGLE_BUF_INPLACE) &&
+                  (flags & SINGLE_BUF_HEADTAILROOM))) {
+               void *dm_vaddr = fc_params->bufs[0].vaddr;
+               uint64_t dm_dma_addr = fc_params->bufs[0].dma_addr;
+               /*
+                * This flag indicates that there is 24 bytes head room and
+                * 8 bytes tail room available, so that we get to do
+                * DIRECT MODE with limitation
+                */
+
+               offset_vaddr = (uint8_t *)dm_vaddr - OFF_CTRL_LEN - iv_len;
+               offset_dma = dm_dma_addr - OFF_CTRL_LEN - iv_len;
+               req->ist.ei1 = offset_dma;
+
+               /* RPTR should just exclude offset control word */
+               req->ist.ei2 = dm_dma_addr - iv_len;
+
+               req->alternate_caddr = (uint64_t *)((uint8_t *)dm_vaddr +
+                                       outputlen - iv_len);
+               /* since this is decryption,
+                * don't touch the content of
+                * alternate ccode space as it contains
+                * hmac.
+                */
+
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(inputlen + OFF_CTRL_LEN);
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint64_t *dest = (uint64_t *)((uint8_t *)offset_vaddr +
+                                                     OFF_CTRL_LEN);
+                       uint64_t *src = fc_params->iv_buf;
+                       dest[0] = src[0];
+                       dest[1] = src[1];
+               }
+
+               *(uint64_t *)offset_vaddr =
+                       rte_cpu_to_be_64(((uint64_t)encr_offset << 16) |
+                               ((uint64_t)iv_offset << 8) |
+                               ((uint64_t)auth_offset));
+
+       } else {
+               uint64_t dptr_dma, rptr_dma;
+               uint32_t g_size_bytes, s_size_bytes;
+               sg_comp_t *gather_comp;
+               sg_comp_t *scatter_comp;
+               uint8_t *in_buffer;
+               uint8_t i = 0;
+
+               /* This falls under strict SG mode */
+               offset_vaddr = m_vaddr;
+               offset_dma = m_dma;
+               size = OFF_CTRL_LEN + iv_len;
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               opcode.s.major |= CPT_DMA_MODE;
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint64_t *dest = (uint64_t *)((uint8_t *)offset_vaddr +
+                                                     OFF_CTRL_LEN);
+                       uint64_t *src = fc_params->iv_buf;
+                       dest[0] = src[0];
+                       dest[1] = src[1];
+               }
+
+               *(uint64_t *)offset_vaddr =
+                       rte_cpu_to_be_64(((uint64_t)encr_offset << 16) |
+                               ((uint64_t)iv_offset << 8) |
+                               ((uint64_t)auth_offset));
+
+               /* DPTR has SG list */
+               in_buffer = m_vaddr;
+               dptr_dma = m_dma;
+
+               ((uint16_t *)in_buffer)[0] = 0;
+               ((uint16_t *)in_buffer)[1] = 0;
+
+               /* TODO Add error check if space will be sufficient */
+               gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+               /*
+                * Input Gather List
+                */
+               i = 0;
+
+               /* Offset control word that includes iv */
+               i = fill_sg_comp(gather_comp, i, offset_dma,
+                                OFF_CTRL_LEN + iv_len);
+
+               /* Add input data */
+               if (flags & VALID_MAC_BUF) {
+                       size = inputlen - iv_len - mac_len;
+                       if (size) {
+                               /* input data only */
+                               if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                                       i = fill_sg_comp_from_buf_min(
+                                                       gather_comp, i,
+                                                       fc_params->bufs,
+                                                       &size);
+                               } else {
+                                       uint32_t aad_offset = aad_len ?
+                                               passthrough_len : 0;
+
+                                       i = fill_sg_comp_from_iov(gather_comp,
+                                                       i,
+                                                       fc_params->src_iov,
+                                                       0, &size,
+                                                       aad_buf,
+                                                       aad_offset);
+                               }
+                               if (size)
+                                       return ERR_BAD_INPUT_ARG;
+                       }
+
+                       /* mac data */
+                       if (mac_len) {
+                               i = fill_sg_comp_from_buf(gather_comp, i,
+                                                         &fc_params->mac_buf);
+                       }
+               } else {
+                       /* input data + mac */
+                       size = inputlen - iv_len;
+                       if (size) {
+                               if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                                       i = fill_sg_comp_from_buf_min(
+                                                       gather_comp, i,
+                                                       fc_params->bufs,
+                                                       &size);
+                               } else {
+                                       uint32_t aad_offset = aad_len ?
+                                               passthrough_len : 0;
+
+                                       if (!fc_params->src_iov)
+                                               return ERR_BAD_INPUT_ARG;
+
+                                       i = fill_sg_comp_from_iov(
+                                                       gather_comp, i,
+                                                       fc_params->src_iov,
+                                                       0, &size,
+                                                       aad_buf,
+                                                       aad_offset);
+                               }
+
+                               if (size)
+                                       return ERR_BAD_INPUT_ARG;
+                       }
+               }
+               ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+               g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               /*
+                * Output Scatter List
+                */
+
+               i = 0;
+               scatter_comp =
+                       (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+               /* Add iv */
+               if (iv_len) {
+                       i = fill_sg_comp(scatter_comp, i,
+                                        offset_dma + OFF_CTRL_LEN,
+                                        iv_len);
+               }
+
+               /* Add output data */
+               size = outputlen - iv_len;
+               if (size) {
+                       if (unlikely(flags & SINGLE_BUF_INPLACE)) {
+                               /* handle single buffer here */
+                               i = fill_sg_comp_from_buf_min(scatter_comp, i,
+                                                             fc_params->bufs,
+                                                             &size);
+                       } else {
+                               uint32_t aad_offset = aad_len ?
+                                       passthrough_len : 0;
+
+                               if (!fc_params->dst_iov)
+                                       return ERR_BAD_INPUT_ARG;
+
+                               i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                         fc_params->dst_iov, 0,
+                                                         &size, aad_buf,
+                                                         aad_offset);
+                       }
+
+                       if (unlikely(size))
+                               return ERR_BAD_INPUT_ARG;
+               }
+
+               ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+               s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+               /* This is DPTR len incase of SG mode */
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               /* cpt alternate completion address saved earlier */
+               req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+               *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+               rptr_dma = c_dma - 8;
+               size += COMPLETION_CODE_SIZE;
+
+               req->ist.ei1 = dptr_dma;
+               req->ist.ei2 = rptr_dma;
+       }
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       ctx_dma = fc_params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, fctx);
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = ctx_dma;
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_zuc_snow3g_enc_prep(uint32_t req_flags,
+                       uint64_t d_offs,
+                       uint64_t d_lens,
+                       fc_params_t *params,
+                       void *op,
+                       void **prep_req)
+{
+       uint32_t size;
+       int32_t inputlen, outputlen;
+       struct cpt_ctx *cpt_ctx;
+       uint32_t mac_len = 0;
+       uint8_t snow3g, j;
+       struct cpt_request_info *req;
+       buf_ptr_t *buf_p;
+       uint32_t encr_offset = 0, auth_offset = 0;
+       uint32_t encr_data_len = 0, auth_data_len = 0;
+       int flags, iv_len = 16, m_size;
+       void *m_vaddr, *c_vaddr;
+       uint64_t m_dma, c_dma, offset_ctrl;
+       uint64_t *offset_vaddr, offset_dma;
+       uint32_t *iv_s, iv[4];
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       opcode_info_t opcode;
+
+       buf_p = &params->meta_buf;
+       m_vaddr = buf_p->vaddr;
+       m_dma = buf_p->dma_addr;
+       m_size = buf_p->size;
+
+       cpt_ctx = params->ctx_buf.vaddr;
+       flags = cpt_ctx->zsk_flags;
+       mac_len = cpt_ctx->mac_len;
+       snow3g = cpt_ctx->snow3g;
+
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* Reserve memory for cpt request info */
+       req = m_vaddr;
+
+       size = sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       opcode.s.major = CPT_MAJOR_OP_ZUC_SNOW3G;
+
+       /* indicates CPTR ctx, operation type, KEY & IV mode from DPTR */
+       opcode.s.minor = ((1 << 6) | (snow3g << 5) | (0 << 4) |
+                         (0 << 3) | (flags & 0x7));
+
+       if (flags == 0x1) {
+               /*
+                * Microcode expects offsets in bytes
+                * TODO: Rounding off
+                */
+               auth_data_len = AUTH_DLEN(d_lens);
+
+               /* EIA3 or UIA2 */
+               auth_offset = AUTH_OFFSET(d_offs);
+               auth_offset = auth_offset / 8;
+
+               /* consider iv len */
+               auth_offset += iv_len;
+
+               inputlen = auth_offset + (RTE_ALIGN(auth_data_len, 8) / 8);
+               outputlen = mac_len;
+
+               offset_ctrl = rte_cpu_to_be_64((uint64_t)auth_offset);
+
+       } else {
+               /* EEA3 or UEA2 */
+               /*
+                * Microcode expects offsets in bytes
+                * TODO: Rounding off
+                */
+               encr_data_len = ENCR_DLEN(d_lens);
+
+               encr_offset = ENCR_OFFSET(d_offs);
+               encr_offset = encr_offset / 8;
+               /* consider iv len */
+               encr_offset += iv_len;
+
+               inputlen = encr_offset + (RTE_ALIGN(encr_data_len, 8) / 8);
+               outputlen = inputlen;
+
+               /* iv offset is 0 */
+               offset_ctrl = rte_cpu_to_be_64((uint64_t)encr_offset << 16);
+       }
+
+       /* IV */
+       iv_s = (flags == 0x1) ? params->auth_iv_buf :
+               params->iv_buf;
+
+       if (snow3g) {
+               /*
+                * DPDK seems to provide it in form of IV3 IV2 IV1 IV0
+                * and BigEndian, MC needs it as IV0 IV1 IV2 IV3
+                */
+
+               for (j = 0; j < 4; j++)
+                       iv[j] = iv_s[3 - j];
+       } else {
+               /* ZUC doesn't need a swap */
+               for (j = 0; j < 4; j++)
+                       iv[j] = iv_s[j];
+       }
+
+       /*
+        * GP op header, lengths are expected in bits.
+        */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+       vq_cmd_w0.s.param2 = rte_cpu_to_be_16(auth_data_len);
+
+       /*
+        * In 83XX since we have a limitation of
+        * IV & Offset control word not part of instruction
+        * and need to be part of Data Buffer, we check if
+        * head room is there and then only do the Direct mode processing
+        */
+       if (likely((req_flags & SINGLE_BUF_INPLACE) &&
+                  (req_flags & SINGLE_BUF_HEADTAILROOM))) {
+               void *dm_vaddr = params->bufs[0].vaddr;
+               uint64_t dm_dma_addr = params->bufs[0].dma_addr;
+               /*
+                * This flag indicates that there is 24 bytes head room and
+                * 8 bytes tail room available, so that we get to do
+                * DIRECT MODE with limitation
+                */
+
+               offset_vaddr = (uint64_t *)((uint8_t *)dm_vaddr -
+                                           OFF_CTRL_LEN - iv_len);
+               offset_dma = dm_dma_addr - OFF_CTRL_LEN - iv_len;
+
+               /* DPTR */
+               req->ist.ei1 = offset_dma;
+               /* RPTR should just exclude offset control word */
+               req->ist.ei2 = dm_dma_addr - iv_len;
+               req->alternate_caddr = (uint64_t *)((uint8_t *)dm_vaddr
+                                                   + outputlen - iv_len);
+
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(inputlen + OFF_CTRL_LEN);
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint32_t *iv_d = (uint32_t *)((uint8_t *)offset_vaddr
+                                                     + OFF_CTRL_LEN);
+                       memcpy(iv_d, iv, 16);
+               }
+
+               *offset_vaddr = offset_ctrl;
+       } else {
+               uint32_t i, g_size_bytes, s_size_bytes;
+               uint64_t dptr_dma, rptr_dma;
+               sg_comp_t *gather_comp;
+               sg_comp_t *scatter_comp;
+               uint8_t *in_buffer;
+               uint32_t *iv_d;
+
+               /* save space for iv */
+               offset_vaddr = m_vaddr;
+               offset_dma = m_dma;
+
+               m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len;
+               m_dma += OFF_CTRL_LEN + iv_len;
+               m_size -= OFF_CTRL_LEN + iv_len;
+
+               opcode.s.major |= CPT_DMA_MODE;
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               /* DPTR has SG list */
+               in_buffer = m_vaddr;
+               dptr_dma = m_dma;
+
+               ((uint16_t *)in_buffer)[0] = 0;
+               ((uint16_t *)in_buffer)[1] = 0;
+
+               /* TODO Add error check if space will be sufficient */
+               gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+               /*
+                * Input Gather List
+                */
+               i = 0;
+
+               /* Offset control word followed by iv */
+
+               i = fill_sg_comp(gather_comp, i, offset_dma,
+                                OFF_CTRL_LEN + iv_len);
+
+               /* iv offset is 0 */
+               *offset_vaddr = offset_ctrl;
+
+               iv_d = (uint32_t *)((uint8_t *)offset_vaddr + OFF_CTRL_LEN);
+               memcpy(iv_d, iv, 16);
+
+               /* input data */
+               size = inputlen - iv_len;
+               if (size) {
+                       i = fill_sg_comp_from_iov(gather_comp, i,
+                                                 params->src_iov,
+                                                 0, &size, NULL, 0);
+                       if (size)
+                               return ERR_BAD_INPUT_ARG;
+               }
+               ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+               g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               /*
+                * Output Scatter List
+                */
+
+               i = 0;
+               scatter_comp =
+                       (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+               if (flags == 0x1) {
+                       /* IV in SLIST only for EEA3 & UEA2 */
+                       iv_len = 0;
+               }
+
+               if (iv_len) {
+                       i = fill_sg_comp(scatter_comp, i,
+                                        offset_dma + OFF_CTRL_LEN, iv_len);
+               }
+
+               /* Add output data */
+               if (req_flags & VALID_MAC_BUF) {
+                       size = outputlen - iv_len - mac_len;
+                       if (size) {
+                               i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                         params->dst_iov, 0,
+                                                         &size, NULL, 0);
+
+                               if (size)
+                                       return ERR_BAD_INPUT_ARG;
+                       }
+
+                       /* mac data */
+                       if (mac_len) {
+                               i = fill_sg_comp_from_buf(scatter_comp, i,
+                                                         &params->mac_buf);
+                       }
+               } else {
+                       /* Output including mac */
+                       size = outputlen - iv_len;
+                       if (size) {
+                               i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                         params->dst_iov, 0,
+                                                         &size, NULL, 0);
+
+                               if (size)
+                                       return ERR_BAD_INPUT_ARG;
+                       }
+               }
+               ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+               s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+               /* This is DPTR len incase of SG mode */
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               /* cpt alternate completion address saved earlier */
+               req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+               *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+               rptr_dma = c_dma - 8;
+
+               req->ist.ei1 = dptr_dma;
+               req->ist.ei2 = rptr_dma;
+       }
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, zs_ctx);
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_zuc_snow3g_dec_prep(uint32_t req_flags,
+                       uint64_t d_offs,
+                       uint64_t d_lens,
+                       fc_params_t *params,
+                       void *op,
+                       void **prep_req)
+{
+       uint32_t size;
+       int32_t inputlen = 0, outputlen;
+       struct cpt_ctx *cpt_ctx;
+       uint8_t snow3g, iv_len = 16;
+       struct cpt_request_info *req;
+       buf_ptr_t *buf_p;
+       uint32_t encr_offset;
+       uint32_t encr_data_len;
+       int flags, m_size;
+       void *m_vaddr, *c_vaddr;
+       uint64_t m_dma, c_dma;
+       uint64_t *offset_vaddr, offset_dma;
+       uint32_t *iv_s, iv[4], j;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       opcode_info_t opcode;
+
+       buf_p = &params->meta_buf;
+       m_vaddr = buf_p->vaddr;
+       m_dma = buf_p->dma_addr;
+       m_size = buf_p->size;
+
+       /*
+        * Microcode expects offsets in bytes
+        * TODO: Rounding off
+        */
+       encr_offset = ENCR_OFFSET(d_offs) / 8;
+       encr_data_len = ENCR_DLEN(d_lens);
+
+       cpt_ctx = params->ctx_buf.vaddr;
+       flags = cpt_ctx->zsk_flags;
+       snow3g = cpt_ctx->snow3g;
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* Reserve memory for cpt request info */
+       req = m_vaddr;
+
+       size = sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       opcode.s.major = CPT_MAJOR_OP_ZUC_SNOW3G;
+
+       /* indicates CPTR ctx, operation type, KEY & IV mode from DPTR */
+       opcode.s.minor = ((1 << 6) | (snow3g << 5) | (0 << 4) |
+                         (0 << 3) | (flags & 0x7));
+
+       /* consider iv len */
+       encr_offset += iv_len;
+
+       inputlen = encr_offset +
+               (RTE_ALIGN(encr_data_len, 8) / 8);
+       outputlen = inputlen;
+
+       /* IV */
+       iv_s = params->iv_buf;
+       if (snow3g) {
+               /*
+                * DPDK seems to provide it in form of IV3 IV2 IV1 IV0
+                * and BigEndian, MC needs it as IV0 IV1 IV2 IV3
+                */
+
+               for (j = 0; j < 4; j++)
+                       iv[j] = iv_s[3 - j];
+       } else {
+               /* ZUC doesn't need a swap */
+               for (j = 0; j < 4; j++)
+                       iv[j] = iv_s[j];
+       }
+
+       /*
+        * GP op header, lengths are expected in bits.
+        */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+
+       /*
+        * In 83XX since we have a limitation of
+        * IV & Offset control word not part of instruction
+        * and need to be part of Data Buffer, we check if
+        * head room is there and then only do the Direct mode processing
+        */
+       if (likely((req_flags & SINGLE_BUF_INPLACE) &&
+                  (req_flags & SINGLE_BUF_HEADTAILROOM))) {
+               void *dm_vaddr = params->bufs[0].vaddr;
+               uint64_t dm_dma_addr = params->bufs[0].dma_addr;
+               /*
+                * This flag indicates that there is 24 bytes head room and
+                * 8 bytes tail room available, so that we get to do
+                * DIRECT MODE with limitation
+                */
+
+               offset_vaddr = (uint64_t *)((uint8_t *)dm_vaddr -
+                                           OFF_CTRL_LEN - iv_len);
+               offset_dma = dm_dma_addr - OFF_CTRL_LEN - iv_len;
+
+               /* DPTR */
+               req->ist.ei1 = offset_dma;
+               /* RPTR should just exclude offset control word */
+               req->ist.ei2 = dm_dma_addr - iv_len;
+               req->alternate_caddr = (uint64_t *)((uint8_t *)dm_vaddr
+                                                   + outputlen - iv_len);
+
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(inputlen + OFF_CTRL_LEN);
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               if (likely(iv_len)) {
+                       uint32_t *iv_d = (uint32_t *)((uint8_t *)offset_vaddr
+                                                     + OFF_CTRL_LEN);
+                       memcpy(iv_d, iv, 16);
+               }
+
+               /* iv offset is 0 */
+               *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16);
+       } else {
+               uint32_t i, g_size_bytes, s_size_bytes;
+               uint64_t dptr_dma, rptr_dma;
+               sg_comp_t *gather_comp;
+               sg_comp_t *scatter_comp;
+               uint8_t *in_buffer;
+               uint32_t *iv_d;
+
+               /* save space for offset and iv... */
+               offset_vaddr = m_vaddr;
+               offset_dma = m_dma;
+
+               m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len;
+               m_dma += OFF_CTRL_LEN + iv_len;
+               m_size -= OFF_CTRL_LEN + iv_len;
+
+               opcode.s.major |= CPT_DMA_MODE;
+
+               vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+               /* DPTR has SG list */
+               in_buffer = m_vaddr;
+               dptr_dma = m_dma;
+
+               ((uint16_t *)in_buffer)[0] = 0;
+               ((uint16_t *)in_buffer)[1] = 0;
+
+               /* TODO Add error check if space will be sufficient */
+               gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+               /*
+                * Input Gather List
+                */
+               i = 0;
+
+               /* Offset control word */
+
+               /* iv offset is 0 */
+               *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16);
+
+               i = fill_sg_comp(gather_comp, i, offset_dma,
+                                OFF_CTRL_LEN + iv_len);
+
+               iv_d = (uint32_t *)((uint8_t *)offset_vaddr + OFF_CTRL_LEN);
+               memcpy(iv_d, iv, 16);
+
+               /* Add input data */
+               size = inputlen - iv_len;
+               if (size) {
+                       i = fill_sg_comp_from_iov(gather_comp, i,
+                                                 params->src_iov,
+                                                 0, &size, NULL, 0);
+                       if (size)
+                               return ERR_BAD_INPUT_ARG;
+               }
+               ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+               g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               /*
+                * Output Scatter List
+                */
+
+               i = 0;
+               scatter_comp =
+                       (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+               /* IV */
+               i = fill_sg_comp(scatter_comp, i,
+                                offset_dma + OFF_CTRL_LEN,
+                                iv_len);
+
+               /* Add output data */
+               size = outputlen - iv_len;
+               if (size) {
+                       i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                 params->dst_iov, 0,
+                                                 &size, NULL, 0);
+
+                       if (size)
+                               return ERR_BAD_INPUT_ARG;
+               }
+               ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+               s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+               size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+               /* This is DPTR len incase of SG mode */
+               vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+               m_vaddr = (uint8_t *)m_vaddr + size;
+               m_dma += size;
+               m_size -= size;
+
+               /* cpt alternate completion address saved earlier */
+               req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+               *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+               rptr_dma = c_dma - 8;
+
+               req->ist.ei1 = dptr_dma;
+               req->ist.ei2 = rptr_dma;
+       }
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, zs_ctx);
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_kasumi_enc_prep(uint32_t req_flags,
+                   uint64_t d_offs,
+                   uint64_t d_lens,
+                   fc_params_t *params,
+                   void *op,
+                   void **prep_req)
+{
+       uint32_t size;
+       int32_t inputlen = 0, outputlen = 0;
+       struct cpt_ctx *cpt_ctx;
+       uint32_t mac_len = 0;
+       uint8_t i = 0;
+       struct cpt_request_info *req;
+       buf_ptr_t *buf_p;
+       uint32_t encr_offset, auth_offset;
+       uint32_t encr_data_len, auth_data_len;
+       int flags, m_size;
+       uint8_t *iv_s, *iv_d, iv_len = 8;
+       uint8_t dir = 0;
+       void *m_vaddr, *c_vaddr;
+       uint64_t m_dma, c_dma;
+       uint64_t *offset_vaddr, offset_dma;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       opcode_info_t opcode;
+       uint8_t *in_buffer;
+       uint32_t g_size_bytes, s_size_bytes;
+       uint64_t dptr_dma, rptr_dma;
+       sg_comp_t *gather_comp;
+       sg_comp_t *scatter_comp;
+
+       buf_p = &params->meta_buf;
+       m_vaddr = buf_p->vaddr;
+       m_dma = buf_p->dma_addr;
+       m_size = buf_p->size;
+
+       encr_offset = ENCR_OFFSET(d_offs) / 8;
+       auth_offset = AUTH_OFFSET(d_offs) / 8;
+       encr_data_len = ENCR_DLEN(d_lens);
+       auth_data_len = AUTH_DLEN(d_lens);
+
+       cpt_ctx = params->ctx_buf.vaddr;
+       flags = cpt_ctx->zsk_flags;
+       mac_len = cpt_ctx->mac_len;
+
+       if (flags == 0x0)
+               iv_s = params->iv_buf;
+       else
+               iv_s = params->auth_iv_buf;
+
+       dir = iv_s[8] & 0x1;
+
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* Reserve memory for cpt request info */
+       req = m_vaddr;
+
+       size = sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       opcode.s.major = CPT_MAJOR_OP_KASUMI | CPT_DMA_MODE;
+
+       /* indicates ECB/CBC, direction, ctx from cptr, iv from dptr */
+       opcode.s.minor = ((1 << 6) | (cpt_ctx->k_ecb << 5) |
+                         (dir << 4) | (0 << 3) | (flags & 0x7));
+
+       /*
+        * GP op header, lengths are expected in bits.
+        */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+       vq_cmd_w0.s.param2 = rte_cpu_to_be_16(auth_data_len);
+       vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+       /* consider iv len */
+       if (flags == 0x0) {
+               encr_offset += iv_len;
+               auth_offset += iv_len;
+       }
+
+       /* save space for offset ctrl and iv */
+       offset_vaddr = m_vaddr;
+       offset_dma = m_dma;
+
+       m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len;
+       m_dma += OFF_CTRL_LEN + iv_len;
+       m_size -= OFF_CTRL_LEN + iv_len;
+
+       /* DPTR has SG list */
+       in_buffer = m_vaddr;
+       dptr_dma = m_dma;
+
+       ((uint16_t *)in_buffer)[0] = 0;
+       ((uint16_t *)in_buffer)[1] = 0;
+
+       /* TODO Add error check if space will be sufficient */
+       gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+       /*
+        * Input Gather List
+        */
+       i = 0;
+
+       /* Offset control word followed by iv */
+
+       if (flags == 0x0) {
+               inputlen = encr_offset + (RTE_ALIGN(encr_data_len, 8) / 8);
+               outputlen = inputlen;
+               /* iv offset is 0 */
+               *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16);
+       } else {
+               inputlen = auth_offset + (RTE_ALIGN(auth_data_len, 8) / 8);
+               outputlen = mac_len;
+               /* iv offset is 0 */
+               *offset_vaddr = rte_cpu_to_be_64((uint64_t)auth_offset);
+       }
+
+       i = fill_sg_comp(gather_comp, i, offset_dma, OFF_CTRL_LEN + iv_len);
+
+       /* IV */
+       iv_d = (uint8_t *)offset_vaddr + OFF_CTRL_LEN;
+       memcpy(iv_d, iv_s, iv_len);
+
+       /* input data */
+       size = inputlen - iv_len;
+       if (size) {
+               i = fill_sg_comp_from_iov(gather_comp, i,
+                                         params->src_iov, 0,
+                                         &size, NULL, 0);
+
+               if (size)
+                       return ERR_BAD_INPUT_ARG;
+       }
+       ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+       g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       /*
+        * Output Scatter List
+        */
+
+       i = 0;
+       scatter_comp = (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+       if (flags == 0x1) {
+               /* IV in SLIST only for F8 */
+               iv_len = 0;
+       }
+
+       /* IV */
+       if (iv_len) {
+               i = fill_sg_comp(scatter_comp, i,
+                                offset_dma + OFF_CTRL_LEN,
+                                iv_len);
+       }
+
+       /* Add output data */
+       if (req_flags & VALID_MAC_BUF) {
+               size = outputlen - iv_len - mac_len;
+               if (size) {
+                       i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                 params->dst_iov, 0,
+                                                 &size, NULL, 0);
+
+                       if (size)
+                               return ERR_BAD_INPUT_ARG;
+               }
+
+               /* mac data */
+               if (mac_len) {
+                       i = fill_sg_comp_from_buf(scatter_comp, i,
+                                                 &params->mac_buf);
+               }
+       } else {
+               /* Output including mac */
+               size = outputlen - iv_len;
+               if (size) {
+                       i = fill_sg_comp_from_iov(scatter_comp, i,
+                                                 params->dst_iov, 0,
+                                                 &size, NULL, 0);
+
+                       if (size)
+                               return ERR_BAD_INPUT_ARG;
+               }
+       }
+       ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+       s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+       /* This is DPTR len incase of SG mode */
+       vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* cpt alternate completion address saved earlier */
+       req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+       *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+       rptr_dma = c_dma - 8;
+
+       req->ist.ei1 = dptr_dma;
+       req->ist.ei2 = rptr_dma;
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, k_ctx);
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline int
+cpt_kasumi_dec_prep(uint64_t d_offs,
+                   uint64_t d_lens,
+                   fc_params_t *params,
+                   void *op,
+                   void **prep_req)
+{
+       uint32_t size;
+       int32_t inputlen = 0, outputlen;
+       struct cpt_ctx *cpt_ctx;
+       uint8_t i = 0, iv_len = 8;
+       struct cpt_request_info *req;
+       buf_ptr_t *buf_p;
+       uint32_t encr_offset;
+       uint32_t encr_data_len;
+       int flags, m_size;
+       uint8_t dir = 0;
+       void *m_vaddr, *c_vaddr;
+       uint64_t m_dma, c_dma;
+       uint64_t *offset_vaddr, offset_dma;
+       vq_cmd_word0_t vq_cmd_w0;
+       vq_cmd_word3_t vq_cmd_w3;
+       opcode_info_t opcode;
+       uint8_t *in_buffer;
+       uint32_t g_size_bytes, s_size_bytes;
+       uint64_t dptr_dma, rptr_dma;
+       sg_comp_t *gather_comp;
+       sg_comp_t *scatter_comp;
+
+       buf_p = &params->meta_buf;
+       m_vaddr = buf_p->vaddr;
+       m_dma = buf_p->dma_addr;
+       m_size = buf_p->size;
+
+       encr_offset = ENCR_OFFSET(d_offs) / 8;
+       encr_data_len = ENCR_DLEN(d_lens);
+
+       cpt_ctx = params->ctx_buf.vaddr;
+       flags = cpt_ctx->zsk_flags;
+       /*
+        * Save initial space that followed app data for completion code &
+        * alternate completion code to fall in same cache line as app data
+        */
+       m_vaddr = (uint8_t *)m_vaddr + COMPLETION_CODE_SIZE;
+       m_dma += COMPLETION_CODE_SIZE;
+       size = (uint8_t *)RTE_PTR_ALIGN((uint8_t *)m_vaddr, 16) -
+               (uint8_t *)m_vaddr;
+
+       c_vaddr = (uint8_t *)m_vaddr + size;
+       c_dma = m_dma + size;
+       size += sizeof(cpt_res_s_t);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* Reserve memory for cpt request info */
+       req = m_vaddr;
+
+       size = sizeof(struct cpt_request_info);
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       opcode.s.major = CPT_MAJOR_OP_KASUMI | CPT_DMA_MODE;
+
+       /* indicates ECB/CBC, direction, ctx from cptr, iv from dptr */
+       opcode.s.minor = ((1 << 6) | (cpt_ctx->k_ecb << 5) |
+                         (dir << 4) | (0 << 3) | (flags & 0x7));
+
+       /*
+        * GP op header, lengths are expected in bits.
+        */
+       vq_cmd_w0.u64 = 0;
+       vq_cmd_w0.s.param1 = rte_cpu_to_be_16(encr_data_len);
+       vq_cmd_w0.s.opcode = rte_cpu_to_be_16(opcode.flags);
+
+       /* consider iv len */
+       encr_offset += iv_len;
+
+       inputlen = iv_len + (RTE_ALIGN(encr_data_len, 8) / 8);
+       outputlen = inputlen;
+
+       /* save space for offset ctrl & iv */
+       offset_vaddr = m_vaddr;
+       offset_dma = m_dma;
+
+       m_vaddr = (uint8_t *)m_vaddr + OFF_CTRL_LEN + iv_len;
+       m_dma += OFF_CTRL_LEN + iv_len;
+       m_size -= OFF_CTRL_LEN + iv_len;
+
+       /* DPTR has SG list */
+       in_buffer = m_vaddr;
+       dptr_dma = m_dma;
+
+       ((uint16_t *)in_buffer)[0] = 0;
+       ((uint16_t *)in_buffer)[1] = 0;
+
+       /* TODO Add error check if space will be sufficient */
+       gather_comp = (sg_comp_t *)((uint8_t *)m_vaddr + 8);
+
+       /*
+        * Input Gather List
+        */
+       i = 0;
+
+       /* Offset control word followed by iv */
+       *offset_vaddr = rte_cpu_to_be_64((uint64_t)encr_offset << 16);
+
+       i = fill_sg_comp(gather_comp, i, offset_dma, OFF_CTRL_LEN + iv_len);
+
+       /* IV */
+       memcpy((uint8_t *)offset_vaddr + OFF_CTRL_LEN,
+              params->iv_buf, iv_len);
+
+       /* Add input data */
+       size = inputlen - iv_len;
+       if (size) {
+               i = fill_sg_comp_from_iov(gather_comp, i,
+                                         params->src_iov,
+                                         0, &size, NULL, 0);
+               if (size)
+                       return ERR_BAD_INPUT_ARG;
+       }
+       ((uint16_t *)in_buffer)[2] = rte_cpu_to_be_16(i);
+       g_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       /*
+        * Output Scatter List
+        */
+
+       i = 0;
+       scatter_comp = (sg_comp_t *)((uint8_t *)gather_comp + g_size_bytes);
+
+       /* IV */
+       i = fill_sg_comp(scatter_comp, i,
+                        offset_dma + OFF_CTRL_LEN,
+                        iv_len);
+
+       /* Add output data */
+       size = outputlen - iv_len;
+       if (size) {
+               i = fill_sg_comp_from_iov(scatter_comp, i,
+                                         params->dst_iov, 0,
+                                         &size, NULL, 0);
+               if (size)
+                       return ERR_BAD_INPUT_ARG;
+       }
+       ((uint16_t *)in_buffer)[3] = rte_cpu_to_be_16(i);
+       s_size_bytes = ((i + 3) / 4) * sizeof(sg_comp_t);
+
+       size = g_size_bytes + s_size_bytes + SG_LIST_HDR_SIZE;
+
+       /* This is DPTR len incase of SG mode */
+       vq_cmd_w0.s.dlen = rte_cpu_to_be_16(size);
+
+       m_vaddr = (uint8_t *)m_vaddr + size;
+       m_dma += size;
+       m_size -= size;
+
+       /* cpt alternate completion address saved earlier */
+       req->alternate_caddr = (uint64_t *)((uint8_t *)c_vaddr - 8);
+       *req->alternate_caddr = ~((uint64_t)COMPLETION_CODE_INIT);
+       rptr_dma = c_dma - 8;
+
+       req->ist.ei1 = dptr_dma;
+       req->ist.ei2 = rptr_dma;
+
+       /* First 16-bit swap then 64-bit swap */
+       /* TODO: HACK: Reverse the vq_cmd and cpt_req bit field definitions
+        * to eliminate all the swapping
+        */
+       vq_cmd_w0.u64 = rte_cpu_to_be_64(vq_cmd_w0.u64);
+
+       /* vq command w3 */
+       vq_cmd_w3.u64 = 0;
+       vq_cmd_w3.s.grp = 0;
+       vq_cmd_w3.s.cptr = params->ctx_buf.dma_addr +
+               offsetof(struct cpt_ctx, k_ctx);
+
+       /* 16 byte aligned cpt res address */
+       req->completion_addr = (uint64_t *)((uint8_t *)c_vaddr);
+       *req->completion_addr = COMPLETION_CODE_INIT;
+       req->comp_baddr  = c_dma;
+
+       /* Fill microcode part of instruction */
+       req->ist.ei0 = vq_cmd_w0.u64;
+       req->ist.ei3 = vq_cmd_w3.u64;
+
+       req->op = op;
+
+       *prep_req = req;
+       return 0;
+}
+
+static __rte_always_inline void *
+cpt_fc_dec_hmac_prep(uint32_t flags,
+                    uint64_t d_offs,
+                    uint64_t d_lens,
+                    fc_params_t *fc_params,
+                    void *op, int *ret_val)
+{
+       struct cpt_ctx *ctx = fc_params->ctx_buf.vaddr;
+       uint8_t fc_type;
+       void *prep_req = NULL;
+       int ret;
+
+       fc_type = ctx->fc_type;
+
+       if (likely(fc_type == FC_GEN)) {
+               ret = cpt_dec_hmac_prep(flags, d_offs, d_lens,
+                                       fc_params, op, &prep_req);
+       } else if (fc_type == ZUC_SNOW3G) {
+               ret = cpt_zuc_snow3g_dec_prep(flags, d_offs, d_lens,
+                                             fc_params, op, &prep_req);
+       } else if (fc_type == KASUMI) {
+               ret = cpt_kasumi_dec_prep(d_offs, d_lens, fc_params, op,
+                                         &prep_req);
+       } else {
+               /*
+                * For AUTH_ONLY case,
+                * MC only supports digest generation and verification
+                * should be done in software by memcmp()
+                */
+
+               ret = ERR_EIO;
+       }
+
+       if (unlikely(!prep_req))
+               *ret_val = ret;
+       return prep_req;
+}
+
+static __rte_always_inline void *__hot
+cpt_fc_enc_hmac_prep(uint32_t flags, uint64_t d_offs, uint64_t d_lens,
+                    fc_params_t *fc_params, void *op, int *ret_val)
+{
+       struct cpt_ctx *ctx = fc_params->ctx_buf.vaddr;
+       uint8_t fc_type;
+       void *prep_req = NULL;
+       int ret;
+
+       fc_type = ctx->fc_type;
+
+       /* Common api for rest of the ops */
+       if (likely(fc_type == FC_GEN)) {
+               ret = cpt_enc_hmac_prep(flags, d_offs, d_lens,
+                                       fc_params, op, &prep_req);
+       } else if (fc_type == ZUC_SNOW3G) {
+               ret = cpt_zuc_snow3g_enc_prep(flags, d_offs, d_lens,
+                                             fc_params, op, &prep_req);
+       } else if (fc_type == KASUMI) {
+               ret = cpt_kasumi_enc_prep(flags, d_offs, d_lens,
+                                         fc_params, op, &prep_req);
+       } else if (fc_type == HASH_HMAC) {
+               ret = cpt_digest_gen_prep(flags, d_lens, fc_params, op,
+                                         &prep_req);
+       } else {
+               ret = ERR_EIO;
+       }
+
+       if (unlikely(!prep_req))
+               *ret_val = ret;
+       return prep_req;
+}
+
+static __rte_always_inline int
+cpt_fc_auth_set_key(void *ctx, auth_type_t type, uint8_t *key,
+                   uint16_t key_len, uint16_t mac_len)
+{
+       struct cpt_ctx *cpt_ctx = ctx;
+       mc_fc_context_t *fctx = &cpt_ctx->fctx;
+       uint64_t *ctrl_flags = NULL;
+
+       if ((type >= ZUC_EIA3) && (type <= KASUMI_F9_ECB)) {
+               uint32_t keyx[4];
+
+               if (key_len != 16)
+                       return -1;
+               /* No support for AEAD yet */
+               if (cpt_ctx->enc_cipher)
+                       return -1;
+               /* For ZUC/SNOW3G/Kasumi */
+               switch (type) {
+               case SNOW3G_UIA2:
+                       cpt_ctx->snow3g = 1;
+                       gen_key_snow3g(key, keyx);
+                       memcpy(cpt_ctx->zs_ctx.ci_key, keyx, key_len);
+                       cpt_ctx->fc_type = ZUC_SNOW3G;
+                       cpt_ctx->zsk_flags = 0x1;
+                       break;
+               case ZUC_EIA3:
+                       cpt_ctx->snow3g = 0;
+                       memcpy(cpt_ctx->zs_ctx.ci_key, key, key_len);
+                       memcpy(cpt_ctx->zs_ctx.zuc_const, zuc_d, 32);
+                       cpt_ctx->fc_type = ZUC_SNOW3G;
+                       cpt_ctx->zsk_flags = 0x1;
+                       break;
+               case KASUMI_F9_ECB:
+                       /* Kasumi ECB mode */
+                       cpt_ctx->k_ecb = 1;
+                       memcpy(cpt_ctx->k_ctx.ci_key, key, key_len);
+                       cpt_ctx->fc_type = KASUMI;
+                       cpt_ctx->zsk_flags = 0x1;
+                       break;
+               case KASUMI_F9_CBC:
+                       memcpy(cpt_ctx->k_ctx.ci_key, key, key_len);
+                       cpt_ctx->fc_type = KASUMI;
+                       cpt_ctx->zsk_flags = 0x1;
+                       break;
+               default:
+                       return -1;
+               }
+               cpt_ctx->mac_len = 4;
+               cpt_ctx->hash_type = type;
+               return 0;
+       }
+
+       if (!(cpt_ctx->fc_type == FC_GEN && !type)) {
+               if (!cpt_ctx->fc_type || !cpt_ctx->enc_cipher)
+                       cpt_ctx->fc_type = HASH_HMAC;
+       }
+
+       ctrl_flags = (uint64_t *)&fctx->enc.enc_ctrl.flags;
+       *ctrl_flags = rte_be_to_cpu_64(*ctrl_flags);
+
+       /* For GMAC auth, cipher must be NULL */
+       if (type == GMAC_TYPE)
+               CPT_P_ENC_CTRL(fctx).enc_cipher = 0;
+
+       CPT_P_ENC_CTRL(fctx).hash_type = cpt_ctx->hash_type = type;
+       CPT_P_ENC_CTRL(fctx).mac_len = cpt_ctx->mac_len = mac_len;
+
+       if (key_len) {
+               cpt_ctx->hmac = 1;
+               memset(cpt_ctx->auth_key, 0, sizeof(cpt_ctx->auth_key));
+               memcpy(cpt_ctx->auth_key, key, key_len);
+               cpt_ctx->auth_key_len = key_len;
+               memset(fctx->hmac.ipad, 0, sizeof(fctx->hmac.ipad));
+               memset(fctx->hmac.opad, 0, sizeof(fctx->hmac.opad));
+               memcpy(fctx->hmac.opad, key, key_len);
+               CPT_P_ENC_CTRL(fctx).auth_input_type = 1;
+       }
+       *ctrl_flags = rte_cpu_to_be_64(*ctrl_flags);
+       return 0;
+}
+
+static __rte_always_inline int
+fill_sess_aead(struct rte_crypto_sym_xform *xform,
+                struct cpt_sess_misc *sess)
+{
+       struct rte_crypto_aead_xform *aead_form;
+       cipher_type_t enc_type = 0; /* NULL Cipher type */
+       auth_type_t auth_type = 0; /* NULL Auth type */
+       uint32_t cipher_key_len = 0;
+       uint8_t zsk_flag = 0, aes_gcm = 0;
+       aead_form = &xform->aead;
+       void *ctx;
+
+       if (aead_form->op == RTE_CRYPTO_AEAD_OP_ENCRYPT &&
+          aead_form->algo == RTE_CRYPTO_AEAD_AES_GCM) {
+               sess->cpt_op |= CPT_OP_CIPHER_ENCRYPT;
+               sess->cpt_op |= CPT_OP_AUTH_GENERATE;
+       } else if (aead_form->op == RTE_CRYPTO_AEAD_OP_DECRYPT &&
+               aead_form->algo == RTE_CRYPTO_AEAD_AES_GCM) {
+               sess->cpt_op |= CPT_OP_CIPHER_DECRYPT;
+               sess->cpt_op |= CPT_OP_AUTH_VERIFY;
+       } else {
+               CPT_LOG_DP_ERR("Unknown cipher operation\n");
+               return -1;
+       }
+       switch (aead_form->algo) {
+       case RTE_CRYPTO_AEAD_AES_GCM:
+               enc_type = AES_GCM;
+               cipher_key_len = 16;
+               aes_gcm = 1;
+               break;
+       case RTE_CRYPTO_AEAD_AES_CCM:
+               CPT_LOG_DP_ERR("Crypto: Unsupported cipher algo %u",
+                              aead_form->algo);
+               return -1;
+       default:
+               CPT_LOG_DP_ERR("Crypto: Undefined cipher algo %u specified",
+                              aead_form->algo);
+               return -1;
+       }
+       if (aead_form->key.length < cipher_key_len) {
+               CPT_LOG_DP_ERR("Invalid cipher params keylen %lu",
+                              (unsigned int long)aead_form->key.length);
+               return -1;
+       }
+       sess->zsk_flag = zsk_flag;
+       sess->aes_gcm = aes_gcm;
+       sess->mac_len = aead_form->digest_length;
+       sess->iv_offset = aead_form->iv.offset;
+       sess->iv_length = aead_form->iv.length;
+       sess->aad_length = aead_form->aad_length;
+       ctx = (void *)((uint8_t *)sess + sizeof(struct cpt_sess_misc)),
+
+       cpt_fc_ciph_set_key(ctx, enc_type, aead_form->key.data,
+                       aead_form->key.length, NULL);
+
+       cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, aead_form->digest_length);
+
+       return 0;
+}
+
+static __rte_always_inline int
+fill_sess_cipher(struct rte_crypto_sym_xform *xform,
+                struct cpt_sess_misc *sess)
+{
+       struct rte_crypto_cipher_xform *c_form;
+       cipher_type_t enc_type = 0; /* NULL Cipher type */
+       uint32_t cipher_key_len = 0;
+       uint8_t zsk_flag = 0, aes_gcm = 0, aes_ctr = 0, is_null = 0;
+
+       if (xform->type != RTE_CRYPTO_SYM_XFORM_CIPHER)
+               return -1;
+
+       c_form = &xform->cipher;
+
+       if (c_form->op == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+               sess->cpt_op |= CPT_OP_CIPHER_ENCRYPT;
+       else if (c_form->op == RTE_CRYPTO_CIPHER_OP_DECRYPT)
+               sess->cpt_op |= CPT_OP_CIPHER_DECRYPT;
+       else {
+               CPT_LOG_DP_ERR("Unknown cipher operation\n");
+               return -1;
+       }
+
+       switch (c_form->algo) {
+       case RTE_CRYPTO_CIPHER_AES_CBC:
+               enc_type = AES_CBC;
+               cipher_key_len = 16;
+               break;
+       case RTE_CRYPTO_CIPHER_3DES_CBC:
+               enc_type = DES3_CBC;
+               cipher_key_len = 24;
+               break;
+       case RTE_CRYPTO_CIPHER_DES_CBC:
+               /* DES is implemented using 3DES in hardware */
+               enc_type = DES3_CBC;
+               cipher_key_len = 8;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_CTR:
+               enc_type = AES_CTR;
+               cipher_key_len = 16;
+               aes_ctr = 1;
+               break;
+       case RTE_CRYPTO_CIPHER_NULL:
+               enc_type = 0;
+               is_null = 1;
+               break;
+       case RTE_CRYPTO_CIPHER_KASUMI_F8:
+               enc_type = KASUMI_F8_ECB;
+               cipher_key_len = 16;
+               zsk_flag = K_F8;
+               break;
+       case RTE_CRYPTO_CIPHER_SNOW3G_UEA2:
+               enc_type = SNOW3G_UEA2;
+               cipher_key_len = 16;
+               zsk_flag = ZS_EA;
+               break;
+       case RTE_CRYPTO_CIPHER_ZUC_EEA3:
+               enc_type = ZUC_EEA3;
+               cipher_key_len = 16;
+               zsk_flag = ZS_EA;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_XTS:
+               enc_type = AES_XTS;
+               cipher_key_len = 16;
+               break;
+       case RTE_CRYPTO_CIPHER_3DES_ECB:
+               enc_type = DES3_ECB;
+               cipher_key_len = 24;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_ECB:
+               enc_type = AES_ECB;
+               cipher_key_len = 16;
+               break;
+       case RTE_CRYPTO_CIPHER_3DES_CTR:
+       case RTE_CRYPTO_CIPHER_AES_F8:
+       case RTE_CRYPTO_CIPHER_ARC4:
+               CPT_LOG_DP_ERR("Crypto: Unsupported cipher algo %u",
+                              c_form->algo);
+               return -1;
+       default:
+               CPT_LOG_DP_ERR("Crypto: Undefined cipher algo %u specified",
+                              c_form->algo);
+               return -1;
+       }
+
+       if (c_form->key.length < cipher_key_len) {
+               CPT_LOG_DP_ERR("Invalid cipher params keylen %lu",
+                              (unsigned long) c_form->key.length);
+               return -1;
+       }
+
+       sess->zsk_flag = zsk_flag;
+       sess->aes_gcm = aes_gcm;
+       sess->aes_ctr = aes_ctr;
+       sess->iv_offset = c_form->iv.offset;
+       sess->iv_length = c_form->iv.length;
+       sess->is_null = is_null;
+
+       cpt_fc_ciph_set_key(SESS_PRIV(sess), enc_type, c_form->key.data,
+                           c_form->key.length, NULL);
+
+       return 0;
+}
+
+static __rte_always_inline int
+fill_sess_auth(struct rte_crypto_sym_xform *xform,
+              struct cpt_sess_misc *sess)
+{
+       struct rte_crypto_auth_xform *a_form;
+       auth_type_t auth_type = 0; /* NULL Auth type */
+       uint8_t zsk_flag = 0, aes_gcm = 0, is_null = 0;
+
+       if (xform->type != RTE_CRYPTO_SYM_XFORM_AUTH)
+               goto error_out;
+
+       a_form = &xform->auth;
+
+       if (a_form->op == RTE_CRYPTO_AUTH_OP_VERIFY)
+               sess->cpt_op |= CPT_OP_AUTH_VERIFY;
+       else if (a_form->op == RTE_CRYPTO_AUTH_OP_GENERATE)
+               sess->cpt_op |= CPT_OP_AUTH_GENERATE;
+       else {
+               CPT_LOG_DP_ERR("Unknown auth operation");
+               return -1;
+       }
+
+       if (a_form->key.length > 64) {
+               CPT_LOG_DP_ERR("Auth key length is big");
+               return -1;
+       }
+
+       switch (a_form->algo) {
+       case RTE_CRYPTO_AUTH_SHA1_HMAC:
+               /* Fall through */
+       case RTE_CRYPTO_AUTH_SHA1:
+               auth_type = SHA1_TYPE;
+               break;
+       case RTE_CRYPTO_AUTH_SHA256_HMAC:
+       case RTE_CRYPTO_AUTH_SHA256:
+               auth_type = SHA2_SHA256;
+               break;
+       case RTE_CRYPTO_AUTH_SHA512_HMAC:
+       case RTE_CRYPTO_AUTH_SHA512:
+               auth_type = SHA2_SHA512;
+               break;
+       case RTE_CRYPTO_AUTH_AES_GMAC:
+               auth_type = GMAC_TYPE;
+               aes_gcm = 1;
+               break;
+       case RTE_CRYPTO_AUTH_SHA224_HMAC:
+       case RTE_CRYPTO_AUTH_SHA224:
+               auth_type = SHA2_SHA224;
+               break;
+       case RTE_CRYPTO_AUTH_SHA384_HMAC:
+       case RTE_CRYPTO_AUTH_SHA384:
+               auth_type = SHA2_SHA384;
+               break;
+       case RTE_CRYPTO_AUTH_MD5_HMAC:
+       case RTE_CRYPTO_AUTH_MD5:
+               auth_type = MD5_TYPE;
+               break;
+       case RTE_CRYPTO_AUTH_KASUMI_F9:
+               auth_type = KASUMI_F9_ECB;
+               /*
+                * Indicate that direction needs to be taken out
+                * from end of src
+                */
+               zsk_flag = K_F9;
+               break;
+       case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
+               auth_type = SNOW3G_UIA2;
+               zsk_flag = ZS_IA;
+               break;
+       case RTE_CRYPTO_AUTH_ZUC_EIA3:
+               auth_type = ZUC_EIA3;
+               zsk_flag = ZS_IA;
+               break;
+       case RTE_CRYPTO_AUTH_NULL:
+               auth_type = 0;
+               is_null = 1;
+               break;
+       case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+       case RTE_CRYPTO_AUTH_AES_CMAC:
+       case RTE_CRYPTO_AUTH_AES_CBC_MAC:
+               CPT_LOG_DP_ERR("Crypto: Unsupported hash algo %u",
+                              a_form->algo);
+               goto error_out;
+       default:
+               CPT_LOG_DP_ERR("Crypto: Undefined Hash algo %u specified",
+                              a_form->algo);
+               goto error_out;
+       }
+
+       sess->zsk_flag = zsk_flag;
+       sess->aes_gcm = aes_gcm;
+       sess->mac_len = a_form->digest_length;
+       sess->is_null = is_null;
+       if (zsk_flag) {
+               sess->auth_iv_offset = a_form->iv.offset;
+               sess->auth_iv_length = a_form->iv.length;
+       }
+       cpt_fc_auth_set_key(SESS_PRIV(sess), auth_type, a_form->key.data,
+                           a_form->key.length, a_form->digest_length);
+
+       return 0;
+
+error_out:
+       return -1;
+}
+
+static __rte_always_inline int
+fill_sess_gmac(struct rte_crypto_sym_xform *xform,
+                struct cpt_sess_misc *sess)
+{
+       struct rte_crypto_auth_xform *a_form;
+       cipher_type_t enc_type = 0; /* NULL Cipher type */
+       auth_type_t auth_type = 0; /* NULL Auth type */
+       uint8_t zsk_flag = 0, aes_gcm = 0;
+       void *ctx;
+
+       if (xform->type != RTE_CRYPTO_SYM_XFORM_AUTH)
+               return -1;
+
+       a_form = &xform->auth;
+
+       if (a_form->op == RTE_CRYPTO_AUTH_OP_GENERATE)
+               sess->cpt_op |= CPT_OP_ENCODE;
+       else if (a_form->op == RTE_CRYPTO_AUTH_OP_VERIFY)
+               sess->cpt_op |= CPT_OP_DECODE;
+       else {
+               CPT_LOG_DP_ERR("Unknown auth operation");
+               return -1;
+       }
+
+       switch (a_form->algo) {
+       case RTE_CRYPTO_AUTH_AES_GMAC:
+               enc_type = AES_GCM;
+               auth_type = GMAC_TYPE;
+               break;
+       default:
+               CPT_LOG_DP_ERR("Crypto: Undefined cipher algo %u specified",
+                              a_form->algo);
+               return -1;
+       }
+
+       sess->zsk_flag = zsk_flag;
+       sess->aes_gcm = aes_gcm;
+       sess->is_gmac = 1;
+       sess->iv_offset = a_form->iv.offset;
+       sess->iv_length = a_form->iv.length;
+       sess->mac_len = a_form->digest_length;
+       ctx = (void *)((uint8_t *)sess + sizeof(struct cpt_sess_misc)),
+
+       cpt_fc_ciph_set_key(ctx, enc_type, a_form->key.data,
+                       a_form->key.length, NULL);
+       cpt_fc_auth_set_key(ctx, auth_type, NULL, 0, a_form->digest_length);
+
+       return 0;
+}
+
+static __rte_always_inline void *
+alloc_op_meta(struct rte_mbuf *m_src,
+             buf_ptr_t *buf,
+             int32_t len,
+             struct rte_mempool *cpt_meta_pool)
+{
+       uint8_t *mdata;
+
+#ifndef CPT_ALWAYS_USE_SEPARATE_BUF
+       if (likely(m_src && (m_src->nb_segs == 1))) {
+               int32_t tailroom;
+               phys_addr_t mphys;
+
+               /* Check if tailroom is sufficient to hold meta data */
+               tailroom = rte_pktmbuf_tailroom(m_src);
+               if (likely(tailroom > len + 8)) {
+                       mdata = (uint8_t *)m_src->buf_addr + m_src->buf_len;
+                       mphys = m_src->buf_physaddr + m_src->buf_len;
+                       mdata -= len;
+                       mphys -= len;
+                       buf->vaddr = mdata;
+                       buf->dma_addr = mphys;
+                       buf->size = len;
+                       /* Indicate that this is a mbuf allocated mdata */
+                       mdata = (uint8_t *)((uint64_t)mdata | 1ull);
+                       return mdata;
+               }
+       }
+#else
+       RTE_SET_USED(m_src);
+#endif
+
+       if (unlikely(rte_mempool_get(cpt_meta_pool, (void **)&mdata) < 0))
+               return NULL;
+
+       buf->vaddr = mdata;
+       buf->dma_addr = rte_mempool_virt2iova(mdata);
+       buf->size = len;
+
+       return mdata;
+}
+
+/**
+ * cpt_free_metabuf - free metabuf to mempool.
+ * @param instance: pointer to instance.
+ * @param objp: pointer to the metabuf.
+ */
+static __rte_always_inline void
+free_op_meta(void *mdata, struct rte_mempool *cpt_meta_pool)
+{
+       bool nofree = ((uintptr_t)mdata & 1ull);
+
+       if (likely(nofree))
+               return;
+       rte_mempool_put(cpt_meta_pool, mdata);
+}
+
+static __rte_always_inline uint32_t
+prepare_iov_from_pkt(struct rte_mbuf *pkt,
+                    iov_ptr_t *iovec, uint32_t start_offset)
+{
+       uint16_t index = 0;
+       void *seg_data = NULL;
+       phys_addr_t seg_phys;
+       int32_t seg_size = 0;
+
+       if (!pkt) {
+               iovec->buf_cnt = 0;
+               return 0;
+       }
+
+       if (!start_offset) {
+               seg_data = rte_pktmbuf_mtod(pkt, void *);
+               seg_phys = rte_pktmbuf_mtophys(pkt);
+               seg_size = pkt->data_len;
+       } else {
+               while (start_offset >= pkt->data_len) {
+                       start_offset -= pkt->data_len;
+                       pkt = pkt->next;
+               }
+
+               seg_data = rte_pktmbuf_mtod_offset(pkt, void *, start_offset);
+               seg_phys = rte_pktmbuf_mtophys_offset(pkt, start_offset);
+               seg_size = pkt->data_len - start_offset;
+               if (!seg_size)
+                       return 1;
+       }
+
+       /* first seg */
+       iovec->bufs[index].vaddr = seg_data;
+       iovec->bufs[index].dma_addr = seg_phys;
+       iovec->bufs[index].size = seg_size;
+       index++;
+       pkt = pkt->next;
+
+       while (unlikely(pkt != NULL)) {
+               seg_data = rte_pktmbuf_mtod(pkt, void *);
+               seg_phys = rte_pktmbuf_mtophys(pkt);
+               seg_size = pkt->data_len;
+               if (!seg_size)
+                       break;
+
+               iovec->bufs[index].vaddr = seg_data;
+               iovec->bufs[index].dma_addr = seg_phys;
+               iovec->bufs[index].size = seg_size;
+
+               index++;
+
+               pkt = pkt->next;
+       }
+
+       iovec->buf_cnt = index;
+       return 0;
+}
+
+static __rte_always_inline uint32_t
+prepare_iov_from_pkt_inplace(struct rte_mbuf *pkt,
+                            fc_params_t *param,
+                            uint32_t *flags)
+{
+       uint16_t index = 0;
+       void *seg_data = NULL;
+       phys_addr_t seg_phys;
+       uint32_t seg_size = 0;
+       iov_ptr_t *iovec;
+
+       seg_data = rte_pktmbuf_mtod(pkt, void *);
+       seg_phys = rte_pktmbuf_mtophys(pkt);
+       seg_size = pkt->data_len;
+
+       /* first seg */
+       if (likely(!pkt->next)) {
+               uint32_t headroom, tailroom;
+
+               *flags |= SINGLE_BUF_INPLACE;
+               headroom = rte_pktmbuf_headroom(pkt);
+               tailroom = rte_pktmbuf_tailroom(pkt);
+               if (likely((headroom >= 24) &&
+                   (tailroom >= 8))) {
+                       /* In 83XX this is prerequivisit for Direct mode */
+                       *flags |= SINGLE_BUF_HEADTAILROOM;
+               }
+               param->bufs[0].vaddr = seg_data;
+               param->bufs[0].dma_addr = seg_phys;
+               param->bufs[0].size = seg_size;
+               return 0;
+       }
+       iovec = param->src_iov;
+       iovec->bufs[index].vaddr = seg_data;
+       iovec->bufs[index].dma_addr = seg_phys;
+       iovec->bufs[index].size = seg_size;
+       index++;
+       pkt = pkt->next;
+
+       while (unlikely(pkt != NULL)) {
+               seg_data = rte_pktmbuf_mtod(pkt, void *);
+               seg_phys = rte_pktmbuf_mtophys(pkt);
+               seg_size = pkt->data_len;
+
+               if (!seg_size)
+                       break;
+
+               iovec->bufs[index].vaddr = seg_data;
+               iovec->bufs[index].dma_addr = seg_phys;
+               iovec->bufs[index].size = seg_size;
+
+               index++;
+
+               pkt = pkt->next;
+       }
+
+       iovec->buf_cnt = index;
+       return 0;
+}
+
+static __rte_always_inline void *
+fill_fc_params(struct rte_crypto_op *cop,
+              struct cpt_sess_misc *sess_misc,
+              void **mdata_ptr,
+              int *op_ret)
+{
+       uint32_t space = 0;
+       struct rte_crypto_sym_op *sym_op = cop->sym;
+       void *mdata;
+       uintptr_t *op;
+       uint32_t mc_hash_off;
+       uint32_t flags = 0;
+       uint64_t d_offs, d_lens;
+       void *prep_req = NULL;
+       struct rte_mbuf *m_src, *m_dst;
+       uint8_t cpt_op = sess_misc->cpt_op;
+       uint8_t zsk_flag = sess_misc->zsk_flag;
+       uint8_t aes_gcm = sess_misc->aes_gcm;
+       uint16_t mac_len = sess_misc->mac_len;
+#ifdef CPT_ALWAYS_USE_SG_MODE
+       uint8_t inplace = 0;
+#else
+       uint8_t inplace = 1;
+#endif
+       fc_params_t fc_params;
+       char src[SRC_IOV_SIZE];
+       char dst[SRC_IOV_SIZE];
+       uint32_t iv_buf[4];
+       struct cptvf_meta_info *cpt_m_info =
+                               (struct cptvf_meta_info *)(*mdata_ptr);
+
+       if (likely(sess_misc->iv_length)) {
+               flags |= VALID_IV_BUF;
+               fc_params.iv_buf = rte_crypto_op_ctod_offset(cop,
+                                  uint8_t *, sess_misc->iv_offset);
+               if (sess_misc->aes_ctr &&
+                   unlikely(sess_misc->iv_length != 16)) {
+                       memcpy((uint8_t *)iv_buf,
+                               rte_crypto_op_ctod_offset(cop,
+                               uint8_t *, sess_misc->iv_offset), 12);
+                       iv_buf[3] = rte_cpu_to_be_32(0x1);
+                       fc_params.iv_buf = iv_buf;
+               }
+       }
+
+       if (zsk_flag) {
+               fc_params.auth_iv_buf = rte_crypto_op_ctod_offset(cop,
+                                       uint8_t *,
+                                       sess_misc->auth_iv_offset);
+               if (zsk_flag == K_F9) {
+                       CPT_LOG_DP_ERR("Should not reach here for "
+                       "kasumi F9\n");
+               }
+               if (zsk_flag != ZS_EA)
+                       inplace = 0;
+       }
+       m_src = sym_op->m_src;
+       m_dst = sym_op->m_dst;
+
+       if (aes_gcm) {
+               uint8_t *salt;
+               uint8_t *aad_data;
+               uint16_t aad_len;
+
+               d_offs = sym_op->aead.data.offset;
+               d_lens = sym_op->aead.data.length;
+               mc_hash_off = sym_op->aead.data.offset +
+                             sym_op->aead.data.length;
+
+               aad_data = sym_op->aead.aad.data;
+               aad_len = sess_misc->aad_length;
+               if (likely((aad_data + aad_len) ==
+                          rte_pktmbuf_mtod_offset(m_src,
+                               uint8_t *,
+                               sym_op->aead.data.offset))) {
+                       d_offs = (d_offs - aad_len) | (d_offs << 16);
+                       d_lens = (d_lens + aad_len) | (d_lens << 32);
+               } else {
+                       fc_params.aad_buf.vaddr = sym_op->aead.aad.data;
+                       fc_params.aad_buf.dma_addr = sym_op->aead.aad.phys_addr;
+                       fc_params.aad_buf.size = aad_len;
+                       flags |= VALID_AAD_BUF;
+                       inplace = 0;
+                       d_offs = d_offs << 16;
+                       d_lens = d_lens << 32;
+               }
+
+               salt = fc_params.iv_buf;
+               if (unlikely(*(uint32_t *)salt != sess_misc->salt)) {
+                       cpt_fc_salt_update(SESS_PRIV(sess_misc), salt);
+                       sess_misc->salt = *(uint32_t *)salt;
+               }
+               fc_params.iv_buf = salt + 4;
+               if (likely(mac_len)) {
+                       struct rte_mbuf *m = (cpt_op & CPT_OP_ENCODE) ? m_dst :
+                                            m_src;
+
+                       if (!m)
+                               m = m_src;
+
+                       /* hmac immediately following data is best case */
+                       if (unlikely(rte_pktmbuf_mtod(m, uint8_t *) +
+                           mc_hash_off !=
+                           (uint8_t *)sym_op->aead.digest.data)) {
+                               flags |= VALID_MAC_BUF;
+                               fc_params.mac_buf.size = sess_misc->mac_len;
+                               fc_params.mac_buf.vaddr =
+                                 sym_op->aead.digest.data;
+                               fc_params.mac_buf.dma_addr =
+                                sym_op->aead.digest.phys_addr;
+                               inplace = 0;
+                       }
+               }
+       } else {
+               d_offs = sym_op->cipher.data.offset;
+               d_lens = sym_op->cipher.data.length;
+               mc_hash_off = sym_op->cipher.data.offset +
+                             sym_op->cipher.data.length;
+               d_offs = (d_offs << 16) | sym_op->auth.data.offset;
+               d_lens = (d_lens << 32) | sym_op->auth.data.length;
+
+               if (mc_hash_off < (sym_op->auth.data.offset +
+                                  sym_op->auth.data.length)){
+                       mc_hash_off = (sym_op->auth.data.offset +
+                                      sym_op->auth.data.length);
+               }
+               /* for gmac, salt should be updated like in gcm */
+               if (unlikely(sess_misc->is_gmac)) {
+                       uint8_t *salt;
+                       salt = fc_params.iv_buf;
+                       if (unlikely(*(uint32_t *)salt != sess_misc->salt)) {
+                               cpt_fc_salt_update(SESS_PRIV(sess_misc), salt);
+                               sess_misc->salt = *(uint32_t *)salt;
+                       }
+                       fc_params.iv_buf = salt + 4;
+               }
+               if (likely(mac_len)) {
+                       struct rte_mbuf *m;
+
+                       m = (cpt_op & CPT_OP_ENCODE) ? m_dst : m_src;
+                       if (!m)
+                               m = m_src;
+
+                       /* hmac immediately following data is best case */
+                       if (unlikely(rte_pktmbuf_mtod(m, uint8_t *) +
+                           mc_hash_off !=
+                            (uint8_t *)sym_op->auth.digest.data)) {
+                               flags |= VALID_MAC_BUF;
+                               fc_params.mac_buf.size =
+                                       sess_misc->mac_len;
+                               fc_params.mac_buf.vaddr =
+                                       sym_op->auth.digest.data;
+                               fc_params.mac_buf.dma_addr =
+                               sym_op->auth.digest.phys_addr;
+                               inplace = 0;
+                       }
+               }
+       }
+       fc_params.ctx_buf.vaddr = SESS_PRIV(sess_misc);
+       fc_params.ctx_buf.dma_addr = sess_misc->ctx_dma_addr;
+
+       if (unlikely(sess_misc->is_null || sess_misc->cpt_op == CPT_OP_DECODE))
+               inplace = 0;
+
+       if (likely(!m_dst && inplace)) {
+               /* Case of single buffer without AAD buf or
+                * separate mac buf in place and
+                * not air crypto
+                */
+               fc_params.dst_iov = fc_params.src_iov = (void *)src;
+
+               if (unlikely(prepare_iov_from_pkt_inplace(m_src,
+                                                         &fc_params,
+                                                         &flags))) {
+                       CPT_LOG_DP_ERR("Prepare inplace src iov failed");
+                       *op_ret = -1;
+                       return NULL;
+               }
+
+       } else {
+               /* Out of place processing */
+               fc_params.src_iov = (void *)src;
+               fc_params.dst_iov = (void *)dst;
+
+               /* Store SG I/O in the api for reuse */
+               if (prepare_iov_from_pkt(m_src, fc_params.src_iov, 0)) {
+                       CPT_LOG_DP_ERR("Prepare src iov failed");
+                       *op_ret = -1;
+                       return NULL;
+               }
+
+               if (unlikely(m_dst != NULL)) {
+                       uint32_t pkt_len;
+
+                       /* Try to make room as much as src has */
+                       m_dst = sym_op->m_dst;
+                       pkt_len = rte_pktmbuf_pkt_len(m_dst);
+
+                       if (unlikely(pkt_len < rte_pktmbuf_pkt_len(m_src))) {
+                               pkt_len = rte_pktmbuf_pkt_len(m_src) - pkt_len;
+                               if (!rte_pktmbuf_append(m_dst, pkt_len)) {
+                                       CPT_LOG_DP_ERR("Not enough space in "
+                                                      "m_dst %p, need %u"
+                                                      " more",
+                                                      m_dst, pkt_len);
+                                       return NULL;
+                               }
+                       }
+
+                       if (prepare_iov_from_pkt(m_dst, fc_params.dst_iov, 0)) {
+                               CPT_LOG_DP_ERR("Prepare dst iov failed for "
+                                              "m_dst %p", m_dst);
+                               return NULL;
+                       }
+               } else {
+                       fc_params.dst_iov = (void *)src;
+               }
+       }
+
+       if (likely(flags & SINGLE_BUF_HEADTAILROOM))
+               mdata = alloc_op_meta(m_src,
+                                     &fc_params.meta_buf,
+                                     cpt_m_info->cptvf_op_sb_mlen,
+                                     cpt_m_info->cptvf_meta_pool);
+       else
+               mdata = alloc_op_meta(NULL,
+                                     &fc_params.meta_buf,
+                                     cpt_m_info->cptvf_op_mlen,
+                                     cpt_m_info->cptvf_meta_pool);
+
+       if (unlikely(mdata == NULL)) {
+               CPT_LOG_DP_ERR("Error allocating meta buffer for request");
+               return NULL;
+       }
+
+       op = (uintptr_t *)((uintptr_t)mdata & (uintptr_t)~1ull);
+       op[0] = (uintptr_t)mdata;
+       op[1] = (uintptr_t)cop;
+       op[2] = op[3] = 0; /* Used to indicate auth verify */
+       space += 4 * sizeof(uint64_t);
+
+       fc_params.meta_buf.vaddr = (uint8_t *)op + space;
+       fc_params.meta_buf.dma_addr += space;
+       fc_params.meta_buf.size -= space;
+
+       /* Finally prepare the instruction */
+       if (cpt_op & CPT_OP_ENCODE)
+               prep_req = cpt_fc_enc_hmac_prep(flags, d_offs, d_lens,
+                                               &fc_params, op, op_ret);
+       else
+               prep_req = cpt_fc_dec_hmac_prep(flags, d_offs, d_lens,
+                                               &fc_params, op, op_ret);
+
+       if (unlikely(!prep_req))
+               free_op_meta(mdata, cpt_m_info->cptvf_meta_pool);
+       *mdata_ptr = mdata;
+       return prep_req;
+}
+
+static __rte_always_inline void
+compl_auth_verify(struct rte_crypto_op *op,
+                     uint8_t *gen_mac,
+                     uint64_t mac_len)
+{
+       uint8_t *mac;
+       struct rte_crypto_sym_op *sym_op = op->sym;
+
+       if (sym_op->auth.digest.data)
+               mac = sym_op->auth.digest.data;
+       else
+               mac = rte_pktmbuf_mtod_offset(sym_op->m_src,
+                                             uint8_t *,
+                                             sym_op->auth.data.length +
+                                             sym_op->auth.data.offset);
+       if (!mac) {
+               op->status = RTE_CRYPTO_OP_STATUS_ERROR;
+               return;
+       }
+
+       if (memcmp(mac, gen_mac, mac_len))
+               op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
+       else
+               op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+}
+
+static __rte_always_inline int
+instance_session_cfg(struct rte_crypto_sym_xform *xform, void *sess)
+{
+       struct rte_crypto_sym_xform *chain;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       if (cpt_is_algo_supported(xform))
+               goto err;
+
+       chain = xform;
+       while (chain) {
+               switch (chain->type) {
+               case RTE_CRYPTO_SYM_XFORM_AEAD:
+                       if (fill_sess_aead(chain, sess))
+                               goto err;
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_CIPHER:
+                       if (fill_sess_cipher(chain, sess))
+                               goto err;
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AUTH:
+                       if (chain->auth.algo == RTE_CRYPTO_AUTH_AES_GMAC) {
+                               if (fill_sess_gmac(chain, sess))
+                                       goto err;
+                       } else {
+                               if (fill_sess_auth(chain, sess))
+                                       goto err;
+                       }
+                       break;
+               default:
+                       CPT_LOG_DP_ERR("Invalid crypto xform type");
+                       break;
+               }
+               chain = chain->next;
+       }
+
+       return 0;
+
+err:
+       return -1;
+}
+
+static __rte_always_inline void
+find_kasumif9_direction_and_length(uint8_t *src,
+                                  uint32_t counter_num_bytes,
+                                  uint32_t *addr_length_in_bits,
+                                  uint8_t *addr_direction)
+{
+       uint8_t found = 0;
+       while (!found && counter_num_bytes > 0) {
+               counter_num_bytes--;
+               if (src[counter_num_bytes] == 0x00)
+                       continue;
+               if (src[counter_num_bytes] == 0x80) {
+                       *addr_direction  =  src[counter_num_bytes - 1] & 0x1;
+                       *addr_length_in_bits = counter_num_bytes * 8  - 1;
+                       found = 1;
+               } else {
+                       int i = 0;
+                       uint8_t last_byte = src[counter_num_bytes];
+                       for (i = 0; i < 8 && found == 0; i++) {
+                               if (last_byte & (1 << i)) {
+                                       *addr_direction = (last_byte >> (i+1))
+                                                         & 0x1;
+                                       if (i != 6)
+                                               *addr_length_in_bits =
+                                                       counter_num_bytes * 8
+                                                       + (8 - (i + 2));
+                                       else
+                                               *addr_length_in_bits =
+                                                       counter_num_bytes * 8;
+                                       found = 1;
+                                       }
+                               }
+                       }
+       }
+}
+
+/*
+ * This handles all auth only except AES_GMAC
+ */
+static __rte_always_inline void *
+fill_digest_params(struct rte_crypto_op *cop,
+                  struct cpt_sess_misc *sess,
+                  void **mdata_ptr,
+                  int *op_ret)
+{
+       uint32_t space = 0;
+       struct rte_crypto_sym_op *sym_op = cop->sym;
+       void *mdata;
+       phys_addr_t mphys;
+       uint64_t *op;
+       uint32_t auth_range_off;
+       uint32_t flags = 0;
+       uint64_t d_offs = 0, d_lens;
+       void *prep_req = NULL;
+       struct rte_mbuf *m_src, *m_dst;
+       uint16_t auth_op = sess->cpt_op & CPT_OP_AUTH_MASK;
+       uint8_t zsk_flag = sess->zsk_flag;
+       uint16_t mac_len = sess->mac_len;
+       fc_params_t params;
+       char src[SRC_IOV_SIZE];
+       uint8_t iv_buf[16];
+       memset(&params, 0, sizeof(fc_params_t));
+       struct cptvf_meta_info *cpt_m_info =
+                               (struct cptvf_meta_info *)(*mdata_ptr);
+
+       m_src = sym_op->m_src;
+
+       /* For just digest lets force mempool alloc */
+       mdata = alloc_op_meta(NULL, &params.meta_buf, cpt_m_info->cptvf_op_mlen,
+                             cpt_m_info->cptvf_meta_pool);
+       if (mdata == NULL) {
+               CPT_LOG_DP_ERR("Error allocating meta buffer for request");
+               *op_ret = -ENOMEM;
+               return NULL;
+       }
+
+       mphys = params.meta_buf.dma_addr;
+
+       op = mdata;
+       op[0] = (uintptr_t)mdata;
+       op[1] = (uintptr_t)cop;
+       op[2] = op[3] = 0; /* Used to indicate auth verify */
+       space += 4 * sizeof(uint64_t);
+
+       auth_range_off = sym_op->auth.data.offset;
+
+       flags = VALID_MAC_BUF;
+       params.src_iov = (void *)src;
+       if (unlikely(zsk_flag)) {
+               /*
+                * Since for Zuc, Kasumi, Snow3g offsets are in bits
+                * we will send pass through even for auth only case,
+                * let MC handle it
+                */
+               d_offs = auth_range_off;
+               auth_range_off = 0;
+               params.auth_iv_buf = rte_crypto_op_ctod_offset(cop,
+                                       uint8_t *, sess->auth_iv_offset);
+               if (zsk_flag == K_F9) {
+                       uint32_t length_in_bits, num_bytes;
+                       uint8_t *src, direction = 0;
+                       uint32_t counter_num_bytes;
+
+                       memcpy(iv_buf, rte_pktmbuf_mtod(cop->sym->m_src,
+                                                       uint8_t *), 8);
+                       /*
+                        * This is kasumi f9, take direction from
+                        * source buffer
+                        */
+                       length_in_bits = cop->sym->auth.data.length;
+                       num_bytes = (length_in_bits >> 3);
+                       counter_num_bytes = num_bytes;
+                       src = rte_pktmbuf_mtod(cop->sym->m_src, uint8_t *);
+                       find_kasumif9_direction_and_length(src,
+                                               counter_num_bytes,
+                                               &length_in_bits,
+                                               &direction);
+                       length_in_bits -= 64;
+                       cop->sym->auth.data.offset += 64;
+                       d_offs = cop->sym->auth.data.offset;
+                       auth_range_off = d_offs / 8;
+                       cop->sym->auth.data.length = length_in_bits;
+
+                       /* Store it at end of auth iv */
+                       iv_buf[8] = direction;
+                       params.auth_iv_buf = iv_buf;
+               }
+       }
+
+       d_lens = sym_op->auth.data.length;
+
+       params.ctx_buf.vaddr = SESS_PRIV(sess);
+       params.ctx_buf.dma_addr = sess->ctx_dma_addr;
+
+       if (auth_op == CPT_OP_AUTH_GENERATE) {
+               if (sym_op->auth.digest.data) {
+                       /*
+                        * Digest to be generated
+                        * in separate buffer
+                        */
+                       params.mac_buf.size =
+                               sess->mac_len;
+                       params.mac_buf.vaddr =
+                               sym_op->auth.digest.data;
+                       params.mac_buf.dma_addr =
+                               sym_op->auth.digest.phys_addr;
+               } else {
+                       uint32_t off = sym_op->auth.data.offset +
+                               sym_op->auth.data.length;
+                       int32_t dlen, space;
+
+                       m_dst = sym_op->m_dst ?
+                               sym_op->m_dst : sym_op->m_src;
+                       dlen = rte_pktmbuf_pkt_len(m_dst);
+
+                       space = off + mac_len - dlen;
+                       if (space > 0)
+                               if (!rte_pktmbuf_append(m_dst, space)) {
+                                       CPT_LOG_DP_ERR("Failed to extend "
+                                                      "mbuf by %uB", space);
+                                       goto err;
+                               }
+
+                       params.mac_buf.vaddr =
+                               rte_pktmbuf_mtod_offset(m_dst, void *, off);
+                       params.mac_buf.dma_addr =
+                               rte_pktmbuf_mtophys_offset(m_dst, off);
+                       params.mac_buf.size = mac_len;
+               }
+       } else {
+               /* Need space for storing generated mac */
+               params.mac_buf.vaddr = (uint8_t *)mdata + space;
+               params.mac_buf.dma_addr = mphys + space;
+               params.mac_buf.size = mac_len;
+               space += RTE_ALIGN_CEIL(mac_len, 8);
+               op[2] = (uintptr_t)params.mac_buf.vaddr;
+               op[3] = mac_len;
+       }
+
+       params.meta_buf.vaddr = (uint8_t *)mdata + space;
+       params.meta_buf.dma_addr = mphys + space;
+       params.meta_buf.size -= space;
+
+       /* Out of place processing */
+       params.src_iov = (void *)src;
+
+       /*Store SG I/O in the api for reuse */
+       if (prepare_iov_from_pkt(m_src, params.src_iov, auth_range_off)) {
+               CPT_LOG_DP_ERR("Prepare src iov failed");
+               *op_ret = -1;
+               goto err;
+       }
+
+       prep_req = cpt_fc_enc_hmac_prep(flags, d_offs, d_lens,
+                                       &params, op, op_ret);
+       *mdata_ptr = mdata;
+       return prep_req;
+err:
+       if (unlikely(!prep_req))
+               free_op_meta(mdata, cpt_m_info->cptvf_meta_pool);
+       return NULL;
+}
+
+#endif /*_CPT_UCODE_H_ */
diff --git a/drivers/common/cpt/meson.build b/drivers/common/cpt/meson.build
new file mode 100644 (file)
index 0000000..0a905aa
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Cavium, Inc
+
+sources = files('cpt_pmd_ops_helper.c')
+
+deps = ['kvargs', 'pci', 'cryptodev']
+includes += include_directories('../../crypto/octeontx')
+allow_experimental_apis = true
diff --git a/drivers/common/cpt/rte_common_cpt_version.map b/drivers/common/cpt/rte_common_cpt_version.map
new file mode 100644 (file)
index 0000000..dec614f
--- /dev/null
@@ -0,0 +1,6 @@
+DPDK_18.11 {
+       global:
+
+       cpt_pmd_ops_helper_get_mlen_direct_mode;
+       cpt_pmd_ops_helper_get_mlen_sg_mode;
+};
diff --git a/drivers/common/dpaax/Makefile b/drivers/common/dpaax/Makefile
new file mode 100644 (file)
index 0000000..94d2cf0
--- /dev/null
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 NXP
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_common_dpaax.a
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# versioning export map
+EXPORT_MAP := rte_common_dpaax_version.map
+
+# library version
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += dpaax_iova_table.c
+
+LDLIBS += -lrte_eal
+
+SYMLINK-y-include += dpaax_iova_table.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
\ No newline at end of file
diff --git a/drivers/common/dpaax/dpaax_iova_table.c b/drivers/common/dpaax/dpaax_iova_table.c
new file mode 100644 (file)
index 0000000..2dd38a9
--- /dev/null
@@ -0,0 +1,465 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#include <rte_memory.h>
+
+#include "dpaax_iova_table.h"
+#include "dpaax_logs.h"
+
+/* Global dpaax logger identifier */
+int dpaax_logger;
+
+/* Global table reference */
+struct dpaax_iova_table *dpaax_iova_table_p;
+
+static int dpaax_handle_memevents(void);
+
+/* A structure representing the device-tree node available in /proc/device-tree.
+ */
+struct reg_node {
+       phys_addr_t addr;
+       size_t len;
+};
+
+/* A ntohll equivalent routine
+ * XXX: This is only applicable for 64 bit environment.
+ */
+static void
+rotate_8(unsigned char *arr)
+{
+       uint32_t temp;
+       uint32_t *first_half;
+       uint32_t *second_half;
+
+       first_half = (uint32_t *)(arr);
+       second_half = (uint32_t *)(arr + 4);
+
+       temp = *first_half;
+       *first_half = *second_half;
+       *second_half = temp;
+
+       *first_half = ntohl(*first_half);
+       *second_half = ntohl(*second_half);
+}
+
+/* read_memory_nodes
+ * Memory layout for DPAAx platforms (LS1043, LS1046, LS1088, LS2088, LX2160)
+ * are populated by Uboot and available in device tree:
+ * /proc/device-tree/memory@<address>/reg <= register.
+ * Entries are of the form:
+ *  (<8 byte start addr><8 byte length>)(..more similar blocks of start,len>)..
+ *
+ * @param count
+ *    OUT populate number of entries found in memory node
+ * @return
+ *    Pointer to array of reg_node elements, count size
+ */
+static struct reg_node *
+read_memory_node(unsigned int *count)
+{
+       int fd, ret, i;
+       unsigned int j;
+       glob_t result = {0};
+       struct stat statbuf = {0};
+       char file_data[MEM_NODE_FILE_LEN];
+       struct reg_node *nodes = NULL;
+
+       *count = 0;
+
+       ret = glob(MEM_NODE_PATH_GLOB, 0, NULL, &result);
+       if (ret != 0) {
+               DPAAX_DEBUG("Unable to glob device-tree memory node: (%s)(%d)",
+                           MEM_NODE_PATH_GLOB, ret);
+               goto out;
+       }
+
+       if (result.gl_pathc != 1) {
+               /* Either more than one memory@<addr> node found, or none.
+                * In either case, cannot work ahead.
+                */
+               DPAAX_DEBUG("Found (%zu) entries in device-tree. Not supported!",
+                           result.gl_pathc);
+               goto out;
+       }
+
+       DPAAX_DEBUG("Opening and parsing device-tree node: (%s)",
+                   result.gl_pathv[0]);
+       fd = open(result.gl_pathv[0], O_RDONLY);
+       if (fd < 0) {
+               DPAAX_DEBUG("Unable to open the device-tree node: (%s)(fd=%d)",
+                           MEM_NODE_PATH_GLOB, fd);
+               goto cleanup;
+       }
+
+       /* Stat to get the file size */
+       ret = fstat(fd, &statbuf);
+       if (ret != 0) {
+               DPAAX_DEBUG("Unable to get device-tree memory node size.");
+               goto cleanup;
+       }
+
+       DPAAX_DEBUG("Size of device-tree mem node: %lu", statbuf.st_size);
+       if (statbuf.st_size > MEM_NODE_FILE_LEN) {
+               DPAAX_DEBUG("More memory nodes available than assumed.");
+               DPAAX_DEBUG("System may not work properly!");
+       }
+
+       ret = read(fd, file_data, statbuf.st_size > MEM_NODE_FILE_LEN ?
+                                 MEM_NODE_FILE_LEN : statbuf.st_size);
+       if (ret <= 0) {
+               DPAAX_DEBUG("Unable to read device-tree memory node: (%d)",
+                           ret);
+               goto cleanup;
+       }
+
+       /* The reg node should be multiple of 16 bytes, 8 bytes each for addr
+        * and len.
+        */
+       *count = (statbuf.st_size / 16);
+       if ((*count) <= 0 || (statbuf.st_size % 16 != 0)) {
+               DPAAX_DEBUG("Invalid memory node values or count. (size=%lu)",
+                           statbuf.st_size);
+               goto cleanup;
+       }
+
+       /* each entry is of 16 bytes, and size/16 is total count of entries */
+       nodes = malloc(sizeof(struct reg_node) * (*count));
+       if (!nodes) {
+               DPAAX_DEBUG("Failure in allocating working memory.");
+               goto cleanup;
+       }
+       memset(nodes, 0, sizeof(struct reg_node) * (*count));
+
+       for (i = 0, j = 0; i < (statbuf.st_size) && j < (*count); i += 16, j++) {
+               memcpy(&nodes[j], file_data + i, 16);
+               /* Rotate (ntohl) each 8 byte entry */
+               rotate_8((unsigned char *)(&(nodes[j].addr)));
+               rotate_8((unsigned char *)(&(nodes[j].len)));
+       }
+
+       DPAAX_DEBUG("Device-tree memory node data:");
+       do {
+               DPAAX_DEBUG("\n    %08" PRIx64 " %08zu", nodes[j].addr, nodes[j].len);
+       } while (--j);
+
+cleanup:
+       close(fd);
+       globfree(&result);
+out:
+       return nodes;
+}
+
+int
+dpaax_iova_table_populate(void)
+{
+       int ret;
+       unsigned int i, node_count;
+       size_t tot_memory_size, total_table_size;
+       struct reg_node *nodes;
+       struct dpaax_iovat_element *entry;
+
+       /* dpaax_iova_table_p is a singleton - only one instance should be
+        * created.
+        */
+       if (dpaax_iova_table_p) {
+               DPAAX_DEBUG("Multiple allocation attempt for IOVA Table (%p)",
+                           dpaax_iova_table_p);
+               /* This can be an error case as well - some path not cleaning
+                * up table - but, for now, it is assumed that if IOVA Table
+                * pointer is valid, table is allocated.
+                */
+               return 0;
+       }
+
+       nodes = read_memory_node(&node_count);
+       if (nodes == NULL) {
+               DPAAX_WARN("PA->VA translation not available;");
+               DPAAX_WARN("Expect performance impact.");
+               return -1;
+       }
+
+       tot_memory_size = 0;
+       for (i = 0; i < node_count; i++)
+               tot_memory_size += nodes[i].len;
+
+       DPAAX_DEBUG("Total available PA memory size: %zu", tot_memory_size);
+
+       /* Total table size = meta data + tot_memory_size/8 */
+       total_table_size = sizeof(struct dpaax_iova_table) +
+                          (sizeof(struct dpaax_iovat_element) * node_count) +
+                          ((tot_memory_size / DPAAX_MEM_SPLIT) * sizeof(uint64_t));
+
+       /* TODO: This memory doesn't need to shared but needs to be always
+        * pinned to RAM (no swap out) - using hugepage rather than malloc
+        */
+       dpaax_iova_table_p = rte_zmalloc(NULL, total_table_size, 0);
+       if (dpaax_iova_table_p == NULL) {
+               DPAAX_WARN("Unable to allocate memory for PA->VA Table;");
+               DPAAX_WARN("PA->VA translation not available;");
+               DPAAX_WARN("Expect performance impact.");
+               free(nodes);
+               return -1;
+       }
+
+       /* Initialize table */
+       dpaax_iova_table_p->count = node_count;
+       entry = dpaax_iova_table_p->entries;
+
+       DPAAX_DEBUG("IOVA Table entries: (entry start = %p)", (void *)entry);
+       DPAAX_DEBUG("\t(entry),(start),(len),(next)");
+
+       for (i = 0; i < node_count; i++) {
+               /* dpaax_iova_table_p
+                * |   dpaax_iova_table_p->entries
+                * |      |
+                * |      |
+                * V      V
+                * +------+------+-------+---+----------+---------+---
+                * |iova_ |entry | entry |   | pages    | pages   |
+                * |table | 1    |  2    |...| entry 1  | entry2  |
+                * +-----'+.-----+-------+---+;---------+;--------+---
+                *         \      \          /          /
+                *          `~~~~~~|~~~~~>pages        /
+                *                  \                 /
+                *                   `~~~~~~~~~~~>pages
+                */
+               entry[i].start = nodes[i].addr;
+               entry[i].len = nodes[i].len;
+               if (i > 0)
+                       entry[i].pages = entry[i-1].pages +
+                               ((entry[i-1].len/DPAAX_MEM_SPLIT));
+               else
+                       entry[i].pages = (uint64_t *)((unsigned char *)entry +
+                                        (sizeof(struct dpaax_iovat_element) *
+                                        node_count));
+
+               DPAAX_DEBUG("\t(%u),(%8"PRIx64"),(%8zu),(%8p)",
+                           i, entry[i].start, entry[i].len, entry[i].pages);
+       }
+
+       /* Release memory associated with nodes array - not required now */
+       free(nodes);
+
+       DPAAX_DEBUG("Adding mem-event handler\n");
+       ret = dpaax_handle_memevents();
+       if (ret) {
+               DPAAX_ERR("Unable to add mem-event handler");
+               DPAAX_WARN("Cases with non-buffer pool mem won't work!");
+       }
+
+       return 0;
+}
+
+void
+dpaax_iova_table_depopulate(void)
+{
+       if (dpaax_iova_table_p == NULL)
+               return;
+
+       rte_free(dpaax_iova_table_p->entries);
+       dpaax_iova_table_p = NULL;
+
+       DPAAX_DEBUG("IOVA Table cleanedup");
+}
+
+int
+dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length)
+{
+       int found = 0;
+       unsigned int i;
+       size_t req_length = length, e_offset;
+       struct dpaax_iovat_element *entry;
+       uintptr_t align_vaddr;
+       phys_addr_t align_paddr;
+
+       if (unlikely(dpaax_iova_table_p == NULL))
+               return -1;
+
+       align_paddr = paddr & DPAAX_MEM_SPLIT_MASK;
+       align_vaddr = ((uintptr_t)vaddr & DPAAX_MEM_SPLIT_MASK);
+
+       /* Check if paddr is available in table */
+       entry = dpaax_iova_table_p->entries;
+       for (i = 0; i < dpaax_iova_table_p->count; i++) {
+               if (align_paddr < entry[i].start) {
+                       /* Address lower than start, but not found in previous
+                        * iteration shouldn't exist.
+                        */
+                       DPAAX_ERR("Add: Incorrect entry for PA->VA Table"
+                                 "(%"PRIu64")", paddr);
+                       DPAAX_ERR("Add: Lowest address: %"PRIu64"",
+                                 entry[i].start);
+                       return -1;
+               }
+
+               if (align_paddr > (entry[i].start + entry[i].len))
+                       continue;
+
+               /* align_paddr >= start && align_paddr < (start + len) */
+               found = 1;
+
+               do {
+                       e_offset = ((align_paddr - entry[i].start) / DPAAX_MEM_SPLIT);
+                       /* TODO: Whatif something already exists at this
+                        * location - is that an error? For now, ignoring the
+                        * case.
+                        */
+                       entry[i].pages[e_offset] = align_vaddr;
+                       DPAAX_DEBUG("Added: vaddr=%zu for Phy:%"PRIu64" at %zu"
+                                   " remaining len %zu", align_vaddr,
+                                   align_paddr, e_offset, req_length);
+
+                       /* Incoming request can be larger than the
+                        * DPAAX_MEM_SPLIT size - in which case, multiple
+                        * entries in entry->pages[] are filled up.
+                        */
+                       if (req_length <= DPAAX_MEM_SPLIT)
+                               break;
+                       align_paddr += DPAAX_MEM_SPLIT;
+                       align_vaddr += DPAAX_MEM_SPLIT;
+                       req_length -= DPAAX_MEM_SPLIT;
+               } while (1);
+
+               break;
+       }
+
+       if (!found) {
+               /* There might be case where the incoming physical address is
+                * beyond the address discovered in the memory node of
+                * device-tree. Specially if some malloc'd area is used by EAL
+                * and the memevent handlers passes that across. But, this is
+                * not necessarily an error.
+                */
+               DPAAX_DEBUG("Add: Unable to find slot for vaddr:(%p),"
+                           " phy(%"PRIu64")",
+                           vaddr, paddr);
+               return -1;
+       }
+
+       DPAAX_DEBUG("Add: Found slot at (%"PRIu64")[(%zu)] for vaddr:(%p),"
+                   " phy(%"PRIu64"), len(%zu)", entry[i].start, e_offset,
+                   vaddr, paddr, length);
+       return 0;
+}
+
+/* dpaax_iova_table_dump
+ * Dump the table, with its entries, on screen. Only works in Debug Mode
+ * Not for weak hearted - the tables can get quite large
+ */
+void
+dpaax_iova_table_dump(void)
+{
+       unsigned int i, j;
+       struct dpaax_iovat_element *entry;
+
+       /* In case DEBUG is not enabled, some 'if' conditions might misbehave
+        * as they have nothing else in them  except a DPAAX_DEBUG() which if
+        * tuned out would leave 'if' naked.
+        */
+       if (rte_log_get_global_level() < RTE_LOG_DEBUG) {
+               DPAAX_ERR("Set log level to Debug for PA->Table dump!");
+               return;
+       }
+
+       DPAAX_DEBUG(" === Start of PA->VA Translation Table ===");
+       if (dpaax_iova_table_p == NULL)
+               DPAAX_DEBUG("\tNULL");
+
+       entry = dpaax_iova_table_p->entries;
+       for (i = 0; i < dpaax_iova_table_p->count; i++) {
+               DPAAX_DEBUG("\t(%16i),(%16"PRIu64"),(%16zu),(%16p)",
+                           i, entry[i].start, entry[i].len, entry[i].pages);
+               DPAAX_DEBUG("\t\t          (PA),          (VA)");
+               for (j = 0; j < (entry->len/DPAAX_MEM_SPLIT); j++) {
+                       if (entry[i].pages[j] == 0)
+                               continue;
+                       DPAAX_DEBUG("\t\t(%16"PRIx64"),(%16"PRIx64")",
+                                   (entry[i].start + (j * sizeof(uint64_t))),
+                                   entry[i].pages[j]);
+               }
+       }
+       DPAAX_DEBUG(" === End of PA->VA Translation Table ===");
+}
+
+static void
+dpaax_memevent_cb(enum rte_mem_event type, const void *addr, size_t len,
+                 void *arg __rte_unused)
+{
+       struct rte_memseg_list *msl;
+       struct rte_memseg *ms;
+       size_t cur_len = 0, map_len = 0;
+       phys_addr_t phys_addr;
+       void *virt_addr;
+       int ret;
+
+       DPAAX_DEBUG("Called with addr=%p, len=%zu", addr, len);
+
+       msl = rte_mem_virt2memseg_list(addr);
+
+       while (cur_len < len) {
+               const void *va = RTE_PTR_ADD(addr, cur_len);
+
+               ms = rte_mem_virt2memseg(va, msl);
+               phys_addr = rte_mem_virt2phy(ms->addr);
+               virt_addr = ms->addr;
+               map_len = ms->len;
+
+               DPAAX_DEBUG("Request for %s, va=%p, virt_addr=%p,"
+                           "iova=%"PRIu64", map_len=%zu",
+                           type == RTE_MEM_EVENT_ALLOC ?
+                           "alloc" : "dealloc",
+                           va, virt_addr, phys_addr, map_len);
+
+               if (type == RTE_MEM_EVENT_ALLOC)
+                       ret = dpaax_iova_table_update(phys_addr, virt_addr,
+                                                     map_len);
+               else
+                       /* In case of mem_events for MEM_EVENT_FREE, complete
+                        * hugepage is released and its PA entry is set to 0.
+                        */
+                       ret = dpaax_iova_table_update(phys_addr, 0, map_len);
+
+               if (ret != 0) {
+                       DPAAX_DEBUG("PA-Table entry update failed. "
+                                   "Map=%d, addr=%p, len=%zu, err:(%d)",
+                                   type, va, map_len, ret);
+                       return;
+               }
+
+               cur_len += map_len;
+       }
+}
+
+static int
+dpaax_memevent_walk_memsegs(const struct rte_memseg_list *msl __rte_unused,
+                           const struct rte_memseg *ms, size_t len,
+                           void *arg __rte_unused)
+{
+       DPAAX_DEBUG("Walking for %p (pa=%"PRIu64") and len %zu",
+                   ms->addr, ms->phys_addr, len);
+       dpaax_iova_table_update(rte_mem_virt2phy(ms->addr), ms->addr, len);
+       return 0;
+}
+
+static int
+dpaax_handle_memevents(void)
+{
+       /* First, walk through all memsegs and pin them, before installing
+        * handler. This assures that all memseg which have already been
+        * identified/allocated by EAL, are already part of PA->VA Table. This
+        * is especially for cases where application allocates memory before
+        * the EAL or this is an externally allocated memory passed to EAL.
+        */
+       rte_memseg_contig_walk_thread_unsafe(dpaax_memevent_walk_memsegs, NULL);
+
+       return rte_mem_event_callback_register("dpaax_memevents_cb",
+                                              dpaax_memevent_cb, NULL);
+}
+
+RTE_INIT(dpaax_log)
+{
+       dpaax_logger = rte_log_register("pmd.common.dpaax");
+       if (dpaax_logger >= 0)
+               rte_log_set_level(dpaax_logger, RTE_LOG_ERR);
+}
diff --git a/drivers/common/dpaax/dpaax_iova_table.h b/drivers/common/dpaax/dpaax_iova_table.h
new file mode 100644 (file)
index 0000000..138827e
--- /dev/null
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#ifndef _DPAAX_IOVA_TABLE_H_
+#define _DPAAX_IOVA_TABLE_H_
+
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <glob.h>
+#include <errno.h>
+#include <arpa/inet.h>
+
+#include <rte_eal.h>
+#include <rte_branch_prediction.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+
+struct dpaax_iovat_element {
+       phys_addr_t start; /**< Start address of block of physical pages */
+       size_t len; /**< Difference of end-start for quick access */
+       uint64_t *pages; /**< VA for each physical page in this block */
+};
+
+struct dpaax_iova_table {
+       unsigned int count; /**< No. of blocks of contiguous physical pages */
+       struct dpaax_iovat_element entries[0];
+};
+
+/* Pointer to the table, which is common for DPAA/DPAA2 and only a single
+ * instance is required across net/crypto/event drivers. This table is
+ * populated iff devices are found on the bus.
+ */
+extern struct dpaax_iova_table *dpaax_iova_table_p;
+
+/* Device tree file for memory layout is named 'memory@<addr>' where the 'addr'
+ * is SoC dependent, or even Uboot fixup dependent.
+ */
+#define MEM_NODE_PATH_GLOB "/proc/device-tree/memory[@0-9]*/reg"
+/* Device file should be multiple of 16 bytes, each containing 8 byte of addr
+ * and its length. Assuming max of 5 entries.
+ */
+#define MEM_NODE_FILE_LEN ((16 * 5) + 1)
+
+/* Table is made up of DPAAX_MEM_SPLIT elements for each contiguous zone. This
+ * helps avoid separate handling for cases where more than one size of hugepage
+ * is supported.
+ */
+#define DPAAX_MEM_SPLIT (1<<21)
+#define DPAAX_MEM_SPLIT_MASK ~(DPAAX_MEM_SPLIT - 1) /**< Floor aligned */
+#define DPAAX_MEM_SPLIT_MASK_OFF (DPAAX_MEM_SPLIT - 1) /**< Offset */
+
+/* APIs exposed */
+int dpaax_iova_table_populate(void);
+void dpaax_iova_table_depopulate(void);
+int dpaax_iova_table_update(phys_addr_t paddr, void *vaddr, size_t length);
+void dpaax_iova_table_dump(void);
+
+static inline void *dpaax_iova_table_get_va(phys_addr_t paddr) __attribute__((hot));
+
+static inline void *
+dpaax_iova_table_get_va(phys_addr_t paddr) {
+       unsigned int i = 0, index;
+       void *vaddr = 0;
+       phys_addr_t paddr_align = paddr & DPAAX_MEM_SPLIT_MASK;
+       size_t offset = paddr & DPAAX_MEM_SPLIT_MASK_OFF;
+       struct dpaax_iovat_element *entry;
+
+       if (unlikely(dpaax_iova_table_p == NULL))
+               return NULL;
+
+       entry = dpaax_iova_table_p->entries;
+
+       do {
+               if (unlikely(i > dpaax_iova_table_p->count))
+                       break;
+
+               if (paddr_align < entry[i].start) {
+                       /* Incorrect paddr; Not in memory range */
+                       return NULL;
+               }
+
+               if (paddr_align > (entry[i].start + entry[i].len)) {
+                       i++;
+                       continue;
+               }
+
+               /* paddr > entry->start && paddr <= entry->(start+len) */
+               index = (paddr_align - entry[i].start)/DPAAX_MEM_SPLIT;
+               vaddr = (void *)((uintptr_t)entry[i].pages[index] + offset);
+               break;
+       } while (1);
+
+       return vaddr;
+}
+
+#endif /* _DPAAX_IOVA_TABLE_H_ */
diff --git a/drivers/common/dpaax/dpaax_logs.h b/drivers/common/dpaax/dpaax_logs.h
new file mode 100644 (file)
index 0000000..bf1b27c
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#ifndef _DPAAX_LOGS_H_
+#define _DPAAX_LOGS_H_
+
+#include <rte_log.h>
+
+extern int dpaax_logger;
+
+#define DPAAX_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, dpaax_logger, "dpaax: " fmt "\n", \
+               ##args)
+
+/* Debug logs are with Function names */
+#define DPAAX_DEBUG(fmt, args...) \
+       rte_log(RTE_LOG_DEBUG, dpaax_logger, "dpaax: %s():       " fmt "\n", \
+               __func__, ##args)
+
+#define DPAAX_INFO(fmt, args...) \
+       DPAAX_LOG(INFO, fmt, ## args)
+#define DPAAX_ERR(fmt, args...) \
+       DPAAX_LOG(ERR, fmt, ## args)
+#define DPAAX_WARN(fmt, args...) \
+       DPAAX_LOG(WARNING, fmt, ## args)
+
+/* DP Logs, toggled out at compile time if level lower than current level */
+#define DPAAX_DP_LOG(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, fmt, ## args)
+
+#define DPAAX_DP_DEBUG(fmt, args...) \
+       DPAAX_DP_LOG(DEBUG, fmt, ## args)
+#define DPAAX_DP_INFO(fmt, args...) \
+       DPAAX_DP_LOG(INFO, fmt, ## args)
+#define DPAAX_DP_WARN(fmt, args...) \
+       DPAAX_DP_LOG(WARNING, fmt, ## args)
+
+#endif /* _DPAAX_LOGS_H_ */
diff --git a/drivers/common/dpaax/meson.build b/drivers/common/dpaax/meson.build
new file mode 100644 (file)
index 0000000..98a1bdd
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 NXP
+
+allow_experimental_apis = true
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+
+sources = files('dpaax_iova_table.c')
+
+cflags += ['-D_GNU_SOURCE']
diff --git a/drivers/common/dpaax/rte_common_dpaax_version.map b/drivers/common/dpaax/rte_common_dpaax_version.map
new file mode 100644 (file)
index 0000000..8131c9e
--- /dev/null
@@ -0,0 +1,11 @@
+DPDK_18.11 {
+       global:
+
+       dpaax_iova_table_update;
+       dpaax_iova_table_depopulate;
+       dpaax_iova_table_dump;
+       dpaax_iova_table_p;
+       dpaax_iova_table_populate;
+
+       local: *;
+};
index d7b7d8c..a509341 100644 (file)
@@ -2,6 +2,6 @@
 # Copyright(c) 2018 Cavium, Inc
 
 std_deps = ['eal']
-drivers = ['octeontx', 'qat']
+drivers = ['cpt', 'dpaax', 'mvep', 'octeontx', 'qat']
 config_flag_fmt = 'RTE_LIBRTE_@0@_COMMON'
 driver_name_fmt = 'rte_common_@0@'
diff --git a/drivers/common/mvep/Makefile b/drivers/common/mvep/Makefile
new file mode 100644 (file)
index 0000000..1f5f005
--- /dev/null
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Marvell International Ltd.
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),config)
+ifeq ($(LIBMUSDK_PATH),)
+$(error "Please define LIBMUSDK_PATH environment variable")
+endif
+endif
+endif
+
+# library name
+LIB = librte_common_mvep.a
+
+# library version
+LIBABIVER := 1
+
+# versioning export map
+EXPORT_MAP := rte_common_mvep_version.map
+
+# external library dependencies
+CFLAGS += -I$($RTE_SDK)/drivers/common/mvep
+CFLAGS += -I$(LIBMUSDK_PATH)/include
+CFLAGS += -DMVCONF_TYPES_PUBLIC
+CFLAGS += -DMVCONF_DMA_PHYS_ADDR_T_PUBLIC
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -O3
+LDLIBS += -L$(LIBMUSDK_PATH)/lib
+LDLIBS += -lmusdk
+LDLIBS += -lrte_eal -lrte_kvargs
+
+# library source files
+SRCS-y += mvep_common.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/common/mvep/meson.build b/drivers/common/mvep/meson.build
new file mode 100644 (file)
index 0000000..8ccfacb
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Marvell International Ltd.
+# Copyright(c) 2018 Semihalf.
+# All rights reserved.
+#
+path = get_option('lib_musdk_dir')
+lib_dir = path + '/lib'
+inc_dir = path + '/include'
+
+lib = cc.find_library('libmusdk', dirs: [lib_dir], required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+       includes += include_directories(inc_dir)
+       cflags += ['-DMVCONF_TYPES_PUBLIC', '-DMVCONF_DMA_PHYS_ADDR_T_PUBLIC']
+endif
+
+sources = files('mvep_common.c')
diff --git a/drivers/common/mvep/mvep_common.c b/drivers/common/mvep/mvep_common.c
new file mode 100644 (file)
index 0000000..67fa65b
--- /dev/null
@@ -0,0 +1,45 @@
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright(c) 2018 Marvell International Ltd.
+ */
+
+#include <rte_common.h>
+
+#include <env/mv_autogen_comp_flags.h>
+#include <env/mv_sys_dma.h>
+
+#include "rte_mvep_common.h"
+
+/* Memory size (in bytes) for MUSDK dma buffers */
+#define MRVL_MUSDK_DMA_MEMSIZE (40 * 1024 * 1024)
+
+struct mvep {
+       uint32_t ref_count;
+};
+
+static struct mvep mvep;
+
+int rte_mvep_init(enum mvep_module_type module __rte_unused,
+                 struct rte_kvargs *kvlist __rte_unused)
+{
+       int ret;
+
+       if (!mvep.ref_count) {
+               ret = mv_sys_dma_mem_init(MRVL_MUSDK_DMA_MEMSIZE);
+               if (ret)
+                       return ret;
+       }
+
+       mvep.ref_count++;
+
+       return 0;
+}
+
+int rte_mvep_deinit(enum mvep_module_type module __rte_unused)
+{
+       mvep.ref_count--;
+
+       if (!mvep.ref_count)
+               mv_sys_dma_mem_destroy();
+
+       return 0;
+}
diff --git a/drivers/common/mvep/rte_common_mvep_version.map b/drivers/common/mvep/rte_common_mvep_version.map
new file mode 100644 (file)
index 0000000..c71722d
--- /dev/null
@@ -0,0 +1,6 @@
+DPDK_18.11 {
+       global:
+
+       rte_mvep_init;
+       rte_mvep_deinit;
+};
diff --git a/drivers/common/mvep/rte_mvep_common.h b/drivers/common/mvep/rte_mvep_common.h
new file mode 100644 (file)
index 0000000..0593cef
--- /dev/null
@@ -0,0 +1,21 @@
+/*  SPDX-License-Identifier: BSD-3-Clause
+ *  Copyright(c) 2018 Marvell International Ltd.
+ */
+
+#ifndef __RTE_MVEP_COMMON_H__
+#define __RTE_MVEP_COMMON_H__
+
+#include <rte_kvargs.h>
+
+enum mvep_module_type {
+       MVEP_MOD_T_NONE = 0,
+       MVEP_MOD_T_PP2,
+       MVEP_MOD_T_SAM,
+       MVEP_MOD_T_NETA,
+       MVEP_MOD_T_LAST
+};
+
+int rte_mvep_init(enum mvep_module_type module, struct rte_kvargs *kvlist);
+int rte_mvep_deinit(enum mvep_module_type module);
+
+#endif /* __RTE_MVEP_COMMON_H__ */
index d4bef53..de9a3ba 100644 (file)
@@ -17,7 +17,8 @@
  */
 enum qat_device_gen {
        QAT_GEN1 = 1,
-       QAT_GEN2
+       QAT_GEN2,
+       QAT_GEN3
 };
 
 enum qat_service_type {
index f32d723..2a1cf3e 100644 (file)
@@ -7,6 +7,7 @@
 #include "qat_device.h"
 #include "adf_transport_access_macros.h"
 #include "qat_sym_pmd.h"
+#include "qat_comp_pmd.h"
 
 /* Hardware device information per generation */
 __extension__
@@ -14,11 +15,18 @@ struct qat_gen_hw_data qat_gen_config[] =  {
        [QAT_GEN1] = {
                .dev_gen = QAT_GEN1,
                .qp_hw_data = qat_gen1_qps,
+               .comp_num_im_bufs_required = QAT_NUM_INTERM_BUFS_GEN1
        },
        [QAT_GEN2] = {
                .dev_gen = QAT_GEN2,
                .qp_hw_data = qat_gen1_qps,
                /* gen2 has same ring layout as gen1 */
+               .comp_num_im_bufs_required = QAT_NUM_INTERM_BUFS_GEN2
+       },
+       [QAT_GEN3] = {
+               .dev_gen = QAT_GEN3,
+               .qp_hw_data = qat_gen3_qps,
+               .comp_num_im_bufs_required = QAT_NUM_INTERM_BUFS_GEN3
        },
 };
 
@@ -43,10 +51,12 @@ static const struct rte_pci_id pci_id_qat_map[] = {
                {
                        RTE_PCI_DEVICE(0x8086, 0x6f55),
                },
+               {
+                       RTE_PCI_DEVICE(0x8086, 0x18a1),
+               },
                {.device_id = 0},
 };
 
-
 static struct qat_pci_device *
 qat_pci_get_dev(uint8_t dev_id)
 {
@@ -130,6 +140,9 @@ qat_pci_device_allocate(struct rte_pci_device *pci_dev)
        case 0x6f55:
                qat_dev->qat_dev_gen = QAT_GEN2;
                break;
+       case 0x18a1:
+               qat_dev->qat_dev_gen = QAT_GEN3;
+               break;
        default:
                QAT_LOG(ERR, "Invalid dev_id, can't determine generation");
                return NULL;
@@ -187,6 +200,7 @@ static int qat_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                struct rte_pci_device *pci_dev)
 {
        int ret = 0;
+       int num_pmds_created = 0;
        struct qat_pci_device *qat_pci_dev;
 
        QAT_LOG(DEBUG, "Found QAT device at %02x:%02x.%x",
@@ -199,23 +213,33 @@ static int qat_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                return -ENODEV;
 
        ret = qat_sym_dev_create(qat_pci_dev);
-       if (ret != 0)
-               goto error_out;
+       if (ret == 0)
+               num_pmds_created++;
+       else
+               QAT_LOG(WARNING,
+                               "Failed to create QAT SYM PMD on device %s",
+                               qat_pci_dev->name);
 
        ret = qat_comp_dev_create(qat_pci_dev);
-       if (ret != 0)
-               goto error_out;
+       if (ret == 0)
+               num_pmds_created++;
+       else
+               QAT_LOG(WARNING,
+                               "Failed to create QAT COMP PMD on device %s",
+                               qat_pci_dev->name);
 
        ret = qat_asym_dev_create(qat_pci_dev);
-       if (ret != 0)
-               goto error_out;
-
-       return 0;
+       if (ret == 0)
+               num_pmds_created++;
+       else
+               QAT_LOG(WARNING,
+                               "Failed to create QAT ASYM PMD on device %s",
+                               qat_pci_dev->name);
 
-error_out:
-       qat_pci_dev_destroy(qat_pci_dev, pci_dev);
-       return ret;
+       if (num_pmds_created == 0)
+               qat_pci_dev_destroy(qat_pci_dev, pci_dev);
 
+       return 0;
 }
 
 static int qat_pci_remove(struct rte_pci_device *pci_dev)
@@ -239,37 +263,37 @@ static struct rte_pci_driver rte_qat_pmd = {
        .remove = qat_pci_remove
 };
 
-__attribute__((weak)) int
+__rte_weak int
 qat_sym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_asym_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_sym_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_asym_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_comp_dev_create(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_comp_dev_destroy(struct qat_pci_device *qat_pci_dev __rte_unused)
 {
        return 0;
index 9599fc5..eb81c78 100644 (file)
 
 #define QAT_DEV_NAME_MAX_LEN   64
 
+enum qat_comp_num_im_buffers {
+       QAT_NUM_INTERM_BUFS_GEN1 = 12,
+       QAT_NUM_INTERM_BUFS_GEN2 = 20,
+       QAT_NUM_INTERM_BUFS_GEN3 = 20
+};
+
 /*
  * This struct holds all the data about a QAT pci device
  * including data about all services it supports.
@@ -59,6 +65,11 @@ struct qat_pci_device {
        /* Data relating to compression service */
        struct qat_comp_dev_private *comp_dev;
        /**< link back to compressdev private data */
+       struct rte_device comp_rte_dev;
+       /**< This represents the compression subset of this pci device.
+        * Register with this rather than with the one in
+        * pci_dev so that its driver can have a compression-specific name
+        */
 
        /* Data relating to asymmetric crypto service */
 
@@ -67,6 +78,7 @@ struct qat_pci_device {
 struct qat_gen_hw_data {
        enum qat_device_gen dev_gen;
        const struct qat_qp_hw_data (*qp_hw_data)[ADF_MAX_QPS_ON_ANY_SERVICE];
+       enum qat_comp_num_im_buffers comp_num_im_bufs_required;
 };
 
 extern struct qat_gen_hw_data qat_gen_config[];
index 7ca7a45..79f6a01 100644 (file)
@@ -90,6 +90,44 @@ const struct qat_qp_hw_data qat_gen1_qps[QAT_MAX_SERVICES]
        }
 };
 
+__extension__
+const struct qat_qp_hw_data qat_gen3_qps[QAT_MAX_SERVICES]
+                                        [ADF_MAX_QPS_ON_ANY_SERVICE] = {
+       /* queue pairs which provide an asymmetric crypto service */
+       [QAT_SERVICE_ASYMMETRIC] = {
+               {
+                       .service_type = QAT_SERVICE_ASYMMETRIC,
+                       .hw_bundle_num = 0,
+                       .tx_ring_num = 0,
+                       .rx_ring_num = 4,
+                       .tx_msg_size = 64,
+                       .rx_msg_size = 32,
+               }
+       },
+       /* queue pairs which provide a symmetric crypto service */
+       [QAT_SERVICE_SYMMETRIC] = {
+               {
+                       .service_type = QAT_SERVICE_SYMMETRIC,
+                       .hw_bundle_num = 0,
+                       .tx_ring_num = 1,
+                       .rx_ring_num = 5,
+                       .tx_msg_size = 128,
+                       .rx_msg_size = 32,
+               }
+       },
+       /* queue pairs which provide a compression service */
+       [QAT_SERVICE_COMPRESSION] = {
+               {
+                       .service_type = QAT_SERVICE_COMPRESSION,
+                       .hw_bundle_num = 0,
+                       .tx_ring_num = 3,
+                       .rx_ring_num = 7,
+                       .tx_msg_size = 128,
+                       .rx_msg_size = 32,
+               }
+       }
+};
+
 static int qat_qp_check_queue_alignment(uint64_t phys_addr,
        uint32_t queue_size_bytes);
 static void qat_queue_delete(struct qat_queue *queue);
@@ -596,15 +634,23 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
        uint32_t head;
        uint32_t resp_counter = 0;
        uint8_t *resp_msg;
+       uint8_t hdr_flags;
 
        rx_queue = &(tmp_qp->rx_q);
        tx_queue = &(tmp_qp->tx_q);
        head = rx_queue->head;
        resp_msg = (uint8_t *)rx_queue->base_addr + rx_queue->head;
+       hdr_flags = ((struct icp_qat_fw_comn_resp_hdr *)resp_msg)->hdr_flags;
 
        while (*(uint32_t *)resp_msg != ADF_RING_EMPTY_SIG &&
                        resp_counter != nb_ops) {
 
+               if (unlikely(!ICP_QAT_FW_COMN_VALID_FLAG_GET(hdr_flags))) {
+                       /* Fatal firmware error */
+                       QAT_LOG(ERR, "QAT Firmware returned invalid response");
+                       return 0;
+               }
+
                if (tmp_qp->service_type == QAT_SERVICE_SYMMETRIC)
                        qat_sym_process_response(ops, resp_msg);
                else if (tmp_qp->service_type == QAT_SERVICE_COMPRESSION)
@@ -635,7 +681,7 @@ qat_dequeue_op_burst(void *qp, void **ops, uint16_t nb_ops)
        return resp_counter;
 }
 
-__attribute__((weak)) int
+__rte_weak int
 qat_comp_process_response(void **op __rte_unused, uint8_t *resp __rte_unused)
 {
        return  0;
index 69f8a61..6f1525e 100644 (file)
@@ -85,6 +85,7 @@ struct qat_qp {
 } __rte_cache_aligned;
 
 extern const struct qat_qp_hw_data qat_gen1_qps[][ADF_MAX_QPS_ON_ANY_SERVICE];
+extern const struct qat_qp_hw_data qat_gen3_qps[][ADF_MAX_QPS_ON_ANY_SERVICE];
 
 uint16_t
 qat_enqueue_op_burst(void *qp, void **ops, uint16_t nb_ops);
index 1e74db4..04c3d75 100644 (file)
  * ZIP compression coding Enumeration
  * Enumerates ZIP_INST_S[CC].
  */
-enum {
+enum zip_cc {
        ZIP_CC_DEFAULT = 0,
        ZIP_CC_DYN_HUFF,
        ZIP_CC_FIXED_HUFF,
        ZIP_CC_LZS
-} zip_cc;
+};
 
 /**
  * Register (NCB) zip_vq#_ena
index 99a38d0..3abefd1 100644 (file)
@@ -79,7 +79,7 @@ int octtx_zip_logtype_driver;
        ZIP_PMD_LOG(ERR, fmt, ## args)
 
 /* resources required to process stream */
-enum {
+enum NUM_BUFS_PER_STREAM {
        RES_BUF = 0,
        CMD_BUF,
        HASH_CTX_BUF,
@@ -88,7 +88,7 @@ enum {
        OUT_DATA_BUF,
        HISTORY_DATA_BUF,
        MAX_BUFS_PER_STREAM
-} NUM_BUFS_PER_STREAM;
+};
 
 struct zip_stream;
 struct zipvf_qp;
@@ -106,7 +106,7 @@ struct zip_stream {
        comp_func_t func;
        /* function to process comp operation */
        void *bufs[MAX_BUFS_PER_STREAM];
-} _rte_cache_aligned;
+} __rte_cache_aligned;
 
 
 /**
index 9d13f93..67ff506 100644 (file)
@@ -533,7 +533,7 @@ zip_pmd_dequeue_burst_sync(void *queue_pair,
        return nb_dequeued;
 }
 
-struct rte_compressdev_ops octtx_zip_pmd_ops = {
+static struct rte_compressdev_ops octtx_zip_pmd_ops = {
                .dev_configure          = zip_pmd_config,
                .dev_start              = zip_pmd_start,
                .dev_stop               = zip_pmd_stop,
index 38c8a5b..d70c594 100644 (file)
@@ -145,7 +145,6 @@ qat_comp_process_response(void **op, uint8_t *resp)
                rx_op->debug_status =
                        *((uint16_t *)(&resp_msg->comn_resp.comn_error));
        } else {
-               struct qat_comp_xform *qat_xform = rx_op->private_xform;
                struct icp_qat_fw_resp_comp_pars *comp_resp =
                  (struct icp_qat_fw_resp_comp_pars *)&resp_msg->comp_resp_pars;
 
@@ -193,7 +192,7 @@ static void qat_comp_create_req_hdr(struct icp_qat_fw_comn_req_hdr *header,
 }
 
 static int qat_comp_create_templates(struct qat_comp_xform *qat_xform,
-                       const struct rte_memzone *interm_buff_mz __rte_unused,
+                       const struct rte_memzone *interm_buff_mz,
                        const struct rte_comp_xform *xform)
 {
        struct icp_qat_fw_comp_req *comp_req;
@@ -281,10 +280,20 @@ static int qat_comp_create_templates(struct qat_comp_xform *qat_xform,
                ICP_QAT_FW_COMN_CURR_ID_SET(&comp_req->comp_cd_ctrl,
                                            ICP_QAT_FW_SLICE_COMP);
        } else if (qat_xform->qat_comp_request_type ==
-                  QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS) {
+                       QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS) {
 
-               QAT_LOG(ERR, "Dynamic huffman encoding not supported");
-               return -EINVAL;
+               ICP_QAT_FW_COMN_NEXT_ID_SET(&comp_req->comp_cd_ctrl,
+                               ICP_QAT_FW_SLICE_XLAT);
+               ICP_QAT_FW_COMN_CURR_ID_SET(&comp_req->comp_cd_ctrl,
+                               ICP_QAT_FW_SLICE_COMP);
+
+               ICP_QAT_FW_COMN_NEXT_ID_SET(&comp_req->u2.xlt_cd_ctrl,
+                               ICP_QAT_FW_SLICE_DRAM_WR);
+               ICP_QAT_FW_COMN_CURR_ID_SET(&comp_req->u2.xlt_cd_ctrl,
+                               ICP_QAT_FW_SLICE_XLAT);
+
+               comp_req->u1.xlt_pars.inter_buff_ptr =
+                               interm_buff_mz->phys_addr;
        }
 
 #if RTE_LOG_DP_LEVEL >= RTE_LOG_DEBUG
@@ -335,27 +344,35 @@ qat_comp_private_xform_create(struct rte_compressdev *dev,
                        (struct qat_comp_xform *)*private_xform;
 
        if (xform->type == RTE_COMP_COMPRESS) {
-               if (xform->compress.deflate.huffman ==
-                               RTE_COMP_HUFFMAN_DYNAMIC) {
-                       QAT_LOG(ERR,
-                       "QAT device doesn't support dynamic compression");
-                       return -ENOTSUP;
-               }
 
                if (xform->compress.deflate.huffman == RTE_COMP_HUFFMAN_FIXED ||
                  ((xform->compress.deflate.huffman == RTE_COMP_HUFFMAN_DEFAULT)
                                   && qat->interm_buff_mz == NULL))
-
                        qat_xform->qat_comp_request_type =
                                        QAT_COMP_REQUEST_FIXED_COMP_STATELESS;
 
+               else if ((xform->compress.deflate.huffman ==
+                               RTE_COMP_HUFFMAN_DYNAMIC ||
+                               xform->compress.deflate.huffman ==
+                                               RTE_COMP_HUFFMAN_DEFAULT) &&
+                               qat->interm_buff_mz != NULL)
+
+                       qat_xform->qat_comp_request_type =
+                                       QAT_COMP_REQUEST_DYNAMIC_COMP_STATELESS;
+
+               else {
+                       QAT_LOG(ERR,
+                                       "IM buffers needed for dynamic deflate. Set size in config file");
+                       return -EINVAL;
+               }
+
+               qat_xform->checksum_type = xform->compress.chksum;
 
        } else {
                qat_xform->qat_comp_request_type = QAT_COMP_REQUEST_DECOMPRESS;
+               qat_xform->checksum_type = xform->decompress.chksum;
        }
 
-       qat_xform->checksum_type = xform->compress.chksum;
-
        if (qat_comp_create_templates(qat_xform, qat->interm_buff_mz, xform)) {
                QAT_LOG(ERR, "QAT: Problem with setting compression");
                return -EINVAL;
index 8d315ef..99a4462 100644 (file)
 #include "icp_qat_fw_comp.h"
 #include "icp_qat_fw_la.h"
 
+#define QAT_64_BYTE_ALIGN_MASK (~0x3f)
+#define QAT_64_BYTE_ALIGN (64)
+#define QAT_NUM_BUFS_IN_IM_SGL 1
+
 #define ERR_CODE_QAT_COMP_WRONG_FW -99
 
 enum qat_comp_request_type {
@@ -24,6 +28,15 @@ enum qat_comp_request_type {
        REQ_COMP_END
 };
 
+struct array_of_ptrs {
+       phys_addr_t pointer[0];
+};
+
+struct qat_inter_sgl {
+       qat_sgl_hdr;
+       struct qat_flat_buf buffers[QAT_NUM_BUFS_IN_IM_SGL];
+} __rte_packed __rte_cache_aligned;
+
 struct qat_comp_sgl {
        qat_sgl_hdr;
        struct qat_flat_buf buffers[RTE_PMD_QAT_COMP_SGL_MAX_SEGMENTS];
index b89975f..01dd736 100644 (file)
@@ -14,6 +14,7 @@ static const struct rte_compressdev_capabilities qat_comp_gen_capabilities[] = {
                                RTE_COMP_FF_CRC32_ADLER32_CHECKSUM |
                                RTE_COMP_FF_SHAREABLE_PRIV_XFORM |
                                RTE_COMP_FF_HUFFMAN_FIXED |
+                               RTE_COMP_FF_HUFFMAN_DYNAMIC |
                                RTE_COMP_FF_OOP_SGL_IN_SGL_OUT |
                                RTE_COMP_FF_OOP_SGL_IN_LB_OUT |
                                RTE_COMP_FF_OOP_LB_IN_SGL_OUT,
@@ -112,7 +113,7 @@ qat_comp_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
 
        /* store a link to the qp in the qat_pci_device */
        qat_private->qat_dev->qps_in_use[QAT_SERVICE_COMPRESSION][qp_id]
-                                                       = *qp_addr;
+                                                               = *qp_addr;
 
        qp = (struct qat_qp *)*qp_addr;
 
@@ -135,6 +136,103 @@ qat_comp_qp_setup(struct rte_compressdev *dev, uint16_t qp_id,
        return ret;
 }
 
+
+#define QAT_IM_BUFFER_DEBUG 0
+static const struct rte_memzone *
+qat_comp_setup_inter_buffers(struct qat_comp_dev_private *comp_dev,
+                             uint32_t buff_size)
+{
+       char inter_buff_mz_name[RTE_MEMZONE_NAMESIZE];
+       const struct rte_memzone *memzone;
+       uint8_t *mz_start = NULL;
+       rte_iova_t mz_start_phys = 0;
+       struct array_of_ptrs *array_of_pointers;
+       int size_of_ptr_array;
+       uint32_t full_size;
+       uint32_t offset_of_sgls, offset_of_flat_buffs = 0;
+       int i;
+       int num_im_sgls = qat_gen_config[
+               comp_dev->qat_dev->qat_dev_gen].comp_num_im_bufs_required;
+
+       QAT_LOG(DEBUG, "QAT COMP device %s needs %d sgls",
+                               comp_dev->qat_dev->name, num_im_sgls);
+       snprintf(inter_buff_mz_name, RTE_MEMZONE_NAMESIZE,
+                               "%s_inter_buff", comp_dev->qat_dev->name);
+       memzone = rte_memzone_lookup(inter_buff_mz_name);
+       if (memzone != NULL) {
+               QAT_LOG(DEBUG, "QAT COMP im buffer memzone created already");
+               return memzone;
+       }
+
+       /* Create a memzone to hold intermediate buffers and associated
+        * meta-data needed by the firmware. The memzone contains:
+        *  - a list of num_im_sgls physical pointers to sgls
+        *  - the num_im_sgl sgl structures, each pointing to 2 flat buffers
+        *  - the flat buffers: num_im_sgl * 2
+        * where num_im_sgls depends on the hardware generation of the device
+        */
+
+       size_of_ptr_array = num_im_sgls * sizeof(phys_addr_t);
+       offset_of_sgls = (size_of_ptr_array + (~QAT_64_BYTE_ALIGN_MASK))
+                       & QAT_64_BYTE_ALIGN_MASK;
+       offset_of_flat_buffs =
+           offset_of_sgls + num_im_sgls * sizeof(struct qat_inter_sgl);
+       full_size = offset_of_flat_buffs +
+                       num_im_sgls * buff_size * QAT_NUM_BUFS_IN_IM_SGL;
+
+       memzone = rte_memzone_reserve_aligned(inter_buff_mz_name, full_size,
+                       comp_dev->compressdev->data->socket_id,
+                       RTE_MEMZONE_2MB, QAT_64_BYTE_ALIGN);
+       if (memzone == NULL) {
+               QAT_LOG(ERR, "Can't allocate intermediate buffers"
+                               " for device %s", comp_dev->qat_dev->name);
+               return NULL;
+       }
+
+       mz_start = (uint8_t *)memzone->addr;
+       mz_start_phys = memzone->phys_addr;
+       QAT_LOG(DEBUG, "Memzone %s: addr = %p, phys = 0x%"PRIx64
+                       ", size required %d, size created %zu",
+                       inter_buff_mz_name, mz_start, mz_start_phys,
+                       full_size, memzone->len);
+
+       array_of_pointers = (struct array_of_ptrs *)mz_start;
+       for (i = 0; i < num_im_sgls; i++) {
+               uint32_t curr_sgl_offset =
+                   offset_of_sgls + i * sizeof(struct qat_inter_sgl);
+               struct qat_inter_sgl *sgl =
+                   (struct qat_inter_sgl *)(mz_start + curr_sgl_offset);
+               array_of_pointers->pointer[i] = mz_start_phys + curr_sgl_offset;
+
+               sgl->num_bufs = QAT_NUM_BUFS_IN_IM_SGL;
+               sgl->num_mapped_bufs = 0;
+               sgl->resrvd = 0;
+               sgl->buffers[0].addr = mz_start_phys + offset_of_flat_buffs +
+                       ((i * QAT_NUM_BUFS_IN_IM_SGL) * buff_size);
+               sgl->buffers[0].len = buff_size;
+               sgl->buffers[0].resrvd = 0;
+               sgl->buffers[1].addr = mz_start_phys + offset_of_flat_buffs +
+                       (((i * QAT_NUM_BUFS_IN_IM_SGL) + 1) * buff_size);
+               sgl->buffers[1].len = buff_size;
+               sgl->buffers[1].resrvd = 0;
+
+#if QAT_IM_BUFFER_DEBUG
+               QAT_LOG(DEBUG, "  : phys addr of sgl[%i] in array_of_pointers"
+                           "= 0x%"PRIx64, i, array_of_pointers->pointer[i]);
+               QAT_LOG(DEBUG, "  : virt address of sgl[%i] = %p", i, sgl);
+               QAT_LOG(DEBUG, "  : sgl->buffers[0].addr = 0x%"PRIx64", len=%d",
+                       sgl->buffers[0].addr, sgl->buffers[0].len);
+               QAT_LOG(DEBUG, "  : sgl->buffers[1].addr = 0x%"PRIx64", len=%d",
+                       sgl->buffers[1].addr, sgl->buffers[1].len);
+#endif
+               }
+#if QAT_IM_BUFFER_DEBUG
+       QAT_DP_HEXDUMP_LOG(DEBUG,  "IM buffer memzone start:",
+                       mz_start, offset_of_flat_buffs + 32);
+#endif
+       return memzone;
+}
+
 static struct rte_mempool *
 qat_comp_create_xform_pool(struct qat_comp_dev_private *comp_dev,
                              uint32_t num_elements)
@@ -176,6 +274,12 @@ qat_comp_create_xform_pool(struct qat_comp_dev_private *comp_dev,
 static void
 _qat_comp_dev_config_clear(struct qat_comp_dev_private *comp_dev)
 {
+       /* Free intermediate buffers */
+       if (comp_dev->interm_buff_mz) {
+               rte_memzone_free(comp_dev->interm_buff_mz);
+               comp_dev->interm_buff_mz = NULL;
+       }
+
        /* Free private_xform pool */
        if (comp_dev->xformpool) {
                /* Free internal mempool for private xforms */
@@ -197,6 +301,21 @@ qat_comp_dev_config(struct rte_compressdev *dev,
                return -EINVAL;
        }
 
+       if (RTE_PMD_QAT_COMP_IM_BUFFER_SIZE == 0) {
+               QAT_LOG(WARNING,
+                       "RTE_PMD_QAT_COMP_IM_BUFFER_SIZE = 0 in config file, so"
+                       " QAT device can't be used for Dynamic Deflate. "
+                       "Did you really intend to do this?");
+       } else {
+               comp_dev->interm_buff_mz =
+                               qat_comp_setup_inter_buffers(comp_dev,
+                                       RTE_PMD_QAT_COMP_IM_BUFFER_SIZE);
+               if (comp_dev->interm_buff_mz == NULL) {
+                       ret = -ENOMEM;
+                       goto error_out;
+               }
+       }
+
        comp_dev->xformpool = qat_comp_create_xform_pool(comp_dev,
                                        config->max_nb_priv_xforms);
        if (comp_dev->xformpool == NULL) {
@@ -348,6 +467,16 @@ static struct rte_compressdev_ops compress_qat_ops = {
        .private_xform_free     = qat_comp_private_xform_free
 };
 
+/* An rte_driver is needed in the registration of the device with compressdev.
+ * The actual qat pci's rte_driver can't be used as its name represents
+ * the whole pci device with all services. Think of this as a holder for a name
+ * for the compression part of the pci device.
+ */
+static const char qat_comp_drv_name[] = RTE_STR(COMPRESSDEV_NAME_QAT_PMD);
+static const struct rte_driver compdev_qat_driver = {
+       .name = qat_comp_drv_name,
+       .alias = qat_comp_drv_name
+};
 int
 qat_comp_dev_create(struct qat_pci_device *qat_pci_dev)
 {
@@ -355,6 +484,10 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev)
                QAT_LOG(ERR, "Compression PMD not supported on QAT dh895xcc");
                return 0;
        }
+       if (qat_pci_dev->qat_dev_gen == QAT_GEN3) {
+               QAT_LOG(ERR, "Compression PMD not supported on QAT c4xxx");
+               return 0;
+       }
 
        struct rte_compressdev_pmd_init_params init_params = {
                .name = "",
@@ -368,8 +501,14 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev)
                        qat_pci_dev->name, "comp");
        QAT_LOG(DEBUG, "Creating QAT COMP device %s", name);
 
+       /* Populate subset device to use in compressdev device creation */
+       qat_pci_dev->comp_rte_dev.driver = &compdev_qat_driver;
+       qat_pci_dev->comp_rte_dev.numa_node =
+                                       qat_pci_dev->pci_dev->device.numa_node;
+       qat_pci_dev->comp_rte_dev.devargs = NULL;
+
        compressdev = rte_compressdev_pmd_create(name,
-                       &qat_pci_dev->pci_dev->device,
+                       &(qat_pci_dev->comp_rte_dev),
                        sizeof(struct qat_comp_dev_private),
                        &init_params);
 
@@ -391,6 +530,7 @@ qat_comp_dev_create(struct qat_pci_device *qat_pci_dev)
        switch (qat_pci_dev->qat_dev_gen) {
        case QAT_GEN1:
        case QAT_GEN2:
+       case QAT_GEN3:
                comp_dev->qat_dev_capabilities = qat_comp_gen_capabilities;
                break;
        default:
index 9ad2a28..b8299d4 100644 (file)
@@ -12,6 +12,9 @@
 
 #include "qat_device.h"
 
+/**< Intel(R) QAT Compression PMD driver name */
+#define COMPRESSDEV_NAME_QAT_PMD       compress_qat
+
 /** private data structure for a QAT compression device.
  * This QAT device is a device offering only a compression service,
  * there can be one of these on each qat_pci_device (VF).
index c480cbd..009f844 100644 (file)
@@ -7,6 +7,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_GCM) += aesni_gcm
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AESNI_MB) += aesni_mb
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO) += armv8
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_CCP) += ccp
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += octeontx
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_OPENSSL) += openssl
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER) += scheduler
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SNOW3G) += snow3g
@@ -14,12 +15,15 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_KASUMI) += kasumi
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_ZUC) += zuc
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO) += mvsam
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL_CRYPTO) += null
+ifeq ($(CONFIG_RTE_LIBRTE_SECURITY),y)
 ifeq ($(CONFIG_RTE_EAL_VFIO)$(CONFIG_RTE_LIBRTE_FSLMC_BUS),yy)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += dpaa2_sec
-endif
+endif # CONFIG_RTE_LIBRTE_FSLMC_BUS
 ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += dpaa_sec
-endif
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR) += caam_jr
+endif # CONFIG_RTE_LIBRTE_PMD_DPAA_SEC
+endif # CONFIG_RTE_LIBRTE_SECURITY
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO) += virtio
 
 include $(RTE_SDK)/mk/rte.subdir.mk
index 752e0cd..ebdf7c3 100644 (file)
@@ -23,7 +23,6 @@ aesni_gcm_set_session_parameters(const struct aesni_gcm_ops *gcm_ops,
 {
        const struct rte_crypto_sym_xform *auth_xform;
        const struct rte_crypto_sym_xform *aead_xform;
-       uint16_t digest_length;
        uint8_t key_length;
        uint8_t *key;
 
@@ -47,7 +46,7 @@ aesni_gcm_set_session_parameters(const struct aesni_gcm_ops *gcm_ops,
 
                key_length = auth_xform->auth.key.length;
                key = auth_xform->auth.key.data;
-               digest_length = auth_xform->auth.digest_length;
+               sess->req_digest_length = auth_xform->auth.digest_length;
 
        /* AES-GCM */
        } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) {
@@ -73,7 +72,7 @@ aesni_gcm_set_session_parameters(const struct aesni_gcm_ops *gcm_ops,
                key = aead_xform->aead.key.data;
 
                sess->aad_length = aead_xform->aead.aad_length;
-               digest_length = aead_xform->aead.digest_length;
+               sess->req_digest_length = aead_xform->aead.digest_length;
        } else {
                AESNI_GCM_LOG(ERR, "Wrong xform type, has to be AEAD or authentication");
                return -ENOTSUP;
@@ -106,13 +105,28 @@ aesni_gcm_set_session_parameters(const struct aesni_gcm_ops *gcm_ops,
        gcm_ops[sess->key].precomp(key, &sess->gdata_key);
 
        /* Digest check */
-       if (digest_length != 16 &&
-                       digest_length != 12 &&
-                       digest_length != 8) {
+       if (sess->req_digest_length > 16) {
                AESNI_GCM_LOG(ERR, "Invalid digest length");
                return -EINVAL;
        }
-       sess->digest_length = digest_length;
+       /*
+        * Multi-buffer lib supports digest sizes from 4 to 16 bytes
+        * in version 0.50 and sizes of 8, 12 and 16 bytes,
+        * in version 0.49.
+        * If size requested is different, generate the full digest
+        * (16 bytes) in a temporary location and then memcpy
+        * the requested number of bytes.
+        */
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+       if (sess->req_digest_length < 4)
+#else
+       if (sess->req_digest_length != 16 &&
+                       sess->req_digest_length != 12 &&
+                       sess->req_digest_length != 8)
+#endif
+               sess->gen_digest_length = 16;
+       else
+               sess->gen_digest_length = sess->req_digest_length;
 
        return 0;
 }
@@ -180,6 +194,7 @@ process_gcm_crypto_op(struct aesni_gcm_qp *qp, struct rte_crypto_op *op,
        struct rte_mbuf *m_src = sym_op->m_src;
        uint32_t offset, data_offset, data_length;
        uint32_t part_len, total_len, data_len;
+       uint8_t *tag;
 
        if (session->op == AESNI_GCM_OP_AUTHENTICATED_ENCRYPTION ||
                        session->op == AESNI_GCM_OP_AUTHENTICATED_DECRYPTION) {
@@ -225,17 +240,8 @@ process_gcm_crypto_op(struct aesni_gcm_qp *qp, struct rte_crypto_op *op,
 
        iv_ptr = rte_crypto_op_ctod_offset(op, uint8_t *,
                                session->iv.offset);
-       /*
-        * GCM working in 12B IV mode => 16B pre-counter block we need
-        * to set BE LSB to 1, driver expects that 16B is allocated
-        */
-       if (session->iv.length == 12) {
-               uint32_t *iv_padd = (uint32_t *)&(iv_ptr[12]);
-               *iv_padd = rte_bswap32(1);
-       }
 
        if (session->op == AESNI_GCM_OP_AUTHENTICATED_ENCRYPTION) {
-
                qp->ops[session->key].init(&session->gdata_key,
                                &qp->gdata_ctx,
                                iv_ptr,
@@ -263,13 +269,16 @@ process_gcm_crypto_op(struct aesni_gcm_qp *qp, struct rte_crypto_op *op,
                        total_len -= part_len;
                }
 
+               if (session->req_digest_length != session->gen_digest_length)
+                       tag = qp->temp_digest;
+               else
+                       tag = sym_op->aead.digest.data;
+
                qp->ops[session->key].finalize(&session->gdata_key,
                                &qp->gdata_ctx,
-                               sym_op->aead.digest.data,
-                               (uint64_t)session->digest_length);
+                               tag,
+                               session->gen_digest_length);
        } else if (session->op == AESNI_GCM_OP_AUTHENTICATED_DECRYPTION) {
-               uint8_t *auth_tag = qp->temp_digest;
-
                qp->ops[session->key].init(&session->gdata_key,
                                &qp->gdata_ctx,
                                iv_ptr,
@@ -298,33 +307,41 @@ process_gcm_crypto_op(struct aesni_gcm_qp *qp, struct rte_crypto_op *op,
                        total_len -= part_len;
                }
 
+               tag = qp->temp_digest;
                qp->ops[session->key].finalize(&session->gdata_key,
                                &qp->gdata_ctx,
-                               auth_tag,
-                               (uint64_t)session->digest_length);
+                               tag,
+                               session->gen_digest_length);
        } else if (session->op == AESNI_GMAC_OP_GENERATE) {
                qp->ops[session->key].init(&session->gdata_key,
                                &qp->gdata_ctx,
                                iv_ptr,
                                src,
                                (uint64_t)data_length);
+               if (session->req_digest_length != session->gen_digest_length)
+                       tag = qp->temp_digest;
+               else
+                       tag = sym_op->auth.digest.data;
                qp->ops[session->key].finalize(&session->gdata_key,
                                &qp->gdata_ctx,
-                               sym_op->auth.digest.data,
-                               (uint64_t)session->digest_length);
+                               tag,
+                               session->gen_digest_length);
        } else { /* AESNI_GMAC_OP_VERIFY */
-               uint8_t *auth_tag = qp->temp_digest;
-
                qp->ops[session->key].init(&session->gdata_key,
                                &qp->gdata_ctx,
                                iv_ptr,
                                src,
                                (uint64_t)data_length);
 
+               /*
+                * Generate always 16 bytes and later compare only
+                * the bytes passed.
+                */
+               tag = qp->temp_digest;
                qp->ops[session->key].finalize(&session->gdata_key,
                                &qp->gdata_ctx,
-                               auth_tag,
-                               (uint64_t)session->digest_length);
+                               tag,
+                               session->gen_digest_length);
        }
 
        return 0;
@@ -361,13 +378,22 @@ post_process_gcm_crypto_op(struct aesni_gcm_qp *qp,
 
 #ifdef RTE_LIBRTE_PMD_AESNI_GCM_DEBUG
                rte_hexdump(stdout, "auth tag (orig):",
-                               digest, session->digest_length);
+                               digest, session->req_digest_length);
                rte_hexdump(stdout, "auth tag (calc):",
-                               tag, session->digest_length);
+                               tag, session->req_digest_length);
 #endif
 
-               if (memcmp(tag, digest, session->digest_length) != 0)
+               if (memcmp(tag, digest, session->req_digest_length) != 0)
                        op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
+       } else {
+               if (session->req_digest_length != session->gen_digest_length) {
+                       if (session->op == AESNI_GCM_OP_AUTHENTICATED_ENCRYPTION)
+                               memcpy(op->sym->aead.digest.data, qp->temp_digest,
+                                               session->req_digest_length);
+                       else
+                               memcpy(op->sym->auth.digest.data, qp->temp_digest,
+                                               session->req_digest_length);
+               }
        }
 }
 
index b6b4dd0..c343a39 100644 (file)
@@ -24,9 +24,9 @@ static const struct rte_cryptodev_capabilities aesni_gcm_pmd_capabilities[] = {
                                        .increment = 8
                                },
                                .digest_size = {
-                                       .min = 8,
+                                       .min = 1,
                                        .max = 16,
-                                       .increment = 4
+                                       .increment = 1
                                },
                                .iv_size = {
                                        .min = 12,
@@ -49,9 +49,9 @@ static const struct rte_cryptodev_capabilities aesni_gcm_pmd_capabilities[] = {
                                        .increment = 8
                                },
                                .digest_size = {
-                                       .min = 8,
+                                       .min = 1,
                                        .max = 16,
-                                       .increment = 4
+                                       .increment = 1
                                },
                                .aad_size = {
                                        .min = 0,
index c13a12a..92b0413 100644 (file)
@@ -76,8 +76,10 @@ struct aesni_gcm_session {
        /**< IV parameters */
        uint16_t aad_length;
        /**< AAD length */
-       uint16_t digest_length;
-       /**< Digest length */
+       uint16_t req_digest_length;
+       /**< Requested digest length */
+       uint16_t gen_digest_length;
+       /**< Generated digest length */
        enum aesni_gcm_operation op;
        /**< GCM operation type */
        enum aesni_gcm_key key;
diff --git a/drivers/crypto/aesni_gcm/meson.build b/drivers/crypto/aesni_gcm/meson.build
new file mode 100644 (file)
index 0000000..a02da1e
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+lib = cc.find_library('IPSec_MB', required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+endif
+
+sources = files('aesni_gcm_pmd.c', 'aesni_gcm_pmd_ops.c')
+deps += ['bus_vdev']
index 5a1cba6..575d6a5 100644 (file)
 
 #include <intel-ipsec-mb.h>
 
+/*
+ * IMB_VERSION_NUM macro was introduced in version Multi-buffer 0.50,
+ * so if macro is not defined, it means that the version is 0.49.
+ */
+#if !defined(IMB_VERSION_NUM)
+#define IMB_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
+#define IMB_VERSION_NUM IMB_VERSION(0, 49, 0)
+#endif
+
 enum aesni_mb_vector_mode {
        RTE_AESNI_MB_NOT_SUPPORTED = 0,
        RTE_AESNI_MB_SSE,
@@ -39,6 +48,8 @@ typedef void (*aes_cmac_sub_key_gen_t)
                (const void *exp_key, void *k2, void *k3);
 typedef void (*aes_cmac_keyexp_t)
                (const void *key, void *keyexp);
+typedef void (*aes_gcm_keyexp_t)
+               (const void *key, struct gcm_key_data *keyexp);
 
 /** Multi-buffer library function pointer table */
 struct aesni_mb_op_fns {
@@ -86,8 +97,24 @@ struct aesni_mb_op_fns {
                        /**< AES CMAC subkey expansions */
                        aes_cmac_keyexp_t aes_cmac_expkey;
                        /**< AES CMAC key expansions */
+                       aes_gcm_keyexp_t aes_gcm_128;
+                       /**< AES GCM 128 key expansions */
+                       aes_gcm_keyexp_t aes_gcm_192;
+                       /**< AES GCM 192 key expansions */
+                       aes_gcm_keyexp_t aes_gcm_256;
+                       /**< AES GCM 256 key expansions */
                } keyexp;
                /**< Key expansion functions */
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               struct {
+                       hash_fn_t sha1;
+                       hash_fn_t sha224;
+                       hash_fn_t sha256;
+                       hash_fn_t sha384;
+                       hash_fn_t sha512;
+               } multi_block;
+               /** multi block hash functions */
+#endif
        } aux;
        /**< Auxiliary functions */
 };
@@ -104,7 +131,13 @@ static const struct aesni_mb_op_fns job_ops[] = {
                                },
                                .keyexp = {
                                        NULL
+                               },
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                               .multi_block = {
+                                       NULL
                                }
+#endif
+
                        }
                },
                [RTE_AESNI_MB_SSE] = {
@@ -130,8 +163,20 @@ static const struct aesni_mb_op_fns job_ops[] = {
                                        aes_keyexp_256_sse,
                                        aes_xcbc_expand_key_sse,
                                        aes_cmac_subkey_gen_sse,
-                                       aes_keyexp_128_enc_sse
+                                       aes_keyexp_128_enc_sse,
+                                       aes_gcm_pre_128_sse,
+                                       aes_gcm_pre_192_sse,
+                                       aes_gcm_pre_256_sse
+                               },
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                               .multi_block = {
+                                       sha1_sse,
+                                       sha224_sse,
+                                       sha256_sse,
+                                       sha384_sse,
+                                       sha512_sse
                                }
+#endif
                        }
                },
                [RTE_AESNI_MB_AVX] = {
@@ -157,8 +202,20 @@ static const struct aesni_mb_op_fns job_ops[] = {
                                        aes_keyexp_256_avx,
                                        aes_xcbc_expand_key_avx,
                                        aes_cmac_subkey_gen_avx,
-                                       aes_keyexp_128_enc_avx
+                                       aes_keyexp_128_enc_avx,
+                                       aes_gcm_pre_128_avx_gen2,
+                                       aes_gcm_pre_192_avx_gen2,
+                                       aes_gcm_pre_256_avx_gen2
+                               },
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                               .multi_block = {
+                                       sha1_avx,
+                                       sha224_avx,
+                                       sha256_avx,
+                                       sha384_avx,
+                                       sha512_avx
                                }
+#endif
                        }
                },
                [RTE_AESNI_MB_AVX2] = {
@@ -184,8 +241,20 @@ static const struct aesni_mb_op_fns job_ops[] = {
                                        aes_keyexp_256_avx2,
                                        aes_xcbc_expand_key_avx2,
                                        aes_cmac_subkey_gen_avx2,
-                                       aes_keyexp_128_enc_avx2
+                                       aes_keyexp_128_enc_avx2,
+                                       aes_gcm_pre_128_avx_gen4,
+                                       aes_gcm_pre_192_avx_gen4,
+                                       aes_gcm_pre_256_avx_gen4
+                               },
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                               .multi_block = {
+                                       sha1_avx2,
+                                       sha224_avx2,
+                                       sha256_avx2,
+                                       sha384_avx2,
+                                       sha512_avx2
                                }
+#endif
                        }
                },
                [RTE_AESNI_MB_AVX512] = {
@@ -211,8 +280,20 @@ static const struct aesni_mb_op_fns job_ops[] = {
                                        aes_keyexp_256_avx512,
                                        aes_xcbc_expand_key_avx512,
                                        aes_cmac_subkey_gen_avx512,
-                                       aes_keyexp_128_enc_avx512
+                                       aes_keyexp_128_enc_avx512,
+                                       aes_gcm_pre_128_avx_gen4,
+                                       aes_gcm_pre_192_avx_gen4,
+                                       aes_gcm_pre_256_avx_gen4
+                               },
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                               .multi_block = {
+                                       sha1_avx512,
+                                       sha224_avx512,
+                                       sha256_avx512,
+                                       sha384_avx512,
+                                       sha512_avx512
                                }
+#endif
                        }
                }
 };
diff --git a/drivers/crypto/aesni_mb/meson.build b/drivers/crypto/aesni_mb/meson.build
new file mode 100644 (file)
index 0000000..aae0995
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+lib = cc.find_library('IPSec_MB', required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+endif
+
+sources = files('rte_aesni_mb_pmd.c', 'rte_aesni_mb_pmd_ops.c')
+deps += ['bus_vdev']
index 93dc7a4..83250e3 100644 (file)
@@ -14,6 +14,9 @@
 
 #include "rte_aesni_mb_pmd_private.h"
 
+#define AES_CCM_DIGEST_MIN_LEN 4
+#define AES_CCM_DIGEST_MAX_LEN 16
+#define HMAC_MAX_BLOCK_SIZE 128
 static uint8_t cryptodev_driver_id;
 
 typedef void (*hash_one_block_t)(const void *data, void *digest);
@@ -83,7 +86,8 @@ aesni_mb_get_chain_order(const struct rte_crypto_sym_xform *xform)
        }
 
        if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) {
-               if (xform->aead.algo == RTE_CRYPTO_AEAD_AES_CCM) {
+               if (xform->aead.algo == RTE_CRYPTO_AEAD_AES_CCM ||
+                               xform->aead.algo == RTE_CRYPTO_AEAD_AES_GCM) {
                        if (xform->aead.op == RTE_CRYPTO_AEAD_OP_ENCRYPT)
                                return AESNI_MB_OP_AEAD_CIPHER_HASH;
                        else
@@ -101,6 +105,8 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_op_fns *mb_ops,
                const struct rte_crypto_sym_xform *xform)
 {
        hash_one_block_t hash_oneblock_fn;
+       unsigned int key_larger_block_size = 0;
+       uint8_t hashed_key[HMAC_MAX_BLOCK_SIZE] = { 0 };
 
        if (xform == NULL) {
                sess->auth.algo = NULL_HASH;
@@ -112,12 +118,23 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_op_fns *mb_ops,
                return -1;
        }
 
+       /* Set the request digest size */
+       sess->auth.req_digest_len = xform->auth.digest_length;
+
        /* Select auth generate/verify */
        sess->auth.operation = xform->auth.op;
 
        /* Set Authentication Parameters */
        if (xform->auth.algo == RTE_CRYPTO_AUTH_AES_XCBC_MAC) {
                sess->auth.algo = AES_XCBC;
+
+               uint16_t xcbc_mac_digest_len =
+                       get_truncated_digest_byte_length(AES_XCBC);
+               if (sess->auth.req_digest_len != xcbc_mac_digest_len) {
+                       AESNI_MB_LOG(ERR, "Invalid digest size\n");
+                       return -EINVAL;
+               }
+               sess->auth.gen_digest_len = sess->auth.req_digest_len;
                (*mb_ops->aux.keyexp.aes_xcbc)(xform->auth.key.data,
                                sess->auth.xcbc.k1_expanded,
                                sess->auth.xcbc.k2, sess->auth.xcbc.k3);
@@ -126,6 +143,32 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_op_fns *mb_ops,
 
        if (xform->auth.algo == RTE_CRYPTO_AUTH_AES_CMAC) {
                sess->auth.algo = AES_CMAC;
+
+               uint16_t cmac_digest_len = get_digest_byte_length(AES_CMAC);
+
+               if (sess->auth.req_digest_len > cmac_digest_len) {
+                       AESNI_MB_LOG(ERR, "Invalid digest size\n");
+                       return -EINVAL;
+               }
+               /*
+                * Multi-buffer lib supports digest sizes from 4 to 16 bytes
+                * in version 0.50 and sizes of 12 and 16 bytes,
+                * in version 0.49.
+                * If size requested is different, generate the full digest
+                * (16 bytes) in a temporary location and then memcpy
+                * the requested number of bytes.
+                */
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (sess->auth.req_digest_len < 4)
+#else
+               uint16_t cmac_trunc_digest_len =
+                               get_truncated_digest_byte_length(AES_CMAC);
+               if (sess->auth.req_digest_len != cmac_digest_len &&
+                               sess->auth.req_digest_len != cmac_trunc_digest_len)
+#endif
+                       sess->auth.gen_digest_len = cmac_digest_len;
+               else
+                       sess->auth.gen_digest_len = sess->auth.req_digest_len;
                (*mb_ops->aux.keyexp.aes_cmac_expkey)(xform->auth.key.data,
                                sess->auth.cmac.expkey);
 
@@ -134,7 +177,6 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_op_fns *mb_ops,
                return 0;
        }
 
-
        switch (xform->auth.algo) {
        case RTE_CRYPTO_AUTH_MD5_HMAC:
                sess->auth.algo = MD5;
@@ -143,34 +185,107 @@ aesni_mb_set_session_auth_parameters(const struct aesni_mb_op_fns *mb_ops,
        case RTE_CRYPTO_AUTH_SHA1_HMAC:
                sess->auth.algo = SHA1;
                hash_oneblock_fn = mb_ops->aux.one_block.sha1;
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (xform->auth.key.length > get_auth_algo_blocksize(SHA1)) {
+                       mb_ops->aux.multi_block.sha1(
+                               xform->auth.key.data,
+                               xform->auth.key.length,
+                               hashed_key);
+                       key_larger_block_size = 1;
+               }
+#endif
                break;
        case RTE_CRYPTO_AUTH_SHA224_HMAC:
                sess->auth.algo = SHA_224;
                hash_oneblock_fn = mb_ops->aux.one_block.sha224;
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (xform->auth.key.length > get_auth_algo_blocksize(SHA_224)) {
+                       mb_ops->aux.multi_block.sha224(
+                               xform->auth.key.data,
+                               xform->auth.key.length,
+                               hashed_key);
+                       key_larger_block_size = 1;
+               }
+#endif
                break;
        case RTE_CRYPTO_AUTH_SHA256_HMAC:
                sess->auth.algo = SHA_256;
                hash_oneblock_fn = mb_ops->aux.one_block.sha256;
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (xform->auth.key.length > get_auth_algo_blocksize(SHA_256)) {
+                       mb_ops->aux.multi_block.sha256(
+                               xform->auth.key.data,
+                               xform->auth.key.length,
+                               hashed_key);
+                       key_larger_block_size = 1;
+               }
+#endif
                break;
        case RTE_CRYPTO_AUTH_SHA384_HMAC:
                sess->auth.algo = SHA_384;
                hash_oneblock_fn = mb_ops->aux.one_block.sha384;
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (xform->auth.key.length > get_auth_algo_blocksize(SHA_384)) {
+                       mb_ops->aux.multi_block.sha384(
+                               xform->auth.key.data,
+                               xform->auth.key.length,
+                               hashed_key);
+                       key_larger_block_size = 1;
+               }
+#endif
                break;
        case RTE_CRYPTO_AUTH_SHA512_HMAC:
                sess->auth.algo = SHA_512;
                hash_oneblock_fn = mb_ops->aux.one_block.sha512;
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+               if (xform->auth.key.length > get_auth_algo_blocksize(SHA_512)) {
+                       mb_ops->aux.multi_block.sha512(
+                               xform->auth.key.data,
+                               xform->auth.key.length,
+                               hashed_key);
+                       key_larger_block_size = 1;
+               }
+#endif
                break;
        default:
                AESNI_MB_LOG(ERR, "Unsupported authentication algorithm selection");
                return -ENOTSUP;
        }
+       uint16_t trunc_digest_size =
+                       get_truncated_digest_byte_length(sess->auth.algo);
+       uint16_t full_digest_size =
+                       get_digest_byte_length(sess->auth.algo);
+
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+       if (sess->auth.req_digest_len > full_digest_size ||
+                       sess->auth.req_digest_len == 0) {
+#else
+       if (sess->auth.req_digest_len != trunc_digest_size) {
+#endif
+               AESNI_MB_LOG(ERR, "Invalid digest size\n");
+               return -EINVAL;
+       }
+
+       if (sess->auth.req_digest_len != trunc_digest_size &&
+                       sess->auth.req_digest_len != full_digest_size)
+               sess->auth.gen_digest_len = full_digest_size;
+       else
+               sess->auth.gen_digest_len = sess->auth.req_digest_len;
 
        /* Calculate Authentication precomputes */
-       calculate_auth_precomputes(hash_oneblock_fn,
+       if (key_larger_block_size) {
+               calculate_auth_precomputes(hash_oneblock_fn,
+                       sess->auth.pads.inner, sess->auth.pads.outer,
+                       hashed_key,
+                       xform->auth.key.length,
+                       get_auth_algo_blocksize(sess->auth.algo));
+       } else {
+               calculate_auth_precomputes(hash_oneblock_fn,
                        sess->auth.pads.inner, sess->auth.pads.outer,
                        xform->auth.key.data,
                        xform->auth.key.length,
                        get_auth_algo_blocksize(sess->auth.algo));
+       }
 
        return 0;
 }
@@ -330,7 +445,10 @@ aesni_mb_set_session_aead_parameters(const struct aesni_mb_op_fns *mb_ops,
                struct aesni_mb_session *sess,
                const struct rte_crypto_sym_xform *xform)
 {
-       aes_keyexp_t aes_keyexp_fn;
+       union {
+               aes_keyexp_t aes_keyexp_fn;
+               aes_gcm_keyexp_t aes_gcm_keyexp_fn;
+       } keyexp;
 
        switch (xform->aead.op) {
        case RTE_CRYPTO_AEAD_OP_ENCRYPT:
@@ -350,7 +468,53 @@ aesni_mb_set_session_aead_parameters(const struct aesni_mb_op_fns *mb_ops,
        case RTE_CRYPTO_AEAD_AES_CCM:
                sess->cipher.mode = CCM;
                sess->auth.algo = AES_CCM;
+
+               /* Check key length and choose key expansion function for AES */
+               switch (xform->aead.key.length) {
+               case AES_128_BYTES:
+                       sess->cipher.key_length_in_bytes = AES_128_BYTES;
+                       keyexp.aes_keyexp_fn = mb_ops->aux.keyexp.aes128;
+                       break;
+               default:
+                       AESNI_MB_LOG(ERR, "Invalid cipher key length");
+                       return -EINVAL;
+               }
+
+               /* Expanded cipher keys */
+               (*keyexp.aes_keyexp_fn)(xform->aead.key.data,
+                               sess->cipher.expanded_aes_keys.encode,
+                               sess->cipher.expanded_aes_keys.decode);
+               break;
+
+       case RTE_CRYPTO_AEAD_AES_GCM:
+               sess->cipher.mode = GCM;
+               sess->auth.algo = AES_GMAC;
+
+               switch (xform->aead.key.length) {
+               case AES_128_BYTES:
+                       sess->cipher.key_length_in_bytes = AES_128_BYTES;
+                       keyexp.aes_gcm_keyexp_fn =
+                                       mb_ops->aux.keyexp.aes_gcm_128;
+                       break;
+               case AES_192_BYTES:
+                       sess->cipher.key_length_in_bytes = AES_192_BYTES;
+                       keyexp.aes_gcm_keyexp_fn =
+                                       mb_ops->aux.keyexp.aes_gcm_192;
+                       break;
+               case AES_256_BYTES:
+                       sess->cipher.key_length_in_bytes = AES_256_BYTES;
+                       keyexp.aes_gcm_keyexp_fn =
+                                       mb_ops->aux.keyexp.aes_gcm_256;
+                       break;
+               default:
+                       AESNI_MB_LOG(ERR, "Invalid cipher key length");
+                       return -EINVAL;
+               }
+
+               (keyexp.aes_gcm_keyexp_fn)(xform->aead.key.data,
+                               &sess->cipher.gcm_key);
                break;
+
        default:
                AESNI_MB_LOG(ERR, "Unsupported aead mode parameter");
                return -ENOTSUP;
@@ -360,22 +524,15 @@ aesni_mb_set_session_aead_parameters(const struct aesni_mb_op_fns *mb_ops,
        sess->iv.offset = xform->aead.iv.offset;
        sess->iv.length = xform->aead.iv.length;
 
-       /* Check key length and choose key expansion function for AES */
-
-       switch (xform->aead.key.length) {
-       case AES_128_BYTES:
-               sess->cipher.key_length_in_bytes = AES_128_BYTES;
-               aes_keyexp_fn = mb_ops->aux.keyexp.aes128;
-               break;
-       default:
-               AESNI_MB_LOG(ERR, "Invalid cipher key length");
+       sess->auth.req_digest_len = xform->aead.digest_length;
+       /* CCM digests must be between 4 and 16 and an even number */
+       if (sess->auth.req_digest_len < AES_CCM_DIGEST_MIN_LEN ||
+                       sess->auth.req_digest_len > AES_CCM_DIGEST_MAX_LEN ||
+                       (sess->auth.req_digest_len & 1) == 1) {
+               AESNI_MB_LOG(ERR, "Invalid digest size\n");
                return -EINVAL;
        }
-
-       /* Expanded cipher keys */
-       (*aes_keyexp_fn)(xform->aead.key.data,
-                       sess->cipher.expanded_aes_keys.encode,
-                       sess->cipher.expanded_aes_keys.decode);
+       sess->auth.gen_digest_len = sess->auth.req_digest_len;
 
        return 0;
 }
@@ -397,19 +554,16 @@ aesni_mb_set_session_parameters(const struct aesni_mb_op_fns *mb_ops,
                sess->chain_order = HASH_CIPHER;
                auth_xform = xform;
                cipher_xform = xform->next;
-               sess->auth.digest_len = xform->auth.digest_length;
                break;
        case AESNI_MB_OP_CIPHER_HASH:
                sess->chain_order = CIPHER_HASH;
                auth_xform = xform->next;
                cipher_xform = xform;
-               sess->auth.digest_len = xform->auth.digest_length;
                break;
        case AESNI_MB_OP_HASH_ONLY:
                sess->chain_order = HASH_CIPHER;
                auth_xform = xform;
                cipher_xform = NULL;
-               sess->auth.digest_len = xform->auth.digest_length;
                break;
        case AESNI_MB_OP_CIPHER_ONLY:
                /*
@@ -428,13 +582,11 @@ aesni_mb_set_session_parameters(const struct aesni_mb_op_fns *mb_ops,
        case AESNI_MB_OP_AEAD_CIPHER_HASH:
                sess->chain_order = CIPHER_HASH;
                sess->aead.aad_len = xform->aead.aad_length;
-               sess->auth.digest_len = xform->aead.digest_length;
                aead_xform = xform;
                break;
        case AESNI_MB_OP_AEAD_HASH_CIPHER:
                sess->chain_order = HASH_CIPHER;
                sess->aead.aad_len = xform->aead.aad_length;
-               sess->auth.digest_len = xform->aead.digest_length;
                aead_xform = xform;
                break;
        case AESNI_MB_OP_NOT_SUPPORTED:
@@ -573,38 +725,62 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp,
 
        job->aes_key_len_in_bytes = session->cipher.key_length_in_bytes;
 
-       if (job->cipher_mode == DES3) {
-               job->aes_enc_key_expanded =
-                       session->cipher.exp_3des_keys.ks_ptr;
-               job->aes_dec_key_expanded =
-                       session->cipher.exp_3des_keys.ks_ptr;
-       } else {
-               job->aes_enc_key_expanded =
-                       session->cipher.expanded_aes_keys.encode;
-               job->aes_dec_key_expanded =
-                       session->cipher.expanded_aes_keys.decode;
-       }
-
-
-
-
        /* Set authentication parameters */
        job->hash_alg = session->auth.algo;
-       if (job->hash_alg == AES_XCBC) {
+
+       switch (job->hash_alg) {
+       case AES_XCBC:
                job->u.XCBC._k1_expanded = session->auth.xcbc.k1_expanded;
                job->u.XCBC._k2 = session->auth.xcbc.k2;
                job->u.XCBC._k3 = session->auth.xcbc.k3;
-       } else if (job->hash_alg == AES_CCM) {
+
+               job->aes_enc_key_expanded =
+                               session->cipher.expanded_aes_keys.encode;
+               job->aes_dec_key_expanded =
+                               session->cipher.expanded_aes_keys.decode;
+               break;
+
+       case AES_CCM:
                job->u.CCM.aad = op->sym->aead.aad.data + 18;
                job->u.CCM.aad_len_in_bytes = session->aead.aad_len;
-       } else if (job->hash_alg == AES_CMAC) {
+               job->aes_enc_key_expanded =
+                               session->cipher.expanded_aes_keys.encode;
+               job->aes_dec_key_expanded =
+                               session->cipher.expanded_aes_keys.decode;
+               break;
+
+       case AES_CMAC:
                job->u.CMAC._key_expanded = session->auth.cmac.expkey;
                job->u.CMAC._skey1 = session->auth.cmac.skey1;
                job->u.CMAC._skey2 = session->auth.cmac.skey2;
+               job->aes_enc_key_expanded =
+                               session->cipher.expanded_aes_keys.encode;
+               job->aes_dec_key_expanded =
+                               session->cipher.expanded_aes_keys.decode;
+               break;
 
-       } else {
+       case AES_GMAC:
+               job->u.GCM.aad = op->sym->aead.aad.data;
+               job->u.GCM.aad_len_in_bytes = session->aead.aad_len;
+               job->aes_enc_key_expanded = &session->cipher.gcm_key;
+               job->aes_dec_key_expanded = &session->cipher.gcm_key;
+               break;
+
+       default:
                job->u.HMAC._hashed_auth_key_xor_ipad = session->auth.pads.inner;
                job->u.HMAC._hashed_auth_key_xor_opad = session->auth.pads.outer;
+
+               if (job->cipher_mode == DES3) {
+                       job->aes_enc_key_expanded =
+                               session->cipher.exp_3des_keys.ks_ptr;
+                       job->aes_dec_key_expanded =
+                               session->cipher.exp_3des_keys.ks_ptr;
+               } else {
+                       job->aes_enc_key_expanded =
+                               session->cipher.expanded_aes_keys.encode;
+                       job->aes_dec_key_expanded =
+                               session->cipher.expanded_aes_keys.decode;
+               }
        }
 
        /* Mutable crypto operation parameters */
@@ -625,7 +801,7 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp,
                                rte_pktmbuf_data_len(op->sym->m_src));
        } else {
                m_dst = m_src;
-               if (job->hash_alg == AES_CCM)
+               if (job->hash_alg == AES_CCM || job->hash_alg == AES_GMAC)
                        m_offset = op->sym->aead.data.offset;
                else
                        m_offset = op->sym->cipher.data.offset;
@@ -637,32 +813,33 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp,
                job->auth_tag_output = qp->temp_digests[*digest_idx];
                *digest_idx = (*digest_idx + 1) % MAX_JOBS;
        } else {
-               if (job->hash_alg == AES_CCM)
+               if (job->hash_alg == AES_CCM || job->hash_alg == AES_GMAC)
                        job->auth_tag_output = op->sym->aead.digest.data;
                else
                        job->auth_tag_output = op->sym->auth.digest.data;
-       }
 
+               if (session->auth.req_digest_len != session->auth.gen_digest_len) {
+                       job->auth_tag_output = qp->temp_digests[*digest_idx];
+                       *digest_idx = (*digest_idx + 1) % MAX_JOBS;
+               }
+       }
        /*
         * Multi-buffer library current only support returning a truncated
         * digest length as specified in the relevant IPsec RFCs
         */
-       if (job->hash_alg != AES_CCM && job->hash_alg != AES_CMAC)
-               job->auth_tag_output_len_in_bytes =
-                               get_truncated_digest_byte_length(job->hash_alg);
-       else
-               job->auth_tag_output_len_in_bytes = session->auth.digest_len;
 
+       /* Set digest length */
+       job->auth_tag_output_len_in_bytes = session->auth.gen_digest_len;
 
        /* Set IV parameters */
-
        job->iv_len_in_bytes = session->iv.length;
 
        /* Data  Parameter */
        job->src = rte_pktmbuf_mtod(m_src, uint8_t *);
        job->dst = rte_pktmbuf_mtod_offset(m_dst, uint8_t *, m_offset);
 
-       if (job->hash_alg == AES_CCM) {
+       switch (job->hash_alg) {
+       case AES_CCM:
                job->cipher_start_src_offset_in_bytes =
                                op->sym->aead.data.offset;
                job->msg_len_to_cipher_in_bytes = op->sym->aead.data.length;
@@ -671,7 +848,19 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp,
 
                job->iv = rte_crypto_op_ctod_offset(op, uint8_t *,
                        session->iv.offset + 1);
-       } else {
+               break;
+
+       case AES_GMAC:
+               job->cipher_start_src_offset_in_bytes =
+                               op->sym->aead.data.offset;
+               job->hash_start_src_offset_in_bytes = op->sym->aead.data.offset;
+               job->msg_len_to_cipher_in_bytes = op->sym->aead.data.length;
+               job->msg_len_to_hash_in_bytes = job->msg_len_to_cipher_in_bytes;
+               job->iv = rte_crypto_op_ctod_offset(op, uint8_t *,
+                               session->iv.offset);
+               break;
+
+       default:
                job->cipher_start_src_offset_in_bytes =
                                op->sym->cipher.data.offset;
                job->msg_len_to_cipher_in_bytes = op->sym->cipher.data.length;
@@ -690,20 +879,37 @@ set_mb_job_params(JOB_AES_HMAC *job, struct aesni_mb_qp *qp,
 }
 
 static inline void
-verify_digest(struct aesni_mb_qp *qp __rte_unused, JOB_AES_HMAC *job,
-               struct rte_crypto_op *op) {
+verify_digest(JOB_AES_HMAC *job, struct rte_crypto_op *op,
+               struct aesni_mb_session *sess)
+{
        /* Verify digest if required */
-       if (job->hash_alg == AES_CCM) {
+       if (job->hash_alg == AES_CCM || job->hash_alg == AES_GMAC) {
                if (memcmp(job->auth_tag_output, op->sym->aead.digest.data,
-                               job->auth_tag_output_len_in_bytes) != 0)
+                               sess->auth.req_digest_len) != 0)
                        op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
        } else {
                if (memcmp(job->auth_tag_output, op->sym->auth.digest.data,
-                               job->auth_tag_output_len_in_bytes) != 0)
+                               sess->auth.req_digest_len) != 0)
                        op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
        }
 }
 
+static inline void
+generate_digest(JOB_AES_HMAC *job, struct rte_crypto_op *op,
+               struct aesni_mb_session *sess)
+{
+       /* No extra copy neeed */
+       if (likely(sess->auth.req_digest_len == sess->auth.gen_digest_len))
+               return;
+
+       /*
+        * This can only happen for HMAC, so only digest
+        * for authentication algos is required
+        */
+       memcpy(op->sym->auth.digest.data, job->auth_tag_output,
+                       sess->auth.req_digest_len);
+}
+
 /**
  * Process a completed job and return rte_mbuf which job processed
  *
@@ -730,7 +936,9 @@ post_process_mb_job(struct aesni_mb_qp *qp, JOB_AES_HMAC *job)
                        if (job->hash_alg != NULL_HASH) {
                                if (sess->auth.operation ==
                                                RTE_CRYPTO_AUTH_OP_VERIFY)
-                                       verify_digest(qp, job, op);
+                                       verify_digest(job, op, sess);
+                               else
+                                       generate_digest(job, op, sess);
                        }
                        break;
                default:
@@ -833,22 +1041,30 @@ aesni_mb_pmd_dequeue_burst(void *queue_pair, struct rte_crypto_op **ops,
 
        uint8_t digest_idx = qp->digest_idx;
        do {
-               /* Get next operation to process from ingress queue */
-               retval = rte_ring_dequeue(qp->ingress_queue, (void **)&op);
-               if (retval < 0)
-                       break;
-
                /* Get next free mb job struct from mb manager */
                job = (*qp->op_fns->job.get_next)(qp->mb_mgr);
                if (unlikely(job == NULL)) {
                        /* if no free mb job structs we need to flush mb_mgr */
                        processed_jobs += flush_mb_mgr(qp,
                                        &ops[processed_jobs],
-                                       (nb_ops - processed_jobs) - 1);
+                                       nb_ops - processed_jobs);
+
+                       if (nb_ops == processed_jobs)
+                               break;
 
                        job = (*qp->op_fns->job.get_next)(qp->mb_mgr);
                }
 
+               /*
+                * Get next operation to process from ingress queue.
+                * There is no need to return the job to the MB_MGR
+                * if there are no more operations to process, since the MB_MGR
+                * can use that pointer again in next get_next calls.
+                */
+               retval = rte_ring_dequeue(qp->ingress_queue, (void **)&op);
+               if (retval < 0)
+                       break;
+
                retval = set_mb_job_params(job, qp, op, &digest_idx);
                if (unlikely(retval != 0)) {
                        qp->stats.dequeue_err_count++;
index ab26e5a..43f6c26 100644 (file)
@@ -25,9 +25,15 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 16,
+                                       .increment = 1
+#else
                                        .min = 12,
                                        .max = 12,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -42,13 +48,23 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                .block_size = 64,
                                .key_size = {
                                        .min = 1,
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .max = 65535,
+#else
                                        .max = 64,
+#endif
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 20,
+                                       .increment = 1
+#else
                                        .min = 12,
                                        .max = 12,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -63,13 +79,23 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                .block_size = 64,
                                .key_size = {
                                        .min = 1,
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .max = 65535,
+#else
                                        .max = 64,
+#endif
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 28,
+                                       .increment = 1
+#else
                                        .min = 14,
                                        .max = 14,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -84,13 +110,23 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                .block_size = 64,
                                .key_size = {
                                        .min = 1,
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .max = 65535,
+#else
                                        .max = 64,
+#endif
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 32,
+                                       .increment = 1
+#else
                                        .min = 16,
                                        .max = 16,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -105,13 +141,23 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                .block_size = 128,
                                .key_size = {
                                        .min = 1,
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .max = 65535,
+#else
                                        .max = 128,
+#endif
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 48,
+                                       .increment = 1
+#else
                                        .min = 24,
                                        .max = 24,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -126,13 +172,23 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                .block_size = 128,
                                .key_size = {
                                        .min = 1,
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .max = 65535,
+#else
                                        .max = 128,
+#endif
                                        .increment = 1
                                },
                                .digest_size = {
+#if IMB_VERSION_NUM >= IMB_VERSION(0, 50, 0)
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+#else
                                        .min = 32,
                                        .max = 32,
                                        .increment = 0
+#endif
                                },
                                .iv_size = { 0 }
                        }, }
@@ -322,14 +378,44 @@ static const struct rte_cryptodev_capabilities aesni_mb_pmd_capabilities[] = {
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 12,
+                                       .min = 1,
                                        .max = 16,
-                                       .increment = 4
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
                }, }
        },
+       {       /* AES GCM */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,
+                       {.aead = {
+                               .algo = RTE_CRYPTO_AEAD_AES_GCM,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .digest_size = {
+                                       .min = 8,
+                                       .max = 16,
+                                       .increment = 4
+                               },
+                               .aad_size = {
+                                       .min = 0,
+                                       .max = 65535,
+                                       .increment = 1
+                               },
+                               .iv_size = {
+                                       .min = 12,
+                                       .max = 12,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
        RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
 };
 
index 70e9d18..d8021cd 100644 (file)
@@ -7,15 +7,6 @@
 
 #include "aesni_mb_ops.h"
 
-/*
- * IMB_VERSION_NUM macro was introduced in version Multi-buffer 0.50,
- * so if macro is not defined, it means that the version is 0.49.
- */
-#if !defined(IMB_VERSION_NUM)
-#define IMB_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + (c))
-#define IMB_VERSION_NUM IMB_VERSION(0, 49, 0)
-#endif
-
 #define CRYPTODEV_NAME_AESNI_MB_PMD    crypto_aesni_mb
 /**< AES-NI Multi buffer PMD device name */
 
@@ -31,8 +22,8 @@ int aesni_mb_logtype_driver;
 #define HMAC_IPAD_VALUE                        (0x36)
 #define HMAC_OPAD_VALUE                        (0x5C)
 
-/* Maximum length for digest (SHA-512 truncated needs 32 bytes) */
-#define DIGEST_LENGTH_MAX 32
+/* Maximum length for digest */
+#define DIGEST_LENGTH_MAX 64
 static const unsigned auth_blocksize[] = {
                [MD5]           = 64,
                [SHA1]          = 64,
@@ -64,7 +55,7 @@ static const unsigned auth_truncated_digest_byte_lengths[] = {
                [SHA_384]       = 24,
                [SHA_512]       = 32,
                [AES_XCBC]      = 12,
-               [AES_CMAC]      = 16,
+               [AES_CMAC]      = 12,
                [AES_CCM]       = 8,
                [NULL_HASH]     = 0
 };
@@ -91,11 +82,13 @@ static const unsigned auth_digest_byte_lengths[] = {
                [SHA_512]       = 64,
                [AES_XCBC]      = 16,
                [AES_CMAC]      = 16,
+               [AES_GMAC]      = 12,
                [NULL_HASH]             = 0
 };
 
 /**
- * Get the output digest size in bytes for a specified authentication algorithm
+ * Get the full digest size in bytes for a specified authentication algorithm
+ * (if available in the Multi-buffer library)
  *
  * @Note: this function will not return a valid value for a non-valid
  * authentication algorithm
@@ -180,6 +173,8 @@ struct aesni_mb_session {
                                const void *ks_ptr[3];
                                uint64_t key[3][16];
                        } exp_3des_keys;
+
+                       struct gcm_key_data gcm_key;
                };
                /**< Expanded AES keys - Allocating space to
                 * contain the maximum expanded key size which
@@ -226,8 +221,10 @@ struct aesni_mb_session {
                        } cmac;
                        /**< Expanded XCBC authentication keys */
                };
-       /** digest size */
-       uint16_t digest_len;
+       /** Generated digest size by the Multi-buffer library */
+       uint16_t gen_digest_len;
+       /** Requested digest size from Cryptodev */
+       uint16_t req_digest_len;
 
        } auth;
        struct {
diff --git a/drivers/crypto/caam_jr/Makefile b/drivers/crypto/caam_jr/Makefile
new file mode 100644 (file)
index 0000000..88cdf74
--- /dev/null
@@ -0,0 +1,44 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2017 NXP
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_caam_jr.a
+
+# build flags
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+CFLAGS += -D _GNU_SOURCE
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include
+CFLAGS += -I$(RTE_SDK)/drivers/crypto/caam_jr
+#sharing the hw flib headers from dpaa2_sec pmd
+CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec/
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include
+CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal
+
+# versioning export map
+EXPORT_MAP := rte_pmd_caam_jr_version.map
+
+# library version
+LIBABIVER := 1
+
+# library source files
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR) += caam_jr.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR) += caam_jr_capabilities.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR) += caam_jr_hw.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR) += caam_jr_uio.c
+# library dependencies
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_cryptodev
+LDLIBS += -lrte_bus_dpaa
+LDLIBS += -lrte_bus_vdev
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/crypto/caam_jr/caam_jr.c b/drivers/crypto/caam_jr/caam_jr.c
new file mode 100644 (file)
index 0000000..f505adf
--- /dev/null
@@ -0,0 +1,2508 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sched.h>
+#include <net/if.h>
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_bus_vdev.h>
+#include <rte_malloc.h>
+#include <rte_security_driver.h>
+#include <rte_hexdump.h>
+
+#include <caam_jr_capabilities.h>
+#include <caam_jr_config.h>
+#include <caam_jr_hw_specific.h>
+#include <caam_jr_pvt.h>
+#include <caam_jr_desc.h>
+#include <caam_jr_log.h>
+
+/* RTA header files */
+#include <hw/desc/common.h>
+#include <hw/desc/algo.h>
+#include <of.h>
+
+#define CAAM_JR_DBG    0
+#define CRYPTODEV_NAME_CAAM_JR_PMD     crypto_caam_jr
+static uint8_t cryptodev_driver_id;
+int caam_jr_logtype;
+
+enum rta_sec_era rta_sec_era;
+
+/* Lists the states possible for the SEC user space driver. */
+enum sec_driver_state_e {
+       SEC_DRIVER_STATE_IDLE,          /* Driver not initialized */
+       SEC_DRIVER_STATE_STARTED,       /* Driver initialized and can be used*/
+       SEC_DRIVER_STATE_RELEASE,       /* Driver release is in progress */
+};
+
+/* Job rings used for communication with SEC HW */
+static struct sec_job_ring_t g_job_rings[MAX_SEC_JOB_RINGS];
+
+/* The current state of SEC user space driver */
+static enum sec_driver_state_e g_driver_state = SEC_DRIVER_STATE_IDLE;
+
+/* The number of job rings used by SEC user space driver */
+static int g_job_rings_no;
+static int g_job_rings_max;
+
+struct sec_outring_entry {
+       phys_addr_t desc;       /* Pointer to completed descriptor */
+       uint32_t status;        /* Status for completed descriptor */
+} __rte_packed;
+
+/* virtual address conversin when mempool support is available for ctx */
+static inline phys_addr_t
+caam_jr_vtop_ctx(struct caam_jr_op_ctx *ctx, void *vaddr)
+{
+       PMD_INIT_FUNC_TRACE();
+       return (size_t)vaddr - ctx->vtop_offset;
+}
+
+static inline void
+caam_jr_op_ending(struct caam_jr_op_ctx *ctx)
+{
+       PMD_INIT_FUNC_TRACE();
+       /* report op status to sym->op and then free the ctx memeory  */
+       rte_mempool_put(ctx->ctx_pool, (void *)ctx);
+}
+
+static inline struct caam_jr_op_ctx *
+caam_jr_alloc_ctx(struct caam_jr_session *ses)
+{
+       struct caam_jr_op_ctx *ctx;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+       ret = rte_mempool_get(ses->ctx_pool, (void **)(&ctx));
+       if (!ctx || ret) {
+               CAAM_JR_DP_WARN("Alloc sec descriptor failed!");
+               return NULL;
+       }
+       /*
+        * Clear SG memory. There are 16 SG entries of 16 Bytes each.
+        * one call to dcbz_64() clear 64 bytes, hence calling it 4 times
+        * to clear all the SG entries. caam_jr_alloc_ctx() is called for
+        * each packet, memset is costlier than dcbz_64().
+        */
+       dcbz_64(&ctx->sg[SG_CACHELINE_0]);
+       dcbz_64(&ctx->sg[SG_CACHELINE_1]);
+       dcbz_64(&ctx->sg[SG_CACHELINE_2]);
+       dcbz_64(&ctx->sg[SG_CACHELINE_3]);
+
+       ctx->ctx_pool = ses->ctx_pool;
+       ctx->vtop_offset = (size_t) ctx - rte_mempool_virt2iova(ctx);
+
+       return ctx;
+}
+
+static
+void caam_jr_stats_get(struct rte_cryptodev *dev,
+                       struct rte_cryptodev_stats *stats)
+{
+       struct caam_jr_qp **qp = (struct caam_jr_qp **)
+                                       dev->data->queue_pairs;
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+       if (stats == NULL) {
+               CAAM_JR_ERR("Invalid stats ptr NULL");
+               return;
+       }
+       for (i = 0; i < dev->data->nb_queue_pairs; i++) {
+               if (qp[i] == NULL) {
+                       CAAM_JR_WARN("Uninitialised queue pair");
+                       continue;
+               }
+
+               stats->enqueued_count += qp[i]->tx_pkts;
+               stats->dequeued_count += qp[i]->rx_pkts;
+               stats->enqueue_err_count += qp[i]->tx_errs;
+               stats->dequeue_err_count += qp[i]->rx_errs;
+               CAAM_JR_INFO("extra stats:\n\tRX Poll ERR = %" PRIu64
+                            "\n\tTX Ring Full = %" PRIu64,
+                            qp[i]->rx_poll_err,
+                            qp[i]->tx_ring_full);
+       }
+}
+
+static
+void caam_jr_stats_reset(struct rte_cryptodev *dev)
+{
+       int i;
+       struct caam_jr_qp **qp = (struct caam_jr_qp **)
+                                  (dev->data->queue_pairs);
+
+       PMD_INIT_FUNC_TRACE();
+       for (i = 0; i < dev->data->nb_queue_pairs; i++) {
+               if (qp[i] == NULL) {
+                       CAAM_JR_WARN("Uninitialised queue pair");
+                       continue;
+               }
+               qp[i]->rx_pkts = 0;
+               qp[i]->rx_errs = 0;
+               qp[i]->rx_poll_err = 0;
+               qp[i]->tx_pkts = 0;
+               qp[i]->tx_errs = 0;
+               qp[i]->tx_ring_full = 0;
+       }
+}
+
+static inline int
+is_cipher_only(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ((ses->cipher_alg != RTE_CRYPTO_CIPHER_NULL) &&
+               (ses->auth_alg == RTE_CRYPTO_AUTH_NULL));
+}
+
+static inline int
+is_auth_only(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ((ses->cipher_alg == RTE_CRYPTO_CIPHER_NULL) &&
+               (ses->auth_alg != RTE_CRYPTO_AUTH_NULL));
+}
+
+static inline int
+is_aead(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ((ses->cipher_alg == 0) &&
+               (ses->auth_alg == 0) &&
+               (ses->aead_alg != 0));
+}
+
+static inline int
+is_auth_cipher(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ((ses->cipher_alg != RTE_CRYPTO_CIPHER_NULL) &&
+               (ses->auth_alg != RTE_CRYPTO_AUTH_NULL) &&
+               (ses->proto_alg != RTE_SECURITY_PROTOCOL_IPSEC));
+}
+
+static inline int
+is_proto_ipsec(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC);
+}
+
+static inline int
+is_encode(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ses->dir == DIR_ENC;
+}
+
+static inline int
+is_decode(struct caam_jr_session *ses)
+{
+       PMD_INIT_FUNC_TRACE();
+       return ses->dir == DIR_DEC;
+}
+
+static inline void
+caam_auth_alg(struct caam_jr_session *ses, struct alginfo *alginfo_a)
+{
+       PMD_INIT_FUNC_TRACE();
+       switch (ses->auth_alg) {
+       case RTE_CRYPTO_AUTH_NULL:
+               ses->digest_length = 0;
+               break;
+       case RTE_CRYPTO_AUTH_MD5_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_MD5_96 : OP_ALG_ALGSEL_MD5;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA1_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_SHA1_96 : OP_ALG_ALGSEL_SHA1;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA224_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_SHA1_160 : OP_ALG_ALGSEL_SHA224;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA256_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_SHA2_256_128 : OP_ALG_ALGSEL_SHA256;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA384_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_SHA2_384_192 : OP_ALG_ALGSEL_SHA384;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA512_HMAC:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_SHA2_512_256 : OP_ALG_ALGSEL_SHA512;
+               alginfo_a->algmode = OP_ALG_AAI_HMAC;
+               break;
+       default:
+               CAAM_JR_DEBUG("unsupported auth alg %u", ses->auth_alg);
+       }
+}
+
+static inline void
+caam_cipher_alg(struct caam_jr_session *ses, struct alginfo *alginfo_c)
+{
+       PMD_INIT_FUNC_TRACE();
+       switch (ses->cipher_alg) {
+       case RTE_CRYPTO_CIPHER_NULL:
+               break;
+       case RTE_CRYPTO_CIPHER_AES_CBC:
+               alginfo_c->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_AES_CBC : OP_ALG_ALGSEL_AES;
+               alginfo_c->algmode = OP_ALG_AAI_CBC;
+               break;
+       case RTE_CRYPTO_CIPHER_3DES_CBC:
+               alginfo_c->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_3DES : OP_ALG_ALGSEL_3DES;
+               alginfo_c->algmode = OP_ALG_AAI_CBC;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_CTR:
+               alginfo_c->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_AES_CTR : OP_ALG_ALGSEL_AES;
+               alginfo_c->algmode = OP_ALG_AAI_CTR;
+               break;
+       default:
+               CAAM_JR_DEBUG("unsupported cipher alg %d", ses->cipher_alg);
+       }
+}
+
+static inline void
+caam_aead_alg(struct caam_jr_session *ses, struct alginfo *alginfo)
+{
+       PMD_INIT_FUNC_TRACE();
+       switch (ses->aead_alg) {
+       case RTE_CRYPTO_AEAD_AES_GCM:
+               alginfo->algtype = OP_ALG_ALGSEL_AES;
+               alginfo->algmode = OP_ALG_AAI_GCM;
+               break;
+       default:
+               CAAM_JR_DEBUG("unsupported AEAD alg %d", ses->aead_alg);
+       }
+}
+
+/* prepare command block of the session */
+static int
+caam_jr_prep_cdb(struct caam_jr_session *ses)
+{
+       struct alginfo alginfo_c = {0}, alginfo_a = {0}, alginfo = {0};
+       int32_t shared_desc_len = 0;
+       struct sec_cdb *cdb;
+       int err;
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       int swap = false;
+#else
+       int swap = true;
+#endif
+
+       PMD_INIT_FUNC_TRACE();
+       if (ses->cdb)
+               caam_jr_dma_free(ses->cdb);
+
+       cdb = caam_jr_dma_mem_alloc(L1_CACHE_BYTES, sizeof(struct sec_cdb));
+       if (!cdb) {
+               CAAM_JR_ERR("failed to allocate memory for cdb\n");
+               return -1;
+       }
+
+       ses->cdb = cdb;
+
+       memset(cdb, 0, sizeof(struct sec_cdb));
+
+       if (is_cipher_only(ses)) {
+               caam_cipher_alg(ses, &alginfo_c);
+               if (alginfo_c.algtype == (unsigned int)CAAM_JR_ALG_UNSUPPORT) {
+                       CAAM_JR_ERR("not supported cipher alg");
+                       rte_free(cdb);
+                       return -ENOTSUP;
+               }
+
+               alginfo_c.key = (size_t)ses->cipher_key.data;
+               alginfo_c.keylen = ses->cipher_key.length;
+               alginfo_c.key_enc_flags = 0;
+               alginfo_c.key_type = RTA_DATA_IMM;
+
+               shared_desc_len = cnstr_shdsc_blkcipher(
+                                               cdb->sh_desc, true,
+                                               swap, &alginfo_c,
+                                               NULL,
+                                               ses->iv.length,
+                                               ses->dir);
+       } else if (is_auth_only(ses)) {
+               caam_auth_alg(ses, &alginfo_a);
+               if (alginfo_a.algtype == (unsigned int)CAAM_JR_ALG_UNSUPPORT) {
+                       CAAM_JR_ERR("not supported auth alg");
+                       rte_free(cdb);
+                       return -ENOTSUP;
+               }
+
+               alginfo_a.key = (size_t)ses->auth_key.data;
+               alginfo_a.keylen = ses->auth_key.length;
+               alginfo_a.key_enc_flags = 0;
+               alginfo_a.key_type = RTA_DATA_IMM;
+
+               shared_desc_len = cnstr_shdsc_hmac(cdb->sh_desc, true,
+                                                  swap, &alginfo_a,
+                                                  !ses->dir,
+                                                  ses->digest_length);
+       } else if (is_aead(ses)) {
+               caam_aead_alg(ses, &alginfo);
+               if (alginfo.algtype == (unsigned int)CAAM_JR_ALG_UNSUPPORT) {
+                       CAAM_JR_ERR("not supported aead alg");
+                       rte_free(cdb);
+                       return -ENOTSUP;
+               }
+               alginfo.key = (size_t)ses->aead_key.data;
+               alginfo.keylen = ses->aead_key.length;
+               alginfo.key_enc_flags = 0;
+               alginfo.key_type = RTA_DATA_IMM;
+
+               if (ses->dir == DIR_ENC)
+                       shared_desc_len = cnstr_shdsc_gcm_encap(
+                                       cdb->sh_desc, true, swap,
+                                       &alginfo,
+                                       ses->iv.length,
+                                       ses->digest_length);
+               else
+                       shared_desc_len = cnstr_shdsc_gcm_decap(
+                                       cdb->sh_desc, true, swap,
+                                       &alginfo,
+                                       ses->iv.length,
+                                       ses->digest_length);
+       } else {
+               caam_cipher_alg(ses, &alginfo_c);
+               if (alginfo_c.algtype == (unsigned int)CAAM_JR_ALG_UNSUPPORT) {
+                       CAAM_JR_ERR("not supported cipher alg");
+                       rte_free(cdb);
+                       return -ENOTSUP;
+               }
+
+               alginfo_c.key = (size_t)ses->cipher_key.data;
+               alginfo_c.keylen = ses->cipher_key.length;
+               alginfo_c.key_enc_flags = 0;
+               alginfo_c.key_type = RTA_DATA_IMM;
+
+               caam_auth_alg(ses, &alginfo_a);
+               if (alginfo_a.algtype == (unsigned int)CAAM_JR_ALG_UNSUPPORT) {
+                       CAAM_JR_ERR("not supported auth alg");
+                       rte_free(cdb);
+                       return -ENOTSUP;
+               }
+
+               alginfo_a.key = (size_t)ses->auth_key.data;
+               alginfo_a.keylen = ses->auth_key.length;
+               alginfo_a.key_enc_flags = 0;
+               alginfo_a.key_type = RTA_DATA_IMM;
+
+               cdb->sh_desc[0] = alginfo_c.keylen;
+               cdb->sh_desc[1] = alginfo_a.keylen;
+               err = rta_inline_query(IPSEC_AUTH_VAR_AES_DEC_BASE_DESC_LEN,
+                                      MIN_JOB_DESC_SIZE,
+                                      (unsigned int *)cdb->sh_desc,
+                                      &cdb->sh_desc[2], 2);
+
+               if (err < 0) {
+                       CAAM_JR_ERR("Crypto: Incorrect key lengths");
+                       rte_free(cdb);
+                       return err;
+               }
+               if (cdb->sh_desc[2] & 1)
+                       alginfo_c.key_type = RTA_DATA_IMM;
+               else {
+                       alginfo_c.key = (size_t)caam_jr_mem_vtop(
+                                               (void *)(size_t)alginfo_c.key);
+                       alginfo_c.key_type = RTA_DATA_PTR;
+               }
+               if (cdb->sh_desc[2] & (1<<1))
+                       alginfo_a.key_type = RTA_DATA_IMM;
+               else {
+                       alginfo_a.key = (size_t)caam_jr_mem_vtop(
+                                               (void *)(size_t)alginfo_a.key);
+                       alginfo_a.key_type = RTA_DATA_PTR;
+               }
+               cdb->sh_desc[0] = 0;
+               cdb->sh_desc[1] = 0;
+               cdb->sh_desc[2] = 0;
+               if (is_proto_ipsec(ses)) {
+                       if (ses->dir == DIR_ENC) {
+                               shared_desc_len = cnstr_shdsc_ipsec_new_encap(
+                                               cdb->sh_desc,
+                                               true, swap, SHR_SERIAL,
+                                               &ses->encap_pdb,
+                                               (uint8_t *)&ses->ip4_hdr,
+                                               &alginfo_c, &alginfo_a);
+                       } else if (ses->dir == DIR_DEC) {
+                               shared_desc_len = cnstr_shdsc_ipsec_new_decap(
+                                               cdb->sh_desc,
+                                               true, swap, SHR_SERIAL,
+                                               &ses->decap_pdb,
+                                               &alginfo_c, &alginfo_a);
+                       }
+               } else {
+                       /* Auth_only_len is set as 0 here and it will be
+                        * overwritten in fd for each packet.
+                        */
+                       shared_desc_len = cnstr_shdsc_authenc(cdb->sh_desc,
+                                       true, swap, &alginfo_c, &alginfo_a,
+                                       ses->iv.length, 0,
+                                       ses->digest_length, ses->dir);
+               }
+       }
+
+       if (shared_desc_len < 0) {
+               CAAM_JR_ERR("error in preparing command block");
+               return shared_desc_len;
+       }
+
+#if CAAM_JR_DBG
+       SEC_DUMP_DESC(cdb->sh_desc);
+#endif
+
+       cdb->sh_hdr.hi.field.idlen = shared_desc_len;
+
+       return 0;
+}
+
+/* @brief Poll the HW for already processed jobs in the JR
+ * and silently discard the available jobs or notify them to UA
+ * with indicated error code.
+ *
+ * @param [in,out]  job_ring        The job ring to poll.
+ * @param [in]  do_notify           Can be #TRUE or #FALSE. Indicates if
+ *                                 descriptors are to be discarded
+ *                                  or notified to UA with given error_code.
+ * @param [out] notified_descs    Number of notified descriptors. Can be NULL
+ *                                     if do_notify is #FALSE
+ */
+static void
+hw_flush_job_ring(struct sec_job_ring_t *job_ring,
+                 uint32_t do_notify,
+                 uint32_t *notified_descs)
+{
+       int32_t jobs_no_to_discard = 0;
+       int32_t discarded_descs_no = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       CAAM_JR_DEBUG("Jr[%p] pi[%d] ci[%d].Flushing jr notify desc=[%d]",
+               job_ring, job_ring->pidx, job_ring->cidx, do_notify);
+
+       jobs_no_to_discard = hw_get_no_finished_jobs(job_ring);
+
+       /* Discard all jobs */
+       CAAM_JR_DEBUG("Jr[%p] pi[%d] ci[%d].Discarding %d descs",
+                 job_ring, job_ring->pidx, job_ring->cidx,
+                 jobs_no_to_discard);
+
+       while (jobs_no_to_discard > discarded_descs_no) {
+               discarded_descs_no++;
+               /* Now increment the consumer index for the current job ring,
+                * AFTER saving job in temporary location!
+                * Increment the consumer index for the current job ring
+                */
+               job_ring->cidx = SEC_CIRCULAR_COUNTER(job_ring->cidx,
+                                        SEC_JOB_RING_SIZE);
+
+               hw_remove_entries(job_ring, 1);
+       }
+
+       if (do_notify == true) {
+               ASSERT(notified_descs != NULL);
+               *notified_descs = discarded_descs_no;
+       }
+}
+
+/* @brief Poll the HW for already processed jobs in the JR
+ * and notify the available jobs to UA.
+ *
+ * @param [in]  job_ring       The job ring to poll.
+ * @param [in]  limit           The maximum number of jobs to notify.
+ *                              If set to negative value, all available jobs are
+ *                             notified.
+ *
+ * @retval >=0 for No of jobs notified to UA.
+ * @retval -1 for error
+ */
+static int
+hw_poll_job_ring(struct sec_job_ring_t *job_ring,
+                struct rte_crypto_op **ops, int32_t limit,
+                struct caam_jr_qp *jr_qp)
+{
+       int32_t jobs_no_to_notify = 0; /* the number of done jobs to notify*/
+       int32_t number_of_jobs_available = 0;
+       int32_t notified_descs_no = 0;
+       uint32_t sec_error_code = 0;
+       struct job_descriptor *current_desc;
+       phys_addr_t current_desc_addr;
+       phys_addr_t *temp_addr;
+       struct caam_jr_op_ctx *ctx;
+
+       PMD_INIT_FUNC_TRACE();
+       /* TODO check for ops have memory*/
+       /* check here if any JR error that cannot be written
+        * in the output status word has occurred
+        */
+       if (JR_REG_JRINT_JRE_EXTRACT(GET_JR_REG(JRINT, job_ring))) {
+               CAAM_JR_INFO("err received");
+               sec_error_code = JR_REG_JRINT_ERR_TYPE_EXTRACT(
+                                       GET_JR_REG(JRINT, job_ring));
+               if (unlikely(sec_error_code)) {
+                       hw_job_ring_error_print(job_ring, sec_error_code);
+                       return -1;
+               }
+       }
+       /* compute the number of jobs available in the job ring based on the
+        * producer and consumer index values.
+        */
+       number_of_jobs_available = hw_get_no_finished_jobs(job_ring);
+       /* Compute the number of notifications that need to be raised to UA
+        * If limit > total number of done jobs -> notify all done jobs
+        * If limit = 0 -> error
+        * If limit < total number of done jobs -> notify a number
+        * of done jobs equal with limit
+        */
+       jobs_no_to_notify = (limit > number_of_jobs_available) ?
+                               number_of_jobs_available : limit;
+       CAAM_JR_DP_DEBUG(
+               "Jr[%p] pi[%d] ci[%d].limit =%d Available=%d.Jobs to notify=%d",
+               job_ring, job_ring->pidx, job_ring->cidx,
+               limit, number_of_jobs_available, jobs_no_to_notify);
+
+       rte_smp_rmb();
+
+       while (jobs_no_to_notify > notified_descs_no) {
+               static uint64_t false_alarm;
+               static uint64_t real_poll;
+
+               /* Get job status here */
+               sec_error_code = job_ring->output_ring[job_ring->cidx].status;
+               /* Get completed descriptor */
+               temp_addr = &(job_ring->output_ring[job_ring->cidx].desc);
+               current_desc_addr = (phys_addr_t)sec_read_addr(temp_addr);
+
+               real_poll++;
+               /* todo check if it is false alarm no desc present */
+               if (!current_desc_addr) {
+                       false_alarm++;
+                       printf("false alarm %" PRIu64 "real %" PRIu64
+                               " sec_err =0x%x cidx Index =0%d\n",
+                               false_alarm, real_poll,
+                               sec_error_code, job_ring->cidx);
+                       rte_panic("CAAM JR descriptor NULL");
+                       return notified_descs_no;
+               }
+               current_desc = (struct job_descriptor *)
+                               caam_jr_dma_ptov(current_desc_addr);
+               /* now increment the consumer index for the current job ring,
+                * AFTER saving job in temporary location!
+                */
+               job_ring->cidx = SEC_CIRCULAR_COUNTER(job_ring->cidx,
+                                SEC_JOB_RING_SIZE);
+               /* Signal that the job has been processed and the slot is free*/
+               hw_remove_entries(job_ring, 1);
+               /*TODO for multiple ops, packets*/
+               ctx = container_of(current_desc, struct caam_jr_op_ctx, jobdes);
+               if (unlikely(sec_error_code)) {
+                       CAAM_JR_ERR("desc at cidx %d generated error 0x%x\n",
+                               job_ring->cidx, sec_error_code);
+                       hw_handle_job_ring_error(job_ring, sec_error_code);
+                       //todo improve with exact errors
+                       ctx->op->status = RTE_CRYPTO_OP_STATUS_ERROR;
+                       jr_qp->rx_errs++;
+               } else {
+                       ctx->op->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
+#if CAAM_JR_DBG
+                       if (ctx->op->sym->m_dst) {
+                               rte_hexdump(stdout, "PROCESSED",
+                               rte_pktmbuf_mtod(ctx->op->sym->m_dst, void *),
+                               rte_pktmbuf_data_len(ctx->op->sym->m_dst));
+                       } else {
+                               rte_hexdump(stdout, "PROCESSED",
+                               rte_pktmbuf_mtod(ctx->op->sym->m_src, void *),
+                               rte_pktmbuf_data_len(ctx->op->sym->m_src));
+                       }
+#endif
+               }
+               if (ctx->op->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) {
+                       struct ip *ip4_hdr;
+
+                       if (ctx->op->sym->m_dst) {
+                               /*TODO check for ip header or other*/
+                               ip4_hdr = (struct ip *)
+                               rte_pktmbuf_mtod(ctx->op->sym->m_dst, char*);
+                               ctx->op->sym->m_dst->pkt_len =
+                                       rte_be_to_cpu_16(ip4_hdr->ip_len);
+                               ctx->op->sym->m_dst->data_len =
+                                       rte_be_to_cpu_16(ip4_hdr->ip_len);
+                       } else {
+                               ip4_hdr = (struct ip *)
+                               rte_pktmbuf_mtod(ctx->op->sym->m_src, char*);
+                               ctx->op->sym->m_src->pkt_len =
+                                       rte_be_to_cpu_16(ip4_hdr->ip_len);
+                               ctx->op->sym->m_src->data_len =
+                                       rte_be_to_cpu_16(ip4_hdr->ip_len);
+                       }
+               }
+               *ops = ctx->op;
+               caam_jr_op_ending(ctx);
+               ops++;
+               notified_descs_no++;
+       }
+       return notified_descs_no;
+}
+
+static uint16_t
+caam_jr_dequeue_burst(void *qp, struct rte_crypto_op **ops,
+                      uint16_t nb_ops)
+{
+       struct caam_jr_qp *jr_qp = (struct caam_jr_qp *)qp;
+       struct sec_job_ring_t *ring = jr_qp->ring;
+       int num_rx;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+       CAAM_JR_DP_DEBUG("Jr[%p]Polling. limit[%d]", ring, nb_ops);
+
+       /* Poll job ring
+        * If nb_ops < 0 -> poll JR until no more notifications are available.
+        * If nb_ops > 0 -> poll JR until limit is reached.
+        */
+
+       /* Run hw poll job ring */
+       num_rx = hw_poll_job_ring(ring, ops, nb_ops, jr_qp);
+       if (num_rx < 0) {
+               CAAM_JR_ERR("Error polling SEC engine (%d)", num_rx);
+               return 0;
+       }
+
+       CAAM_JR_DP_DEBUG("Jr[%p].Jobs notified[%d]. ", ring, num_rx);
+
+       if (ring->jr_mode == SEC_NOTIFICATION_TYPE_NAPI) {
+               if (num_rx < nb_ops) {
+                       ret = caam_jr_enable_irqs(ring->irq_fd);
+                       SEC_ASSERT(ret == 0, ret,
+                       "Failed to enable irqs for job ring %p", ring);
+               }
+       } else if (ring->jr_mode == SEC_NOTIFICATION_TYPE_IRQ) {
+
+               /* Always enable IRQ generation when in pure IRQ mode */
+               ret = caam_jr_enable_irqs(ring->irq_fd);
+               SEC_ASSERT(ret == 0, ret,
+                       "Failed to enable irqs for job ring %p", ring);
+       }
+
+       jr_qp->rx_pkts += num_rx;
+
+       return num_rx;
+}
+
+/**
+ * packet looks like:
+ *             |<----data_len------->|
+ *    |ip_header|ah_header|icv|payload|
+ *              ^
+ *             |
+ *        mbuf->pkt.data
+ */
+static inline struct caam_jr_op_ctx *
+build_auth_only_sg(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct rte_mbuf *mbuf = sym->m_src;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg;
+       int     length;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       struct sec_job_descriptor_t *jobdescr;
+       uint8_t extra_segs;
+
+       PMD_INIT_FUNC_TRACE();
+       if (is_decode(ses))
+               extra_segs = 2;
+       else
+               extra_segs = 1;
+
+       if ((mbuf->nb_segs + extra_segs) > MAX_SG_ENTRIES) {
+               CAAM_JR_DP_ERR("Auth: Max sec segs supported is %d",
+                               MAX_SG_ENTRIES);
+               return NULL;
+       }
+
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+       /* output */
+       SEC_JD_SET_OUT_PTR(jobdescr, (uint64_t)sym->auth.digest.phys_addr,
+                       0, ses->digest_length);
+
+       /*input */
+       sg = &ctx->sg[0];
+       length = sym->auth.data.length;
+       sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf) + sym->auth.data.offset);
+       sg->len = cpu_to_caam32(mbuf->data_len - sym->auth.data.offset);
+
+       /* Successive segs */
+       mbuf = mbuf->next;
+       while (mbuf) {
+               sg++;
+               sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf));
+               sg->len = cpu_to_caam32(mbuf->data_len);
+               mbuf = mbuf->next;
+       }
+
+       if (is_decode(ses)) {
+               /* digest verification case */
+               sg++;
+               /* hash result or digest, save digest first */
+               rte_memcpy(ctx->digest, sym->auth.digest.data,
+                          ses->digest_length);
+#if CAAM_JR_DBG
+               rte_hexdump(stdout, "ICV", ctx->digest, ses->digest_length);
+#endif
+               sg->ptr = cpu_to_caam64(caam_jr_vtop_ctx(ctx, ctx->digest));
+               sg->len = cpu_to_caam32(ses->digest_length);
+               length += ses->digest_length;
+       } else {
+               length -= ses->digest_length;
+       }
+
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       SEC_JD_SET_IN_PTR(jobdescr,
+               (uint64_t)caam_jr_vtop_ctx(ctx, &ctx->sg[0]), 0, length);
+       /* enabling sg list */
+       (jobdescr)->seq_in.command.word  |= 0x01000000;
+
+       return ctx;
+}
+
+static inline struct caam_jr_op_ctx *
+build_auth_only(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg;
+       rte_iova_t start_addr;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       struct sec_job_descriptor_t *jobdescr;
+
+       PMD_INIT_FUNC_TRACE();
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       start_addr = rte_pktmbuf_iova(sym->m_src);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+       /* output */
+       SEC_JD_SET_OUT_PTR(jobdescr, (uint64_t)sym->auth.digest.phys_addr,
+                       0, ses->digest_length);
+
+       /*input */
+       if (is_decode(ses)) {
+               sg = &ctx->sg[0];
+               SEC_JD_SET_IN_PTR(jobdescr,
+                       (uint64_t)caam_jr_vtop_ctx(ctx, sg), 0,
+                       (sym->auth.data.length + ses->digest_length));
+               /* enabling sg list */
+               (jobdescr)->seq_in.command.word  |= 0x01000000;
+
+               /* hash result or digest, save digest first */
+               rte_memcpy(ctx->digest, sym->auth.digest.data,
+                          ses->digest_length);
+               sg->ptr = cpu_to_caam64(start_addr + sym->auth.data.offset);
+               sg->len = cpu_to_caam32(sym->auth.data.length);
+
+#if CAAM_JR_DBG
+               rte_hexdump(stdout, "ICV", ctx->digest, ses->digest_length);
+#endif
+               /* let's check digest by hw */
+               sg++;
+               sg->ptr = cpu_to_caam64(caam_jr_vtop_ctx(ctx, ctx->digest));
+               sg->len = cpu_to_caam32(ses->digest_length);
+               /* last element*/
+               sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+       } else {
+               SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)start_addr,
+                       sym->auth.data.offset, sym->auth.data.length);
+       }
+       return ctx;
+}
+
+static inline struct caam_jr_op_ctx *
+build_cipher_only_sg(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct rte_mbuf *mbuf = sym->m_src;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg, *in_sg;
+       int length;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       uint8_t *IV_ptr = rte_crypto_op_ctod_offset(op, uint8_t *,
+                       ses->iv.offset);
+       struct sec_job_descriptor_t *jobdescr;
+       uint8_t reg_segs;
+
+       PMD_INIT_FUNC_TRACE();
+       if (sym->m_dst) {
+               mbuf = sym->m_dst;
+               reg_segs = mbuf->nb_segs + sym->m_src->nb_segs + 2;
+       } else {
+               mbuf = sym->m_src;
+               reg_segs = mbuf->nb_segs * 2 + 2;
+       }
+
+       if (reg_segs > MAX_SG_ENTRIES) {
+               CAAM_JR_DP_ERR("Cipher: Max sec segs supported is %d",
+                               MAX_SG_ENTRIES);
+               return NULL;
+       }
+
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+#if CAAM_JR_DBG
+       CAAM_JR_INFO("mbuf offset =%d, cipher offset = %d, length =%d+%d",
+                       sym->m_src->data_off, sym->cipher.data.offset,
+                       sym->cipher.data.length, ses->iv.length);
+#endif
+       /* output */
+       if (sym->m_dst)
+               mbuf = sym->m_dst;
+       else
+               mbuf = sym->m_src;
+
+       sg = &ctx->sg[0];
+       length = sym->cipher.data.length;
+
+       sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf)
+               + sym->cipher.data.offset);
+       sg->len = cpu_to_caam32(mbuf->data_len - sym->cipher.data.offset);
+
+       /* Successive segs */
+       mbuf = mbuf->next;
+       while (mbuf) {
+               sg++;
+               sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf));
+               sg->len = cpu_to_caam32(mbuf->data_len);
+               mbuf = mbuf->next;
+       }
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       SEC_JD_SET_OUT_PTR(jobdescr,
+                       (uint64_t)caam_jr_vtop_ctx(ctx, &ctx->sg[0]), 0,
+                       length);
+       /*enabling sg bit */
+       (jobdescr)->seq_out.command.word  |= 0x01000000;
+
+       /*input */
+       sg++;
+       mbuf = sym->m_src;
+       in_sg = sg;
+
+       length = sym->cipher.data.length + ses->iv.length;
+
+       /* IV */
+       sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(IV_ptr));
+       sg->len = cpu_to_caam32(ses->iv.length);
+
+       /* 1st seg */
+       sg++;
+       sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf)
+                               + sym->cipher.data.offset);
+       sg->len = cpu_to_caam32(mbuf->data_len - sym->cipher.data.offset);
+
+       /* Successive segs */
+       mbuf = mbuf->next;
+       while (mbuf) {
+               sg++;
+               sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf));
+               sg->len = cpu_to_caam32(mbuf->data_len);
+               mbuf = mbuf->next;
+       }
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+
+       SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)caam_jr_vtop_ctx(ctx, in_sg), 0,
+                               length);
+       /*enabling sg bit */
+       (jobdescr)->seq_in.command.word  |= 0x01000000;
+
+       return ctx;
+}
+
+static inline struct caam_jr_op_ctx *
+build_cipher_only(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg;
+       rte_iova_t src_start_addr, dst_start_addr;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       uint8_t *IV_ptr = rte_crypto_op_ctod_offset(op, uint8_t *,
+                       ses->iv.offset);
+       struct sec_job_descriptor_t *jobdescr;
+
+       PMD_INIT_FUNC_TRACE();
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       src_start_addr = rte_pktmbuf_iova(sym->m_src);
+       if (sym->m_dst)
+               dst_start_addr = rte_pktmbuf_iova(sym->m_dst);
+       else
+               dst_start_addr = src_start_addr;
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+#if CAAM_JR_DBG
+       CAAM_JR_INFO("mbuf offset =%d, cipher offset = %d, length =%d+%d",
+                       sym->m_src->data_off, sym->cipher.data.offset,
+                       sym->cipher.data.length, ses->iv.length);
+#endif
+       /* output */
+       SEC_JD_SET_OUT_PTR(jobdescr, (uint64_t)dst_start_addr,
+                       sym->cipher.data.offset,
+                       sym->cipher.data.length + ses->iv.length);
+
+       /*input */
+       sg = &ctx->sg[0];
+       SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)caam_jr_vtop_ctx(ctx, sg), 0,
+                               sym->cipher.data.length + ses->iv.length);
+       /*enabling sg bit */
+       (jobdescr)->seq_in.command.word  |= 0x01000000;
+
+       sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(IV_ptr));
+       sg->len = cpu_to_caam32(ses->iv.length);
+
+       sg = &ctx->sg[1];
+       sg->ptr = cpu_to_caam64(src_start_addr + sym->cipher.data.offset);
+       sg->len = cpu_to_caam32(sym->cipher.data.length);
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       return ctx;
+}
+
+/* For decapsulation:
+ *     Input:
+ * +----+----------------+--------------------------------+-----+
+ * | IV | Auth-only data | Authenticated & Encrypted data | ICV |
+ * +----+----------------+--------------------------------+-----+
+ *     Output:
+ * +----+--------------------------+
+ * | Decrypted & authenticated data |
+ * +----+--------------------------+
+ */
+
+static inline struct caam_jr_op_ctx *
+build_cipher_auth_sg(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg, *out_sg, *in_sg;
+       struct rte_mbuf *mbuf;
+       uint32_t length = 0;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       uint8_t req_segs;
+       uint8_t *IV_ptr = rte_crypto_op_ctod_offset(op, uint8_t *,
+                       ses->iv.offset);
+       struct sec_job_descriptor_t *jobdescr;
+       uint32_t auth_only_len;
+
+       PMD_INIT_FUNC_TRACE();
+       auth_only_len = op->sym->auth.data.length -
+                               op->sym->cipher.data.length;
+
+       if (sym->m_dst) {
+               mbuf = sym->m_dst;
+               req_segs = mbuf->nb_segs + sym->m_src->nb_segs + 3;
+       } else {
+               mbuf = sym->m_src;
+               req_segs = mbuf->nb_segs * 2 + 3;
+       }
+
+       if (req_segs > MAX_SG_ENTRIES) {
+               CAAM_JR_DP_ERR("Cipher-Auth: Max sec segs supported is %d",
+                               MAX_SG_ENTRIES);
+               return NULL;
+       }
+
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+       /* output */
+       if (sym->m_dst)
+               mbuf = sym->m_dst;
+       else
+               mbuf = sym->m_src;
+
+       out_sg = &ctx->sg[0];
+       if (is_encode(ses))
+               length = sym->auth.data.length + ses->digest_length;
+       else
+               length = sym->auth.data.length;
+
+       sg = &ctx->sg[0];
+
+       /* 1st seg */
+       sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf)
+               + sym->auth.data.offset);
+       sg->len = cpu_to_caam32(mbuf->data_len - sym->auth.data.offset);
+
+       /* Successive segs */
+       mbuf = mbuf->next;
+       while (mbuf) {
+               sg++;
+               sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf));
+               sg->len = cpu_to_caam32(mbuf->data_len);
+               mbuf = mbuf->next;
+       }
+
+       if (is_encode(ses)) {
+               /* set auth output */
+               sg++;
+               sg->ptr = cpu_to_caam64(sym->auth.digest.phys_addr);
+               sg->len = cpu_to_caam32(ses->digest_length);
+       }
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       SEC_JD_SET_OUT_PTR(jobdescr,
+                          (uint64_t)caam_jr_dma_vtop(out_sg), 0, length);
+       /* set sg bit */
+       (jobdescr)->seq_out.command.word  |= 0x01000000;
+
+       /* input */
+       sg++;
+       mbuf = sym->m_src;
+       in_sg = sg;
+       if (is_encode(ses))
+               length = ses->iv.length + sym->auth.data.length;
+       else
+               length = ses->iv.length + sym->auth.data.length
+                                               + ses->digest_length;
+
+       sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(IV_ptr));
+       sg->len = cpu_to_caam32(ses->iv.length);
+
+       sg++;
+       /* 1st seg */
+       sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf)
+               + sym->auth.data.offset);
+       sg->len = cpu_to_caam32(mbuf->data_len - sym->auth.data.offset);
+
+       /* Successive segs */
+       mbuf = mbuf->next;
+       while (mbuf) {
+               sg++;
+               sg->ptr = cpu_to_caam64(rte_pktmbuf_iova(mbuf));
+               sg->len = cpu_to_caam32(mbuf->data_len);
+               mbuf = mbuf->next;
+       }
+
+       if (is_decode(ses)) {
+               sg++;
+               rte_memcpy(ctx->digest, sym->auth.digest.data,
+                      ses->digest_length);
+               sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(ctx->digest));
+               sg->len = cpu_to_caam32(ses->digest_length);
+       }
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)caam_jr_dma_vtop(in_sg), 0,
+                               length);
+       /* set sg bit */
+       (jobdescr)->seq_in.command.word  |= 0x01000000;
+       /* Auth_only_len is set as 0 in descriptor and it is
+        * overwritten here in the jd which will update
+        * the DPOVRD reg.
+        */
+       if (auth_only_len)
+               /* set sg bit */
+               (jobdescr)->dpovrd = 0x80000000 | auth_only_len;
+
+       return ctx;
+}
+
+static inline struct caam_jr_op_ctx *
+build_cipher_auth(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct caam_jr_op_ctx *ctx;
+       struct sec4_sg_entry *sg;
+       rte_iova_t src_start_addr, dst_start_addr;
+       uint32_t length = 0;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       uint8_t *IV_ptr = rte_crypto_op_ctod_offset(op, uint8_t *,
+                       ses->iv.offset);
+       struct sec_job_descriptor_t *jobdescr;
+       uint32_t auth_only_len;
+
+       PMD_INIT_FUNC_TRACE();
+       auth_only_len = op->sym->auth.data.length -
+                               op->sym->cipher.data.length;
+
+       src_start_addr = rte_pktmbuf_iova(sym->m_src);
+       if (sym->m_dst)
+               dst_start_addr = rte_pktmbuf_iova(sym->m_dst);
+       else
+               dst_start_addr = src_start_addr;
+
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+
+       ctx->op = op;
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+               cdb->sh_hdr.hi.field.idlen);
+
+       /* input */
+       sg = &ctx->sg[0];
+       if (is_encode(ses)) {
+               sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(IV_ptr));
+               sg->len = cpu_to_caam32(ses->iv.length);
+               length += ses->iv.length;
+
+               sg++;
+               sg->ptr = cpu_to_caam64(src_start_addr + sym->auth.data.offset);
+               sg->len = cpu_to_caam32(sym->auth.data.length);
+               length += sym->auth.data.length;
+               /* last element*/
+               sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+       } else {
+               sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(IV_ptr));
+               sg->len = cpu_to_caam32(ses->iv.length);
+               length += ses->iv.length;
+
+               sg++;
+               sg->ptr = cpu_to_caam64(src_start_addr + sym->auth.data.offset);
+               sg->len = cpu_to_caam32(sym->auth.data.length);
+               length += sym->auth.data.length;
+
+               rte_memcpy(ctx->digest, sym->auth.digest.data,
+                      ses->digest_length);
+               sg++;
+               sg->ptr = cpu_to_caam64(caam_jr_dma_vtop(ctx->digest));
+               sg->len = cpu_to_caam32(ses->digest_length);
+               length += ses->digest_length;
+               /* last element*/
+               sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+       }
+
+       SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)caam_jr_dma_vtop(&ctx->sg[0]), 0,
+                               length);
+       /* set sg bit */
+       (jobdescr)->seq_in.command.word  |= 0x01000000;
+
+       /* output */
+       sg = &ctx->sg[6];
+
+       sg->ptr = cpu_to_caam64(dst_start_addr + sym->cipher.data.offset);
+       sg->len = cpu_to_caam32(sym->cipher.data.length);
+       length = sym->cipher.data.length;
+
+       if (is_encode(ses)) {
+               /* set auth output */
+               sg++;
+               sg->ptr = cpu_to_caam64(sym->auth.digest.phys_addr);
+               sg->len = cpu_to_caam32(ses->digest_length);
+               length += ses->digest_length;
+       }
+       /* last element*/
+       sg->len |= cpu_to_caam32(SEC4_SG_LEN_FIN);
+
+       SEC_JD_SET_OUT_PTR(jobdescr,
+                          (uint64_t)caam_jr_dma_vtop(&ctx->sg[6]), 0, length);
+       /* set sg bit */
+       (jobdescr)->seq_out.command.word  |= 0x01000000;
+
+       /* Auth_only_len is set as 0 in descriptor and it is
+        * overwritten here in the jd which will update
+        * the DPOVRD reg.
+        */
+       if (auth_only_len)
+               /* set sg bit */
+               (jobdescr)->dpovrd = 0x80000000 | auth_only_len;
+
+       return ctx;
+}
+
+static inline struct caam_jr_op_ctx *
+build_proto(struct rte_crypto_op *op, struct caam_jr_session *ses)
+{
+       struct rte_crypto_sym_op *sym = op->sym;
+       struct caam_jr_op_ctx *ctx = NULL;
+       phys_addr_t src_start_addr, dst_start_addr;
+       struct sec_cdb *cdb;
+       uint64_t sdesc_offset;
+       struct sec_job_descriptor_t *jobdescr;
+
+       PMD_INIT_FUNC_TRACE();
+       ctx = caam_jr_alloc_ctx(ses);
+       if (!ctx)
+               return NULL;
+       ctx->op = op;
+
+       src_start_addr = rte_pktmbuf_iova(sym->m_src);
+       if (sym->m_dst)
+               dst_start_addr = rte_pktmbuf_iova(sym->m_dst);
+       else
+               dst_start_addr = src_start_addr;
+
+       cdb = ses->cdb;
+       sdesc_offset = (size_t) ((char *)&cdb->sh_desc - (char *)cdb);
+
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       SEC_JD_INIT(jobdescr);
+       SEC_JD_SET_SD(jobdescr,
+               (phys_addr_t)(caam_jr_dma_vtop(cdb)) + sdesc_offset,
+                       cdb->sh_hdr.hi.field.idlen);
+
+       /* output */
+       SEC_JD_SET_OUT_PTR(jobdescr, (uint64_t)dst_start_addr, 0,
+                       sym->m_src->buf_len - sym->m_src->data_off);
+       /* input */
+       SEC_JD_SET_IN_PTR(jobdescr, (uint64_t)src_start_addr, 0,
+                       sym->m_src->pkt_len);
+       sym->m_src->packet_type &= ~RTE_PTYPE_L4_MASK;
+
+       return ctx;
+}
+
+static int
+caam_jr_enqueue_op(struct rte_crypto_op *op, struct caam_jr_qp *qp)
+{
+       struct sec_job_ring_t *ring = qp->ring;
+       struct caam_jr_session *ses;
+       struct caam_jr_op_ctx *ctx = NULL;
+       struct sec_job_descriptor_t *jobdescr __rte_unused;
+
+       PMD_INIT_FUNC_TRACE();
+       switch (op->sess_type) {
+       case RTE_CRYPTO_OP_WITH_SESSION:
+               ses = (struct caam_jr_session *)
+               get_sym_session_private_data(op->sym->session,
+                                       cryptodev_driver_id);
+               break;
+       case RTE_CRYPTO_OP_SECURITY_SESSION:
+               ses = (struct caam_jr_session *)
+                       get_sec_session_private_data(
+                                       op->sym->sec_session);
+               break;
+       default:
+               CAAM_JR_DP_ERR("sessionless crypto op not supported");
+               qp->tx_errs++;
+               return -1;
+       }
+
+       if (unlikely(!ses->qp || ses->qp != qp)) {
+               CAAM_JR_DP_DEBUG("Old:sess->qp=%p New qp = %p\n", ses->qp, qp);
+               ses->qp = qp;
+               caam_jr_prep_cdb(ses);
+       }
+
+       if (rte_pktmbuf_is_contiguous(op->sym->m_src)) {
+               if (is_auth_cipher(ses))
+                       ctx = build_cipher_auth(op, ses);
+               else if (is_aead(ses))
+                       goto err1;
+               else if (is_auth_only(ses))
+                       ctx = build_auth_only(op, ses);
+               else if (is_cipher_only(ses))
+                       ctx = build_cipher_only(op, ses);
+               else if (is_proto_ipsec(ses))
+                       ctx = build_proto(op, ses);
+       } else {
+               if (is_auth_cipher(ses))
+                       ctx = build_cipher_auth_sg(op, ses);
+               else if (is_aead(ses))
+                       goto err1;
+               else if (is_auth_only(ses))
+                       ctx = build_auth_only_sg(op, ses);
+               else if (is_cipher_only(ses))
+                       ctx = build_cipher_only_sg(op, ses);
+       }
+err1:
+       if (unlikely(!ctx)) {
+               qp->tx_errs++;
+               CAAM_JR_ERR("not supported sec op");
+               return -1;
+       }
+#if CAAM_JR_DBG
+       if (is_decode(ses))
+               rte_hexdump(stdout, "DECODE",
+                       rte_pktmbuf_mtod(op->sym->m_src, void *),
+                       rte_pktmbuf_data_len(op->sym->m_src));
+       else
+               rte_hexdump(stdout, "ENCODE",
+                       rte_pktmbuf_mtod(op->sym->m_src, void *),
+                       rte_pktmbuf_data_len(op->sym->m_src));
+
+       printf("\n JD before conversion\n");
+       for (int i = 0; i < 12; i++)
+               printf("\n 0x%08x", ctx->jobdes.desc[i]);
+#endif
+
+       CAAM_JR_DP_DEBUG("Jr[%p] pi[%d] ci[%d].Before sending desc",
+                     ring, ring->pidx, ring->cidx);
+
+       /* todo - do we want to retry */
+       if (SEC_JOB_RING_IS_FULL(ring->pidx, ring->cidx,
+                        SEC_JOB_RING_SIZE, SEC_JOB_RING_SIZE)) {
+               CAAM_JR_DP_DEBUG("Ring FULL Jr[%p] pi[%d] ci[%d].Size = %d",
+                             ring, ring->pidx, ring->cidx, SEC_JOB_RING_SIZE);
+               caam_jr_op_ending(ctx);
+               qp->tx_ring_full++;
+               return -EBUSY;
+       }
+
+#if CORE_BYTE_ORDER != CAAM_BYTE_ORDER
+       jobdescr = (struct sec_job_descriptor_t *) ctx->jobdes.desc;
+
+       jobdescr->deschdr.command.word =
+               cpu_to_caam32(jobdescr->deschdr.command.word);
+       jobdescr->sd_ptr = cpu_to_caam64(jobdescr->sd_ptr);
+       jobdescr->seq_out.command.word =
+               cpu_to_caam32(jobdescr->seq_out.command.word);
+       jobdescr->seq_out_ptr = cpu_to_caam64(jobdescr->seq_out_ptr);
+       jobdescr->out_ext_length = cpu_to_caam32(jobdescr->out_ext_length);
+       jobdescr->seq_in.command.word =
+               cpu_to_caam32(jobdescr->seq_in.command.word);
+       jobdescr->seq_in_ptr = cpu_to_caam64(jobdescr->seq_in_ptr);
+       jobdescr->in_ext_length = cpu_to_caam32(jobdescr->in_ext_length);
+       jobdescr->load_dpovrd.command.word =
+               cpu_to_caam32(jobdescr->load_dpovrd.command.word);
+       jobdescr->dpovrd = cpu_to_caam32(jobdescr->dpovrd);
+#endif
+
+       /* Set ptr in input ring to current descriptor  */
+       sec_write_addr(&ring->input_ring[ring->pidx],
+                       (phys_addr_t)caam_jr_vtop_ctx(ctx, ctx->jobdes.desc));
+       rte_smp_wmb();
+
+       /* Notify HW that a new job is enqueued */
+       hw_enqueue_desc_on_job_ring(ring);
+
+       /* increment the producer index for the current job ring */
+       ring->pidx = SEC_CIRCULAR_COUNTER(ring->pidx, SEC_JOB_RING_SIZE);
+
+       return 0;
+}
+
+static uint16_t
+caam_jr_enqueue_burst(void *qp, struct rte_crypto_op **ops,
+                      uint16_t nb_ops)
+{
+       /* Function to transmit the frames to given device and queuepair */
+       uint32_t loop;
+       int32_t ret;
+       struct caam_jr_qp *jr_qp = (struct caam_jr_qp *)qp;
+       uint16_t num_tx = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       /*Prepare each packet which is to be sent*/
+       for (loop = 0; loop < nb_ops; loop++) {
+               ret = caam_jr_enqueue_op(ops[loop], jr_qp);
+               if (!ret)
+                       num_tx++;
+       }
+
+       jr_qp->tx_pkts += num_tx;
+
+       return num_tx;
+}
+
+/* Release queue pair */
+static int
+caam_jr_queue_pair_release(struct rte_cryptodev *dev,
+                          uint16_t qp_id)
+{
+       struct sec_job_ring_t *internals;
+       struct caam_jr_qp *qp = NULL;
+
+       PMD_INIT_FUNC_TRACE();
+       CAAM_JR_DEBUG("dev =%p, queue =%d", dev, qp_id);
+
+       internals = dev->data->dev_private;
+       if (qp_id >= internals->max_nb_queue_pairs) {
+               CAAM_JR_ERR("Max supported qpid %d",
+                            internals->max_nb_queue_pairs);
+               return -EINVAL;
+       }
+
+       qp = &internals->qps[qp_id];
+       qp->ring = NULL;
+       dev->data->queue_pairs[qp_id] = NULL;
+
+       return 0;
+}
+
+/* Setup a queue pair */
+static int
+caam_jr_queue_pair_setup(
+               struct rte_cryptodev *dev, uint16_t qp_id,
+               __rte_unused const struct rte_cryptodev_qp_conf *qp_conf,
+               __rte_unused int socket_id,
+               __rte_unused struct rte_mempool *session_pool)
+{
+       struct sec_job_ring_t *internals;
+       struct caam_jr_qp *qp = NULL;
+
+       PMD_INIT_FUNC_TRACE();
+       CAAM_JR_DEBUG("dev =%p, queue =%d, conf =%p", dev, qp_id, qp_conf);
+
+       internals = dev->data->dev_private;
+       if (qp_id >= internals->max_nb_queue_pairs) {
+               CAAM_JR_ERR("Max supported qpid %d",
+                            internals->max_nb_queue_pairs);
+               return -EINVAL;
+       }
+
+       qp = &internals->qps[qp_id];
+       qp->ring = internals;
+       dev->data->queue_pairs[qp_id] = qp;
+
+       return 0;
+}
+
+/* Return the number of allocated queue pairs */
+static uint32_t
+caam_jr_queue_pair_count(struct rte_cryptodev *dev)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       return dev->data->nb_queue_pairs;
+}
+
+/* Returns the size of the aesni gcm session structure */
+static unsigned int
+caam_jr_sym_session_get_size(struct rte_cryptodev *dev __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       return sizeof(struct caam_jr_session);
+}
+
+static int
+caam_jr_cipher_init(struct rte_cryptodev *dev __rte_unused,
+                   struct rte_crypto_sym_xform *xform,
+                   struct caam_jr_session *session)
+{
+       PMD_INIT_FUNC_TRACE();
+       session->cipher_alg = xform->cipher.algo;
+       session->iv.length = xform->cipher.iv.length;
+       session->iv.offset = xform->cipher.iv.offset;
+       session->cipher_key.data = rte_zmalloc(NULL, xform->cipher.key.length,
+                                              RTE_CACHE_LINE_SIZE);
+       if (session->cipher_key.data == NULL && xform->cipher.key.length > 0) {
+               CAAM_JR_ERR("No Memory for cipher key\n");
+               return -ENOMEM;
+       }
+       session->cipher_key.length = xform->cipher.key.length;
+
+       memcpy(session->cipher_key.data, xform->cipher.key.data,
+              xform->cipher.key.length);
+       session->dir = (xform->cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
+                       DIR_ENC : DIR_DEC;
+
+       return 0;
+}
+
+static int
+caam_jr_auth_init(struct rte_cryptodev *dev __rte_unused,
+                 struct rte_crypto_sym_xform *xform,
+                 struct caam_jr_session *session)
+{
+       PMD_INIT_FUNC_TRACE();
+       session->auth_alg = xform->auth.algo;
+       session->auth_key.data = rte_zmalloc(NULL, xform->auth.key.length,
+                                            RTE_CACHE_LINE_SIZE);
+       if (session->auth_key.data == NULL && xform->auth.key.length > 0) {
+               CAAM_JR_ERR("No Memory for auth key\n");
+               return -ENOMEM;
+       }
+       session->auth_key.length = xform->auth.key.length;
+       session->digest_length = xform->auth.digest_length;
+
+       memcpy(session->auth_key.data, xform->auth.key.data,
+              xform->auth.key.length);
+       session->dir = (xform->auth.op == RTE_CRYPTO_AUTH_OP_GENERATE) ?
+                       DIR_ENC : DIR_DEC;
+
+       return 0;
+}
+
+static int
+caam_jr_aead_init(struct rte_cryptodev *dev __rte_unused,
+                 struct rte_crypto_sym_xform *xform,
+                 struct caam_jr_session *session)
+{
+       PMD_INIT_FUNC_TRACE();
+       session->aead_alg = xform->aead.algo;
+       session->iv.length = xform->aead.iv.length;
+       session->iv.offset = xform->aead.iv.offset;
+       session->auth_only_len = xform->aead.aad_length;
+       session->aead_key.data = rte_zmalloc(NULL, xform->aead.key.length,
+                                            RTE_CACHE_LINE_SIZE);
+       if (session->aead_key.data == NULL && xform->aead.key.length > 0) {
+               CAAM_JR_ERR("No Memory for aead key\n");
+               return -ENOMEM;
+       }
+       session->aead_key.length = xform->aead.key.length;
+       session->digest_length = xform->aead.digest_length;
+
+       memcpy(session->aead_key.data, xform->aead.key.data,
+              xform->aead.key.length);
+       session->dir = (xform->aead.op == RTE_CRYPTO_AEAD_OP_ENCRYPT) ?
+                       DIR_ENC : DIR_DEC;
+
+       return 0;
+}
+
+static int
+caam_jr_set_session_parameters(struct rte_cryptodev *dev,
+                              struct rte_crypto_sym_xform *xform, void *sess)
+{
+       struct sec_job_ring_t *internals = dev->data->dev_private;
+       struct caam_jr_session *session = sess;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (unlikely(sess == NULL)) {
+               CAAM_JR_ERR("invalid session struct");
+               return -EINVAL;
+       }
+
+       /* Default IV length = 0 */
+       session->iv.length = 0;
+
+       /* Cipher Only */
+       if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER && xform->next == NULL) {
+               session->auth_alg = RTE_CRYPTO_AUTH_NULL;
+               caam_jr_cipher_init(dev, xform, session);
+
+       /* Authentication Only */
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH &&
+                  xform->next == NULL) {
+               session->cipher_alg = RTE_CRYPTO_CIPHER_NULL;
+               caam_jr_auth_init(dev, xform, session);
+
+       /* Cipher then Authenticate */
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
+                  xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+               if (xform->cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+                       caam_jr_cipher_init(dev, xform, session);
+                       caam_jr_auth_init(dev, xform->next, session);
+               } else {
+                       CAAM_JR_ERR("Not supported: Auth then Cipher");
+                       goto err1;
+               }
+
+       /* Authenticate then Cipher */
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH &&
+                  xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+               if (xform->next->cipher.op == RTE_CRYPTO_CIPHER_OP_DECRYPT) {
+                       caam_jr_auth_init(dev, xform, session);
+                       caam_jr_cipher_init(dev, xform->next, session);
+               } else {
+                       CAAM_JR_ERR("Not supported: Auth then Cipher");
+                       goto err1;
+               }
+
+       /* AEAD operation for AES-GCM kind of Algorithms */
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD &&
+                  xform->next == NULL) {
+               caam_jr_aead_init(dev, xform, session);
+
+       } else {
+               CAAM_JR_ERR("Invalid crypto type");
+               return -EINVAL;
+       }
+       session->ctx_pool = internals->ctx_pool;
+
+       return 0;
+
+err1:
+       rte_free(session->cipher_key.data);
+       rte_free(session->auth_key.data);
+       memset(session, 0, sizeof(struct caam_jr_session));
+
+       return -EINVAL;
+}
+
+static int
+caam_jr_sym_session_configure(struct rte_cryptodev *dev,
+                             struct rte_crypto_sym_xform *xform,
+                             struct rte_cryptodev_sym_session *sess,
+                             struct rte_mempool *mempool)
+{
+       void *sess_private_data;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rte_mempool_get(mempool, &sess_private_data)) {
+               CAAM_JR_ERR("Couldn't get object from session mempool");
+               return -ENOMEM;
+       }
+
+       memset(sess_private_data, 0, sizeof(struct caam_jr_session));
+       ret = caam_jr_set_session_parameters(dev, xform, sess_private_data);
+       if (ret != 0) {
+               CAAM_JR_ERR("failed to configure session parameters");
+               /* Return session to mempool */
+               rte_mempool_put(mempool, sess_private_data);
+               return ret;
+       }
+
+       set_sym_session_private_data(sess, dev->driver_id, sess_private_data);
+
+       return 0;
+}
+
+/* Clear the memory of session so it doesn't leave key material behind */
+static void
+caam_jr_sym_session_clear(struct rte_cryptodev *dev,
+               struct rte_cryptodev_sym_session *sess)
+{
+       uint8_t index = dev->driver_id;
+       void *sess_priv = get_sym_session_private_data(sess, index);
+       struct caam_jr_session *s = (struct caam_jr_session *)sess_priv;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (sess_priv) {
+               struct rte_mempool *sess_mp = rte_mempool_from_obj(sess_priv);
+
+               rte_free(s->cipher_key.data);
+               rte_free(s->auth_key.data);
+               memset(s, 0, sizeof(struct caam_jr_session));
+               set_sym_session_private_data(sess, index, NULL);
+               rte_mempool_put(sess_mp, sess_priv);
+       }
+}
+
+static int
+caam_jr_set_ipsec_session(__rte_unused struct rte_cryptodev *dev,
+                         struct rte_security_session_conf *conf,
+                         void *sess)
+{
+       struct sec_job_ring_t *internals = dev->data->dev_private;
+       struct rte_security_ipsec_xform *ipsec_xform = &conf->ipsec;
+       struct rte_crypto_auth_xform *auth_xform;
+       struct rte_crypto_cipher_xform *cipher_xform;
+       struct caam_jr_session *session = (struct caam_jr_session *)sess;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
+               cipher_xform = &conf->crypto_xform->cipher;
+               auth_xform = &conf->crypto_xform->next->auth;
+       } else {
+               auth_xform = &conf->crypto_xform->auth;
+               cipher_xform = &conf->crypto_xform->next->cipher;
+       }
+       session->proto_alg = conf->protocol;
+       session->cipher_key.data = rte_zmalloc(NULL,
+                                              cipher_xform->key.length,
+                                              RTE_CACHE_LINE_SIZE);
+       if (session->cipher_key.data == NULL &&
+                       cipher_xform->key.length > 0) {
+               CAAM_JR_ERR("No Memory for cipher key\n");
+               return -ENOMEM;
+       }
+
+       session->cipher_key.length = cipher_xform->key.length;
+       session->auth_key.data = rte_zmalloc(NULL,
+                                       auth_xform->key.length,
+                                       RTE_CACHE_LINE_SIZE);
+       if (session->auth_key.data == NULL &&
+                       auth_xform->key.length > 0) {
+               CAAM_JR_ERR("No Memory for auth key\n");
+               rte_free(session->cipher_key.data);
+               return -ENOMEM;
+       }
+       session->auth_key.length = auth_xform->key.length;
+       memcpy(session->cipher_key.data, cipher_xform->key.data,
+                       cipher_xform->key.length);
+       memcpy(session->auth_key.data, auth_xform->key.data,
+                       auth_xform->key.length);
+
+       switch (auth_xform->algo) {
+       case RTE_CRYPTO_AUTH_SHA1_HMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_SHA1_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_MD5_HMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_MD5_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA256_HMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_SHA256_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA384_HMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_SHA384_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_SHA512_HMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_SHA512_HMAC;
+               break;
+       case RTE_CRYPTO_AUTH_AES_CMAC:
+               session->auth_alg = RTE_CRYPTO_AUTH_AES_CMAC;
+               break;
+       case RTE_CRYPTO_AUTH_NULL:
+               session->auth_alg = RTE_CRYPTO_AUTH_NULL;
+               break;
+       case RTE_CRYPTO_AUTH_SHA224_HMAC:
+       case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
+       case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
+       case RTE_CRYPTO_AUTH_SHA1:
+       case RTE_CRYPTO_AUTH_SHA256:
+       case RTE_CRYPTO_AUTH_SHA512:
+       case RTE_CRYPTO_AUTH_SHA224:
+       case RTE_CRYPTO_AUTH_SHA384:
+       case RTE_CRYPTO_AUTH_MD5:
+       case RTE_CRYPTO_AUTH_AES_GMAC:
+       case RTE_CRYPTO_AUTH_KASUMI_F9:
+       case RTE_CRYPTO_AUTH_AES_CBC_MAC:
+       case RTE_CRYPTO_AUTH_ZUC_EIA3:
+               CAAM_JR_ERR("Crypto: Unsupported auth alg %u\n",
+                       auth_xform->algo);
+               goto out;
+       default:
+               CAAM_JR_ERR("Crypto: Undefined Auth specified %u\n",
+                       auth_xform->algo);
+               goto out;
+       }
+
+       switch (cipher_xform->algo) {
+       case RTE_CRYPTO_CIPHER_AES_CBC:
+               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CBC;
+               break;
+       case RTE_CRYPTO_CIPHER_3DES_CBC:
+               session->cipher_alg = RTE_CRYPTO_CIPHER_3DES_CBC;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_CTR:
+               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CTR;
+               break;
+       case RTE_CRYPTO_CIPHER_NULL:
+       case RTE_CRYPTO_CIPHER_SNOW3G_UEA2:
+       case RTE_CRYPTO_CIPHER_3DES_ECB:
+       case RTE_CRYPTO_CIPHER_AES_ECB:
+       case RTE_CRYPTO_CIPHER_KASUMI_F8:
+               CAAM_JR_ERR("Crypto: Unsupported Cipher alg %u\n",
+                       cipher_xform->algo);
+               goto out;
+       default:
+               CAAM_JR_ERR("Crypto: Undefined Cipher specified %u\n",
+                       cipher_xform->algo);
+               goto out;
+       }
+
+       if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
+               memset(&session->encap_pdb, 0, sizeof(struct ipsec_encap_pdb) +
+                               sizeof(session->ip4_hdr));
+               session->ip4_hdr.ip_v = IPVERSION;
+               session->ip4_hdr.ip_hl = 5;
+               session->ip4_hdr.ip_len = rte_cpu_to_be_16(
+                                               sizeof(session->ip4_hdr));
+               session->ip4_hdr.ip_tos = ipsec_xform->tunnel.ipv4.dscp;
+               session->ip4_hdr.ip_id = 0;
+               session->ip4_hdr.ip_off = 0;
+               session->ip4_hdr.ip_ttl = ipsec_xform->tunnel.ipv4.ttl;
+               session->ip4_hdr.ip_p = (ipsec_xform->proto ==
+                               RTE_SECURITY_IPSEC_SA_PROTO_ESP) ? IPPROTO_ESP
+                               : IPPROTO_AH;
+               session->ip4_hdr.ip_sum = 0;
+               session->ip4_hdr.ip_src = ipsec_xform->tunnel.ipv4.src_ip;
+               session->ip4_hdr.ip_dst = ipsec_xform->tunnel.ipv4.dst_ip;
+               session->ip4_hdr.ip_sum = calc_chksum((uint16_t *)
+                                               (void *)&session->ip4_hdr,
+                                               sizeof(struct ip));
+
+               session->encap_pdb.options =
+                       (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) |
+                       PDBOPTS_ESP_OIHI_PDB_INL |
+                       PDBOPTS_ESP_IVSRC |
+                       PDBHMO_ESP_ENCAP_DTTL;
+               session->encap_pdb.spi = ipsec_xform->spi;
+               session->encap_pdb.ip_hdr_len = sizeof(struct ip);
+
+               session->dir = DIR_ENC;
+       } else if (ipsec_xform->direction ==
+                       RTE_SECURITY_IPSEC_SA_DIR_INGRESS) {
+               memset(&session->decap_pdb, 0, sizeof(struct ipsec_decap_pdb));
+               session->decap_pdb.options = sizeof(struct ip) << 16;
+               session->dir = DIR_DEC;
+       } else
+               goto out;
+       session->ctx_pool = internals->ctx_pool;
+
+       return 0;
+out:
+       rte_free(session->auth_key.data);
+       rte_free(session->cipher_key.data);
+       memset(session, 0, sizeof(struct caam_jr_session));
+       return -1;
+}
+
+static int
+caam_jr_security_session_create(void *dev,
+                               struct rte_security_session_conf *conf,
+                               struct rte_security_session *sess,
+                               struct rte_mempool *mempool)
+{
+       void *sess_private_data;
+       struct rte_cryptodev *cdev = (struct rte_cryptodev *)dev;
+       int ret;
+
+       PMD_INIT_FUNC_TRACE();
+       if (rte_mempool_get(mempool, &sess_private_data)) {
+               CAAM_JR_ERR("Couldn't get object from session mempool");
+               return -ENOMEM;
+       }
+
+       switch (conf->protocol) {
+       case RTE_SECURITY_PROTOCOL_IPSEC:
+               ret = caam_jr_set_ipsec_session(cdev, conf,
+                               sess_private_data);
+               break;
+       case RTE_SECURITY_PROTOCOL_MACSEC:
+               return -ENOTSUP;
+       default:
+               return -EINVAL;
+       }
+       if (ret != 0) {
+               CAAM_JR_ERR("failed to configure session parameters");
+               /* Return session to mempool */
+               rte_mempool_put(mempool, sess_private_data);
+               return ret;
+       }
+
+       set_sec_session_private_data(sess, sess_private_data);
+
+       return ret;
+}
+
+/* Clear the memory of session so it doesn't leave key material behind */
+static int
+caam_jr_security_session_destroy(void *dev __rte_unused,
+                                struct rte_security_session *sess)
+{
+       PMD_INIT_FUNC_TRACE();
+       void *sess_priv = get_sec_session_private_data(sess);
+
+       struct caam_jr_session *s = (struct caam_jr_session *)sess_priv;
+
+       if (sess_priv) {
+               struct rte_mempool *sess_mp = rte_mempool_from_obj(sess_priv);
+
+               rte_free(s->cipher_key.data);
+               rte_free(s->auth_key.data);
+               memset(sess, 0, sizeof(struct caam_jr_session));
+               set_sec_session_private_data(sess, NULL);
+               rte_mempool_put(sess_mp, sess_priv);
+       }
+       return 0;
+}
+
+
+static int
+caam_jr_dev_configure(struct rte_cryptodev *dev,
+                      struct rte_cryptodev_config *config __rte_unused)
+{
+       char str[20];
+       struct sec_job_ring_t *internals;
+
+       PMD_INIT_FUNC_TRACE();
+
+       internals = dev->data->dev_private;
+       sprintf(str, "ctx_pool_%d", dev->data->dev_id);
+       if (!internals->ctx_pool) {
+               internals->ctx_pool = rte_mempool_create((const char *)str,
+                                               CTX_POOL_NUM_BUFS,
+                                               sizeof(struct caam_jr_op_ctx),
+                                               CTX_POOL_CACHE_SIZE, 0,
+                                               NULL, NULL, NULL, NULL,
+                                               SOCKET_ID_ANY, 0);
+               if (!internals->ctx_pool) {
+                       CAAM_JR_ERR("%s create failed\n", str);
+                       return -ENOMEM;
+               }
+       } else
+               CAAM_JR_INFO("mempool already created for dev_id : %d",
+                               dev->data->dev_id);
+
+       return 0;
+}
+
+static int
+caam_jr_dev_start(struct rte_cryptodev *dev __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+       return 0;
+}
+
+static void
+caam_jr_dev_stop(struct rte_cryptodev *dev __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+}
+
+static int
+caam_jr_dev_close(struct rte_cryptodev *dev)
+{
+       struct sec_job_ring_t *internals;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (dev == NULL)
+               return -ENOMEM;
+
+       internals = dev->data->dev_private;
+       rte_mempool_free(internals->ctx_pool);
+       internals->ctx_pool = NULL;
+
+       return 0;
+}
+
+static void
+caam_jr_dev_infos_get(struct rte_cryptodev *dev,
+                      struct rte_cryptodev_info *info)
+{
+       struct sec_job_ring_t *internals = dev->data->dev_private;
+
+       PMD_INIT_FUNC_TRACE();
+       if (info != NULL) {
+               info->max_nb_queue_pairs = internals->max_nb_queue_pairs;
+               info->feature_flags = dev->feature_flags;
+               info->capabilities = caam_jr_get_cryptodev_capabilities();
+               info->sym.max_nb_sessions = internals->max_nb_sessions;
+               info->driver_id = cryptodev_driver_id;
+       }
+}
+
+static struct rte_cryptodev_ops caam_jr_ops = {
+       .dev_configure        = caam_jr_dev_configure,
+       .dev_start            = caam_jr_dev_start,
+       .dev_stop             = caam_jr_dev_stop,
+       .dev_close            = caam_jr_dev_close,
+       .dev_infos_get        = caam_jr_dev_infos_get,
+       .stats_get            = caam_jr_stats_get,
+       .stats_reset          = caam_jr_stats_reset,
+       .queue_pair_setup     = caam_jr_queue_pair_setup,
+       .queue_pair_release   = caam_jr_queue_pair_release,
+       .queue_pair_count     = caam_jr_queue_pair_count,
+       .sym_session_get_size = caam_jr_sym_session_get_size,
+       .sym_session_configure = caam_jr_sym_session_configure,
+       .sym_session_clear    = caam_jr_sym_session_clear
+};
+
+static struct rte_security_ops caam_jr_security_ops = {
+       .session_create = caam_jr_security_session_create,
+       .session_update = NULL,
+       .session_stats_get = NULL,
+       .session_destroy = caam_jr_security_session_destroy,
+       .set_pkt_metadata = NULL,
+       .capabilities_get = caam_jr_get_security_capabilities
+};
+
+/* @brief Flush job rings of any processed descs.
+ * The processed descs are silently dropped,
+ * WITHOUT being notified to UA.
+ */
+static void
+close_job_ring(struct sec_job_ring_t *job_ring)
+{
+       PMD_INIT_FUNC_TRACE();
+       if (job_ring->irq_fd) {
+               /* Producer index is frozen. If consumer index is not equal
+                * with producer index, then we have descs to flush.
+                */
+               while (job_ring->pidx != job_ring->cidx)
+                       hw_flush_job_ring(job_ring, false, NULL);
+
+               /* free the uio job ring */
+               free_job_ring(job_ring->irq_fd);
+               job_ring->irq_fd = 0;
+               caam_jr_dma_free(job_ring->input_ring);
+               caam_jr_dma_free(job_ring->output_ring);
+               g_job_rings_no--;
+       }
+}
+
+/** @brief Release the software and hardware resources tied to a job ring.
+ * @param [in] job_ring The job ring
+ *
+ * @retval  0 for success
+ * @retval  -1 for error
+ */
+static int
+shutdown_job_ring(struct sec_job_ring_t *job_ring)
+{
+       int ret = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       ASSERT(job_ring != NULL);
+       ret = hw_shutdown_job_ring(job_ring);
+       SEC_ASSERT(ret == 0, ret,
+               "Failed to shutdown hardware job ring %p",
+               job_ring);
+
+       if (job_ring->coalescing_en)
+               hw_job_ring_disable_coalescing(job_ring);
+
+       if (job_ring->jr_mode != SEC_NOTIFICATION_TYPE_POLL) {
+               ret = caam_jr_disable_irqs(job_ring->irq_fd);
+               SEC_ASSERT(ret == 0, ret,
+               "Failed to disable irqs for job ring %p",
+               job_ring);
+       }
+
+       return ret;
+}
+
+/*
+ * @brief Release the resources used by the SEC user space driver.
+ *
+ * Reset and release SEC's job rings indicated by the User Application at
+ * init_job_ring() and free any memory allocated internally.
+ * Call once during application tear down.
+ *
+ * @note In case there are any descriptors in-flight (descriptors received by
+ * SEC driver for processing and for which no response was yet provided to UA),
+ * the descriptors are discarded without any notifications to User Application.
+ *
+ * @retval ::0                 is returned for a successful execution
+ * @retval ::-1                is returned if SEC driver release is in progress
+ */
+static int
+caam_jr_dev_uninit(struct rte_cryptodev *dev)
+{
+       struct sec_job_ring_t *internals;
+
+       PMD_INIT_FUNC_TRACE();
+       if (dev == NULL)
+               return -ENODEV;
+
+       internals = dev->data->dev_private;
+       rte_free(dev->security_ctx);
+
+       /* If any descriptors in flight , poll and wait
+        * until all descriptors are received and silently discarded.
+        */
+       if (internals) {
+               shutdown_job_ring(internals);
+               close_job_ring(internals);
+               rte_mempool_free(internals->ctx_pool);
+       }
+
+       CAAM_JR_INFO("Closing crypto device %s", dev->data->name);
+
+       /* last caam jr instance) */
+       if (g_job_rings_no == 0)
+               g_driver_state = SEC_DRIVER_STATE_IDLE;
+
+       return SEC_SUCCESS;
+}
+
+/* @brief Initialize the software and hardware resources tied to a job ring.
+ * @param [in] jr_mode;                Model to be used by SEC Driver to receive
+ *                             notifications from SEC.  Can be either
+ *                             of the three: #SEC_NOTIFICATION_TYPE_NAPI
+ *                             #SEC_NOTIFICATION_TYPE_IRQ or
+ *                             #SEC_NOTIFICATION_TYPE_POLL
+ * @param [in] NAPI_mode       The NAPI work mode to configure a job ring at
+ *                             startup. Used only when #SEC_NOTIFICATION_TYPE
+ *                             is set to #SEC_NOTIFICATION_TYPE_NAPI.
+ * @param [in] irq_coalescing_timer This value determines the maximum
+ *                                     amount of time after processing a
+ *                                     descriptor before raising an interrupt.
+ * @param [in] irq_coalescing_count This value determines how many
+ *                                     descriptors are completed before
+ *                                     raising an interrupt.
+ * @param [in] reg_base_addr,  The job ring base address register
+ * @param [in] irq_id          The job ring interrupt identification number.
+ * @retval  job_ring_handle for successful job ring configuration
+ * @retval  NULL on error
+ *
+ */
+static void *
+init_job_ring(void *reg_base_addr, uint32_t irq_id)
+{
+       struct sec_job_ring_t *job_ring = NULL;
+       int i, ret = 0;
+       int jr_mode = SEC_NOTIFICATION_TYPE_POLL;
+       int napi_mode = 0;
+       int irq_coalescing_timer = 0;
+       int irq_coalescing_count = 0;
+
+       for (i = 0; i < MAX_SEC_JOB_RINGS; i++) {
+               if (g_job_rings[i].irq_fd == 0) {
+                       job_ring = &g_job_rings[i];
+                       g_job_rings_no++;
+                       break;
+               }
+       }
+       if (job_ring == NULL) {
+               CAAM_JR_ERR("No free job ring\n");
+               return NULL;
+       }
+
+       job_ring->register_base_addr = reg_base_addr;
+       job_ring->jr_mode = jr_mode;
+       job_ring->napi_mode = 0;
+       job_ring->irq_fd = irq_id;
+
+       /* Allocate mem for input and output ring */
+
+       /* Allocate memory for input ring */
+       job_ring->input_ring = caam_jr_dma_mem_alloc(L1_CACHE_BYTES,
+                               SEC_DMA_MEM_INPUT_RING_SIZE);
+       memset(job_ring->input_ring, 0, SEC_DMA_MEM_INPUT_RING_SIZE);
+
+       /* Allocate memory for output ring */
+       job_ring->output_ring = caam_jr_dma_mem_alloc(L1_CACHE_BYTES,
+                               SEC_DMA_MEM_OUTPUT_RING_SIZE);
+       memset(job_ring->output_ring, 0, SEC_DMA_MEM_OUTPUT_RING_SIZE);
+
+       /* Reset job ring in SEC hw and configure job ring registers */
+       ret = hw_reset_job_ring(job_ring);
+       if (ret != 0) {
+               CAAM_JR_ERR("Failed to reset hardware job ring");
+               goto cleanup;
+       }
+
+       if (jr_mode == SEC_NOTIFICATION_TYPE_NAPI) {
+       /* When SEC US driver works in NAPI mode, the UA can select
+        * if the driver starts with IRQs on or off.
+        */
+               if (napi_mode == SEC_STARTUP_INTERRUPT_MODE) {
+                       CAAM_JR_INFO("Enabling DONE IRQ generationon job ring - %p",
+                               job_ring);
+                       ret = caam_jr_enable_irqs(job_ring->irq_fd);
+                       if (ret != 0) {
+                               CAAM_JR_ERR("Failed to enable irqs for job ring");
+                               goto cleanup;
+                       }
+               }
+       } else if (jr_mode == SEC_NOTIFICATION_TYPE_IRQ) {
+       /* When SEC US driver works in pure interrupt mode,
+        * IRQ's are always enabled.
+        */
+               CAAM_JR_INFO("Enabling DONE IRQ generation on job ring - %p",
+                        job_ring);
+               ret = caam_jr_enable_irqs(job_ring->irq_fd);
+               if (ret != 0) {
+                       CAAM_JR_ERR("Failed to enable irqs for job ring");
+                       goto cleanup;
+               }
+       }
+       if (irq_coalescing_timer || irq_coalescing_count) {
+               hw_job_ring_set_coalescing_param(job_ring,
+                        irq_coalescing_timer,
+                        irq_coalescing_count);
+
+               hw_job_ring_enable_coalescing(job_ring);
+               job_ring->coalescing_en = 1;
+       }
+
+       job_ring->jr_state = SEC_JOB_RING_STATE_STARTED;
+       job_ring->max_nb_queue_pairs = RTE_CAAM_MAX_NB_SEC_QPS;
+       job_ring->max_nb_sessions = RTE_CAAM_JR_PMD_MAX_NB_SESSIONS;
+
+       return job_ring;
+cleanup:
+       caam_jr_dma_free(job_ring->output_ring);
+       caam_jr_dma_free(job_ring->input_ring);
+       return NULL;
+}
+
+
+static int
+caam_jr_dev_init(const char *name,
+                struct rte_vdev_device *vdev,
+                struct rte_cryptodev_pmd_init_params *init_params)
+{
+       struct rte_cryptodev *dev;
+       struct rte_security_ctx *security_instance;
+       struct uio_job_ring *job_ring;
+       char str[RTE_CRYPTODEV_NAME_MAX_LEN];
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* Validate driver state */
+       if (g_driver_state == SEC_DRIVER_STATE_IDLE) {
+               g_job_rings_max = sec_configure();
+               if (!g_job_rings_max) {
+                       CAAM_JR_ERR("No job ring detected on UIO !!!!");
+                       return -1;
+               }
+               /* Update driver state */
+               g_driver_state = SEC_DRIVER_STATE_STARTED;
+       }
+
+       if (g_job_rings_no >= g_job_rings_max) {
+               CAAM_JR_ERR("No more job rings available max=%d!!!!",
+                               g_job_rings_max);
+               return -1;
+       }
+
+       job_ring = config_job_ring();
+       if (job_ring == NULL) {
+               CAAM_JR_ERR("failed to create job ring");
+               goto init_error;
+       }
+
+       snprintf(str, sizeof(str), "caam_jr%d", job_ring->jr_id);
+
+       dev = rte_cryptodev_pmd_create(name, &vdev->device, init_params);
+       if (dev == NULL) {
+               CAAM_JR_ERR("failed to create cryptodev vdev");
+               goto cleanup;
+       }
+       /*TODO free it during teardown*/
+       dev->data->dev_private = init_job_ring(job_ring->register_base_addr,
+                                               job_ring->uio_fd);
+
+       if (!dev->data->dev_private) {
+               CAAM_JR_ERR("Ring memory allocation failed\n");
+               goto cleanup2;
+       }
+
+       dev->driver_id = cryptodev_driver_id;
+       dev->dev_ops = &caam_jr_ops;
+
+       /* register rx/tx burst functions for data path */
+       dev->dequeue_burst = caam_jr_dequeue_burst;
+       dev->enqueue_burst = caam_jr_enqueue_burst;
+       dev->feature_flags = RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO |
+                       RTE_CRYPTODEV_FF_HW_ACCELERATED |
+                       RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING |
+                       RTE_CRYPTODEV_FF_SECURITY |
+                       RTE_CRYPTODEV_FF_IN_PLACE_SGL |
+                       RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT |
+                       RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
+                       RTE_CRYPTODEV_FF_OOP_LB_IN_SGL_OUT |
+                       RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT;
+
+       /* For secondary processes, we don't initialise any further as primary
+        * has already done this work. Only check we don't need a different
+        * RX function
+        */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               CAAM_JR_WARN("Device already init by primary process");
+               return 0;
+       }
+
+       /*TODO free it during teardown*/
+       security_instance = rte_malloc("caam_jr",
+                               sizeof(struct rte_security_ctx), 0);
+       if (security_instance == NULL) {
+               CAAM_JR_ERR("memory allocation failed\n");
+               //todo error handling.
+               goto cleanup2;
+       }
+
+       security_instance->device = (void *)dev;
+       security_instance->ops = &caam_jr_security_ops;
+       security_instance->sess_cnt = 0;
+       dev->security_ctx = security_instance;
+
+       RTE_LOG(INFO, PMD, "%s cryptodev init\n", dev->data->name);
+
+       return 0;
+
+cleanup2:
+       caam_jr_dev_uninit(dev);
+       rte_cryptodev_pmd_release_device(dev);
+cleanup:
+       free_job_ring(job_ring->uio_fd);
+init_error:
+       CAAM_JR_ERR("driver %s: cryptodev_caam_jr_create failed",
+                       init_params->name);
+
+       return -ENXIO;
+}
+
+/** Initialise CAAM JR crypto device */
+static int
+cryptodev_caam_jr_probe(struct rte_vdev_device *vdev)
+{
+       struct rte_cryptodev_pmd_init_params init_params = {
+               "",
+               sizeof(struct sec_job_ring_t),
+               rte_socket_id(),
+               RTE_CRYPTODEV_PMD_DEFAULT_MAX_NB_QUEUE_PAIRS
+       };
+       const char *name;
+       const char *input_args;
+
+       name = rte_vdev_device_name(vdev);
+       if (name == NULL)
+               return -EINVAL;
+
+       input_args = rte_vdev_device_args(vdev);
+       rte_cryptodev_pmd_parse_input_args(&init_params, input_args);
+
+       /* if sec device version is not configured */
+       if (!rta_get_sec_era()) {
+               const struct device_node *caam_node;
+
+               for_each_compatible_node(caam_node, NULL, "fsl,sec-v4.0") {
+                       const uint32_t *prop = of_get_property(caam_node,
+                                       "fsl,sec-era",
+                                       NULL);
+                       if (prop) {
+                               rta_set_sec_era(
+                                       INTL_SEC_ERA(cpu_to_caam32(*prop)));
+                               break;
+                       }
+               }
+       }
+#ifdef RTE_LIBRTE_PMD_CAAM_JR_BE
+       if (rta_get_sec_era() > RTA_SEC_ERA_8) {
+               RTE_LOG(ERR, PMD,
+               "CAAM is compiled in BE mode for device with sec era > 8???\n");
+               return -EINVAL;
+       }
+#endif
+
+       return caam_jr_dev_init(name, vdev, &init_params);
+}
+
+/** Uninitialise CAAM JR crypto device */
+static int
+cryptodev_caam_jr_remove(struct rte_vdev_device *vdev)
+{
+       struct rte_cryptodev *cryptodev;
+       const char *name;
+
+       name = rte_vdev_device_name(vdev);
+       if (name == NULL)
+               return -EINVAL;
+
+       cryptodev = rte_cryptodev_pmd_get_named_dev(name);
+       if (cryptodev == NULL)
+               return -ENODEV;
+
+       caam_jr_dev_uninit(cryptodev);
+
+       return rte_cryptodev_pmd_destroy(cryptodev);
+}
+
+static struct rte_vdev_driver cryptodev_caam_jr_drv = {
+       .probe = cryptodev_caam_jr_probe,
+       .remove = cryptodev_caam_jr_remove
+};
+
+static struct cryptodev_driver caam_jr_crypto_drv;
+
+RTE_PMD_REGISTER_VDEV(CRYPTODEV_NAME_CAAM_JR_PMD, cryptodev_caam_jr_drv);
+RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_CAAM_JR_PMD,
+       "max_nb_queue_pairs=<int>"
+       "socket_id=<int>");
+RTE_PMD_REGISTER_CRYPTO_DRIVER(caam_jr_crypto_drv, cryptodev_caam_jr_drv.driver,
+               cryptodev_driver_id);
+
+RTE_INIT(caam_jr_init_log)
+{
+       caam_jr_logtype = rte_log_register("pmd.crypto.caam");
+       if (caam_jr_logtype >= 0)
+               rte_log_set_level(caam_jr_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/crypto/caam_jr/caam_jr_capabilities.c b/drivers/crypto/caam_jr/caam_jr_capabilities.c
new file mode 100644 (file)
index 0000000..c51593c
--- /dev/null
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#include <caam_jr_capabilities.h>
+
+static const struct rte_cryptodev_capabilities caam_jr_capabilities[] = {
+       {       /* MD5 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_MD5_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 16,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* SHA1 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA1_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 20,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* SHA224 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA224_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 28,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* SHA256 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA256_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 32,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* SHA384 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA384_HMAC,
+                               .block_size = 128,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 128,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 48,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* SHA512 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA512_HMAC,
+                               .block_size = 128,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 128,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .iv_size = { 0 }
+                       }, }
+               }, }
+       },
+       {       /* AES GCM */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,
+                       {.aead = {
+                               .algo = RTE_CRYPTO_AEAD_AES_GCM,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .digest_size = {
+                                       .min = 8,
+                                       .max = 16,
+                                       .increment = 4
+                               },
+                               .aad_size = {
+                                       .min = 0,
+                                       .max = 240,
+                                       .increment = 1
+                               },
+                               .iv_size = {
+                                       .min = 12,
+                                       .max = 12,
+                                       .increment = 0
+                               },
+                       }, }
+               }, }
+       },
+       {       /* AES CBC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_CBC,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES CTR */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_CTR,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* 3DES CBC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 24,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 8,
+                                       .max = 8,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+
+       RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
+};
+
+static const struct rte_security_capability caam_jr_security_cap[] = {
+       { /* IPsec Lookaside Protocol offload ESP Transport Egress */
+               .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+               .protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+               .ipsec = {
+                       .proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+                       .mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+                       .direction = RTE_SECURITY_IPSEC_SA_DIR_EGRESS,
+                       .options = { 0 }
+               },
+               .crypto_capabilities = caam_jr_capabilities
+       },
+       { /* IPsec Lookaside Protocol offload ESP Tunnel Ingress */
+               .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+               .protocol = RTE_SECURITY_PROTOCOL_IPSEC,
+               .ipsec = {
+                       .proto = RTE_SECURITY_IPSEC_SA_PROTO_ESP,
+                       .mode = RTE_SECURITY_IPSEC_SA_MODE_TUNNEL,
+                       .direction = RTE_SECURITY_IPSEC_SA_DIR_INGRESS,
+                       .options = { 0 }
+               },
+               .crypto_capabilities = caam_jr_capabilities
+       },
+       {
+               .action = RTE_SECURITY_ACTION_TYPE_NONE
+       }
+};
+
+const struct rte_cryptodev_capabilities *
+caam_jr_get_cryptodev_capabilities(void)
+{
+       return caam_jr_capabilities;
+}
+
+const struct rte_security_capability *
+caam_jr_get_security_capabilities(void *device __rte_unused)
+{
+       return caam_jr_security_cap;
+}
diff --git a/drivers/crypto/caam_jr/caam_jr_capabilities.h b/drivers/crypto/caam_jr/caam_jr_capabilities.h
new file mode 100644 (file)
index 0000000..c1e3f30
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef CAAM_JR_CAPABILITIES_H
+#define CAAM_JR_CAPABILITIES_H
+
+#include <rte_cryptodev.h>
+#include <rte_security.h>
+
+/* Get cryptodev capabilities */
+const struct rte_cryptodev_capabilities *
+caam_jr_get_cryptodev_capabilities(void);
+/* Get security capabilities */
+const struct rte_security_capability *
+caam_jr_get_security_capabilities(void *device);
+
+#endif
diff --git a/drivers/crypto/caam_jr/caam_jr_config.h b/drivers/crypto/caam_jr/caam_jr_config.h
new file mode 100644 (file)
index 0000000..041187a
--- /dev/null
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef CAAM_JR_CONFIG_H
+#define CAAM_JR_CONFIG_H
+
+#include <rte_byteorder.h>
+
+#include <compat.h>
+
+#ifdef RTE_LIBRTE_PMD_CAAM_JR_BE
+#define CAAM_BYTE_ORDER __BIG_ENDIAN
+#else
+#define CAAM_BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+#define CORE_BYTE_ORDER __BIG_ENDIAN
+#else
+#define CORE_BYTE_ORDER __LITTLE_ENDIAN
+#endif
+
+#if CORE_BYTE_ORDER != CAAM_BYTE_ORDER
+
+#define cpu_to_caam64 rte_cpu_to_be_64
+#define cpu_to_caam32 rte_cpu_to_be_32
+#else
+#define cpu_to_caam64
+#define cpu_to_caam32
+
+#endif
+
+/*
+ * SEC is configured to start work in polling mode,
+ * when configured for NAPI notification style.
+ */
+#define SEC_STARTUP_POLLING_MODE     0
+/*
+ * SEC is configured to start work in interrupt mode,
+ * when configured for NAPI notification style.
+ */
+#define SEC_STARTUP_INTERRUPT_MODE   1
+
+/*
+ * SEC driver will use NAPI model to receive notifications
+ * for processed packets from SEC engine hardware:
+ * - IRQ for low traffic
+ * - polling for high traffic.
+ */
+#define SEC_NOTIFICATION_TYPE_NAPI  0
+/*
+ * SEC driver will use ONLY interrupts to receive notifications
+ * for processed packets from SEC engine hardware.
+ */
+#define SEC_NOTIFICATION_TYPE_IRQ   1
+/*
+ * SEC driver will use ONLY polling to receive notifications
+ * for processed packets from SEC engine hardware.
+ */
+#define SEC_NOTIFICATION_TYPE_POLL  2
+
+/*
+ * SEC USER SPACE DRIVER related configuration.
+ */
+
+/*
+ * Determines how SEC user space driver will receive notifications
+ * for processed packets from SEC engine.
+ * Valid values are: #SEC_NOTIFICATION_TYPE_POLL, #SEC_NOTIFICATION_TYPE_IRQ
+ * and #SEC_NOTIFICATION_TYPE_NAPI.
+ */
+#define SEC_NOTIFICATION_TYPE   SEC_NOTIFICATION_TYPE_POLL
+
+/* Maximum number of job rings supported by SEC hardware */
+#define MAX_SEC_JOB_RINGS         4
+
+/* Maximum number of QP per job ring */
+#define RTE_CAAM_MAX_NB_SEC_QPS    1
+
+/*
+ * Size of cryptographic context that is used directly in communicating
+ * with SEC device. SEC device works only with physical addresses. This
+ * is the maximum size for a SEC descriptor ( = 64 words).
+ */
+#define SEC_CRYPTO_DESCRIPTOR_SIZE  256
+
+/*
+ * Size of job descriptor submitted to SEC device for each packet to
+ * be processed.
+ * Job descriptor contains 3 DMA address pointers:
+ *     - to shared descriptor, to input buffer and to output buffer.
+ * The job descriptor contains other SEC specific commands as well:
+ *     - HEADER command, SEQ IN PTR command SEQ OUT PTR command and opaque data
+ *      each measuring 4 bytes.
+ * Job descriptor size, depending on physical address representation:
+ *     - 32 bit - size is 28 bytes - cacheline-aligned size is 64 bytes
+ *     - 36 bit - size is 40 bytes - cacheline-aligned size is 64 bytes
+ * @note: Job descriptor must be cacheline-aligned to ensure efficient
+ *     memory access.
+ * @note: If other format is used for job descriptor, then the size must be
+ *     revised.
+ */
+#define SEC_JOB_DESCRIPTOR_SIZE     64
+
+/*
+ * Size of one entry in the input ring of a job ring.
+ * Input ring contains pointers to job descriptors.
+ * The memory used for an input ring and output ring must be physically
+ * contiguous.
+ */
+#define SEC_JOB_INPUT_RING_ENTRY_SIZE  sizeof(dma_addr_t)
+
+/*
+ * Size of one entry in the output ring of a job ring.
+ * Output ring entry is a pointer to a job descriptor followed by a 4 byte
+ * status word.
+ * The memory used for an input ring and output ring must be physically
+ * contiguous.
+ * @note If desired to use also the optional SEQ OUT indication in output ring
+ * entries,
+ * then 4 more bytes must be added to the size.
+ */
+#define SEC_JOB_OUTPUT_RING_ENTRY_SIZE  (SEC_JOB_INPUT_RING_ENTRY_SIZE + 4)
+
+/*
+ * DMA memory required for an input ring of a job ring.
+ */
+#define SEC_DMA_MEM_INPUT_RING_SIZE     ((SEC_JOB_INPUT_RING_ENTRY_SIZE) * \
+                                       (SEC_JOB_RING_SIZE))
+
+/*
+ * DMA memory required for an output ring of a job ring.
+ *  Required extra 4 byte for status word per each entry.
+ */
+#define SEC_DMA_MEM_OUTPUT_RING_SIZE    ((SEC_JOB_OUTPUT_RING_ENTRY_SIZE) * \
+                                       (SEC_JOB_RING_SIZE))
+
+/* DMA memory required for a job ring, including both input and output rings. */
+#define SEC_DMA_MEM_JOB_RING_SIZE       ((SEC_DMA_MEM_INPUT_RING_SIZE) + \
+                                       (SEC_DMA_MEM_OUTPUT_RING_SIZE))
+
+/*
+ * When calling sec_init() UA will provide an area of virtual memory
+ *  of size #SEC_DMA_MEMORY_SIZE to be  used internally by the driver
+ *  to allocate data (like SEC descriptors) that needs to be passed to
+ *  SEC device in physical addressing and later on retrieved from SEC device.
+ *  At initialization the UA provides specialized ptov/vtop functions/macros to
+ *  translate addresses allocated from this memory area.
+ */
+#define SEC_DMA_MEMORY_SIZE          ((SEC_DMA_MEM_JOB_RING_SIZE) * \
+                                       (MAX_SEC_JOB_RINGS))
+
+#define L1_CACHE_BYTES 64
+
+/* SEC JOB RING related configuration. */
+
+/*
+ * Configure the size of the JOB RING.
+ * The maximum size of the ring in hardware limited to 1024.
+ * However the number of packets in flight in a time interval of 1ms can
+ * be calculated from the traffic rate (Mbps) and packet size.
+ * Here it was considered a packet size of 64 bytes.
+ *
+ * @note Round up to nearest power of 2 for optimized update
+ * of producer/consumer indexes of each job ring
+ */
+#define SEC_JOB_RING_SIZE     512
+
+/*
+ * Interrupt coalescing related configuration.
+ * NOTE: SEC hardware enabled interrupt
+ * coalescing is not supported on SEC version 3.1!
+ * SEC version 4.4 has support for interrupt
+ * coalescing.
+ */
+
+#if SEC_NOTIFICATION_TYPE != SEC_NOTIFICATION_TYPE_POLL
+
+#define SEC_INT_COALESCING_ENABLE   1
+/*
+ * Interrupt Coalescing Descriptor Count Threshold.
+ * While interrupt coalescing is enabled (ICEN=1), this value determines
+ * how many Descriptors are completed before raising an interrupt.
+ *
+ * Valid values for this field are from 0 to 255.
+ * Note that a value of 1 functionally defeats the advantages of interrupt
+ * coalescing since the threshold value is reached each time that a
+ * Job Descriptor is completed. A value of 0 is treated in the same
+ * manner as a value of 1.
+ */
+#define SEC_INTERRUPT_COALESCING_DESCRIPTOR_COUNT_THRESH  10
+
+/*
+ * Interrupt Coalescing Timer Threshold.
+ * While interrupt coalescing is enabled (ICEN=1), this value determines the
+ * maximum amount of time after processing a Descriptor before raising an
+ * interrupt.
+ * The threshold value is represented in units equal to 64 CAAM interface
+ * clocks. Valid values for this field are from 1 to 65535.
+ * A value of 0 results in behavior identical to that when interrupt
+ * coalescing is disabled.
+ */
+#define SEC_INTERRUPT_COALESCING_TIMER_THRESH  100
+#endif /* SEC_NOTIFICATION_TYPE_POLL */
+
+#endif /* CAAM_JR_CONFIG_H */
diff --git a/drivers/crypto/caam_jr/caam_jr_desc.h b/drivers/crypto/caam_jr/caam_jr_desc.h
new file mode 100644 (file)
index 0000000..6683ea8
--- /dev/null
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef CAAM_JR_DESC_H
+#define CAAM_JR_DESC_H
+
+#define CMD_HDR_CTYPE_SD               0x16
+#define CMD_HDR_CTYPE_JD               0x17
+
+/* The maximum size of a SEC descriptor, in WORDs (32 bits). */
+#define MAX_DESC_SIZE_WORDS                     64
+
+/*
+ * Macros manipulating descriptors
+ */
+/* Macro for setting the SD pointer in a JD. Common for all protocols
+ * supported by the SEC driver.
+ */
+#define SEC_JD_SET_SD(descriptor, ptr, len)       {      \
+       (descriptor)->sd_ptr = (ptr);                          \
+       (descriptor)->deschdr.command.jd.shr_desc_len = (len);      \
+}
+
+/* Macro for setting a pointer to the job which this descriptor processes.
+ * It eases the lookup procedure for identifying the descriptor that has
+ * completed.
+ */
+#define SEC_JD_SET_JOB_PTR(descriptor, ptr) \
+       ((descriptor)->job_ptr = (ptr))
+
+/* Macro for setting up a JD. The structure of the JD is common across all
+ * supported protocols, thus its structure is identical.
+ */
+#define SEC_JD_INIT(descriptor)              ({ \
+       /* CTYPE = job descriptor                              \
+        * RSMS, DNR = 0
+        * ONE = 1
+        * Start Index = 0
+        * ZRO,TD, MTD = 0
+        * SHR = 1 (there's a shared descriptor referenced
+        *        by this job descriptor,pointer in next word)
+        * REO = 1 (execute job descr. first, shared descriptor
+        *        after)
+        * SHARE = DEFER
+        * Descriptor Length = 0 ( to be completed @ runtime ) */ \
+       (descriptor)->deschdr.command.word = 0xB0801C0D;        \
+       /*
+        * CTYPE = SEQ OUT command * Scater Gather Flag = 0
+        * (can be updated @ runtime) PRE = 0 * EXT = 1
+        * (data length is in next word, following the * command)
+        * RTO = 0 */                                           \
+       (descriptor)->seq_out.command.word = 0xF8400000; /**/   \
+       /*
+        * CTYPE = SEQ IN command
+        * Scater Gather Flag = 0 (can be updated @ runtime)
+        * PRE = 0
+        * EXT = 1 ( data length is in next word, following the
+        *         command)
+        * RTO = 0 */                                           \
+       (descriptor)->seq_in.command.word  = 0xF0400000; /**/   \
+       /*
+        * In order to be compatible with QI scenarios, the DPOVRD value
+        * loaded must be formated like this:
+        * DPOVRD_EN (1b) | Res| DPOVRD Value (right aligned). */ \
+       (descriptor)->load_dpovrd.command.word = 0x16870004;    \
+       /* By default, DPOVRD mechanism is disabled, thus the value to be
+        * LOAD-ed through the above descriptor command will be
+        * 0x0000_0000. */                                      \
+       (descriptor)->dpovrd = 0x00000000;                      \
+})
+
+/* Macro for setting the pointer to the input buffer in the JD, according to
+ * the parameters set by the user in the ::sec_packet_t structure.
+ */
+#define SEC_JD_SET_IN_PTR(descriptor, phys_addr, offset, length) {     \
+       (descriptor)->seq_in_ptr = (phys_addr) + (offset);            \
+       (descriptor)->in_ext_length = (length);                  \
+}
+
+/* Macro for setting the pointer to the output buffer in the JD, according to
+ * the parameters set by the user in the ::sec_packet_t structure.
+ */
+#define SEC_JD_SET_OUT_PTR(descriptor, phys_addr, offset, length) {    \
+       (descriptor)->seq_out_ptr = (phys_addr) + (offset);          \
+       (descriptor)->out_ext_length = (length);                        \
+}
+
+/* Macro for setting the Scatter-Gather flag in the SEQ IN command. Used in
+ * case the input buffer is split in multiple buffers, according to the user
+ * specification.
+ */
+#define SEC_JD_SET_SG_IN(descriptor) \
+       ((descriptor)->seq_in.command.field.sgf =  1)
+
+/* Macro for setting the Scatter-Gather flag in the SEQ OUT command. Used in
+ * case the output buffer is split in multiple buffers, according to the user
+ * specification.
+ */
+#define SEC_JD_SET_SG_OUT(descriptor) \
+       ((descriptor)->seq_out.command.field.sgf = 1)
+
+#define SEC_JD_SET_DPOVRD(descriptor) \
+
+/* Macro for retrieving a descriptor's length. Works for both SD and JD. */
+#define SEC_GET_DESC_LEN(descriptor)                                   \
+       (((struct descriptor_header_s *)(descriptor))->command.sd.ctype == \
+       CMD_HDR_CTYPE_SD ? ((struct descriptor_header_s *) \
+       (descriptor))->command.sd.desclen :     \
+       ((struct descriptor_header_s *)(descriptor))->command.jd.desclen)
+
+/* Helper macro for dumping the hex representation of a descriptor */
+#define SEC_DUMP_DESC(descriptor) {                                    \
+       int __i;                                                        \
+       CAAM_JR_INFO("Des@ 0x%08x\n", (uint32_t)((uint32_t *)(descriptor)));\
+       for (__i = 0;                                           \
+               __i < SEC_GET_DESC_LEN(descriptor);                     \
+               __i++) {                                                \
+               printf("0x%08x: 0x%08x\n",                      \
+                       (uint32_t)(((uint32_t *)(descriptor)) + __i),   \
+                       *(((uint32_t *)(descriptor)) + __i));           \
+       }                                                               \
+}
+/* Union describing a descriptor header.
+ */
+struct descriptor_header_s {
+       union {
+               uint32_t word;
+               struct {
+                       /* 4  */ unsigned int ctype:5;
+                       /* 5  */ unsigned int res1:2;
+                       /* 7  */ unsigned int dnr:1;
+                       /* 8  */ unsigned int one:1;
+                       /* 9  */ unsigned int res2:1;
+                       /* 10 */ unsigned int start_idx:6;
+                       /* 16 */ unsigned int res3:2;
+                       /* 18 */ unsigned int cif:1;
+                       /* 19 */ unsigned int sc:1;
+                       /* 20 */ unsigned int pd:1;
+                       /* 21 */ unsigned int res4:1;
+                       /* 22 */ unsigned int share:2;
+                       /* 24 */ unsigned int res5:2;
+                       /* 26 */ unsigned int desclen:6;
+               } sd;
+               struct {
+                       /* TODO only below struct members are corrected,
+                        * all others also need to be reversed please verify it
+                        */
+                       /* 0 */ unsigned int desclen:7;
+                       /* 7 */ unsigned int res4:1;
+                       /* 8 */ unsigned int share:3;
+                       /* 11 */ unsigned int reo:1;
+                       /* 12 */ unsigned int shr:1;
+                       /* 13 */ unsigned int mtd:1;
+                       /* 14 */ unsigned int td:1;
+                       /* 15 */ unsigned int zero:1;
+                       /* 16 */ unsigned int shr_desc_len:6;
+                       /* 22  */ unsigned int res2:1;
+                       /* 23  */ unsigned int one:1;
+                       /* 24  */ unsigned int dnr:1;
+                       /* 25  */ unsigned int rsms:1;
+                       /* 26  */ unsigned int res1:1;
+                       /* 27  */ unsigned int ctype:5;
+               } jd;
+       } __rte_packed command;
+} __rte_packed;
+
+/* Union describing a KEY command in a descriptor.
+ */
+struct key_command_s {
+       union {
+               uint32_t word;
+               struct {
+                       unsigned int ctype:5;
+                       unsigned int cls:2;
+                       unsigned int sgf:1;
+                       unsigned int imm:1;
+                       unsigned int enc:1;
+                       unsigned int nwb:1;
+                       unsigned int ekt:1;
+                       unsigned int kdest:4;
+                       unsigned int tk:1;
+                       unsigned int rsvd1:5;
+                       unsigned int length:10;
+               } __rte_packed field;
+       } __rte_packed command;
+} __rte_packed;
+
+/* Union describing a PROTOCOL command
+ * in a descriptor.
+ */
+struct protocol_operation_command_s {
+       union {
+               uint32_t word;
+               struct {
+                       unsigned int ctype:5;
+                       unsigned int optype:3;
+                       unsigned char protid;
+                       unsigned short protinfo;
+               } __rte_packed field;
+       } __rte_packed command;
+} __rte_packed;
+
+/* Union describing a SEQIN command in a
+ * descriptor.
+ */
+struct seq_in_command_s {
+       union {
+               uint32_t word;
+               struct {
+                       unsigned int ctype:5;
+                       unsigned int res1:1;
+                       unsigned int inl:1;
+                       unsigned int sgf:1;
+                       unsigned int pre:1;
+                       unsigned int ext:1;
+                       unsigned int rto:1;
+                       unsigned int rjd:1;
+                       unsigned int res2:4;
+                       unsigned int length:16;
+               } field;
+       } __rte_packed command;
+} __rte_packed;
+
+/* Union describing a SEQOUT command in a
+ * descriptor.
+ */
+struct seq_out_command_s {
+       union {
+               uint32_t word;
+               struct {
+                       unsigned int ctype:5;
+                       unsigned int res1:2;
+                       unsigned int sgf:1;
+                       unsigned int pre:1;
+                       unsigned int ext:1;
+                       unsigned int rto:1;
+                       unsigned int res2:5;
+                       unsigned int length:16;
+               } field;
+       } __rte_packed command;
+} __rte_packed;
+
+struct load_command_s {
+       union {
+               uint32_t word;
+               struct {
+                       unsigned int ctype:5;
+                       unsigned int class:2;
+                       unsigned int sgf:1;
+                       unsigned int imm:1;
+                       unsigned int dst:7;
+                       unsigned char offset;
+                       unsigned char length;
+               } fields;
+       } __rte_packed command;
+} __rte_packed;
+
+/* Structure encompassing a general shared descriptor of maximum
+ * size (64 WORDs). Usually, other specific shared descriptor structures
+ * will be type-casted to this one
+ * this one.
+ */
+struct sec_sd_t {
+       uint32_t rsvd[MAX_DESC_SIZE_WORDS];
+} __attribute__((packed, aligned(64)));
+
+/* Structure encompassing a job descriptor which processes
+ * a single packet from a context. The job descriptor references
+ * a shared descriptor from a SEC context.
+ */
+struct sec_job_descriptor_t {
+       struct descriptor_header_s deschdr;
+       dma_addr_t sd_ptr;
+       struct seq_out_command_s seq_out;
+       dma_addr_t seq_out_ptr;
+       uint32_t out_ext_length;
+       struct seq_in_command_s seq_in;
+       dma_addr_t seq_in_ptr;
+       uint32_t in_ext_length;
+       struct load_command_s load_dpovrd;
+       uint32_t dpovrd;
+} __attribute__((packed, aligned(64)));
+
+#endif
diff --git a/drivers/crypto/caam_jr/caam_jr_hw.c b/drivers/crypto/caam_jr/caam_jr_hw.c
new file mode 100644 (file)
index 0000000..4a2b089
--- /dev/null
@@ -0,0 +1,367 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_crypto.h>
+#include <rte_security.h>
+
+#include <caam_jr_config.h>
+#include <caam_jr_hw_specific.h>
+#include <caam_jr_pvt.h>
+#include <caam_jr_log.h>
+
+/* RTA header files */
+#include <hw/desc/common.h>
+#include <hw/desc/algo.h>
+#include <hw/desc/ipsec.h>
+
+/* Used to retry resetting a job ring in SEC hardware. */
+#define SEC_TIMEOUT 100000
+
+/* @brief Process Jump Halt Condition related errors
+ *
+ * @param [in]  error_code        The error code in the descriptor status word
+ */
+static inline void
+hw_handle_jmp_halt_cond_err(union hw_error_code error_code)
+{
+       CAAM_JR_DEBUG("JMP: %d, Descriptor Index: 0x%x, Condition: 0x%x",
+                       error_code.error_desc.jmp_halt_cond_src.jmp,
+                       error_code.error_desc.jmp_halt_cond_src.desc_idx,
+                       error_code.error_desc.jmp_halt_cond_src.cond);
+       (void)error_code;
+}
+
+/* @brief Process DECO related errors
+ *
+ * @param [in]  error_code        The error code in the descriptor status word
+ */
+static inline void
+hw_handle_deco_err(union hw_error_code error_code)
+{
+       CAAM_JR_DEBUG("JMP: %d, Descriptor Index: 0x%x",
+                       error_code.error_desc.deco_src.jmp,
+                       error_code.error_desc.deco_src.desc_idx);
+
+       switch (error_code.error_desc.deco_src.desc_err) {
+       case SEC_HW_ERR_DECO_HFN_THRESHOLD:
+               CAAM_JR_DEBUG(" Warning: Descriptor completed normally,"
+                       "but 3GPP HFN matches or exceeds the Threshold ");
+               break;
+       default:
+               CAAM_JR_DEBUG("Error 0x%04x not implemented",
+                               error_code.error_desc.deco_src.desc_err);
+               break;
+       }
+}
+
+/* @brief Process  Jump Halt User Status related errors
+ *
+ * @param [in]  error_code        The error code in the descriptor status word
+ */
+static inline void
+hw_handle_jmp_halt_user_err(union hw_error_code error_code __rte_unused)
+{
+       CAAM_JR_DEBUG(" Not implemented");
+}
+
+/* @brief Process CCB related errors
+ *
+ * @param [in]  error_code        The error code in the descriptor status word
+ */
+static inline void
+hw_handle_ccb_err(union hw_error_code hw_error_code __rte_unused)
+{
+       CAAM_JR_DEBUG(" Not implemented");
+}
+
+/* @brief Process Job Ring related errors
+ *
+ * @param [in]  error_code        The error code in the descriptor status word
+ */
+static inline void
+hw_handle_jr_err(union hw_error_code hw_error_code __rte_unused)
+{
+       CAAM_JR_DEBUG(" Not implemented");
+}
+
+int
+hw_reset_job_ring(struct sec_job_ring_t *job_ring)
+{
+       int ret = 0;
+
+       ASSERT(job_ring->register_base_addr != NULL);
+
+       /* First reset the job ring in hw */
+       ret = hw_shutdown_job_ring(job_ring);
+       SEC_ASSERT(ret == 0, ret, "Failed resetting job ring in hardware");
+
+       /* In order to have the HW JR in a workable state
+        * after a reset, I need to re-write the input
+        * queue size, input start address, output queue
+        * size and output start address
+        */
+       /* Write the JR input queue size to the HW register */
+       hw_set_input_ring_size(job_ring, SEC_JOB_RING_SIZE);
+
+       /* Write the JR output queue size to the HW register */
+       hw_set_output_ring_size(job_ring, SEC_JOB_RING_SIZE);
+
+       /* Write the JR input queue start address */
+       hw_set_input_ring_start_addr(job_ring,
+                       caam_jr_dma_vtop(job_ring->input_ring));
+       CAAM_JR_DEBUG(" Set input ring base address to : Virtual: 0x%" PRIx64
+                     ",Physical: 0x%" PRIx64 ", Read from HW: 0x%" PRIx64,
+                     (uint64_t)(uintptr_t)job_ring->input_ring,
+                     caam_jr_dma_vtop(job_ring->input_ring),
+                     hw_get_inp_queue_base(job_ring));
+
+       /* Write the JR output queue start address */
+       hw_set_output_ring_start_addr(job_ring,
+                       caam_jr_dma_vtop(job_ring->output_ring));
+       CAAM_JR_DEBUG(" Set output ring base address to: Virtual: 0x%" PRIx64
+                     ",Physical: 0x%" PRIx64 ", Read from HW: 0x%" PRIx64,
+                     (uint64_t)(uintptr_t)job_ring->output_ring,
+                     caam_jr_dma_vtop(job_ring->output_ring),
+                     hw_get_out_queue_base(job_ring));
+       return ret;
+}
+
+int
+hw_shutdown_job_ring(struct sec_job_ring_t *job_ring)
+{
+       unsigned int timeout = SEC_TIMEOUT;
+       uint32_t tmp = 0;
+       int usleep_interval = 10;
+
+       if (job_ring->register_base_addr == NULL) {
+               CAAM_JR_ERR("Jr[%p] has reg base addr as NULL.driver not init",
+                       job_ring);
+               return 0;
+       }
+
+       CAAM_JR_INFO("Resetting Job ring %p", job_ring);
+
+       /*
+        * Mask interrupts since we are going to poll
+        * for reset completion status
+        * Also, at POR, interrupts are ENABLED on a JR, thus
+        * this is the point where I can disable them without
+        * changing the code logic too much
+        */
+       caam_jr_disable_irqs(job_ring->irq_fd);
+
+       /* initiate flush (required prior to reset) */
+       SET_JR_REG(JRCR, job_ring, JR_REG_JRCR_VAL_RESET);
+
+       /* dummy read */
+       tmp = GET_JR_REG(JRCR, job_ring);
+
+       do {
+               tmp = GET_JR_REG(JRINT, job_ring);
+               usleep(usleep_interval);
+       } while (((tmp & JRINT_ERR_HALT_MASK) ==
+                       JRINT_ERR_HALT_INPROGRESS) && --timeout);
+
+       CAAM_JR_INFO("JRINT is %x", tmp);
+       if ((tmp & JRINT_ERR_HALT_MASK) != JRINT_ERR_HALT_COMPLETE ||
+               timeout == 0) {
+               CAAM_JR_ERR("0x%x, %d", tmp, timeout);
+               /* unmask interrupts */
+               if (job_ring->jr_mode != SEC_NOTIFICATION_TYPE_POLL)
+                       caam_jr_enable_irqs(job_ring->irq_fd);
+               return -1;
+       }
+
+       /* Initiate reset */
+       timeout = SEC_TIMEOUT;
+       SET_JR_REG(JRCR, job_ring, JR_REG_JRCR_VAL_RESET);
+
+       do {
+               tmp = GET_JR_REG(JRCR, job_ring);
+               usleep(usleep_interval);
+       } while ((tmp & JR_REG_JRCR_VAL_RESET) && --timeout);
+
+       CAAM_JR_DEBUG("JRCR is %x", tmp);
+       if (timeout == 0) {
+               CAAM_JR_ERR("Failed to reset hw job ring %p", job_ring);
+               /* unmask interrupts */
+               if (job_ring->jr_mode != SEC_NOTIFICATION_TYPE_POLL)
+                       caam_jr_enable_irqs(job_ring->irq_fd);
+               return -1;
+       }
+       /* unmask interrupts */
+       if (job_ring->jr_mode != SEC_NOTIFICATION_TYPE_POLL)
+               caam_jr_enable_irqs(job_ring->irq_fd);
+       return 0;
+
+}
+
+void
+hw_handle_job_ring_error(struct sec_job_ring_t *job_ring __rte_unused,
+                        uint32_t error_code)
+{
+       union hw_error_code hw_err_code;
+
+       hw_err_code.error = error_code;
+       switch (hw_err_code.error_desc.value.ssrc) {
+       case SEC_HW_ERR_SSRC_NO_SRC:
+               ASSERT(hw_err_code.error_desc.no_status_src.res == 0);
+               CAAM_JR_ERR("No Status Source ");
+               break;
+       case SEC_HW_ERR_SSRC_CCB_ERR:
+               CAAM_JR_ERR("CCB Status Source");
+               hw_handle_ccb_err(hw_err_code);
+               break;
+       case SEC_HW_ERR_SSRC_JMP_HALT_U:
+               CAAM_JR_ERR("Jump Halt User Status Source");
+               hw_handle_jmp_halt_user_err(hw_err_code);
+               break;
+       case SEC_HW_ERR_SSRC_DECO:
+               CAAM_JR_ERR("DECO Status Source");
+               hw_handle_deco_err(hw_err_code);
+               break;
+       case SEC_HW_ERR_SSRC_JR:
+               CAAM_JR_ERR("Job Ring Status Source");
+               hw_handle_jr_err(hw_err_code);
+               break;
+       case SEC_HW_ERR_SSRC_JMP_HALT_COND:
+               CAAM_JR_ERR("Jump Halt Condition Codes");
+               hw_handle_jmp_halt_cond_err(hw_err_code);
+               break;
+       default:
+               ASSERT(0);
+               CAAM_JR_ERR("Unknown SSRC");
+               break;
+       }
+}
+
+void
+hw_job_ring_error_print(struct sec_job_ring_t *job_ring, int code)
+{
+       switch (code) {
+       case JRINT_ERR_WRITE_STATUS:
+               CAAM_JR_ERR("Error writing status to Output Ring ");
+               break;
+       case JRINT_ERR_BAD_INPUT_BASE:
+               CAAM_JR_ERR(
+               "Bad Input Ring Base (%p) (not on a 4-byte boundary) ",
+               (void *)job_ring);
+               break;
+       case JRINT_ERR_BAD_OUTPUT_BASE:
+               CAAM_JR_ERR(
+               "Bad Output Ring Base (%p) (not on a 4-byte boundary) ",
+               (void *)job_ring);
+               break;
+       case JRINT_ERR_WRITE_2_IRBA:
+               CAAM_JR_ERR(
+               "Invalid write to Input Ring Base Address Register ");
+               break;
+       case JRINT_ERR_WRITE_2_ORBA:
+               CAAM_JR_ERR(
+               "Invalid write to Output Ring Base Address Register ");
+               break;
+       case JRINT_ERR_RES_B4_HALT:
+               CAAM_JR_ERR(
+               "Job Ring [%p] released before Job Ring is halted",
+               (void *)job_ring);
+               break;
+       case JRINT_ERR_REM_TOO_MANY:
+               CAAM_JR_ERR("Removed too many jobs from job ring [%p]",
+                       (void *)job_ring);
+               break;
+       case JRINT_ERR_ADD_TOO_MANY:
+               CAAM_JR_ERR("Added too many jobs on job ring [%p]", job_ring);
+               break;
+       default:
+               CAAM_JR_ERR(" Unknown SEC JR Error :%d",
+                               code);
+               break;
+       }
+}
+
+int
+hw_job_ring_set_coalescing_param(struct sec_job_ring_t *job_ring,
+                                uint16_t irq_coalescing_timer,
+                                uint8_t irq_coalescing_count)
+{
+       uint32_t reg_val = 0;
+
+       ASSERT(job_ring != NULL);
+       if (job_ring->register_base_addr == NULL) {
+               CAAM_JR_ERR("Jr[%p] has reg base addr as NULL.driver not init",
+                       job_ring);
+               return -1;
+       }
+       /* Set descriptor count coalescing */
+       reg_val |= (irq_coalescing_count << JR_REG_JRCFG_LO_ICDCT_SHIFT);
+
+       /* Set coalescing timer value */
+       reg_val |= (irq_coalescing_timer << JR_REG_JRCFG_LO_ICTT_SHIFT);
+
+       /* Update parameters in HW */
+       SET_JR_REG_LO(JRCFG, job_ring, reg_val);
+       CAAM_JR_DEBUG("Set coalescing params on jr %p timer:%d, desc count: %d",
+                       job_ring, irq_coalescing_timer, irq_coalescing_timer);
+
+       return 0;
+}
+
+int
+hw_job_ring_enable_coalescing(struct sec_job_ring_t *job_ring)
+{
+       uint32_t reg_val = 0;
+
+       ASSERT(job_ring != NULL);
+       if (job_ring->register_base_addr == NULL) {
+               CAAM_JR_ERR("Jr[%p] has reg base addr as NULL.driver not init",
+                       job_ring);
+               return -1;
+       }
+
+       /* Get the current value of the register */
+       reg_val = GET_JR_REG_LO(JRCFG, job_ring);
+
+       /* Enable coalescing */
+       reg_val |= JR_REG_JRCFG_LO_ICEN_EN;
+
+       /* Write in hw */
+       SET_JR_REG_LO(JRCFG, job_ring, reg_val);
+
+       CAAM_JR_DEBUG("Enabled coalescing on jr %p ",
+                       job_ring);
+
+       return 0;
+}
+
+int
+hw_job_ring_disable_coalescing(struct sec_job_ring_t *job_ring)
+{
+       uint32_t reg_val = 0;
+
+       ASSERT(job_ring != NULL);
+
+       if (job_ring->register_base_addr == NULL) {
+               CAAM_JR_ERR("Jr[%p] has reg base addr as NULL.driver not init",
+                       job_ring);
+               return -1;
+       }
+
+       /* Get the current value of the register */
+       reg_val = GET_JR_REG_LO(JRCFG, job_ring);
+
+       /* Disable coalescing */
+       reg_val &= ~JR_REG_JRCFG_LO_ICEN_EN;
+
+       /* Write in hw */
+       SET_JR_REG_LO(JRCFG, job_ring, reg_val);
+       CAAM_JR_DEBUG("Disabled coalescing on jr %p ", job_ring);
+
+       return 0;
+}
diff --git a/drivers/crypto/caam_jr/caam_jr_hw_specific.h b/drivers/crypto/caam_jr/caam_jr_hw_specific.h
new file mode 100644 (file)
index 0000000..5f58a58
--- /dev/null
@@ -0,0 +1,503 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017 NXP
+ */
+
+#ifndef CAAM_JR_HW_SPECIFIC_H
+#define CAAM_JR_HW_SPECIFIC_H
+
+#include <caam_jr_config.h>
+
+/*
+ * Offset to the registers of a job ring.
+ * Is different for each job ring.
+ */
+#define CHAN_BASE(jr)   ((size_t)(jr)->register_base_addr)
+
+#define SEC_JOB_RING_IS_FULL(pi, ci, ring_max_size, ring_threshold) \
+               ((((pi) + 1 + ((ring_max_size) - (ring_threshold))) & \
+                 (ring_max_size - 1))  == ((ci)))
+
+#define SEC_CIRCULAR_COUNTER(x, max)   (((x) + 1) & (max - 1))
+
+/*
+ * Assert that cond is true. If !cond is true, display str and the vararg list
+ * in a printf-like syntax. also, if !cond is true, return altRet.
+ *
+ * \param cond          A boolean expression to be asserted true
+ * \param altRet        The value to be returned if cond doesn't hold true
+ * \param str           A quoted char string
+ *
+ * E.g.:
+ *      SEC_ASSERT(ret > 0, 0, "ERROR initializing app: code = %d\n", ret);
+ */
+#define SEC_ASSERT(cond, altRet, ...) do {\
+       if (unlikely(!(cond))) {\
+               CAAM_JR_ERR(__VA_ARGS__); \
+               return altRet; \
+       } \
+} while (0)
+
+#define SEC_DP_ASSERT(cond, altRet, ...) do {\
+       if (unlikely(!(cond))) {\
+               CAAM_JR_DP_ERR(__VA_ARGS__); \
+               return altRet; \
+       } \
+} while (0)
+
+#define ASSERT(x)
+
+/*
+ * Constants representing various job ring registers
+ */
+#if CAAM_BYTE_ORDER == __BIG_ENDIAN
+#define JR_REG_IRBA_OFFSET             0x0000
+#define JR_REG_IRBA_OFFSET_LO          0x0004
+#else
+#define JR_REG_IRBA_OFFSET             0x0004
+#define JR_REG_IRBA_OFFSET_LO          0x0000
+#endif
+
+#define JR_REG_IRSR_OFFSET             0x000C
+#define JR_REG_IRSA_OFFSET             0x0014
+#define JR_REG_IRJA_OFFSET             0x001C
+
+#if CAAM_BYTE_ORDER == __BIG_ENDIAN
+#define JR_REG_ORBA_OFFSET             0x0020
+#define JR_REG_ORBA_OFFSET_LO          0x0024
+#else
+#define JR_REG_ORBA_OFFSET             0x0024
+#define JR_REG_ORBA_OFFSET_LO          0x0020
+#endif
+
+#define JR_REG_ORSR_OFFSET             0x002C
+#define JR_REG_ORJR_OFFSET             0x0034
+#define JR_REG_ORSFR_OFFSET            0x003C
+#define JR_REG_JROSR_OFFSET            0x0044
+#define JR_REG_JRINT_OFFSET            0x004C
+
+#define JR_REG_JRCFG_OFFSET            0x0050
+#define JR_REG_JRCFG_OFFSET_LO         0x0054
+
+#define JR_REG_IRRI_OFFSET             0x005C
+#define JR_REG_ORWI_OFFSET             0x0064
+#define JR_REG_JRCR_OFFSET             0x006C
+
+/*
+ * Constants for error handling on job ring
+ */
+#define JR_REG_JRINT_ERR_TYPE_SHIFT    8
+#define JR_REG_JRINT_ERR_ORWI_SHIFT    16
+#define JR_REG_JRINIT_JRE_SHIFT                1
+
+#define JRINT_JRE                      (1 << JR_REG_JRINIT_JRE_SHIFT)
+#define JRINT_ERR_WRITE_STATUS         (1 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_BAD_INPUT_BASE       (3 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_BAD_OUTPUT_BASE      (4 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_WRITE_2_IRBA         (5 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_WRITE_2_ORBA         (6 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_RES_B4_HALT          (7 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_REM_TOO_MANY         (8 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_ADD_TOO_MANY         (9 << JR_REG_JRINT_ERR_TYPE_SHIFT)
+#define JRINT_ERR_HALT_MASK            0x0C
+#define JRINT_ERR_HALT_INPROGRESS      0x04
+#define JRINT_ERR_HALT_COMPLETE                0x08
+
+#define JR_REG_JRCR_VAL_RESET          0x00000001
+
+#define JR_REG_JRCFG_LO_ICTT_SHIFT     0x10
+#define JR_REG_JRCFG_LO_ICDCT_SHIFT    0x08
+#define JR_REG_JRCFG_LO_ICEN_EN                0x02
+
+/*
+ * Constants for Descriptor Processing errors
+ */
+#define SEC_HW_ERR_SSRC_NO_SRC         0x00
+#define SEC_HW_ERR_SSRC_CCB_ERR                0x02
+#define SEC_HW_ERR_SSRC_JMP_HALT_U     0x03
+#define SEC_HW_ERR_SSRC_DECO           0x04
+#define SEC_HW_ERR_SSRC_JR             0x06
+#define SEC_HW_ERR_SSRC_JMP_HALT_COND   0x07
+
+#define SEC_HW_ERR_DECO_HFN_THRESHOLD   0xF1
+#define SEC_HW_ERR_CCB_ICV_CHECK_FAIL   0x0A
+
+/*
+ * Constants for descriptors
+ */
+/* Return higher 32 bits of physical address */
+#define PHYS_ADDR_HI(phys_addr) \
+           (uint32_t)(((uint64_t)phys_addr) >> 32)
+
+/* Return lower 32 bits of physical address */
+#define PHYS_ADDR_LO(phys_addr) \
+           (uint32_t)(((uint64_t)phys_addr) & 0xFFFFFFFF)
+
+/*
+ * Macros for extracting error codes for the job ring
+ */
+#define JR_REG_JRINT_ERR_TYPE_EXTRACT(value)      ((value) & 0x00000F00)
+#define JR_REG_JRINT_ERR_ORWI_EXTRACT(value)     \
+       (((value) & 0x3FFF0000) >> JR_REG_JRINT_ERR_ORWI_SHIFT)
+#define JR_REG_JRINT_JRE_EXTRACT(value)           ((value) & JRINT_JRE)
+
+/*
+ * Macros for managing the job ring
+ */
+/* Read pointer to job ring input ring start address */
+#if defined(RTE_ARCH_ARM64)
+#define hw_get_inp_queue_base(jr) ((((dma_addr_t)GET_JR_REG(IRBA, \
+                                     (jr))) << 32) | \
+                                     (GET_JR_REG_LO(IRBA, (jr))))
+
+/* Read pointer to job ring output ring start address */
+#define hw_get_out_queue_base(jr) (((dma_addr_t)(GET_JR_REG(ORBA, \
+                                    (jr))) << 32) | \
+                                    (GET_JR_REG_LO(ORBA, (jr))))
+#else
+#define hw_get_inp_queue_base(jr)   ((dma_addr_t)(GET_JR_REG_LO(IRBA, (jr))))
+
+#define hw_get_out_queue_base(jr)   ((dma_addr_t)(GET_JR_REG_LO(ORBA, (jr))))
+#endif
+
+/*
+ * IRJA - Input Ring Jobs Added Register shows
+ * how many new jobs were added to the Input Ring.
+ */
+#define hw_enqueue_desc_on_job_ring(job_ring) SET_JR_REG(IRJA, (job_ring), 1)
+
+#define hw_set_input_ring_size(job_ring, size) SET_JR_REG(IRSR, job_ring, \
+                                                        (size))
+
+#define hw_set_output_ring_size(job_ring, size) SET_JR_REG(ORSR, job_ring, \
+                                                         (size))
+
+#if defined(RTE_ARCH_ARM64)
+#define hw_set_input_ring_start_addr(job_ring, start_addr)     \
+{                                                              \
+       SET_JR_REG(IRBA, job_ring, PHYS_ADDR_HI(start_addr));   \
+       SET_JR_REG_LO(IRBA, job_ring, PHYS_ADDR_LO(start_addr));\
+}
+
+#define hw_set_output_ring_start_addr(job_ring, start_addr) \
+{                                                              \
+       SET_JR_REG(ORBA, job_ring, PHYS_ADDR_HI(start_addr));   \
+       SET_JR_REG_LO(ORBA, job_ring, PHYS_ADDR_LO(start_addr));\
+}
+
+#else
+#define hw_set_input_ring_start_addr(job_ring, start_addr)     \
+{                                                              \
+       SET_JR_REG(IRBA, job_ring, 0);  \
+       SET_JR_REG_LO(IRBA, job_ring, PHYS_ADDR_LO(start_addr));\
+}
+
+#define hw_set_output_ring_start_addr(job_ring, start_addr) \
+{                                                              \
+       SET_JR_REG(ORBA, job_ring, 0);  \
+       SET_JR_REG_LO(ORBA, job_ring, PHYS_ADDR_LO(start_addr));\
+}
+#endif
+
+/* ORJR - Output Ring Jobs Removed Register shows how many jobs were
+ * removed from the Output Ring for processing by software. This is done after
+ * the software has processed the entries.
+ */
+#define hw_remove_entries(jr, no_entries) SET_JR_REG(ORJR, (jr), (no_entries))
+
+/* IRSA - Input Ring Slots Available register holds the number of entries in
+ * the Job Ring's input ring. Once a job is enqueued, the value returned is
+ * decremented by the hardware by the number of jobs enqueued.
+ */
+#define hw_get_available_slots(jr)             GET_JR_REG(IRSA, jr)
+
+/* ORSFR - Output Ring Slots Full register holds the number of jobs which were
+ * processed by the SEC and can be retrieved by the software. Once a job has
+ * been processed by software, the user will call hw_remove_one_entry in order
+ * to notify the SEC that the entry was processed.
+ */
+#define hw_get_no_finished_jobs(jr)            GET_JR_REG(ORSFR, jr)
+
+/*
+ * Macros for manipulating JR registers
+ */
+#if CORE_BYTE_ORDER == CAAM_BYTE_ORDER
+#define sec_read_32(addr)      (*(volatile unsigned int *)(addr))
+#define sec_write_32(addr, val)        (*(volatile unsigned int *)(addr) = (val))
+
+#else
+#define sec_read_32(addr)      rte_bswap32((*(volatile unsigned int *)(addr)))
+#define sec_write_32(addr, val) \
+                       (*(volatile unsigned int *)(addr) = rte_bswap32(val))
+#endif
+
+#if CAAM_BYTE_ORDER == __LITTLE_ENDIAN
+#define sec_read_64(addr)      (((u64)sec_read_32((u32 *)(addr) + 1) << 32) | \
+                               (sec_read_32((u32 *)(addr))))
+
+#define sec_write_64(addr, val) {                              \
+       sec_write_32((u32 *)(addr) + 1, (u32)((val) >> 32));    \
+       sec_write_32((u32 *)(addr), (u32)(val));                \
+}
+#else /* CAAM_BYTE_ORDER == __BIG_ENDIAN */
+#define sec_read_64(addr)      (((u64)sec_read_32((u32 *)(addr)) << 32) | \
+                               (sec_read_32((u32 *)(addr) + 1)))
+
+#define sec_write_64(addr, val) {                              \
+       sec_write_32((u32 *)(addr), (u32)((val) >> 32));        \
+       sec_write_32((u32 *)(addr) + 1, (u32)(val));            \
+}
+#endif
+
+#if defined(RTE_ARCH_ARM64)
+#define sec_read_addr(a)       sec_read_64((a))
+#define sec_write_addr(a, v)   sec_write_64((a), (v))
+#else
+#define sec_read_addr(a)       sec_read_32((a))
+#define sec_write_addr(a, v)   sec_write_32((a), (v))
+#endif
+
+#define JR_REG(name, jr)       (CHAN_BASE(jr) + JR_REG_##name##_OFFSET)
+#define JR_REG_LO(name, jr)    (CHAN_BASE(jr) + JR_REG_##name##_OFFSET_LO)
+
+#define GET_JR_REG(name, jr)   (sec_read_32(JR_REG(name, (jr))))
+#define GET_JR_REG_LO(name, jr)        (sec_read_32(JR_REG_LO(name, (jr))))
+
+#define SET_JR_REG(name, jr, value) \
+                               (sec_write_32(JR_REG(name, (jr)), value))
+#define SET_JR_REG_LO(name, jr, value) \
+                               (sec_write_32(JR_REG_LO(name, (jr)), value))
+
+/* Lists the possible states for a job ring. */
+typedef enum sec_job_ring_state_e {
+       SEC_JOB_RING_STATE_STARTED,     /* Job ring is initialized */
+       SEC_JOB_RING_STATE_RESET,       /* Job ring reset is in progress */
+} sec_job_ring_state_t;
+
+/* code or cmd block to caam */
+struct sec_cdb {
+       struct {
+               union {
+                       uint32_t word;
+                       struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+                               uint16_t rsvd63_48;
+                               unsigned int rsvd47_39:9;
+                               unsigned int idlen:7;
+#else
+                               unsigned int idlen:7;
+                               unsigned int rsvd47_39:9;
+                               uint16_t rsvd63_48;
+#endif
+                       } field;
+               } __rte_packed hi;
+
+               union {
+                       uint32_t word;
+                       struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+                               unsigned int rsvd31_30:2;
+                               unsigned int fsgt:1;
+                               unsigned int lng:1;
+                               unsigned int offset:2;
+                               unsigned int abs:1;
+                               unsigned int add_buf:1;
+                               uint8_t pool_id;
+                               uint16_t pool_buffer_size;
+#else
+                               uint16_t pool_buffer_size;
+                               uint8_t pool_id;
+                               unsigned int add_buf:1;
+                               unsigned int abs:1;
+                               unsigned int offset:2;
+                               unsigned int lng:1;
+                               unsigned int fsgt:1;
+                               unsigned int rsvd31_30:2;
+#endif
+                       } field;
+               } __rte_packed lo;
+       } __rte_packed sh_hdr;
+
+       uint32_t sh_desc[SEC_JOB_DESCRIPTOR_SIZE];
+};
+
+struct caam_jr_qp {
+       struct sec_job_ring_t *ring;
+       uint64_t rx_pkts;
+       uint64_t rx_errs;
+       uint64_t rx_poll_err;
+       uint64_t tx_pkts;
+       uint64_t tx_errs;
+       uint64_t tx_ring_full;
+};
+
+struct sec_job_ring_t {
+       /* TODO: Add wrapper macro to make it obvious this is the consumer index
+        * on the output ring
+        */
+       uint32_t cidx;          /* Consumer index for job ring (jobs array).
+                                * @note: cidx and pidx are accessed from
+                                * different threads. Place the cidx and pidx
+                                * inside the structure so that they lay on
+                                * different cachelines, to avoid false sharing
+                                * between threads when the threads run on
+                                * different cores!
+                                */
+       /* TODO: Add wrapper macro to make it obvious this is the producer index
+        * on the input ring
+        */
+       uint32_t pidx;          /* Producer index for job ring (jobs array) */
+
+       phys_addr_t *input_ring;/* Ring of output descriptors received from SEC.
+                                * Size of array is power of 2 to allow fast
+                                * update of producer/consumer indexes with
+                                * bitwise operations.
+                                */
+
+       struct sec_outring_entry *output_ring;
+                               /* Ring of output descriptors received from SEC.
+                                * Size of array is power of 2 to allow fast
+                                * update of producer/consumer indexes with
+                                * bitwise operations.
+                                */
+
+       uint32_t irq_fd;        /* The file descriptor used for polling from
+                                * user space for interrupts notifications
+                                */
+       uint32_t jr_mode;       /* Model used by SEC Driver to receive
+                                * notifications from SEC.  Can be either
+                                * of the three: #SEC_NOTIFICATION_TYPE_NAPI
+                                * #SEC_NOTIFICATION_TYPE_IRQ or
+                                * #SEC_NOTIFICATION_TYPE_POLL
+                                */
+       uint32_t napi_mode;     /* Job ring mode if NAPI mode is chosen
+                                * Used only when jr_mode is set to
+                                * #SEC_NOTIFICATION_TYPE_NAPI
+                                */
+       void *register_base_addr;       /* Base address for SEC's
+                                        * register memory for this job ring.
+                                        */
+       uint8_t coalescing_en;          /* notifies if coelescing is
+                                        * enabled for the job ring
+                                        */
+       sec_job_ring_state_t jr_state;  /* The state of this job ring */
+
+       struct rte_mempool *ctx_pool;   /* per dev mempool for caam_jr_op_ctx */
+       unsigned int max_nb_queue_pairs;
+       unsigned int max_nb_sessions;
+       struct caam_jr_qp qps[RTE_CAAM_MAX_NB_SEC_QPS]; /* i/o queue for sec */
+};
+
+/* Union describing the possible error codes that
+ * can be set in the descriptor status word
+ */
+union hw_error_code {
+       uint32_t error;
+       union {
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t ssed_val:28;
+               } __rte_packed value;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t res:28;
+               } __rte_packed no_status_src;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t jmp:1;
+                       uint32_t res:11;
+                       uint32_t desc_idx:8;
+                       uint32_t cha_id:4;
+                       uint32_t err_id:4;
+               } __rte_packed ccb_status_src;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t jmp:1;
+                       uint32_t res:11;
+                       uint32_t desc_idx:8;
+                       uint32_t offset:8;
+               } __rte_packed jmp_halt_user_src;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t jmp:1;
+                       uint32_t res:11;
+                       uint32_t desc_idx:8;
+                       uint32_t desc_err:8;
+               } __rte_packed deco_src;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t res:17;
+                       uint32_t naddr:3;
+                       uint32_t desc_err:8;
+               } __rte_packed jr_src;
+               struct {
+                       uint32_t ssrc:4;
+                       uint32_t jmp:1;
+                       uint32_t res:11;
+                       uint32_t desc_idx:8;
+                       uint32_t cond:8;
+               } __rte_packed jmp_halt_cond_src;
+       } __rte_packed error_desc;
+} __rte_packed;
+
+/* @brief Initialize a job ring/channel in SEC device.
+ * Write configuration register/s to properly initialize a job ring.
+ *
+ * @param [in] job_ring     The job ring
+ *
+ * @retval 0 for success
+ * @retval other for error
+ */
+int hw_reset_job_ring(struct sec_job_ring_t *job_ring);
+
+/* @brief Reset a job ring/channel in SEC device.
+ * Write configuration register/s to reset a job ring.
+ *
+ * @param [in] job_ring     The job ring
+ *
+ * @retval 0 for success
+ * @retval -1 in case job ring reset failed
+ */
+int hw_shutdown_job_ring(struct sec_job_ring_t *job_ring);
+
+/* @brief Handle a job ring/channel error in SEC device.
+ * Identify the error type and clear error bits if required.
+ *
+ * @param [in]  job_ring       The job ring
+ * @param [in]  sec_error_code  The job ring's error code
+ */
+void hw_handle_job_ring_error(struct sec_job_ring_t *job_ring,
+                             uint32_t sec_error_code);
+
+/* @brief Handle a job ring error in the device.
+ * Identify the error type and printout a explanatory
+ * messages.
+ *
+ * @param [in]  job_ring       The job ring
+ *
+ */
+void hw_job_ring_error_print(struct sec_job_ring_t *job_ring, int code);
+
+/* @brief Set interrupt coalescing parameters on the Job Ring.
+ * @param [in]  job_ring               The job ring
+ * @param [in]  irq_coalesing_timer     Interrupt coalescing timer threshold.
+ *                                     This value determines the maximum
+ *                                     amount of time after processing a
+ *                                     descriptor before raising an interrupt.
+ * @param [in]  irq_coalescing_count    Interrupt coalescing descriptor count
+ *                                     threshold.
+ */
+int hw_job_ring_set_coalescing_param(struct sec_job_ring_t *job_ring,
+                                    uint16_t irq_coalescing_timer,
+                                    uint8_t irq_coalescing_count);
+
+/* @brief Enable interrupt coalescing on a job ring
+ * @param [in]  job_ring               The job ring
+ */
+int hw_job_ring_enable_coalescing(struct sec_job_ring_t *job_ring);
+
+/* @brief Disable interrupt coalescing on a job ring
+ * @param [in]  job_ring               The job ring
+ */
+int hw_job_ring_disable_coalescing(struct sec_job_ring_t *job_ring);
+
+#endif /* CAAM_JR_HW_SPECIFIC_H */
diff --git a/drivers/crypto/caam_jr/caam_jr_log.h b/drivers/crypto/caam_jr/caam_jr_log.h
new file mode 100644 (file)
index 0000000..106ff07
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef _CAAM_JR_LOG_H_
+#define _CAAM_JR_LOG_H_
+
+#include <rte_log.h>
+
+extern int caam_jr_logtype;
+
+#define CAAM_JR_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, caam_jr_logtype, "caam_jr: " \
+               fmt "\n", ##args)
+
+#define CAAM_JR_DEBUG(fmt, args...) \
+       rte_log(RTE_LOG_DEBUG, caam_jr_logtype, "caam_jr: %s(): " \
+               fmt "\n", __func__, ##args)
+
+#define PMD_INIT_FUNC_TRACE() CAAM_JR_DEBUG(" >>")
+
+#define CAAM_JR_INFO(fmt, args...) \
+       CAAM_JR_LOG(INFO, fmt, ## args)
+#define CAAM_JR_ERR(fmt, args...) \
+       CAAM_JR_LOG(ERR, fmt, ## args)
+#define CAAM_JR_WARN(fmt, args...) \
+       CAAM_JR_LOG(WARNING, fmt, ## args)
+
+/* DP Logs, toggled out at compile time if level lower than current level */
+#define CAAM_JR_DP_LOG(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, fmt "\n", ## args)
+
+#define CAAM_JR_DP_DEBUG(fmt, args...) \
+       CAAM_JR_DP_LOG(DEBUG, fmt, ## args)
+#define CAAM_JR_DP_INFO(fmt, args...) \
+       CAAM_JR_DP_LOG(INFO, fmt, ## args)
+#define CAAM_JR_DP_WARN(fmt, args...) \
+       CAAM_JR_DP_LOG(WARNING, fmt, ## args)
+#define CAAM_JR_DP_ERR(fmt, args...) \
+       CAAM_JR_DP_LOG(ERR, fmt, ## args)
+
+#endif /* _CAAM_JR_LOG_H_ */
diff --git a/drivers/crypto/caam_jr/caam_jr_pvt.h b/drivers/crypto/caam_jr/caam_jr_pvt.h
new file mode 100644 (file)
index 0000000..9f1adab
--- /dev/null
@@ -0,0 +1,291 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#ifndef CAAM_JR_PVT_H
+#define CAAM_JR_PVT_H
+
+#include <hw/desc/ipsec.h>
+
+/* NXP CAAM JR PMD device name */
+
+#define CAAM_JR_ALG_UNSUPPORT  (-1)
+
+/* Minimum job descriptor consists of a oneword job descriptor HEADER and
+ * a pointer to the shared descriptor.
+ */
+#define MIN_JOB_DESC_SIZE      (CAAM_CMD_SZ + CAAM_PTR_SZ)
+#define CAAM_JOB_DESC_SIZE     13
+
+/* CTX_POOL_NUM_BUFS is set as per the ipsec-secgw application */
+#define CTX_POOL_NUM_BUFS      32000
+#define CTX_POOL_CACHE_SIZE    512
+
+#define DIR_ENC                 1
+#define DIR_DEC                 0
+
+#define JR_MAX_NB_MAX_DIGEST   32
+
+#define RTE_CAAM_JR_PMD_MAX_NB_SESSIONS 2048
+
+
+/* Return codes for SEC user space driver APIs */
+enum sec_return_code_e {
+       SEC_SUCCESS = 0,               /* Operation executed successfully.*/
+       SEC_INVALID_INPUT_PARAM,       /* API received an invalid input
+                                       * parameter
+                                       */
+       SEC_OUT_OF_MEMORY,             /* Memory allocation failed. */
+       SEC_DESCRIPTOR_IN_FLIGHT,      /* API function indicates there are
+                                       * descriptors in flight
+                                       * for SEC to process.
+                                       */
+       SEC_LAST_DESCRIPTOR_IN_FLIGHT, /* API function indicates there is one
+                                       * last descriptor in flight
+                                       * for SEC to process that.
+                                       */
+       SEC_PROCESSING_ERROR,          /* Indicates a SEC processing error
+                                       * occurred on a Job Ring which requires
+                                       * a SEC user space driver shutdown. Can
+                                       * be returned from sec_poll_job_ring().
+                                       * Then the only other API that can be
+                                       * called after this error is
+                                       * sec_release().
+                                       */
+       SEC_DESC_PROCESSING_ERROR,     /* Indicates a SEC descriptor processing
+                                       * error occurred on a Job Ring. Can be
+                                       * returned from sec_poll_job_ring().
+                                       * The driver was able to reset job ring
+                                       * and job ring can be used like in a
+                                       * normal case.
+                                       */
+       SEC_JR_IS_FULL,                 /* Job Ring is full. There is no more
+                                        * room in the JR for new descriptors.
+                                        * This can happen if the descriptor RX
+                                        * rate is higher than SEC's capacity.
+                                        */
+       SEC_DRIVER_RELEASE_IN_PROGRESS, /* SEC driver shutdown is in progress,
+                                        * descriptors processing or polling is
+                                        * allowed.
+                                        */
+       SEC_DRIVER_ALREADY_INITIALIZED, /* SEC driver is already initialized.*/
+       SEC_DRIVER_NOT_INITIALIZED,     /* SEC driver is NOT initialized. */
+       SEC_JOB_RING_RESET_IN_PROGRESS, /* Job ring is resetting due to a
+                                        * per-descriptor SEC processing error
+                                        * ::SEC_desc_PROCESSING_ERROR. Reset is
+                                        * finished when sec_poll_job_ring()
+                                        * return. Then the job ring can be used
+                                        * again.
+                                        */
+       SEC_RESET_ENGINE_FAILED,        /* Resetting of SEC Engine by SEC Kernel
+                                        * Driver Failed
+                                        */
+       SEC_ENABLE_IRQS_FAILED,         /* Enabling of IRQs in SEC Kernel Driver
+                                        * Failed
+                                        */
+       SEC_DISABLE_IRQS_FAILED,        /* Disabling of IRQs in SEC Kernel
+                                        * Driver Failed
+                                        */
+       /* END OF VALID VALUES */
+
+       SEC_RETURN_CODE_MAX_VALUE,      /* Invalid value for return code. It is
+                                        * used to mark the end of the return
+                                        * code values. @note ALL new return
+                                        * code values MUST be added before
+                                        * ::SEC_RETURN_CODE_MAX_VALUE!
+                                        */
+};
+
+enum caam_jr_op_type {
+       CAAM_JR_NONE,  /* No Cipher operations*/
+       CAAM_JR_CIPHER,/* CIPHER operations */
+       CAAM_JR_AUTH,  /* Authentication Operations */
+       CAAM_JR_AEAD,  /* Authenticated Encryption with associated data */
+       CAAM_JR_IPSEC, /* IPSEC protocol operations*/
+       CAAM_JR_PDCP,  /* PDCP protocol operations*/
+       CAAM_JR_PKC,   /* Public Key Cryptographic Operations */
+       CAAM_JR_MAX
+};
+
+struct caam_jr_session {
+       uint8_t dir;         /* Operation Direction */
+       enum rte_crypto_cipher_algorithm cipher_alg; /* Cipher Algorithm*/
+       enum rte_crypto_auth_algorithm auth_alg; /* Authentication Algorithm*/
+       enum rte_crypto_aead_algorithm aead_alg; /* AEAD Algorithm*/
+       enum rte_security_session_protocol proto_alg; /* Security Algorithm*/
+       union {
+               struct {
+                       uint8_t *data;  /* pointer to key data */
+                       size_t length;  /* key length in bytes */
+               } aead_key;
+               struct {
+                       struct {
+                               uint8_t *data;  /* pointer to key data */
+                               size_t length;  /* key length in bytes */
+                       } cipher_key;
+                       struct {
+                               uint8_t *data;  /* pointer to key data */
+                               size_t length;  /* key length in bytes */
+                       } auth_key;
+               };
+       };
+       struct {
+               uint16_t length;
+               uint16_t offset;
+       } iv;   /* Initialisation vector parameters */
+       uint16_t auth_only_len; /* Length of data for Auth only */
+       uint32_t digest_length;
+       struct ipsec_encap_pdb encap_pdb;
+       struct ip ip4_hdr;
+       struct ipsec_decap_pdb decap_pdb;
+       struct caam_jr_qp *qp;
+       struct sec_cdb *cdb;    /* cmd block associated with qp */
+       struct rte_mempool *ctx_pool; /* session mempool for caam_jr_op_ctx */
+};
+
+/*
+ * 16-byte hardware scatter/gather table
+ */
+
+#define SEC4_SG_LEN_EXT                0x80000000      /* Entry points to table */
+#define SEC4_SG_LEN_FIN                0x40000000      /* Last ent in table */
+#define SEC4_SG_BPID_MASK      0x000000ff
+#define SEC4_SG_BPID_SHIFT     16
+#define SEC4_SG_LEN_MASK       0x3fffffff      /* Excludes EXT and FINAL */
+#define SEC4_SG_OFFSET_MASK    0x00001fff
+
+struct sec4_sg_entry {
+       uint64_t ptr;
+       uint32_t len;
+       uint32_t bpid_offset;
+};
+
+#define MAX_SG_ENTRIES         16
+#define SG_CACHELINE_0         0
+#define SG_CACHELINE_1         4
+#define SG_CACHELINE_2         8
+#define SG_CACHELINE_3         12
+
+/* Structure encompassing a job descriptor which is to be processed
+ * by SEC. User should also initialise this structure with the callback
+ * function pointer which will be called by driver after recieving proccessed
+ * descriptor from SEC. User data is also passed in this data structure which
+ * will be sent as an argument to the user callback function.
+ */
+struct job_descriptor {
+       uint32_t desc[CAAM_JOB_DESC_SIZE];
+};
+
+struct caam_jr_op_ctx {
+       struct job_descriptor jobdes;
+       /* sg[0] output, sg[1] input, others are possible sub frames */
+       struct sec4_sg_entry sg[MAX_SG_ENTRIES];
+       struct rte_crypto_op *op;
+       struct rte_mempool *ctx_pool; /* mempool pointer for caam_jr_op_ctx */
+       int64_t vtop_offset;
+       uint8_t digest[JR_MAX_NB_MAX_DIGEST];
+};
+
+/**
+ * Checksum
+ *
+ * @param buffer calculate chksum for buffer
+ * @param len    buffer length
+ *
+ * @return checksum value in host cpu order
+ */
+static inline uint16_t
+calc_chksum(void *buffer, int len)
+{
+       uint16_t *buf = (uint16_t *)buffer;
+       uint32_t sum = 0;
+       uint16_t result;
+
+       for (sum = 0; len > 1; len -= 2)
+               sum += *buf++;
+
+       if (len == 1)
+               sum += *(unsigned char *)buf;
+
+       sum = (sum >> 16) + (sum & 0xFFFF);
+       sum += (sum >> 16);
+       result = ~sum;
+
+       return  result;
+}
+struct uio_job_ring {
+       uint32_t jr_id;
+       uint32_t uio_fd;
+       void *register_base_addr;
+       int map_size;
+       int uio_minor_number;
+};
+
+int sec_cleanup(void);
+int sec_configure(void);
+struct uio_job_ring *config_job_ring(void);
+void free_job_ring(uint32_t uio_fd);
+
+/* For Dma memory allocation of specified length and alignment */
+static inline void *
+caam_jr_dma_mem_alloc(size_t align, size_t len)
+{
+       return rte_malloc("mem_alloc", len, align);
+}
+
+/* For freeing dma memory */
+static inline void
+caam_jr_dma_free(void *ptr)
+{
+       rte_free(ptr);
+}
+
+static inline rte_iova_t
+caam_jr_mem_vtop(void *vaddr)
+{
+       const struct rte_memseg *ms;
+
+       ms = rte_mem_virt2memseg(vaddr, NULL);
+       if (ms)
+               return ms->iova + RTE_PTR_DIFF(vaddr, ms->addr);
+       return (size_t)NULL;
+}
+
+static inline void *
+caam_jr_dma_ptov(rte_iova_t paddr)
+{
+       return rte_mem_iova2virt(paddr);
+}
+
+/* Virtual to physical address conversion */
+static inline rte_iova_t caam_jr_dma_vtop(void *ptr)
+{
+       return caam_jr_mem_vtop(ptr);
+}
+
+/** @brief Request to SEC kernel driver to enable interrupts for
+ *         descriptor finished processing
+ *  Use UIO to communicate with SEC kernel driver: write command
+ *  value that indicates an IRQ enable action into UIO file descriptor
+ *  of this job ring.
+ *
+ * @param [in]  uio_fd     Job Ring UIO File descriptor
+ * @retval 0 for success
+ * @retval -1 value for error
+ */
+uint32_t caam_jr_enable_irqs(uint32_t uio_fd);
+
+/** @brief Request to SEC kernel driver to disable interrupts for descriptor
+ *  finished processing
+ *  Use UIO to communicate with SEC kernel driver: write command
+ *  value that indicates an IRQ disable action into UIO file descriptor
+ *  of this job ring.
+ *
+ * @param [in]  uio_fd    UIO File descripto
+ * @retval 0 for success
+ * @retval -1 value for error
+ *
+ */
+uint32_t caam_jr_disable_irqs(uint32_t uio_fd);
+
+#endif
diff --git a/drivers/crypto/caam_jr/caam_jr_uio.c b/drivers/crypto/caam_jr/caam_jr_uio.c
new file mode 100644 (file)
index 0000000..c07d9db
--- /dev/null
@@ -0,0 +1,501 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2017-2018 NXP
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_crypto.h>
+#include <rte_security.h>
+
+#include <caam_jr_config.h>
+#include <caam_jr_hw_specific.h>
+#include <caam_jr_pvt.h>
+#include <caam_jr_log.h>
+
+/* RTA header files */
+#include <hw/desc/common.h>
+#include <hw/desc/algo.h>
+#include <hw/desc/ipsec.h>
+
+/* Prefix path to sysfs directory where UIO device attributes are exported.
+ * Path for UIO device X is /sys/class/uio/uioX
+ */
+#define SEC_UIO_DEVICE_SYS_ATTR_PATH    "/sys/class/uio"
+
+/* Subfolder in sysfs where mapping attributes are exported
+ * for each UIO device. Path for mapping Y for device X is:
+ *      /sys/class/uio/uioX/maps/mapY
+ */
+#define SEC_UIO_DEVICE_SYS_MAP_ATTR     "maps/map"
+
+/* Name of UIO device file prefix. Each UIO device will have a device file
+ * /dev/uioX, where X is the minor device number.
+ */
+#define SEC_UIO_DEVICE_FILE_NAME    "/dev/uio"
+
+/*
+ * Name of UIO device. Each user space SEC job ring will have a corresponding
+ * UIO device with the name sec-channelX, where X is the job ring id.
+ * Maximum length is #SEC_UIO_MAX_DEVICE_NAME_LENGTH.
+ *
+ * @note  Must be kept in synch with SEC kernel driver
+ * define #SEC_UIO_DEVICE_NAME !
+ */
+#define SEC_UIO_DEVICE_NAME     "fsl-jr"
+
+/* Maximum length for the name of an UIO device file.
+ * Device file name format is: /dev/uioX.
+ */
+#define SEC_UIO_MAX_DEVICE_FILE_NAME_LENGTH 30
+
+/* Maximum length for the name of an attribute file for an UIO device.
+ * Attribute files are exported in sysfs and have the name formatted as:
+ *      /sys/class/uio/uioX/<attribute_file_name>
+ */
+#define SEC_UIO_MAX_ATTR_FILE_NAME  100
+
+/* Command that is used by SEC user space driver and SEC kernel driver
+ *  to signal a request from the former to the later to disable job DONE
+ *  and error IRQs on a certain job ring.
+ *  The configuration is done at SEC Controller's level.
+ *  @note   Need to be kept in synch with #SEC_UIO_DISABLE_IRQ_CMD from
+ *          linux/drivers/crypto/talitos.c !
+ */
+#define SEC_UIO_DISABLE_IRQ_CMD     0
+
+/* Command that is used by SEC user space driver and SEC kernel driver
+ *  to signal a request from the former to the later to enable job DONE
+ *  and error IRQs on a certain job ring.
+ *  The configuration is done at SEC Controller's level.
+ *  @note   Need to be kept in synch with #SEC_UIO_ENABLE_IRQ_CMD from
+ *          linux/drivers/crypto/talitos.c !
+ */
+#define SEC_UIO_ENABLE_IRQ_CMD      1
+
+/** Command that is used by SEC user space driver and SEC kernel driver
+ *  to signal a request from the former to the later to do a SEC engine reset.
+ *  @note   Need to be kept in synch with #SEC_UIO_RESET_SEC_ENGINE_CMD from
+ *          linux/drivers/crypto/talitos.c !
+ */
+#define SEC_UIO_RESET_SEC_ENGINE_CMD    3
+
+/* The id for the mapping used to export SEC's registers to
+ * user space through UIO devices.
+ */
+#define SEC_UIO_MAP_ID              0
+
+static struct uio_job_ring g_uio_job_ring[MAX_SEC_JOB_RINGS];
+static int g_uio_jr_num;
+
+/** @brief Checks if a file name contains a certain substring.
+ * If so, it extracts the number following the substring.
+ * This function assumes a filename format of: [text][number].
+ * @param [in]  filename    File name
+ * @param [in]  match       String to match in file name
+ * @param [out] number      The number extracted from filename
+ *
+ * @retval true if file name matches the criteria
+ * @retval false if file name does not match the criteria
+ */
+static bool
+file_name_match_extract(const char filename[], const char match[], int *number)
+{
+       char *substr = NULL;
+
+       substr = strstr(filename, match);
+       if (substr == NULL)
+               return false;
+
+       /* substring <match> was found in <filename>
+        * read number following <match> substring in <filename>
+        */
+       if (sscanf(filename + strlen(match), "%d", number) <= 0)
+               return false;
+
+       return true;
+}
+
+/** @brief Reads first line from a file.
+ * Composes file name as: root/subdir/filename
+ *
+ * @param [in]  root     Root path
+ * @param [in]  subdir   Subdirectory name
+ * @param [in]  filename File name
+ * @param [out] line     The first line read from file.
+ *
+ * @retval 0 for succes
+ * @retval other value for error
+ */
+static int
+file_read_first_line(const char root[], const char subdir[],
+                    const char filename[], char *line)
+{
+       char absolute_file_name[SEC_UIO_MAX_ATTR_FILE_NAME];
+       int fd = 0, ret = 0;
+
+       /*compose the file name: root/subdir/filename */
+       memset(absolute_file_name, 0, sizeof(absolute_file_name));
+       snprintf(absolute_file_name, SEC_UIO_MAX_ATTR_FILE_NAME,
+                "%s/%s/%s", root, subdir, filename);
+
+       fd = open(absolute_file_name, O_RDONLY);
+       SEC_ASSERT(fd > 0, fd, "Error opening file %s",
+                       absolute_file_name);
+
+       /* read UIO device name from first line in file */
+       ret = read(fd, line, SEC_UIO_MAX_DEVICE_FILE_NAME_LENGTH);
+       close(fd);
+
+       /* NULL-ify string */
+       line[SEC_UIO_MAX_DEVICE_FILE_NAME_LENGTH - 1] = '\0';
+
+       if (ret <= 0) {
+               CAAM_JR_ERR("Error reading from file %s", absolute_file_name);
+               return ret;
+       }
+
+       return 0;
+}
+
+/** @brief Uses UIO control to send commands to SEC kernel driver.
+ * The mechanism is to write a command word into the file descriptor
+ * that the user-space driver obtained for each user-space SEC job ring.
+ * Both user-space driver and kernel driver must have the same understanding
+ * about the command codes.
+ *
+ * @param [in]  UIO FD             The UIO file descriptor
+ * @param [in]  uio_command         Command word
+ *
+ * @retval Result of write operation on the job ring's UIO file descriptor.
+ *         Should be sizeof(int) for success operations.
+ *         Other values can be returned and used, if desired to add special
+ *         meaning to return values, but this has to be programmed in SEC
+ *         kernel driver as well. No special return values are used.
+ */
+static int
+sec_uio_send_command(uint32_t uio_fd, int32_t uio_command)
+{
+       int ret;
+
+       /* Use UIO file descriptor we have for this job ring.
+        * Writing a command code to this file descriptor will make the
+        * SEC kernel driver execute the desired command.
+        */
+       ret = write(uio_fd, &uio_command, sizeof(int));
+       return ret;
+}
+
+/** @brief Request to SEC kernel driver to enable interrupts for
+ *         descriptor finished processing
+ *  Use UIO to communicate with SEC kernel driver: write command
+ *  value that indicates an IRQ enable action into UIO file descriptor
+ *  of this job ring.
+ *
+ * @param [in]  uio_fd     Job Ring UIO File descriptor
+ * @retval 0 for success
+ * @retval -1 value for error
+ */
+uint32_t
+caam_jr_enable_irqs(uint32_t uio_fd)
+{
+       int ret;
+
+       /* Use UIO file descriptor we have for this job ring.
+        * Writing a command code to this file descriptor will make the
+        * SEC kernel driver enable DONE and Error IRQs for this job ring,
+        * at Controller level.
+        */
+       ret = sec_uio_send_command(uio_fd, SEC_UIO_ENABLE_IRQ_CMD);
+       SEC_ASSERT(ret == sizeof(int), -1,
+               "Failed to request SEC engine to enable job done and "
+               "error IRQs through UIO control. UIO FD %d. Reset SEC driver!",
+               uio_fd);
+       CAAM_JR_DEBUG("Enabled IRQs on jr with uio_fd %d", uio_fd);
+       return 0;
+}
+
+
+/** @brief Request to SEC kernel driver to disable interrupts for descriptor
+ *  finished processing
+ *  Use UIO to communicate with SEC kernel driver: write command
+ *  value that indicates an IRQ disable action into UIO file descriptor
+ *  of this job ring.
+ *
+ * @param [in]  uio_fd    UIO File descripto
+ * @retval 0 for success
+ * @retval -1 value for error
+ *
+ */
+uint32_t
+caam_jr_disable_irqs(uint32_t uio_fd)
+{
+       int ret;
+
+       /* Use UIO file descriptor we have for this job ring.
+        * Writing a command code to this file descriptor will make the
+        * SEC kernel driver disable IRQs for this job ring,
+        * at Controller level.
+        */
+
+       ret = sec_uio_send_command(uio_fd, SEC_UIO_DISABLE_IRQ_CMD);
+       SEC_ASSERT(ret == sizeof(int), -1,
+               "Failed to request SEC engine to disable job done and "
+               "IRQs through UIO control. UIO_FD %d Reset SEC driver!",
+               uio_fd);
+       CAAM_JR_DEBUG("Disabled IRQs on jr with uio_fd %d", uio_fd);
+       return 0;
+}
+
+/** @brief Maps register range assigned for a job ring.
+ *
+ * @param [in] uio_device_fd    UIO device file descriptor
+ * @param [in] uio_device_id    UIO device id
+ * @param [in] uio_map_id       UIO allows maximum 5 different mapping for
+                               each device. Maps start with id 0.
+ * @param [out] map_size        Map size.
+ * @retval  NULL if failed to map registers
+ * @retval  Virtual address for mapped register address range
+ */
+static void *
+uio_map_registers(int uio_device_fd, int uio_device_id,
+                 int uio_map_id, int *map_size)
+{
+       void *mapped_address = NULL;
+       unsigned int uio_map_size = 0;
+       char uio_sys_root[SEC_UIO_MAX_ATTR_FILE_NAME];
+       char uio_sys_map_subdir[SEC_UIO_MAX_ATTR_FILE_NAME];
+       char uio_map_size_str[32];
+       int ret = 0;
+
+       /* compose the file name: root/subdir/filename */
+       memset(uio_sys_root, 0, sizeof(uio_sys_root));
+       memset(uio_sys_map_subdir, 0, sizeof(uio_sys_map_subdir));
+       memset(uio_map_size_str, 0, sizeof(uio_map_size_str));
+
+       /* Compose string: /sys/class/uio/uioX */
+       sprintf(uio_sys_root, "%s/%s%d", SEC_UIO_DEVICE_SYS_ATTR_PATH,
+               "uio", uio_device_id);
+       /* Compose string: maps/mapY */
+       sprintf(uio_sys_map_subdir, "%s%d", SEC_UIO_DEVICE_SYS_MAP_ATTR,
+               uio_map_id);
+
+       /* Read first (and only) line from file
+        * /sys/class/uio/uioX/maps/mapY/size
+        */
+       ret = file_read_first_line(uio_sys_root, uio_sys_map_subdir,
+                                "size", uio_map_size_str);
+       SEC_ASSERT(ret == 0, NULL, "file_read_first_line() failed");
+
+       /* Read mapping size, expressed in hexa(base 16) */
+       uio_map_size = strtol(uio_map_size_str, NULL, 16);
+
+       /* Map the region in user space */
+       mapped_address = mmap(0, /*dynamically choose virtual address */
+               uio_map_size, PROT_READ | PROT_WRITE,
+               MAP_SHARED, uio_device_fd, 0);
+       /* offset = 0 because UIO device has only one mapping
+        * for the entire SEC register memory
+        */
+       if (mapped_address == MAP_FAILED) {
+               CAAM_JR_ERR(
+                       "Failed to map registers! errno = %d job ring fd  = %d,"
+                       "uio device id = %d, uio map id = %d", errno,
+                       uio_device_fd, uio_device_id, uio_map_id);
+               return NULL;
+       }
+
+       /*
+        * Save the map size to use it later on for munmap-ing.
+        */
+       *map_size = uio_map_size;
+
+       CAAM_JR_INFO("UIO dev[%d] mapped region [id =%d] size 0x%x at %p",
+               uio_device_id, uio_map_id, uio_map_size, mapped_address);
+
+       return mapped_address;
+}
+
+void
+free_job_ring(uint32_t uio_fd)
+{
+       struct uio_job_ring *job_ring = NULL;
+       int i;
+
+       if (!job_ring->uio_fd)
+               return;
+
+       for (i = 0; i < MAX_SEC_JOB_RINGS; i++) {
+               if (g_uio_job_ring[i].uio_fd == uio_fd) {
+                       job_ring = &g_uio_job_ring[i];
+                       break;
+               }
+       }
+
+       if (job_ring == NULL) {
+               CAAM_JR_ERR("JR not available for fd = %x\n", uio_fd);
+               return;
+       }
+
+       /* Open device file */
+       CAAM_JR_INFO("Closed device file for job ring %d , fd = %d",
+                       job_ring->jr_id, job_ring->uio_fd);
+       close(job_ring->uio_fd);
+       g_uio_jr_num--;
+       job_ring->uio_fd = 0;
+       if (job_ring->register_base_addr == NULL)
+               return;
+
+       /* Unmap the PCI memory resource of device */
+       if (munmap(job_ring->register_base_addr, job_ring->map_size)) {
+               CAAM_JR_INFO("cannot munmap(%p, 0x%lx): %s",
+                       job_ring->register_base_addr,
+                       (unsigned long)job_ring->map_size, strerror(errno));
+       } else
+               CAAM_JR_DEBUG("  JR UIO memory unmapped at %p",
+                               job_ring->register_base_addr);
+       job_ring->register_base_addr = NULL;
+}
+
+struct
+uio_job_ring *config_job_ring(void)
+{
+       char uio_device_file_name[32];
+       struct uio_job_ring *job_ring = NULL;
+       int i;
+
+       for (i = 0; i < MAX_SEC_JOB_RINGS; i++) {
+               if (g_uio_job_ring[i].uio_fd == 0) {
+                       job_ring = &g_uio_job_ring[i];
+                       g_uio_jr_num++;
+                       break;
+               }
+       }
+
+       if (job_ring == NULL) {
+               CAAM_JR_ERR("No free job ring\n");
+               return NULL;
+       }
+
+       /* Find UIO device created by SEC kernel driver for this job ring. */
+       memset(uio_device_file_name, 0, sizeof(uio_device_file_name));
+
+       sprintf(uio_device_file_name, "%s%d", SEC_UIO_DEVICE_FILE_NAME,
+               job_ring->uio_minor_number);
+
+       /* Open device file */
+       job_ring->uio_fd = open(uio_device_file_name, O_RDWR);
+       SEC_ASSERT(job_ring->uio_fd > 0, NULL,
+               "Failed to open UIO device file for job ring %d",
+               job_ring->jr_id);
+
+       CAAM_JR_INFO("Open device(%s) file for job ring=%d , uio_fd = %d",
+               uio_device_file_name, job_ring->jr_id, job_ring->uio_fd);
+
+       ASSERT(job_ring->register_base_addr == NULL);
+       job_ring->register_base_addr = uio_map_registers(
+                       job_ring->uio_fd, job_ring->uio_minor_number,
+                       SEC_UIO_MAP_ID, &job_ring->map_size);
+
+       SEC_ASSERT(job_ring->register_base_addr != NULL, NULL,
+               "Failed to map SEC registers");
+       return job_ring;
+}
+
+int
+sec_configure(void)
+{
+       char uio_name[32];
+       int config_jr_no = 0, jr_id = -1;
+       int uio_minor_number = -1;
+       int ret;
+       DIR *d = NULL;
+       struct dirent *dir;
+
+       d = opendir(SEC_UIO_DEVICE_SYS_ATTR_PATH);
+       if (d == NULL) {
+               printf("\nError opening directory '%s': %s\n",
+                       SEC_UIO_DEVICE_SYS_ATTR_PATH, strerror(errno));
+               return -1;
+       }
+
+       /* Iterate through all subdirs */
+       while ((dir = readdir(d)) != NULL) {
+               if (!strncmp(dir->d_name, ".", 1) ||
+                               !strncmp(dir->d_name, "..", 2))
+                       continue;
+
+               if (file_name_match_extract
+                       (dir->d_name, "uio", &uio_minor_number)) {
+               /*
+                * Open file uioX/name and read first line which contains
+                * the name for the device. Based on the name check if this
+                * UIO device is UIO device for job ring with id jr_id.
+                */
+                       memset(uio_name, 0, sizeof(uio_name));
+                       ret = file_read_first_line(SEC_UIO_DEVICE_SYS_ATTR_PATH,
+                                       dir->d_name, "name", uio_name);
+                       CAAM_JR_INFO("sec device uio name: %s", uio_name);
+                       SEC_ASSERT(ret == 0, -1, "file_read_first_line failed");
+
+                       if (file_name_match_extract(uio_name,
+                                               SEC_UIO_DEVICE_NAME,
+                                               &jr_id)) {
+                               g_uio_job_ring[config_jr_no].jr_id = jr_id;
+                               g_uio_job_ring[config_jr_no].uio_minor_number =
+                                                       uio_minor_number;
+                               CAAM_JR_INFO("Detected logical JRID:%d", jr_id);
+                               config_jr_no++;
+
+                               /* todo  find the actual ring id
+                                * OF_FULLNAME=/soc/crypto@1700000/jr@20000
+                                */
+                       }
+               }
+       }
+       closedir(d);
+
+       if (config_jr_no == 0) {
+               CAAM_JR_ERR("! No SEC Job Rings assigned for userspace usage!");
+               return 0;
+       }
+       CAAM_JR_INFO("Total JR detected =%d", config_jr_no);
+       return config_jr_no;
+}
+
+int
+sec_cleanup(void)
+{
+       int i;
+       struct uio_job_ring *job_ring;
+
+       for (i = 0; i < g_uio_jr_num; i++) {
+               job_ring = &g_uio_job_ring[i];
+               /* munmap SEC's register memory */
+               if (job_ring->register_base_addr) {
+                       munmap(job_ring->register_base_addr,
+                               job_ring->map_size);
+                       job_ring->register_base_addr = NULL;
+               }
+               /* I need to close the fd after shutdown UIO commands need to be
+                * sent using the fd
+                */
+               if (job_ring->uio_fd != 0) {
+                       CAAM_JR_INFO(
+                       "Closed device file for job ring %d , fd = %d",
+                       job_ring->jr_id, job_ring->uio_fd);
+                       close(job_ring->uio_fd);
+               }
+       }
+       return 0;
+}
diff --git a/drivers/crypto/caam_jr/meson.build b/drivers/crypto/caam_jr/meson.build
new file mode 100644 (file)
index 0000000..99b71ae
--- /dev/null
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 NXP
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+
+deps += ['bus_vdev', 'bus_dpaa', 'security']
+sources = files('caam_jr_capabilities.c',
+               'caam_jr_hw.c',
+               'caam_jr_uio.c',
+               'caam_jr.c')
+
+allow_experimental_apis = true
+
+includes += include_directories('../dpaa2_sec/')
+includes += include_directories('../../bus/dpaa/include/')
diff --git a/drivers/crypto/caam_jr/rte_pmd_caam_jr_version.map b/drivers/crypto/caam_jr/rte_pmd_caam_jr_version.map
new file mode 100644 (file)
index 0000000..521e51f
--- /dev/null
@@ -0,0 +1,4 @@
+DPDK_18.11 {
+
+       local: *;
+};
index da3d8f8..f537f76 100644 (file)
@@ -4,13 +4,6 @@
 #
 
 include $(RTE_SDK)/mk/rte.vars.mk
-
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(CONFIG_RTE_LIBRTE_SECURITY),y)
-$(error "RTE_LIBRTE_SECURITY is required to build RTE_LIBRTE_PMD_DPAA2_SEC")
-endif
-endif
-
 #
 # library name
 #
@@ -20,7 +13,6 @@ LIB = librte_pmd_dpaa2_sec.a
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS += -D _GNU_SOURCE
 
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
 ifeq ($(shell test $(GCC_VERSION) -gt 70 && echo 1), 1)
@@ -41,7 +33,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal
 EXPORT_MAP := rte_pmd_dpaa2_sec_version.map
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 # library source files
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC) += dpaa2_sec_dpseci.c
@@ -51,5 +43,6 @@ LDLIBS += -lrte_bus_fslmc
 LDLIBS += -lrte_mempool_dpaa2
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_cryptodev
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index 2a3c61c..6095c60 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP
+ *   Copyright 2016-2018 NXP
  *
  */
 
@@ -10,7 +10,6 @@
 
 #include <rte_mbuf.h>
 #include <rte_cryptodev.h>
-#include <rte_security_driver.h>
 #include <rte_malloc.h>
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
 #include <dpaa2_hw_pvt.h>
 #include <dpaa2_hw_dpio.h>
 #include <dpaa2_hw_mempool.h>
+#include <fsl_dpopr.h>
 #include <fsl_dpseci.h>
 #include <fsl_mc_sys.h>
 
 #include "dpaa2_sec_priv.h"
+#include "dpaa2_sec_event.h"
 #include "dpaa2_sec_logs.h"
 
 /* Required types */
@@ -35,6 +36,7 @@ typedef uint64_t      dma_addr_t;
 
 /* RTA header files */
 #include <hw/desc/ipsec.h>
+#include <hw/desc/pdcp.h>
 #include <hw/desc/algo.h>
 
 /* Minimum job descriptor consists of a oneword job descriptor HEADER and
@@ -61,12 +63,88 @@ static uint8_t cryptodev_driver_id;
 
 int dpaa2_logtype_sec;
 
+static inline int
+build_proto_compound_fd(dpaa2_sec_session *sess,
+              struct rte_crypto_op *op,
+              struct qbman_fd *fd, uint16_t bpid)
+{
+       struct rte_crypto_sym_op *sym_op = op->sym;
+       struct ctxt_priv *priv = sess->ctxt;
+       struct qbman_fle *fle, *ip_fle, *op_fle;
+       struct sec_flow_context *flc;
+       struct rte_mbuf *src_mbuf = sym_op->m_src;
+       struct rte_mbuf *dst_mbuf = sym_op->m_dst;
+       int retval;
+
+       if (!dst_mbuf)
+               dst_mbuf = src_mbuf;
+
+       /* Save the shared descriptor */
+       flc = &priv->flc_desc[0].flc;
+
+       /* we are using the first FLE entry to store Mbuf */
+       retval = rte_mempool_get(priv->fle_pool, (void **)(&fle));
+       if (retval) {
+               DPAA2_SEC_ERR("Memory alloc failed");
+               return -1;
+       }
+       memset(fle, 0, FLE_POOL_BUF_SIZE);
+       DPAA2_SET_FLE_ADDR(fle, (size_t)op);
+       DPAA2_FLE_SAVE_CTXT(fle, (ptrdiff_t)priv);
+
+       op_fle = fle + 1;
+       ip_fle = fle + 2;
+
+       if (likely(bpid < MAX_BPID)) {
+               DPAA2_SET_FD_BPID(fd, bpid);
+               DPAA2_SET_FLE_BPID(op_fle, bpid);
+               DPAA2_SET_FLE_BPID(ip_fle, bpid);
+       } else {
+               DPAA2_SET_FD_IVP(fd);
+               DPAA2_SET_FLE_IVP(op_fle);
+               DPAA2_SET_FLE_IVP(ip_fle);
+       }
+
+       /* Configure FD as a FRAME LIST */
+       DPAA2_SET_FD_ADDR(fd, DPAA2_VADDR_TO_IOVA(op_fle));
+       DPAA2_SET_FD_COMPOUND_FMT(fd);
+       DPAA2_SET_FD_FLC(fd, (ptrdiff_t)flc);
+
+       /* Configure Output FLE with dst mbuf data  */
+       DPAA2_SET_FLE_ADDR(op_fle, DPAA2_MBUF_VADDR_TO_IOVA(dst_mbuf));
+       DPAA2_SET_FLE_OFFSET(op_fle, dst_mbuf->data_off);
+       DPAA2_SET_FLE_LEN(op_fle, dst_mbuf->buf_len);
+
+       /* Configure Input FLE with src mbuf data */
+       DPAA2_SET_FLE_ADDR(ip_fle, DPAA2_MBUF_VADDR_TO_IOVA(src_mbuf));
+       DPAA2_SET_FLE_OFFSET(ip_fle, src_mbuf->data_off);
+       DPAA2_SET_FLE_LEN(ip_fle, src_mbuf->pkt_len);
+
+       DPAA2_SET_FD_LEN(fd, ip_fle->length);
+       DPAA2_SET_FLE_FIN(ip_fle);
+
+#ifdef ENABLE_HFN_OVERRIDE
+       if (sess->ctxt_type == DPAA2_SEC_PDCP && sess->pdcp.hfn_ovd) {
+               /*enable HFN override override */
+               DPAA2_SET_FLE_INTERNAL_JD(ip_fle, sess->pdcp.hfn_ovd);
+               DPAA2_SET_FLE_INTERNAL_JD(op_fle, sess->pdcp.hfn_ovd);
+               DPAA2_SET_FD_INTERNAL_JD(fd, sess->pdcp.hfn_ovd);
+       }
+#endif
+
+       return 0;
+
+}
+
 static inline int
 build_proto_fd(dpaa2_sec_session *sess,
               struct rte_crypto_op *op,
               struct qbman_fd *fd, uint16_t bpid)
 {
        struct rte_crypto_sym_op *sym_op = op->sym;
+       if (sym_op->m_dst)
+               return build_proto_compound_fd(sess, op, fd, bpid);
+
        struct ctxt_priv *priv = sess->ctxt;
        struct sec_flow_context *flc;
        struct rte_mbuf *mbuf = sym_op->m_src;
@@ -1124,6 +1202,9 @@ build_sec_fd(struct rte_crypto_op *op,
                case DPAA2_SEC_IPSEC:
                        ret = build_proto_fd(sess, op, fd, bpid);
                        break;
+               case DPAA2_SEC_PDCP:
+                       ret = build_proto_compound_fd(sess, op, fd, bpid);
+                       break;
                case DPAA2_SEC_HASH_CIPHER:
                default:
                        DPAA2_SEC_ERR("error: Unsupported session");
@@ -1145,6 +1226,7 @@ dpaa2_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
        struct dpaa2_sec_qp *dpaa2_qp = (struct dpaa2_sec_qp *)qp;
        struct qbman_swp *swp;
        uint16_t num_tx = 0;
+       uint32_t flags[MAX_TX_RING_SLOTS] = {0};
        /*todo - need to support multiple buffer pools */
        uint16_t bpid;
        struct rte_mempool *mb_pool;
@@ -1172,9 +1254,19 @@ dpaa2_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
        swp = DPAA2_PER_LCORE_PORTAL;
 
        while (nb_ops) {
-               frames_to_send = (nb_ops >> 3) ? MAX_TX_RING_SLOTS : nb_ops;
+               frames_to_send = (nb_ops > dpaa2_eqcr_size) ?
+                       dpaa2_eqcr_size : nb_ops;
 
                for (loop = 0; loop < frames_to_send; loop++) {
+                       if ((*ops)->sym->m_src->seqn) {
+                        uint8_t dqrr_index = (*ops)->sym->m_src->seqn - 1;
+
+                        flags[loop] = QBMAN_ENQUEUE_FLAG_DCA | dqrr_index;
+                        DPAA2_PER_LCORE_DQRR_SIZE--;
+                        DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dqrr_index);
+                        (*ops)->sym->m_src->seqn = DPAA2_INVALID_MBUF_SEQN;
+                       }
+
                        /*Clear the unused FD fields before sending*/
                        memset(&fd_arr[loop], 0, sizeof(struct qbman_fd));
                        mb_pool = (*ops)->sym->m_src->pool;
@@ -1191,7 +1283,7 @@ dpaa2_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
                while (loop < frames_to_send) {
                        loop += qbman_swp_enqueue_multiple(swp, &eqdesc,
                                                        &fd_arr[loop],
-                                                       NULL,
+                                                       &flags[loop],
                                                        frames_to_send - loop);
                }
 
@@ -1216,6 +1308,9 @@ sec_simple_fd_to_mbuf(const struct qbman_fd *fd, __rte_unused uint8_t id)
                DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd)),
                rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size);
 
+       diff = len - mbuf->pkt_len;
+       mbuf->pkt_len += diff;
+       mbuf->data_len += diff;
        op = (struct rte_crypto_op *)(size_t)mbuf->buf_iova;
        mbuf->buf_iova = op->sym->aead.digest.phys_addr;
        op->sym->aead.digest.phys_addr = 0L;
@@ -1226,9 +1321,6 @@ sec_simple_fd_to_mbuf(const struct qbman_fd *fd, __rte_unused uint8_t id)
                mbuf->data_off += SEC_FLC_DHR_OUTBOUND;
        else
                mbuf->data_off += SEC_FLC_DHR_INBOUND;
-       diff = len - mbuf->pkt_len;
-       mbuf->pkt_len += diff;
-       mbuf->data_len += diff;
 
        return op;
 }
@@ -1273,6 +1365,16 @@ sec_fd_to_mbuf(const struct qbman_fd *fd, uint8_t driver_id)
        } else
                dst = src;
 
+       if (op->sess_type == RTE_CRYPTO_OP_SECURITY_SESSION) {
+               dpaa2_sec_session *sess = (dpaa2_sec_session *)
+                       get_sec_session_private_data(op->sym->sec_session);
+               if (sess->ctxt_type == DPAA2_SEC_IPSEC) {
+                       uint16_t len = DPAA2_GET_FD_LEN(fd);
+                       dst->pkt_len = len;
+                       dst->data_len = len;
+               }
+       }
+
        DPAA2_SEC_DP_DEBUG("mbuf %p BMAN buf addr %p,"
                " fdaddr =%" PRIx64 " bpid =%d meta =%d off =%d, len =%d\n",
                (void *)dst,
@@ -1321,8 +1423,8 @@ dpaa2_sec_dequeue_burst(void *qp, struct rte_crypto_op **ops,
 
        qbman_pull_desc_clear(&pulldesc);
        qbman_pull_desc_set_numframes(&pulldesc,
-                                     (nb_ops > DPAA2_DQRR_RING_SIZE) ?
-                                     DPAA2_DQRR_RING_SIZE : nb_ops);
+                                     (nb_ops > dpaa2_dqrr_size) ?
+                                     dpaa2_dqrr_size : nb_ops);
        qbman_pull_desc_set_fq(&pulldesc, fqid);
        qbman_pull_desc_set_storage(&pulldesc, dq_storage,
                                    (dma_addr_t)DPAA2_VADDR_TO_IOVA(dq_storage),
@@ -2099,6 +2201,7 @@ dpaa2_sec_set_session_parameters(struct rte_cryptodev *dev,
                return -1;
        }
 
+       memset(session, 0, sizeof(dpaa2_sec_session));
        /* Default IV length = 0 */
        session->iv.length = 0;
 
@@ -2139,107 +2242,127 @@ dpaa2_sec_set_session_parameters(struct rte_cryptodev *dev,
 }
 
 static int
-dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
-                           struct rte_security_session_conf *conf,
-                           void *sess)
+dpaa2_sec_ipsec_aead_init(struct rte_crypto_aead_xform *aead_xform,
+                       dpaa2_sec_session *session,
+                       struct alginfo *aeaddata)
 {
-       struct rte_security_ipsec_xform *ipsec_xform = &conf->ipsec;
-       struct rte_crypto_auth_xform *auth_xform;
-       struct rte_crypto_cipher_xform *cipher_xform;
-       dpaa2_sec_session *session = (dpaa2_sec_session *)sess;
-       struct ctxt_priv *priv;
-       struct ipsec_encap_pdb encap_pdb;
-       struct ipsec_decap_pdb decap_pdb;
-       struct alginfo authdata, cipherdata;
-       int bufsize;
-       struct sec_flow_context *flc;
-
        PMD_INIT_FUNC_TRACE();
 
-       if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
-               cipher_xform = &conf->crypto_xform->cipher;
-               auth_xform = &conf->crypto_xform->next->auth;
-       } else {
-               auth_xform = &conf->crypto_xform->auth;
-               cipher_xform = &conf->crypto_xform->next->cipher;
+       session->aead_key.data = rte_zmalloc(NULL, aead_xform->key.length,
+                                              RTE_CACHE_LINE_SIZE);
+       if (session->aead_key.data == NULL && aead_xform->key.length > 0) {
+               DPAA2_SEC_ERR("No Memory for aead key");
+               return -1;
        }
-       priv = (struct ctxt_priv *)rte_zmalloc(NULL,
-                               sizeof(struct ctxt_priv) +
-                               sizeof(struct sec_flc_desc),
-                               RTE_CACHE_LINE_SIZE);
+       memcpy(session->aead_key.data, aead_xform->key.data,
+              aead_xform->key.length);
 
-       if (priv == NULL) {
-               DPAA2_SEC_ERR("No memory for priv CTXT");
-               return -ENOMEM;
-       }
+       session->digest_length = aead_xform->digest_length;
+       session->aead_key.length = aead_xform->key.length;
 
-       flc = &priv->flc_desc[0].flc;
+       aeaddata->key = (size_t)session->aead_key.data;
+       aeaddata->keylen = session->aead_key.length;
+       aeaddata->key_enc_flags = 0;
+       aeaddata->key_type = RTA_DATA_IMM;
 
-       session->ctxt_type = DPAA2_SEC_IPSEC;
-       session->cipher_key.data = rte_zmalloc(NULL,
-                                              cipher_xform->key.length,
-                                              RTE_CACHE_LINE_SIZE);
-       if (session->cipher_key.data == NULL &&
-                       cipher_xform->key.length > 0) {
-               DPAA2_SEC_ERR("No Memory for cipher key");
-               rte_free(priv);
-               return -ENOMEM;
+       switch (aead_xform->algo) {
+       case RTE_CRYPTO_AEAD_AES_GCM:
+               aeaddata->algtype = OP_ALG_ALGSEL_AES;
+               aeaddata->algmode = OP_ALG_AAI_GCM;
+               session->aead_alg = RTE_CRYPTO_AEAD_AES_GCM;
+               break;
+       case RTE_CRYPTO_AEAD_AES_CCM:
+               aeaddata->algtype = OP_ALG_ALGSEL_AES;
+               aeaddata->algmode = OP_ALG_AAI_CCM;
+               session->aead_alg = RTE_CRYPTO_AEAD_AES_CCM;
+               break;
+       default:
+               DPAA2_SEC_ERR("Crypto: Undefined AEAD specified %u",
+                             aead_xform->algo);
+               return -1;
        }
+       session->dir = (aead_xform->op == RTE_CRYPTO_AEAD_OP_ENCRYPT) ?
+                               DIR_ENC : DIR_DEC;
 
-       session->cipher_key.length = cipher_xform->key.length;
-       session->auth_key.data = rte_zmalloc(NULL,
-                                       auth_xform->key.length,
-                                       RTE_CACHE_LINE_SIZE);
-       if (session->auth_key.data == NULL &&
-                       auth_xform->key.length > 0) {
-               DPAA2_SEC_ERR("No Memory for auth key");
-               rte_free(session->cipher_key.data);
-               rte_free(priv);
-               return -ENOMEM;
+       return 0;
+}
+
+static int
+dpaa2_sec_ipsec_proto_init(struct rte_crypto_cipher_xform *cipher_xform,
+       struct rte_crypto_auth_xform *auth_xform,
+       dpaa2_sec_session *session,
+       struct alginfo *cipherdata,
+       struct alginfo *authdata)
+{
+       if (cipher_xform) {
+               session->cipher_key.data = rte_zmalloc(NULL,
+                                                      cipher_xform->key.length,
+                                                      RTE_CACHE_LINE_SIZE);
+               if (session->cipher_key.data == NULL &&
+                               cipher_xform->key.length > 0) {
+                       DPAA2_SEC_ERR("No Memory for cipher key");
+                       return -ENOMEM;
+               }
+
+               session->cipher_key.length = cipher_xform->key.length;
+               memcpy(session->cipher_key.data, cipher_xform->key.data,
+                               cipher_xform->key.length);
+               session->cipher_alg = cipher_xform->algo;
+       } else {
+               session->cipher_key.data = NULL;
+               session->cipher_key.length = 0;
+               session->cipher_alg = RTE_CRYPTO_CIPHER_NULL;
+       }
+
+       if (auth_xform) {
+               session->auth_key.data = rte_zmalloc(NULL,
+                                               auth_xform->key.length,
+                                               RTE_CACHE_LINE_SIZE);
+               if (session->auth_key.data == NULL &&
+                               auth_xform->key.length > 0) {
+                       DPAA2_SEC_ERR("No Memory for auth key");
+                       return -ENOMEM;
+               }
+               session->auth_key.length = auth_xform->key.length;
+               memcpy(session->auth_key.data, auth_xform->key.data,
+                               auth_xform->key.length);
+               session->auth_alg = auth_xform->algo;
+       } else {
+               session->auth_key.data = NULL;
+               session->auth_key.length = 0;
+               session->auth_alg = RTE_CRYPTO_AUTH_NULL;
        }
-       session->auth_key.length = auth_xform->key.length;
-       memcpy(session->cipher_key.data, cipher_xform->key.data,
-                       cipher_xform->key.length);
-       memcpy(session->auth_key.data, auth_xform->key.data,
-                       auth_xform->key.length);
 
-       authdata.key = (size_t)session->auth_key.data;
-       authdata.keylen = session->auth_key.length;
-       authdata.key_enc_flags = 0;
-       authdata.key_type = RTA_DATA_IMM;
-       switch (auth_xform->algo) {
+       authdata->key = (size_t)session->auth_key.data;
+       authdata->keylen = session->auth_key.length;
+       authdata->key_enc_flags = 0;
+       authdata->key_type = RTA_DATA_IMM;
+       switch (session->auth_alg) {
        case RTE_CRYPTO_AUTH_SHA1_HMAC:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_SHA1_96;
-               authdata.algmode = OP_ALG_AAI_HMAC;
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA1_HMAC;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_SHA1_96;
+               authdata->algmode = OP_ALG_AAI_HMAC;
                break;
        case RTE_CRYPTO_AUTH_MD5_HMAC:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_MD5_96;
-               authdata.algmode = OP_ALG_AAI_HMAC;
-               session->auth_alg = RTE_CRYPTO_AUTH_MD5_HMAC;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_MD5_96;
+               authdata->algmode = OP_ALG_AAI_HMAC;
                break;
        case RTE_CRYPTO_AUTH_SHA256_HMAC:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_SHA2_256_128;
-               authdata.algmode = OP_ALG_AAI_HMAC;
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA256_HMAC;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_SHA2_256_128;
+               authdata->algmode = OP_ALG_AAI_HMAC;
                break;
        case RTE_CRYPTO_AUTH_SHA384_HMAC:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_SHA2_384_192;
-               authdata.algmode = OP_ALG_AAI_HMAC;
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA384_HMAC;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_SHA2_384_192;
+               authdata->algmode = OP_ALG_AAI_HMAC;
                break;
        case RTE_CRYPTO_AUTH_SHA512_HMAC:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_SHA2_512_256;
-               authdata.algmode = OP_ALG_AAI_HMAC;
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA512_HMAC;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_SHA2_512_256;
+               authdata->algmode = OP_ALG_AAI_HMAC;
                break;
        case RTE_CRYPTO_AUTH_AES_CMAC:
-               authdata.algtype = OP_PCL_IPSEC_AES_CMAC_96;
-               session->auth_alg = RTE_CRYPTO_AUTH_AES_CMAC;
+               authdata->algtype = OP_PCL_IPSEC_AES_CMAC_96;
                break;
        case RTE_CRYPTO_AUTH_NULL:
-               authdata.algtype = OP_PCL_IPSEC_HMAC_NULL;
-               session->auth_alg = RTE_CRYPTO_AUTH_NULL;
+               authdata->algtype = OP_PCL_IPSEC_HMAC_NULL;
                break;
        case RTE_CRYPTO_AUTH_SHA224_HMAC:
        case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
@@ -2255,50 +2378,119 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
        case RTE_CRYPTO_AUTH_AES_CBC_MAC:
        case RTE_CRYPTO_AUTH_ZUC_EIA3:
                DPAA2_SEC_ERR("Crypto: Unsupported auth alg %u",
-                             auth_xform->algo);
-               goto out;
+                             session->auth_alg);
+               return -1;
        default:
                DPAA2_SEC_ERR("Crypto: Undefined Auth specified %u",
-                             auth_xform->algo);
-               goto out;
+                             session->auth_alg);
+               return -1;
        }
-       cipherdata.key = (size_t)session->cipher_key.data;
-       cipherdata.keylen = session->cipher_key.length;
-       cipherdata.key_enc_flags = 0;
-       cipherdata.key_type = RTA_DATA_IMM;
+       cipherdata->key = (size_t)session->cipher_key.data;
+       cipherdata->keylen = session->cipher_key.length;
+       cipherdata->key_enc_flags = 0;
+       cipherdata->key_type = RTA_DATA_IMM;
 
-       switch (cipher_xform->algo) {
+       switch (session->cipher_alg) {
        case RTE_CRYPTO_CIPHER_AES_CBC:
-               cipherdata.algtype = OP_PCL_IPSEC_AES_CBC;
-               cipherdata.algmode = OP_ALG_AAI_CBC;
-               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CBC;
+               cipherdata->algtype = OP_PCL_IPSEC_AES_CBC;
+               cipherdata->algmode = OP_ALG_AAI_CBC;
                break;
        case RTE_CRYPTO_CIPHER_3DES_CBC:
-               cipherdata.algtype = OP_PCL_IPSEC_3DES;
-               cipherdata.algmode = OP_ALG_AAI_CBC;
-               session->cipher_alg = RTE_CRYPTO_CIPHER_3DES_CBC;
+               cipherdata->algtype = OP_PCL_IPSEC_3DES;
+               cipherdata->algmode = OP_ALG_AAI_CBC;
                break;
        case RTE_CRYPTO_CIPHER_AES_CTR:
-               cipherdata.algtype = OP_PCL_IPSEC_AES_CTR;
-               cipherdata.algmode = OP_ALG_AAI_CTR;
-               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CTR;
+               cipherdata->algtype = OP_PCL_IPSEC_AES_CTR;
+               cipherdata->algmode = OP_ALG_AAI_CTR;
                break;
        case RTE_CRYPTO_CIPHER_NULL:
-               cipherdata.algtype = OP_PCL_IPSEC_NULL;
+               cipherdata->algtype = OP_PCL_IPSEC_NULL;
                break;
        case RTE_CRYPTO_CIPHER_SNOW3G_UEA2:
        case RTE_CRYPTO_CIPHER_3DES_ECB:
        case RTE_CRYPTO_CIPHER_AES_ECB:
        case RTE_CRYPTO_CIPHER_KASUMI_F8:
                DPAA2_SEC_ERR("Crypto: Unsupported Cipher alg %u",
-                             cipher_xform->algo);
-               goto out;
+                             session->cipher_alg);
+               return -1;
        default:
                DPAA2_SEC_ERR("Crypto: Undefined Cipher specified %u",
-                             cipher_xform->algo);
+                             session->cipher_alg);
+               return -1;
+       }
+
+       return 0;
+}
+
+#ifdef RTE_LIBRTE_SECURITY_TEST
+static uint8_t aes_cbc_iv[] = {
+       0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+       0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f };
+#endif
+
+static int
+dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
+                           struct rte_security_session_conf *conf,
+                           void *sess)
+{
+       struct rte_security_ipsec_xform *ipsec_xform = &conf->ipsec;
+       struct rte_crypto_cipher_xform *cipher_xform = NULL;
+       struct rte_crypto_auth_xform *auth_xform = NULL;
+       struct rte_crypto_aead_xform *aead_xform = NULL;
+       dpaa2_sec_session *session = (dpaa2_sec_session *)sess;
+       struct ctxt_priv *priv;
+       struct ipsec_encap_pdb encap_pdb;
+       struct ipsec_decap_pdb decap_pdb;
+       struct alginfo authdata, cipherdata;
+       int bufsize;
+       struct sec_flow_context *flc;
+       struct dpaa2_sec_dev_private *dev_priv = dev->data->dev_private;
+       int ret = -1;
+
+       PMD_INIT_FUNC_TRACE();
+
+       priv = (struct ctxt_priv *)rte_zmalloc(NULL,
+                               sizeof(struct ctxt_priv) +
+                               sizeof(struct sec_flc_desc),
+                               RTE_CACHE_LINE_SIZE);
+
+       if (priv == NULL) {
+               DPAA2_SEC_ERR("No memory for priv CTXT");
+               return -ENOMEM;
+       }
+
+       priv->fle_pool = dev_priv->fle_pool;
+       flc = &priv->flc_desc[0].flc;
+
+       memset(session, 0, sizeof(dpaa2_sec_session));
+
+       if (conf->crypto_xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+               cipher_xform = &conf->crypto_xform->cipher;
+               if (conf->crypto_xform->next)
+                       auth_xform = &conf->crypto_xform->next->auth;
+               ret = dpaa2_sec_ipsec_proto_init(cipher_xform, auth_xform,
+                                       session, &cipherdata, &authdata);
+       } else if (conf->crypto_xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+               auth_xform = &conf->crypto_xform->auth;
+               if (conf->crypto_xform->next)
+                       cipher_xform = &conf->crypto_xform->next->cipher;
+               ret = dpaa2_sec_ipsec_proto_init(cipher_xform, auth_xform,
+                                       session, &cipherdata, &authdata);
+       } else if (conf->crypto_xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) {
+               aead_xform = &conf->crypto_xform->aead;
+               ret = dpaa2_sec_ipsec_aead_init(aead_xform,
+                                       session, &cipherdata);
+       } else {
+               DPAA2_SEC_ERR("XFORM not specified");
+               ret = -EINVAL;
+               goto out;
+       }
+       if (ret) {
+               DPAA2_SEC_ERR("Failed to process xform");
                goto out;
        }
 
+       session->ctxt_type = DPAA2_SEC_IPSEC;
        if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
                struct ip ip4_hdr;
 
@@ -2310,7 +2502,7 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
                ip4_hdr.ip_id = 0;
                ip4_hdr.ip_off = 0;
                ip4_hdr.ip_ttl = ipsec_xform->tunnel.ipv4.ttl;
-               ip4_hdr.ip_p = 0x32;
+               ip4_hdr.ip_p = IPPROTO_ESP;
                ip4_hdr.ip_sum = 0;
                ip4_hdr.ip_src = ipsec_xform->tunnel.ipv4.src_ip;
                ip4_hdr.ip_dst = ipsec_xform->tunnel.ipv4.dst_ip;
@@ -2322,13 +2514,14 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
                encap_pdb.options = (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) |
                        PDBOPTS_ESP_OIHI_PDB_INL |
                        PDBOPTS_ESP_IVSRC |
-                       PDBHMO_ESP_ENCAP_DTTL;
+                       PDBHMO_ESP_ENCAP_DTTL |
+                       PDBHMO_ESP_SNR;
                encap_pdb.spi = ipsec_xform->spi;
                encap_pdb.ip_hdr_len = sizeof(struct ip);
 
                session->dir = DIR_ENC;
                bufsize = cnstr_shdsc_ipsec_new_encap(priv->flc_desc[0].desc,
-                               1, 0, &encap_pdb,
+                               1, 0, SHR_SERIAL, &encap_pdb,
                                (uint8_t *)&ip4_hdr,
                                &cipherdata, &authdata);
        } else if (ipsec_xform->direction ==
@@ -2338,7 +2531,8 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
                decap_pdb.options = sizeof(struct ip) << 16;
                session->dir = DIR_DEC;
                bufsize = cnstr_shdsc_ipsec_new_decap(priv->flc_desc[0].desc,
-                               1, 0, &decap_pdb, &cipherdata, &authdata);
+                               1, 0, SHR_SERIAL,
+                               &decap_pdb, &cipherdata, &authdata);
        } else
                goto out;
 
@@ -2367,6 +2561,244 @@ dpaa2_sec_set_ipsec_session(struct rte_cryptodev *dev,
 
        session->ctxt = priv;
 
+       return 0;
+out:
+       rte_free(session->auth_key.data);
+       rte_free(session->cipher_key.data);
+       rte_free(priv);
+       return ret;
+}
+
+static int
+dpaa2_sec_set_pdcp_session(struct rte_cryptodev *dev,
+                          struct rte_security_session_conf *conf,
+                          void *sess)
+{
+       struct rte_security_pdcp_xform *pdcp_xform = &conf->pdcp;
+       struct rte_crypto_sym_xform *xform = conf->crypto_xform;
+       struct rte_crypto_auth_xform *auth_xform = NULL;
+       struct rte_crypto_cipher_xform *cipher_xform;
+       dpaa2_sec_session *session = (dpaa2_sec_session *)sess;
+       struct ctxt_priv *priv;
+       struct dpaa2_sec_dev_private *dev_priv = dev->data->dev_private;
+       struct alginfo authdata, cipherdata;
+       int bufsize = -1;
+       struct sec_flow_context *flc;
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       int swap = true;
+#else
+       int swap = false;
+#endif
+
+       PMD_INIT_FUNC_TRACE();
+
+       memset(session, 0, sizeof(dpaa2_sec_session));
+
+       priv = (struct ctxt_priv *)rte_zmalloc(NULL,
+                               sizeof(struct ctxt_priv) +
+                               sizeof(struct sec_flc_desc),
+                               RTE_CACHE_LINE_SIZE);
+
+       if (priv == NULL) {
+               DPAA2_SEC_ERR("No memory for priv CTXT");
+               return -ENOMEM;
+       }
+
+       priv->fle_pool = dev_priv->fle_pool;
+       flc = &priv->flc_desc[0].flc;
+
+       /* find xfrm types */
+       if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER && xform->next == NULL) {
+               cipher_xform = &xform->cipher;
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER &&
+                  xform->next->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+               session->ext_params.aead_ctxt.auth_cipher_text = true;
+               cipher_xform = &xform->cipher;
+               auth_xform = &xform->next->auth;
+       } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH &&
+                  xform->next->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+               session->ext_params.aead_ctxt.auth_cipher_text = false;
+               cipher_xform = &xform->next->cipher;
+               auth_xform = &xform->auth;
+       } else {
+               DPAA2_SEC_ERR("Invalid crypto type");
+               return -EINVAL;
+       }
+
+       session->ctxt_type = DPAA2_SEC_PDCP;
+       if (cipher_xform) {
+               session->cipher_key.data = rte_zmalloc(NULL,
+                                              cipher_xform->key.length,
+                                              RTE_CACHE_LINE_SIZE);
+               if (session->cipher_key.data == NULL &&
+                               cipher_xform->key.length > 0) {
+                       DPAA2_SEC_ERR("No Memory for cipher key");
+                       rte_free(priv);
+                       return -ENOMEM;
+               }
+               session->cipher_key.length = cipher_xform->key.length;
+               memcpy(session->cipher_key.data, cipher_xform->key.data,
+                       cipher_xform->key.length);
+               session->dir =
+                       (cipher_xform->op == RTE_CRYPTO_CIPHER_OP_ENCRYPT) ?
+                                       DIR_ENC : DIR_DEC;
+               session->cipher_alg = cipher_xform->algo;
+       } else {
+               session->cipher_key.data = NULL;
+               session->cipher_key.length = 0;
+               session->cipher_alg = RTE_CRYPTO_CIPHER_NULL;
+               session->dir = DIR_ENC;
+       }
+
+       session->pdcp.domain = pdcp_xform->domain;
+       session->pdcp.bearer = pdcp_xform->bearer;
+       session->pdcp.pkt_dir = pdcp_xform->pkt_dir;
+       session->pdcp.sn_size = pdcp_xform->sn_size;
+#ifdef ENABLE_HFN_OVERRIDE
+       session->pdcp.hfn_ovd = pdcp_xform->hfn_ovd;
+#endif
+       session->pdcp.hfn = pdcp_xform->hfn;
+       session->pdcp.hfn_threshold = pdcp_xform->hfn_threshold;
+
+       cipherdata.key = (size_t)session->cipher_key.data;
+       cipherdata.keylen = session->cipher_key.length;
+       cipherdata.key_enc_flags = 0;
+       cipherdata.key_type = RTA_DATA_IMM;
+
+       switch (session->cipher_alg) {
+       case RTE_CRYPTO_CIPHER_SNOW3G_UEA2:
+               cipherdata.algtype = PDCP_CIPHER_TYPE_SNOW;
+               break;
+       case RTE_CRYPTO_CIPHER_ZUC_EEA3:
+               cipherdata.algtype = PDCP_CIPHER_TYPE_ZUC;
+               break;
+       case RTE_CRYPTO_CIPHER_AES_CTR:
+               cipherdata.algtype = PDCP_CIPHER_TYPE_AES;
+               break;
+       case RTE_CRYPTO_CIPHER_NULL:
+               cipherdata.algtype = PDCP_CIPHER_TYPE_NULL;
+               break;
+       default:
+               DPAA2_SEC_ERR("Crypto: Undefined Cipher specified %u",
+                             session->cipher_alg);
+               goto out;
+       }
+
+       /* Auth is only applicable for control mode operation. */
+       if (pdcp_xform->domain == RTE_SECURITY_PDCP_MODE_CONTROL) {
+               if (pdcp_xform->sn_size != RTE_SECURITY_PDCP_SN_SIZE_5) {
+                       DPAA2_SEC_ERR(
+                               "PDCP Seq Num size should be 5 bits for cmode");
+                       goto out;
+               }
+               if (auth_xform) {
+                       session->auth_key.data = rte_zmalloc(NULL,
+                                                       auth_xform->key.length,
+                                                       RTE_CACHE_LINE_SIZE);
+                       if (session->auth_key.data == NULL &&
+                                       auth_xform->key.length > 0) {
+                               DPAA2_SEC_ERR("No Memory for auth key");
+                               rte_free(session->cipher_key.data);
+                               rte_free(priv);
+                               return -ENOMEM;
+                       }
+                       session->auth_key.length = auth_xform->key.length;
+                       memcpy(session->auth_key.data, auth_xform->key.data,
+                                       auth_xform->key.length);
+                       session->auth_alg = auth_xform->algo;
+               } else {
+                       session->auth_key.data = NULL;
+                       session->auth_key.length = 0;
+                       session->auth_alg = RTE_CRYPTO_AUTH_NULL;
+               }
+               authdata.key = (size_t)session->auth_key.data;
+               authdata.keylen = session->auth_key.length;
+               authdata.key_enc_flags = 0;
+               authdata.key_type = RTA_DATA_IMM;
+
+               switch (session->auth_alg) {
+               case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
+                       authdata.algtype = PDCP_AUTH_TYPE_SNOW;
+                       break;
+               case RTE_CRYPTO_AUTH_ZUC_EIA3:
+                       authdata.algtype = PDCP_AUTH_TYPE_ZUC;
+                       break;
+               case RTE_CRYPTO_AUTH_AES_CMAC:
+                       authdata.algtype = PDCP_AUTH_TYPE_AES;
+                       break;
+               case RTE_CRYPTO_AUTH_NULL:
+                       authdata.algtype = PDCP_AUTH_TYPE_NULL;
+                       break;
+               default:
+                       DPAA2_SEC_ERR("Crypto: Unsupported auth alg %u",
+                                     session->auth_alg);
+                       goto out;
+               }
+
+               if (session->dir == DIR_ENC)
+                       bufsize = cnstr_shdsc_pdcp_c_plane_encap(
+                                       priv->flc_desc[0].desc, 1, swap,
+                                       pdcp_xform->hfn,
+                                       pdcp_xform->bearer,
+                                       pdcp_xform->pkt_dir,
+                                       pdcp_xform->hfn_threshold,
+                                       &cipherdata, &authdata,
+                                       0);
+               else if (session->dir == DIR_DEC)
+                       bufsize = cnstr_shdsc_pdcp_c_plane_decap(
+                                       priv->flc_desc[0].desc, 1, swap,
+                                       pdcp_xform->hfn,
+                                       pdcp_xform->bearer,
+                                       pdcp_xform->pkt_dir,
+                                       pdcp_xform->hfn_threshold,
+                                       &cipherdata, &authdata,
+                                       0);
+       } else {
+               if (session->dir == DIR_ENC)
+                       bufsize = cnstr_shdsc_pdcp_u_plane_encap(
+                                       priv->flc_desc[0].desc, 1, swap,
+                                       (enum pdcp_sn_size)pdcp_xform->sn_size,
+                                       pdcp_xform->hfn,
+                                       pdcp_xform->bearer,
+                                       pdcp_xform->pkt_dir,
+                                       pdcp_xform->hfn_threshold,
+                                       &cipherdata, 0);
+               else if (session->dir == DIR_DEC)
+                       bufsize = cnstr_shdsc_pdcp_u_plane_decap(
+                                       priv->flc_desc[0].desc, 1, swap,
+                                       (enum pdcp_sn_size)pdcp_xform->sn_size,
+                                       pdcp_xform->hfn,
+                                       pdcp_xform->bearer,
+                                       pdcp_xform->pkt_dir,
+                                       pdcp_xform->hfn_threshold,
+                                       &cipherdata, 0);
+       }
+
+       if (bufsize < 0) {
+               DPAA2_SEC_ERR("Crypto: Invalid buffer length");
+               goto out;
+       }
+
+       /* Enable the stashing control bit */
+       DPAA2_SET_FLC_RSC(flc);
+       flc->word2_rflc_31_0 = lower_32_bits(
+                       (size_t)&(((struct dpaa2_sec_qp *)
+                       dev->data->queue_pairs[0])->rx_vq) | 0x14);
+       flc->word3_rflc_63_32 = upper_32_bits(
+                       (size_t)&(((struct dpaa2_sec_qp *)
+                       dev->data->queue_pairs[0])->rx_vq));
+
+       flc->word1_sdl = (uint8_t)bufsize;
+
+       /* Set EWS bit i.e. enable write-safe */
+       DPAA2_SET_FLC_EWS(flc);
+       /* Set BS = 1 i.e reuse input buffers as output buffers */
+       DPAA2_SET_FLC_REUSE_BS(flc);
+       /* Set FF = 10; reuse input buffers if they provide sufficient space */
+       DPAA2_SET_FLC_REUSE_FF(flc);
+
+       session->ctxt = priv;
+
        return 0;
 out:
        rte_free(session->auth_key.data);
@@ -2397,6 +2829,10 @@ dpaa2_sec_security_session_create(void *dev,
                break;
        case RTE_SECURITY_PROTOCOL_MACSEC:
                return -ENOTSUP;
+       case RTE_SECURITY_PROTOCOL_PDCP:
+               ret = dpaa2_sec_set_pdcp_session(cdev, conf,
+                               sess_private_data);
+               break;
        default:
                return -EINVAL;
        }
@@ -2686,6 +3122,129 @@ void dpaa2_sec_stats_reset(struct rte_cryptodev *dev)
        }
 }
 
+static void __attribute__((hot))
+dpaa2_sec_process_parallel_event(struct qbman_swp *swp,
+                                const struct qbman_fd *fd,
+                                const struct qbman_result *dq,
+                                struct dpaa2_queue *rxq,
+                                struct rte_event *ev)
+{
+       /* Prefetching mbuf */
+       rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-
+               rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size));
+
+       /* Prefetching ipsec crypto_op stored in priv data of mbuf */
+       rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-64));
+
+       ev->flow_id = rxq->ev.flow_id;
+       ev->sub_event_type = rxq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_CRYPTODEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = rxq->ev.sched_type;
+       ev->queue_id = rxq->ev.queue_id;
+       ev->priority = rxq->ev.priority;
+       ev->event_ptr = sec_fd_to_mbuf(fd, ((struct rte_cryptodev *)
+                               (rxq->dev))->driver_id);
+
+       qbman_swp_dqrr_consume(swp, dq);
+}
+static void
+dpaa2_sec_process_atomic_event(struct qbman_swp *swp __attribute__((unused)),
+                                const struct qbman_fd *fd,
+                                const struct qbman_result *dq,
+                                struct dpaa2_queue *rxq,
+                                struct rte_event *ev)
+{
+       uint8_t dqrr_index;
+       struct rte_crypto_op *crypto_op = (struct rte_crypto_op *)ev->event_ptr;
+       /* Prefetching mbuf */
+       rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-
+               rte_dpaa2_bpid_info[DPAA2_GET_FD_BPID(fd)].meta_data_size));
+
+       /* Prefetching ipsec crypto_op stored in priv data of mbuf */
+       rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(fd)-64));
+
+       ev->flow_id = rxq->ev.flow_id;
+       ev->sub_event_type = rxq->ev.sub_event_type;
+       ev->event_type = RTE_EVENT_TYPE_CRYPTODEV;
+       ev->op = RTE_EVENT_OP_NEW;
+       ev->sched_type = rxq->ev.sched_type;
+       ev->queue_id = rxq->ev.queue_id;
+       ev->priority = rxq->ev.priority;
+
+       ev->event_ptr = sec_fd_to_mbuf(fd, ((struct rte_cryptodev *)
+                               (rxq->dev))->driver_id);
+       dqrr_index = qbman_get_dqrr_idx(dq);
+       crypto_op->sym->m_src->seqn = dqrr_index + 1;
+       DPAA2_PER_LCORE_DQRR_SIZE++;
+       DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index;
+       DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = crypto_op->sym->m_src;
+}
+
+int
+dpaa2_sec_eventq_attach(const struct rte_cryptodev *dev,
+               int qp_id,
+               uint16_t dpcon_id,
+               const struct rte_event *event)
+{
+       struct dpaa2_sec_dev_private *priv = dev->data->dev_private;
+       struct fsl_mc_io *dpseci = (struct fsl_mc_io *)priv->hw;
+       struct dpaa2_sec_qp *qp = dev->data->queue_pairs[qp_id];
+       struct dpseci_rx_queue_cfg cfg;
+       int ret;
+
+       if (event->sched_type == RTE_SCHED_TYPE_PARALLEL)
+               qp->rx_vq.cb = dpaa2_sec_process_parallel_event;
+       else if (event->sched_type == RTE_SCHED_TYPE_ATOMIC)
+               qp->rx_vq.cb = dpaa2_sec_process_atomic_event;
+       else
+               return -EINVAL;
+
+       memset(&cfg, 0, sizeof(struct dpseci_rx_queue_cfg));
+       cfg.options = DPSECI_QUEUE_OPT_DEST;
+       cfg.dest_cfg.dest_type = DPSECI_DEST_DPCON;
+       cfg.dest_cfg.dest_id = dpcon_id;
+       cfg.dest_cfg.priority = event->priority;
+
+       cfg.options |= DPSECI_QUEUE_OPT_USER_CTX;
+       cfg.user_ctx = (size_t)(qp);
+       if (event->sched_type == RTE_SCHED_TYPE_ATOMIC) {
+               cfg.options |= DPSECI_QUEUE_OPT_ORDER_PRESERVATION;
+               cfg.order_preservation_en = 1;
+       }
+       ret = dpseci_set_rx_queue(dpseci, CMD_PRI_LOW, priv->token,
+                                 qp_id, &cfg);
+       if (ret) {
+               RTE_LOG(ERR, PMD, "Error in dpseci_set_queue: ret: %d\n", ret);
+               return ret;
+       }
+
+       memcpy(&qp->rx_vq.ev, event, sizeof(struct rte_event));
+
+       return 0;
+}
+
+int
+dpaa2_sec_eventq_detach(const struct rte_cryptodev *dev,
+                       int qp_id)
+{
+       struct dpaa2_sec_dev_private *priv = dev->data->dev_private;
+       struct fsl_mc_io *dpseci = (struct fsl_mc_io *)priv->hw;
+       struct dpseci_rx_queue_cfg cfg;
+       int ret;
+
+       memset(&cfg, 0, sizeof(struct dpseci_rx_queue_cfg));
+       cfg.options = DPSECI_QUEUE_OPT_DEST;
+       cfg.dest_cfg.dest_type = DPSECI_DEST_NONE;
+
+       ret = dpseci_set_rx_queue(dpseci, CMD_PRI_LOW, priv->token,
+                                 qp_id, &cfg);
+       if (ret)
+               RTE_LOG(ERR, PMD, "Error in dpseci_set_queue: ret: %d\n", ret);
+
+       return ret;
+}
+
 static struct rte_cryptodev_ops crypto_ops = {
        .dev_configure        = dpaa2_sec_dev_configure,
        .dev_start            = dpaa2_sec_dev_start,
@@ -2708,7 +3267,7 @@ dpaa2_sec_capabilities_get(void *device __rte_unused)
        return dpaa2_sec_security_cap;
 }
 
-struct rte_security_ops dpaa2_sec_security_ops = {
+static const struct rte_security_ops dpaa2_sec_security_ops = {
        .session_create = dpaa2_sec_security_session_create,
        .session_update = NULL,
        .session_stats_get = NULL,
@@ -2843,7 +3402,7 @@ init_error:
 }
 
 static int
-cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv,
+cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv __rte_unused,
                          struct rte_dpaa2_device *dpaa2_dev)
 {
        struct rte_cryptodev *cryptodev;
@@ -2871,7 +3430,6 @@ cryptodev_dpaa2_sec_probe(struct rte_dpaa2_driver *dpaa2_drv,
 
        dpaa2_dev->cryptodev = cryptodev;
        cryptodev->device = &dpaa2_dev->device;
-       cryptodev->device->driver = &dpaa2_drv->driver;
 
        /* init user callbacks */
        TAILQ_INIT(&(cryptodev->link_intr_cbs));
diff --git a/drivers/crypto/dpaa2_sec/dpaa2_sec_event.h b/drivers/crypto/dpaa2_sec/dpaa2_sec_event.h
new file mode 100644 (file)
index 0000000..9770994
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ *
+ */
+
+#ifndef _DPAA2_SEC_EVENT_H_
+#define _DPAA2_SEC_EVENT_H_
+
+int
+dpaa2_sec_eventq_attach(const struct rte_cryptodev *dev,
+               int qp_id,
+               uint16_t dpcon_id,
+               const struct rte_event *event);
+
+int dpaa2_sec_eventq_detach(const struct rte_cryptodev *dev,
+               int qp_id);
+
+#endif /* _DPAA2_SEC_EVENT_H_ */
index d015be1..5175110 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef _RTE_DPAA2_SEC_PMD_PRIVATE_H_
 #define _RTE_DPAA2_SEC_PMD_PRIVATE_H_
 
+#include <rte_security_driver.h>
+
 #define CRYPTODEV_NAME_DPAA2_SEC_PMD   crypto_dpaa2_sec
 /**< NXP DPAA2 - SEC PMD device name */
 
@@ -135,6 +137,19 @@ struct dpaa2_sec_aead_ctxt {
        uint8_t auth_cipher_text;       /**< Authenticate/cipher ordering */
 };
 
+/*
+ * The structure is to be filled by user for PDCP Protocol
+ */
+struct dpaa2_pdcp_ctxt {
+       enum rte_security_pdcp_domain domain; /*!< Data/Control mode*/
+       int8_t bearer;  /*!< PDCP bearer ID */
+       int8_t pkt_dir;/*!< PDCP Frame Direction 0:UL 1:DL*/
+       int8_t hfn_ovd;/*!< Overwrite HFN per packet*/
+       uint32_t hfn;   /*!< Hyper Frame Number */
+       uint32_t hfn_threshold; /*!< HFN Threashold for key renegotiation */
+       uint8_t sn_size;        /*!< Sequence number size, 7/12/15 */
+};
+
 typedef struct dpaa2_sec_session_entry {
        void *ctxt;
        uint8_t ctxt_type;
@@ -158,15 +173,20 @@ typedef struct dpaa2_sec_session_entry {
                        } auth_key;
                };
        };
-       struct {
-               uint16_t length; /**< IV length in bytes */
-               uint16_t offset; /**< IV offset in bytes */
-       } iv;
-       uint16_t digest_length;
-       uint8_t status;
        union {
-               struct dpaa2_sec_aead_ctxt aead_ctxt;
-       } ext_params;
+               struct {
+                       struct {
+                               uint16_t length; /**< IV length in bytes */
+                               uint16_t offset; /**< IV offset in bytes */
+                       } iv;
+                       uint16_t digest_length;
+                       uint8_t status;
+                       union {
+                               struct dpaa2_sec_aead_ctxt aead_ctxt;
+                       } ext_params;
+               };
+               struct dpaa2_pdcp_ctxt pdcp;
+       };
 } dpaa2_sec_session;
 
 static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
@@ -390,6 +410,162 @@ static const struct rte_cryptodev_capabilities dpaa2_sec_capabilities[] = {
        RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
 };
 
+static const struct rte_cryptodev_capabilities dpaa2_pdcp_capabilities[] = {
+       {       /* SNOW 3G (UIA2) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 4,
+                                       .max = 4,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* SNOW 3G (UEA2) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES CTR */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_CTR,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* NULL (AUTH) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .iv_size = { 0 }
+                       }, },
+               }, },
+       },
+       {       /* NULL (CIPHER) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, },
+               }, }
+       },
+       {       /* ZUC (EEA3) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_ZUC_EEA3,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* ZUC (EIA3) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_ZUC_EIA3,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 4,
+                                       .max = 4,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+
+       RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
+};
+
 static const struct rte_security_capability dpaa2_sec_security_cap[] = {
        { /* IPsec Lookaside Protocol offload ESP Transport Egress */
                .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
@@ -413,6 +589,24 @@ static const struct rte_security_capability dpaa2_sec_security_cap[] = {
                },
                .crypto_capabilities = dpaa2_sec_capabilities
        },
+       { /* PDCP Lookaside Protocol offload Data */
+               .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+               .protocol = RTE_SECURITY_PROTOCOL_PDCP,
+               .pdcp = {
+                       .domain = RTE_SECURITY_PDCP_MODE_DATA,
+                       .capa_flags = 0
+               },
+               .crypto_capabilities = dpaa2_pdcp_capabilities
+       },
+       { /* PDCP Lookaside Protocol offload Control */
+               .action = RTE_SECURITY_ACTION_TYPE_LOOKASIDE_PROTOCOL,
+               .protocol = RTE_SECURITY_PROTOCOL_PDCP,
+               .pdcp = {
+                       .domain = RTE_SECURITY_PDCP_MODE_CONTROL,
+                       .capa_flags = 0
+               },
+               .crypto_capabilities = dpaa2_pdcp_capabilities
+       },
        {
                .action = RTE_SECURITY_ACTION_TYPE_NONE
        }
index e925583..5d99dd8 100644 (file)
 #define OP_PCLID_TLS10_PRF     (0x09 << OP_PCLID_SHIFT)
 #define OP_PCLID_TLS11_PRF     (0x0a << OP_PCLID_SHIFT)
 #define OP_PCLID_TLS12_PRF     (0x0b << OP_PCLID_SHIFT)
-#define OP_PCLID_DTLS10_PRF    (0x0c << OP_PCLID_SHIFT)
+#define OP_PCLID_DTLS_PRF      (0x0c << OP_PCLID_SHIFT)
 #define OP_PCLID_PUBLICKEYPAIR (0x14 << OP_PCLID_SHIFT)
 #define OP_PCLID_DSASIGN       (0x15 << OP_PCLID_SHIFT)
 #define OP_PCLID_DSAVERIFY     (0x16 << OP_PCLID_SHIFT)
 #define OP_PCLID_TLS10         (0x09 << OP_PCLID_SHIFT)
 #define OP_PCLID_TLS11         (0x0a << OP_PCLID_SHIFT)
 #define OP_PCLID_TLS12         (0x0b << OP_PCLID_SHIFT)
-#define OP_PCLID_DTLS10                (0x0c << OP_PCLID_SHIFT)
+#define OP_PCLID_DTLS          (0x0c << OP_PCLID_SHIFT)
 #define OP_PCLID_BLOB          (0x0d << OP_PCLID_SHIFT)
 #define OP_PCLID_IPSEC_NEW     (0x11 << OP_PCLID_SHIFT)
 #define OP_PCLID_3G_DCRC       (0x31 << OP_PCLID_SHIFT)
 
 #define OP_PCL_SRTP_HMAC_SHA1_160               0x0007
 
-/* For SSL 3.0 - OP_PCLID_SSL30 */
-#define OP_PCL_SSL30_AES_128_CBC_SHA            0x002f
-#define OP_PCL_SSL30_AES_128_CBC_SHA_2          0x0030
-#define OP_PCL_SSL30_AES_128_CBC_SHA_3          0x0031
-#define OP_PCL_SSL30_AES_128_CBC_SHA_4          0x0032
-#define OP_PCL_SSL30_AES_128_CBC_SHA_5          0x0033
-#define OP_PCL_SSL30_AES_128_CBC_SHA_6          0x0034
-#define OP_PCL_SSL30_AES_128_CBC_SHA_7          0x008c
-#define OP_PCL_SSL30_AES_128_CBC_SHA_8          0x0090
-#define OP_PCL_SSL30_AES_128_CBC_SHA_9          0x0094
-#define OP_PCL_SSL30_AES_128_CBC_SHA_10                 0xc004
-#define OP_PCL_SSL30_AES_128_CBC_SHA_11                 0xc009
-#define OP_PCL_SSL30_AES_128_CBC_SHA_12                 0xc00e
-#define OP_PCL_SSL30_AES_128_CBC_SHA_13                 0xc013
-#define OP_PCL_SSL30_AES_128_CBC_SHA_14                 0xc018
-#define OP_PCL_SSL30_AES_128_CBC_SHA_15                 0xc01d
-#define OP_PCL_SSL30_AES_128_CBC_SHA_16                 0xc01e
-#define OP_PCL_SSL30_AES_128_CBC_SHA_17                 0xc01f
-
-#define OP_PCL_SSL30_AES_256_CBC_SHA            0x0035
-#define OP_PCL_SSL30_AES_256_CBC_SHA_2          0x0036
-#define OP_PCL_SSL30_AES_256_CBC_SHA_3          0x0037
-#define OP_PCL_SSL30_AES_256_CBC_SHA_4          0x0038
-#define OP_PCL_SSL30_AES_256_CBC_SHA_5          0x0039
-#define OP_PCL_SSL30_AES_256_CBC_SHA_6          0x003a
-#define OP_PCL_SSL30_AES_256_CBC_SHA_7          0x008d
-#define OP_PCL_SSL30_AES_256_CBC_SHA_8          0x0091
-#define OP_PCL_SSL30_AES_256_CBC_SHA_9          0x0095
-#define OP_PCL_SSL30_AES_256_CBC_SHA_10                 0xc005
-#define OP_PCL_SSL30_AES_256_CBC_SHA_11                 0xc00a
-#define OP_PCL_SSL30_AES_256_CBC_SHA_12                 0xc00f
-#define OP_PCL_SSL30_AES_256_CBC_SHA_13                 0xc014
-#define OP_PCL_SSL30_AES_256_CBC_SHA_14                 0xc019
-#define OP_PCL_SSL30_AES_256_CBC_SHA_15                 0xc020
-#define OP_PCL_SSL30_AES_256_CBC_SHA_16                 0xc021
-#define OP_PCL_SSL30_AES_256_CBC_SHA_17                 0xc022
-
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_1       0x009C
-#define OP_PCL_SSL30_AES_256_GCM_SHA384_1       0x009D
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_2       0x009E
-#define OP_PCL_SSL30_AES_256_GCM_SHA384_2       0x009F
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_3       0x00A0
-#define OP_PCL_SSL30_AES_256_GCM_SHA384_3       0x00A1
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_4       0x00A2
-#define OP_PCL_SSL30_AES_256_GCM_SHA384_4       0x00A3
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_5       0x00A4
-#define OP_PCL_SSL30_AES_256_GCM_SHA384_5       0x00A5
-#define OP_PCL_SSL30_AES_128_GCM_SHA256_6       0x00A6
-
-#define OP_PCL_TLS_DH_ANON_AES_256_GCM_SHA384   0x00A7
-#define OP_PCL_TLS_PSK_AES_128_GCM_SHA256       0x00A8
-#define OP_PCL_TLS_PSK_AES_256_GCM_SHA384       0x00A9
-#define OP_PCL_TLS_DHE_PSK_AES_128_GCM_SHA256   0x00AA
-#define OP_PCL_TLS_DHE_PSK_AES_256_GCM_SHA384   0x00AB
-#define OP_PCL_TLS_RSA_PSK_AES_128_GCM_SHA256   0x00AC
-#define OP_PCL_TLS_RSA_PSK_AES_256_GCM_SHA384   0x00AD
-#define OP_PCL_TLS_PSK_AES_128_CBC_SHA256       0x00AE
-#define OP_PCL_TLS_PSK_AES_256_CBC_SHA384       0x00AF
-#define OP_PCL_TLS_DHE_PSK_AES_128_CBC_SHA256   0x00B2
-#define OP_PCL_TLS_DHE_PSK_AES_256_CBC_SHA384   0x00B3
-#define OP_PCL_TLS_RSA_PSK_AES_128_CBC_SHA256   0x00B6
-#define OP_PCL_TLS_RSA_PSK_AES_256_CBC_SHA384   0x00B7
-
-#define OP_PCL_SSL30_3DES_EDE_CBC_MD5           0x0023
-
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA           0x001f
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_2                 0x008b
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_3                 0x008f
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_4                 0x0093
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_5                 0x000a
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_6                 0x000d
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_7                 0x0010
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_8                 0x0013
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_9                 0x0016
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_10        0x001b
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_11        0xc003
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_12        0xc008
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_13        0xc00d
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_14        0xc012
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_15        0xc017
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_16        0xc01a
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_17        0xc01b
-#define OP_PCL_SSL30_3DES_EDE_CBC_SHA_18        0xc01c
-
-#define OP_PCL_SSL30_DES40_CBC_MD5              0x0029
-
-#define OP_PCL_SSL30_DES_CBC_MD5                0x0022
-
-#define OP_PCL_SSL30_DES40_CBC_SHA              0x0008
-#define OP_PCL_SSL30_DES40_CBC_SHA_2            0x000b
-#define OP_PCL_SSL30_DES40_CBC_SHA_3            0x000e
-#define OP_PCL_SSL30_DES40_CBC_SHA_4            0x0011
-#define OP_PCL_SSL30_DES40_CBC_SHA_5            0x0014
-#define OP_PCL_SSL30_DES40_CBC_SHA_6            0x0019
-#define OP_PCL_SSL30_DES40_CBC_SHA_7            0x0026
-
-#define OP_PCL_SSL30_DES_CBC_SHA                0x001e
-#define OP_PCL_SSL30_DES_CBC_SHA_2              0x0009
-#define OP_PCL_SSL30_DES_CBC_SHA_3              0x000c
-#define OP_PCL_SSL30_DES_CBC_SHA_4              0x000f
-#define OP_PCL_SSL30_DES_CBC_SHA_5              0x0012
-#define OP_PCL_SSL30_DES_CBC_SHA_6              0x0015
-#define OP_PCL_SSL30_DES_CBC_SHA_7              0x001a
-
-#define OP_PCL_SSL30_RC4_128_MD5                0x0024
-#define OP_PCL_SSL30_RC4_128_MD5_2              0x0004
-#define OP_PCL_SSL30_RC4_128_MD5_3              0x0018
-
-#define OP_PCL_SSL30_RC4_40_MD5                         0x002b
-#define OP_PCL_SSL30_RC4_40_MD5_2               0x0003
-#define OP_PCL_SSL30_RC4_40_MD5_3               0x0017
-
-#define OP_PCL_SSL30_RC4_128_SHA                0x0020
-#define OP_PCL_SSL30_RC4_128_SHA_2              0x008a
-#define OP_PCL_SSL30_RC4_128_SHA_3              0x008e
-#define OP_PCL_SSL30_RC4_128_SHA_4              0x0092
-#define OP_PCL_SSL30_RC4_128_SHA_5              0x0005
-#define OP_PCL_SSL30_RC4_128_SHA_6              0xc002
-#define OP_PCL_SSL30_RC4_128_SHA_7              0xc007
-#define OP_PCL_SSL30_RC4_128_SHA_8              0xc00c
-#define OP_PCL_SSL30_RC4_128_SHA_9              0xc011
-#define OP_PCL_SSL30_RC4_128_SHA_10             0xc016
-
-#define OP_PCL_SSL30_RC4_40_SHA                         0x0028
-
-/* For TLS 1.0 - OP_PCLID_TLS10 */
-#define OP_PCL_TLS10_AES_128_CBC_SHA            0x002f
-#define OP_PCL_TLS10_AES_128_CBC_SHA_2          0x0030
-#define OP_PCL_TLS10_AES_128_CBC_SHA_3          0x0031
-#define OP_PCL_TLS10_AES_128_CBC_SHA_4          0x0032
-#define OP_PCL_TLS10_AES_128_CBC_SHA_5          0x0033
-#define OP_PCL_TLS10_AES_128_CBC_SHA_6          0x0034
-#define OP_PCL_TLS10_AES_128_CBC_SHA_7          0x008c
-#define OP_PCL_TLS10_AES_128_CBC_SHA_8          0x0090
-#define OP_PCL_TLS10_AES_128_CBC_SHA_9          0x0094
-#define OP_PCL_TLS10_AES_128_CBC_SHA_10                 0xc004
-#define OP_PCL_TLS10_AES_128_CBC_SHA_11                 0xc009
-#define OP_PCL_TLS10_AES_128_CBC_SHA_12                 0xc00e
-#define OP_PCL_TLS10_AES_128_CBC_SHA_13                 0xc013
-#define OP_PCL_TLS10_AES_128_CBC_SHA_14                 0xc018
-#define OP_PCL_TLS10_AES_128_CBC_SHA_15                 0xc01d
-#define OP_PCL_TLS10_AES_128_CBC_SHA_16                 0xc01e
-#define OP_PCL_TLS10_AES_128_CBC_SHA_17                 0xc01f
-
-#define OP_PCL_TLS10_AES_256_CBC_SHA            0x0035
-#define OP_PCL_TLS10_AES_256_CBC_SHA_2          0x0036
-#define OP_PCL_TLS10_AES_256_CBC_SHA_3          0x0037
-#define OP_PCL_TLS10_AES_256_CBC_SHA_4          0x0038
-#define OP_PCL_TLS10_AES_256_CBC_SHA_5          0x0039
-#define OP_PCL_TLS10_AES_256_CBC_SHA_6          0x003a
-#define OP_PCL_TLS10_AES_256_CBC_SHA_7          0x008d
-#define OP_PCL_TLS10_AES_256_CBC_SHA_8          0x0091
-#define OP_PCL_TLS10_AES_256_CBC_SHA_9          0x0095
-#define OP_PCL_TLS10_AES_256_CBC_SHA_10                 0xc005
-#define OP_PCL_TLS10_AES_256_CBC_SHA_11                 0xc00a
-#define OP_PCL_TLS10_AES_256_CBC_SHA_12                 0xc00f
-#define OP_PCL_TLS10_AES_256_CBC_SHA_13                 0xc014
-#define OP_PCL_TLS10_AES_256_CBC_SHA_14                 0xc019
-#define OP_PCL_TLS10_AES_256_CBC_SHA_15                 0xc020
-#define OP_PCL_TLS10_AES_256_CBC_SHA_16                 0xc021
-#define OP_PCL_TLS10_AES_256_CBC_SHA_17                 0xc022
-
-#define OP_PCL_TLS_ECDHE_ECDSA_AES_128_CBC_SHA256  0xC023
-#define OP_PCL_TLS_ECDHE_ECDSA_AES_256_CBC_SHA384  0xC024
-#define OP_PCL_TLS_ECDH_ECDSA_AES_128_CBC_SHA256   0xC025
-#define OP_PCL_TLS_ECDH_ECDSA_AES_256_CBC_SHA384   0xC026
-#define OP_PCL_TLS_ECDHE_RSA_AES_128_CBC_SHA256           0xC027
-#define OP_PCL_TLS_ECDHE_RSA_AES_256_CBC_SHA384           0xC028
-#define OP_PCL_TLS_ECDH_RSA_AES_128_CBC_SHA256    0xC029
-#define OP_PCL_TLS_ECDH_RSA_AES_256_CBC_SHA384    0xC02A
-#define OP_PCL_TLS_ECDHE_ECDSA_AES_128_GCM_SHA256  0xC02B
-#define OP_PCL_TLS_ECDHE_ECDSA_AES_256_GCM_SHA384  0xC02C
-#define OP_PCL_TLS_ECDH_ECDSA_AES_128_GCM_SHA256   0xC02D
-#define OP_PCL_TLS_ECDH_ECDSA_AES_256_GCM_SHA384   0xC02E
-#define OP_PCL_TLS_ECDHE_RSA_AES_128_GCM_SHA256           0xC02F
-#define OP_PCL_TLS_ECDHE_RSA_AES_256_GCM_SHA384           0xC030
-#define OP_PCL_TLS_ECDH_RSA_AES_128_GCM_SHA256    0xC031
-#define OP_PCL_TLS_ECDH_RSA_AES_256_GCM_SHA384    0xC032
-#define OP_PCL_TLS_ECDHE_PSK_RC4_128_SHA          0xC033
-#define OP_PCL_TLS_ECDHE_PSK_3DES_EDE_CBC_SHA     0xC034
-#define OP_PCL_TLS_ECDHE_PSK_AES_128_CBC_SHA      0xC035
-#define OP_PCL_TLS_ECDHE_PSK_AES_256_CBC_SHA      0xC036
-#define OP_PCL_TLS_ECDHE_PSK_AES_128_CBC_SHA256           0xC037
-#define OP_PCL_TLS_ECDHE_PSK_AES_256_CBC_SHA384           0xC038
-
-/* #define OP_PCL_TLS10_3DES_EDE_CBC_MD5       0x0023 */
-
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA           0x001f
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_2                 0x008b
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_3                 0x008f
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_4                 0x0093
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_5                 0x000a
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_6                 0x000d
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_7                 0x0010
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_8                 0x0013
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_9                 0x0016
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_10        0x001b
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_11        0xc003
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_12        0xc008
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_13        0xc00d
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_14        0xc012
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_15        0xc017
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_16        0xc01a
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_17        0xc01b
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA_18        0xc01c
-
-#define OP_PCL_TLS10_DES40_CBC_MD5              0x0029
-
-#define OP_PCL_TLS10_DES_CBC_MD5                0x0022
-
-#define OP_PCL_TLS10_DES40_CBC_SHA              0x0008
-#define OP_PCL_TLS10_DES40_CBC_SHA_2            0x000b
-#define OP_PCL_TLS10_DES40_CBC_SHA_3            0x000e
-#define OP_PCL_TLS10_DES40_CBC_SHA_4            0x0011
-#define OP_PCL_TLS10_DES40_CBC_SHA_5            0x0014
-#define OP_PCL_TLS10_DES40_CBC_SHA_6            0x0019
-#define OP_PCL_TLS10_DES40_CBC_SHA_7            0x0026
-
-#define OP_PCL_TLS10_DES_CBC_SHA                0x001e
-#define OP_PCL_TLS10_DES_CBC_SHA_2              0x0009
-#define OP_PCL_TLS10_DES_CBC_SHA_3              0x000c
-#define OP_PCL_TLS10_DES_CBC_SHA_4              0x000f
-#define OP_PCL_TLS10_DES_CBC_SHA_5              0x0012
-#define OP_PCL_TLS10_DES_CBC_SHA_6              0x0015
-#define OP_PCL_TLS10_DES_CBC_SHA_7              0x001a
-
-#define OP_PCL_TLS10_RC4_128_MD5                0x0024
-#define OP_PCL_TLS10_RC4_128_MD5_2              0x0004
-#define OP_PCL_TLS10_RC4_128_MD5_3              0x0018
-
-#define OP_PCL_TLS10_RC4_40_MD5                         0x002b
-#define OP_PCL_TLS10_RC4_40_MD5_2               0x0003
-#define OP_PCL_TLS10_RC4_40_MD5_3               0x0017
-
-#define OP_PCL_TLS10_RC4_128_SHA                0x0020
-#define OP_PCL_TLS10_RC4_128_SHA_2              0x008a
-#define OP_PCL_TLS10_RC4_128_SHA_3              0x008e
-#define OP_PCL_TLS10_RC4_128_SHA_4              0x0092
-#define OP_PCL_TLS10_RC4_128_SHA_5              0x0005
-#define OP_PCL_TLS10_RC4_128_SHA_6              0xc002
-#define OP_PCL_TLS10_RC4_128_SHA_7              0xc007
-#define OP_PCL_TLS10_RC4_128_SHA_8              0xc00c
-#define OP_PCL_TLS10_RC4_128_SHA_9              0xc011
-#define OP_PCL_TLS10_RC4_128_SHA_10             0xc016
-
-#define OP_PCL_TLS10_RC4_40_SHA                         0x0028
-
-#define OP_PCL_TLS10_3DES_EDE_CBC_MD5           0xff23
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA160        0xff30
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA224        0xff34
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA256        0xff36
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA384        0xff33
-#define OP_PCL_TLS10_3DES_EDE_CBC_SHA512        0xff35
-#define OP_PCL_TLS10_AES_128_CBC_SHA160                 0xff80
-#define OP_PCL_TLS10_AES_128_CBC_SHA224                 0xff84
-#define OP_PCL_TLS10_AES_128_CBC_SHA256                 0xff86
-#define OP_PCL_TLS10_AES_128_CBC_SHA384                 0xff83
-#define OP_PCL_TLS10_AES_128_CBC_SHA512                 0xff85
-#define OP_PCL_TLS10_AES_192_CBC_SHA160                 0xff20
-#define OP_PCL_TLS10_AES_192_CBC_SHA224                 0xff24
-#define OP_PCL_TLS10_AES_192_CBC_SHA256                 0xff26
-#define OP_PCL_TLS10_AES_192_CBC_SHA384                 0xff23
-#define OP_PCL_TLS10_AES_192_CBC_SHA512                 0xff25
-#define OP_PCL_TLS10_AES_256_CBC_SHA160                 0xff60
-#define OP_PCL_TLS10_AES_256_CBC_SHA224                 0xff64
-#define OP_PCL_TLS10_AES_256_CBC_SHA256                 0xff66
-#define OP_PCL_TLS10_AES_256_CBC_SHA384                 0xff63
-#define OP_PCL_TLS10_AES_256_CBC_SHA512                 0xff65
-
-#define OP_PCL_TLS_PVT_AES_192_CBC_SHA160       0xff90
-#define OP_PCL_TLS_PVT_AES_192_CBC_SHA384       0xff93
-#define OP_PCL_TLS_PVT_AES_192_CBC_SHA224       0xff94
-#define OP_PCL_TLS_PVT_AES_192_CBC_SHA512       0xff95
-#define OP_PCL_TLS_PVT_AES_192_CBC_SHA256       0xff96
-#define OP_PCL_TLS_PVT_MASTER_SECRET_PRF_FE     0xfffe
-#define OP_PCL_TLS_PVT_MASTER_SECRET_PRF_FF     0xffff
-
-/* For TLS 1.1 - OP_PCLID_TLS11 */
-#define OP_PCL_TLS11_AES_128_CBC_SHA            0x002f
-#define OP_PCL_TLS11_AES_128_CBC_SHA_2          0x0030
-#define OP_PCL_TLS11_AES_128_CBC_SHA_3          0x0031
-#define OP_PCL_TLS11_AES_128_CBC_SHA_4          0x0032
-#define OP_PCL_TLS11_AES_128_CBC_SHA_5          0x0033
-#define OP_PCL_TLS11_AES_128_CBC_SHA_6          0x0034
-#define OP_PCL_TLS11_AES_128_CBC_SHA_7          0x008c
-#define OP_PCL_TLS11_AES_128_CBC_SHA_8          0x0090
-#define OP_PCL_TLS11_AES_128_CBC_SHA_9          0x0094
-#define OP_PCL_TLS11_AES_128_CBC_SHA_10                 0xc004
-#define OP_PCL_TLS11_AES_128_CBC_SHA_11                 0xc009
-#define OP_PCL_TLS11_AES_128_CBC_SHA_12                 0xc00e
-#define OP_PCL_TLS11_AES_128_CBC_SHA_13                 0xc013
-#define OP_PCL_TLS11_AES_128_CBC_SHA_14                 0xc018
-#define OP_PCL_TLS11_AES_128_CBC_SHA_15                 0xc01d
-#define OP_PCL_TLS11_AES_128_CBC_SHA_16                 0xc01e
-#define OP_PCL_TLS11_AES_128_CBC_SHA_17                 0xc01f
-
-#define OP_PCL_TLS11_AES_256_CBC_SHA            0x0035
-#define OP_PCL_TLS11_AES_256_CBC_SHA_2          0x0036
-#define OP_PCL_TLS11_AES_256_CBC_SHA_3          0x0037
-#define OP_PCL_TLS11_AES_256_CBC_SHA_4          0x0038
-#define OP_PCL_TLS11_AES_256_CBC_SHA_5          0x0039
-#define OP_PCL_TLS11_AES_256_CBC_SHA_6          0x003a
-#define OP_PCL_TLS11_AES_256_CBC_SHA_7          0x008d
-#define OP_PCL_TLS11_AES_256_CBC_SHA_8          0x0091
-#define OP_PCL_TLS11_AES_256_CBC_SHA_9          0x0095
-#define OP_PCL_TLS11_AES_256_CBC_SHA_10                 0xc005
-#define OP_PCL_TLS11_AES_256_CBC_SHA_11                 0xc00a
-#define OP_PCL_TLS11_AES_256_CBC_SHA_12                 0xc00f
-#define OP_PCL_TLS11_AES_256_CBC_SHA_13                 0xc014
-#define OP_PCL_TLS11_AES_256_CBC_SHA_14                 0xc019
-#define OP_PCL_TLS11_AES_256_CBC_SHA_15                 0xc020
-#define OP_PCL_TLS11_AES_256_CBC_SHA_16                 0xc021
-#define OP_PCL_TLS11_AES_256_CBC_SHA_17                 0xc022
-
-/* #define OP_PCL_TLS11_3DES_EDE_CBC_MD5       0x0023 */
-
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA           0x001f
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_2                 0x008b
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_3                 0x008f
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_4                 0x0093
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_5                 0x000a
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_6                 0x000d
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_7                 0x0010
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_8                 0x0013
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_9                 0x0016
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_10        0x001b
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_11        0xc003
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_12        0xc008
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_13        0xc00d
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_14        0xc012
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_15        0xc017
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_16        0xc01a
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_17        0xc01b
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA_18        0xc01c
-
-#define OP_PCL_TLS11_DES40_CBC_MD5              0x0029
-
-#define OP_PCL_TLS11_DES_CBC_MD5                0x0022
-
-#define OP_PCL_TLS11_DES40_CBC_SHA              0x0008
-#define OP_PCL_TLS11_DES40_CBC_SHA_2            0x000b
-#define OP_PCL_TLS11_DES40_CBC_SHA_3            0x000e
-#define OP_PCL_TLS11_DES40_CBC_SHA_4            0x0011
-#define OP_PCL_TLS11_DES40_CBC_SHA_5            0x0014
-#define OP_PCL_TLS11_DES40_CBC_SHA_6            0x0019
-#define OP_PCL_TLS11_DES40_CBC_SHA_7            0x0026
-
-#define OP_PCL_TLS11_DES_CBC_SHA                0x001e
-#define OP_PCL_TLS11_DES_CBC_SHA_2              0x0009
-#define OP_PCL_TLS11_DES_CBC_SHA_3              0x000c
-#define OP_PCL_TLS11_DES_CBC_SHA_4              0x000f
-#define OP_PCL_TLS11_DES_CBC_SHA_5              0x0012
-#define OP_PCL_TLS11_DES_CBC_SHA_6              0x0015
-#define OP_PCL_TLS11_DES_CBC_SHA_7              0x001a
-
-#define OP_PCL_TLS11_RC4_128_MD5                0x0024
-#define OP_PCL_TLS11_RC4_128_MD5_2              0x0004
-#define OP_PCL_TLS11_RC4_128_MD5_3              0x0018
-
-#define OP_PCL_TLS11_RC4_40_MD5                         0x002b
-#define OP_PCL_TLS11_RC4_40_MD5_2               0x0003
-#define OP_PCL_TLS11_RC4_40_MD5_3               0x0017
-
-#define OP_PCL_TLS11_RC4_128_SHA                0x0020
-#define OP_PCL_TLS11_RC4_128_SHA_2              0x008a
-#define OP_PCL_TLS11_RC4_128_SHA_3              0x008e
-#define OP_PCL_TLS11_RC4_128_SHA_4              0x0092
-#define OP_PCL_TLS11_RC4_128_SHA_5              0x0005
-#define OP_PCL_TLS11_RC4_128_SHA_6              0xc002
-#define OP_PCL_TLS11_RC4_128_SHA_7              0xc007
-#define OP_PCL_TLS11_RC4_128_SHA_8              0xc00c
-#define OP_PCL_TLS11_RC4_128_SHA_9              0xc011
-#define OP_PCL_TLS11_RC4_128_SHA_10             0xc016
-
-#define OP_PCL_TLS11_RC4_40_SHA                         0x0028
-
-#define OP_PCL_TLS11_3DES_EDE_CBC_MD5           0xff23
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA160        0xff30
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA224        0xff34
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA256        0xff36
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA384        0xff33
-#define OP_PCL_TLS11_3DES_EDE_CBC_SHA512        0xff35
-#define OP_PCL_TLS11_AES_128_CBC_SHA160                 0xff80
-#define OP_PCL_TLS11_AES_128_CBC_SHA224                 0xff84
-#define OP_PCL_TLS11_AES_128_CBC_SHA256                 0xff86
-#define OP_PCL_TLS11_AES_128_CBC_SHA384                 0xff83
-#define OP_PCL_TLS11_AES_128_CBC_SHA512                 0xff85
-#define OP_PCL_TLS11_AES_192_CBC_SHA160                 0xff20
-#define OP_PCL_TLS11_AES_192_CBC_SHA224                 0xff24
-#define OP_PCL_TLS11_AES_192_CBC_SHA256                 0xff26
-#define OP_PCL_TLS11_AES_192_CBC_SHA384                 0xff23
-#define OP_PCL_TLS11_AES_192_CBC_SHA512                 0xff25
-#define OP_PCL_TLS11_AES_256_CBC_SHA160                 0xff60
-#define OP_PCL_TLS11_AES_256_CBC_SHA224                 0xff64
-#define OP_PCL_TLS11_AES_256_CBC_SHA256                 0xff66
-#define OP_PCL_TLS11_AES_256_CBC_SHA384                 0xff63
-#define OP_PCL_TLS11_AES_256_CBC_SHA512                 0xff65
-
-
-/* For TLS 1.2 - OP_PCLID_TLS12 */
-#define OP_PCL_TLS12_AES_128_CBC_SHA            0x002f
-#define OP_PCL_TLS12_AES_128_CBC_SHA_2          0x0030
-#define OP_PCL_TLS12_AES_128_CBC_SHA_3          0x0031
-#define OP_PCL_TLS12_AES_128_CBC_SHA_4          0x0032
-#define OP_PCL_TLS12_AES_128_CBC_SHA_5          0x0033
-#define OP_PCL_TLS12_AES_128_CBC_SHA_6          0x0034
-#define OP_PCL_TLS12_AES_128_CBC_SHA_7          0x008c
-#define OP_PCL_TLS12_AES_128_CBC_SHA_8          0x0090
-#define OP_PCL_TLS12_AES_128_CBC_SHA_9          0x0094
-#define OP_PCL_TLS12_AES_128_CBC_SHA_10                 0xc004
-#define OP_PCL_TLS12_AES_128_CBC_SHA_11                 0xc009
-#define OP_PCL_TLS12_AES_128_CBC_SHA_12                 0xc00e
-#define OP_PCL_TLS12_AES_128_CBC_SHA_13                 0xc013
-#define OP_PCL_TLS12_AES_128_CBC_SHA_14                 0xc018
-#define OP_PCL_TLS12_AES_128_CBC_SHA_15                 0xc01d
-#define OP_PCL_TLS12_AES_128_CBC_SHA_16                 0xc01e
-#define OP_PCL_TLS12_AES_128_CBC_SHA_17                 0xc01f
-
-#define OP_PCL_TLS12_AES_256_CBC_SHA            0x0035
-#define OP_PCL_TLS12_AES_256_CBC_SHA_2          0x0036
-#define OP_PCL_TLS12_AES_256_CBC_SHA_3          0x0037
-#define OP_PCL_TLS12_AES_256_CBC_SHA_4          0x0038
-#define OP_PCL_TLS12_AES_256_CBC_SHA_5          0x0039
-#define OP_PCL_TLS12_AES_256_CBC_SHA_6          0x003a
-#define OP_PCL_TLS12_AES_256_CBC_SHA_7          0x008d
-#define OP_PCL_TLS12_AES_256_CBC_SHA_8          0x0091
-#define OP_PCL_TLS12_AES_256_CBC_SHA_9          0x0095
-#define OP_PCL_TLS12_AES_256_CBC_SHA_10                 0xc005
-#define OP_PCL_TLS12_AES_256_CBC_SHA_11                 0xc00a
-#define OP_PCL_TLS12_AES_256_CBC_SHA_12                 0xc00f
-#define OP_PCL_TLS12_AES_256_CBC_SHA_13                 0xc014
-#define OP_PCL_TLS12_AES_256_CBC_SHA_14                 0xc019
-#define OP_PCL_TLS12_AES_256_CBC_SHA_15                 0xc020
-#define OP_PCL_TLS12_AES_256_CBC_SHA_16                 0xc021
-#define OP_PCL_TLS12_AES_256_CBC_SHA_17                 0xc022
-
-/* #define OP_PCL_TLS12_3DES_EDE_CBC_MD5       0x0023 */
-
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA           0x001f
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_2                 0x008b
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_3                 0x008f
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_4                 0x0093
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_5                 0x000a
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_6                 0x000d
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_7                 0x0010
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_8                 0x0013
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_9                 0x0016
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_10        0x001b
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_11        0xc003
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_12        0xc008
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_13        0xc00d
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_14        0xc012
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_15        0xc017
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_16        0xc01a
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_17        0xc01b
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA_18        0xc01c
-
-#define OP_PCL_TLS12_DES40_CBC_MD5              0x0029
-
-#define OP_PCL_TLS12_DES_CBC_MD5                0x0022
-
-#define OP_PCL_TLS12_DES40_CBC_SHA              0x0008
-#define OP_PCL_TLS12_DES40_CBC_SHA_2            0x000b
-#define OP_PCL_TLS12_DES40_CBC_SHA_3            0x000e
-#define OP_PCL_TLS12_DES40_CBC_SHA_4            0x0011
-#define OP_PCL_TLS12_DES40_CBC_SHA_5            0x0014
-#define OP_PCL_TLS12_DES40_CBC_SHA_6            0x0019
-#define OP_PCL_TLS12_DES40_CBC_SHA_7            0x0026
-
-#define OP_PCL_TLS12_DES_CBC_SHA                0x001e
-#define OP_PCL_TLS12_DES_CBC_SHA_2              0x0009
-#define OP_PCL_TLS12_DES_CBC_SHA_3              0x000c
-#define OP_PCL_TLS12_DES_CBC_SHA_4              0x000f
-#define OP_PCL_TLS12_DES_CBC_SHA_5              0x0012
-#define OP_PCL_TLS12_DES_CBC_SHA_6              0x0015
-#define OP_PCL_TLS12_DES_CBC_SHA_7              0x001a
-
-#define OP_PCL_TLS12_RC4_128_MD5                0x0024
-#define OP_PCL_TLS12_RC4_128_MD5_2              0x0004
-#define OP_PCL_TLS12_RC4_128_MD5_3              0x0018
-
-#define OP_PCL_TLS12_RC4_40_MD5                         0x002b
-#define OP_PCL_TLS12_RC4_40_MD5_2               0x0003
-#define OP_PCL_TLS12_RC4_40_MD5_3               0x0017
-
-#define OP_PCL_TLS12_RC4_128_SHA                0x0020
-#define OP_PCL_TLS12_RC4_128_SHA_2              0x008a
-#define OP_PCL_TLS12_RC4_128_SHA_3              0x008e
-#define OP_PCL_TLS12_RC4_128_SHA_4              0x0092
-#define OP_PCL_TLS12_RC4_128_SHA_5              0x0005
-#define OP_PCL_TLS12_RC4_128_SHA_6              0xc002
-#define OP_PCL_TLS12_RC4_128_SHA_7              0xc007
-#define OP_PCL_TLS12_RC4_128_SHA_8              0xc00c
-#define OP_PCL_TLS12_RC4_128_SHA_9              0xc011
-#define OP_PCL_TLS12_RC4_128_SHA_10             0xc016
-
-#define OP_PCL_TLS12_RC4_40_SHA                         0x0028
-
-/* #define OP_PCL_TLS12_AES_128_CBC_SHA256     0x003c */
-#define OP_PCL_TLS12_AES_128_CBC_SHA256_2       0x003e
-#define OP_PCL_TLS12_AES_128_CBC_SHA256_3       0x003f
-#define OP_PCL_TLS12_AES_128_CBC_SHA256_4       0x0040
-#define OP_PCL_TLS12_AES_128_CBC_SHA256_5       0x0067
-#define OP_PCL_TLS12_AES_128_CBC_SHA256_6       0x006c
-
-/* #define OP_PCL_TLS12_AES_256_CBC_SHA256     0x003d */
-#define OP_PCL_TLS12_AES_256_CBC_SHA256_2       0x0068
-#define OP_PCL_TLS12_AES_256_CBC_SHA256_3       0x0069
-#define OP_PCL_TLS12_AES_256_CBC_SHA256_4       0x006a
-#define OP_PCL_TLS12_AES_256_CBC_SHA256_5       0x006b
-#define OP_PCL_TLS12_AES_256_CBC_SHA256_6       0x006d
-
-/* AEAD_AES_xxx_CCM/GCM remain to be defined... */
-
-#define OP_PCL_TLS12_3DES_EDE_CBC_MD5           0xff23
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA160        0xff30
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA224        0xff34
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA256        0xff36
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA384        0xff33
-#define OP_PCL_TLS12_3DES_EDE_CBC_SHA512        0xff35
-#define OP_PCL_TLS12_AES_128_CBC_SHA160                 0xff80
-#define OP_PCL_TLS12_AES_128_CBC_SHA224                 0xff84
-#define OP_PCL_TLS12_AES_128_CBC_SHA256                 0xff86
-#define OP_PCL_TLS12_AES_128_CBC_SHA384                 0xff83
-#define OP_PCL_TLS12_AES_128_CBC_SHA512                 0xff85
-#define OP_PCL_TLS12_AES_192_CBC_SHA160                 0xff20
-#define OP_PCL_TLS12_AES_192_CBC_SHA224                 0xff24
-#define OP_PCL_TLS12_AES_192_CBC_SHA256                 0xff26
-#define OP_PCL_TLS12_AES_192_CBC_SHA384                 0xff23
-#define OP_PCL_TLS12_AES_192_CBC_SHA512                 0xff25
-#define OP_PCL_TLS12_AES_256_CBC_SHA160                 0xff60
-#define OP_PCL_TLS12_AES_256_CBC_SHA224                 0xff64
-#define OP_PCL_TLS12_AES_256_CBC_SHA256                 0xff66
-#define OP_PCL_TLS12_AES_256_CBC_SHA384                 0xff63
-#define OP_PCL_TLS12_AES_256_CBC_SHA512                 0xff65
-
-/* For DTLS - OP_PCLID_DTLS */
-
-#define OP_PCL_DTLS_AES_128_CBC_SHA             0x002f
-#define OP_PCL_DTLS_AES_128_CBC_SHA_2           0x0030
-#define OP_PCL_DTLS_AES_128_CBC_SHA_3           0x0031
-#define OP_PCL_DTLS_AES_128_CBC_SHA_4           0x0032
-#define OP_PCL_DTLS_AES_128_CBC_SHA_5           0x0033
-#define OP_PCL_DTLS_AES_128_CBC_SHA_6           0x0034
-#define OP_PCL_DTLS_AES_128_CBC_SHA_7           0x008c
-#define OP_PCL_DTLS_AES_128_CBC_SHA_8           0x0090
-#define OP_PCL_DTLS_AES_128_CBC_SHA_9           0x0094
-#define OP_PCL_DTLS_AES_128_CBC_SHA_10          0xc004
-#define OP_PCL_DTLS_AES_128_CBC_SHA_11          0xc009
-#define OP_PCL_DTLS_AES_128_CBC_SHA_12          0xc00e
-#define OP_PCL_DTLS_AES_128_CBC_SHA_13          0xc013
-#define OP_PCL_DTLS_AES_128_CBC_SHA_14          0xc018
-#define OP_PCL_DTLS_AES_128_CBC_SHA_15          0xc01d
-#define OP_PCL_DTLS_AES_128_CBC_SHA_16          0xc01e
-#define OP_PCL_DTLS_AES_128_CBC_SHA_17          0xc01f
-
-#define OP_PCL_DTLS_AES_256_CBC_SHA             0x0035
-#define OP_PCL_DTLS_AES_256_CBC_SHA_2           0x0036
-#define OP_PCL_DTLS_AES_256_CBC_SHA_3           0x0037
-#define OP_PCL_DTLS_AES_256_CBC_SHA_4           0x0038
-#define OP_PCL_DTLS_AES_256_CBC_SHA_5           0x0039
-#define OP_PCL_DTLS_AES_256_CBC_SHA_6           0x003a
-#define OP_PCL_DTLS_AES_256_CBC_SHA_7           0x008d
-#define OP_PCL_DTLS_AES_256_CBC_SHA_8           0x0091
-#define OP_PCL_DTLS_AES_256_CBC_SHA_9           0x0095
-#define OP_PCL_DTLS_AES_256_CBC_SHA_10          0xc005
-#define OP_PCL_DTLS_AES_256_CBC_SHA_11          0xc00a
-#define OP_PCL_DTLS_AES_256_CBC_SHA_12          0xc00f
-#define OP_PCL_DTLS_AES_256_CBC_SHA_13          0xc014
-#define OP_PCL_DTLS_AES_256_CBC_SHA_14          0xc019
-#define OP_PCL_DTLS_AES_256_CBC_SHA_15          0xc020
-#define OP_PCL_DTLS_AES_256_CBC_SHA_16          0xc021
-#define OP_PCL_DTLS_AES_256_CBC_SHA_17          0xc022
-
-/* #define OP_PCL_DTLS_3DES_EDE_CBC_MD5                0x0023 */
-
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA            0x001f
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_2          0x008b
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_3          0x008f
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_4          0x0093
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_5          0x000a
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_6          0x000d
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_7          0x0010
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_8          0x0013
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_9          0x0016
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_10                 0x001b
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_11                 0xc003
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_12                 0xc008
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_13                 0xc00d
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_14                 0xc012
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_15                 0xc017
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_16                 0xc01a
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_17                 0xc01b
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA_18                 0xc01c
-
-#define OP_PCL_DTLS_DES40_CBC_MD5               0x0029
-
-#define OP_PCL_DTLS_DES_CBC_MD5                         0x0022
-
-#define OP_PCL_DTLS_DES40_CBC_SHA               0x0008
-#define OP_PCL_DTLS_DES40_CBC_SHA_2             0x000b
-#define OP_PCL_DTLS_DES40_CBC_SHA_3             0x000e
-#define OP_PCL_DTLS_DES40_CBC_SHA_4             0x0011
-#define OP_PCL_DTLS_DES40_CBC_SHA_5             0x0014
-#define OP_PCL_DTLS_DES40_CBC_SHA_6             0x0019
-#define OP_PCL_DTLS_DES40_CBC_SHA_7             0x0026
-
-
-#define OP_PCL_DTLS_DES_CBC_SHA                         0x001e
-#define OP_PCL_DTLS_DES_CBC_SHA_2               0x0009
-#define OP_PCL_DTLS_DES_CBC_SHA_3               0x000c
-#define OP_PCL_DTLS_DES_CBC_SHA_4               0x000f
-#define OP_PCL_DTLS_DES_CBC_SHA_5               0x0012
-#define OP_PCL_DTLS_DES_CBC_SHA_6               0x0015
-#define OP_PCL_DTLS_DES_CBC_SHA_7               0x001a
-
-#define OP_PCL_DTLS_3DES_EDE_CBC_MD5            0xff23
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA160                 0xff30
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA224                 0xff34
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA256                 0xff36
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA384                 0xff33
-#define OP_PCL_DTLS_3DES_EDE_CBC_SHA512                 0xff35
-#define OP_PCL_DTLS_AES_128_CBC_SHA160          0xff80
-#define OP_PCL_DTLS_AES_128_CBC_SHA224          0xff84
-#define OP_PCL_DTLS_AES_128_CBC_SHA256          0xff86
-#define OP_PCL_DTLS_AES_128_CBC_SHA384          0xff83
-#define OP_PCL_DTLS_AES_128_CBC_SHA512          0xff85
-#define OP_PCL_DTLS_AES_192_CBC_SHA160          0xff20
-#define OP_PCL_DTLS_AES_192_CBC_SHA224          0xff24
-#define OP_PCL_DTLS_AES_192_CBC_SHA256          0xff26
-#define OP_PCL_DTLS_AES_192_CBC_SHA384          0xff23
-#define OP_PCL_DTLS_AES_192_CBC_SHA512          0xff25
-#define OP_PCL_DTLS_AES_256_CBC_SHA160          0xff60
-#define OP_PCL_DTLS_AES_256_CBC_SHA224          0xff64
-#define OP_PCL_DTLS_AES_256_CBC_SHA256          0xff66
-#define OP_PCL_DTLS_AES_256_CBC_SHA384          0xff63
-#define OP_PCL_DTLS_AES_256_CBC_SHA512          0xff65
+/*
+ * For SSL/TLS/DTLS - OP_PCL_TLS
+ * For more details see IANA TLS Cipher Suite registry:
+ * https://www.iana.org/assignments/tls-parameters/tls-parameters.xhtml
+ * Note: for private/internal use (reserved by IANA) - OP_PCL_PVT_TLS
+ */
+#define OP_PCL_TLS_RSA_EXPORT_WITH_RC4_40_MD5          0x0003
+#define OP_PCL_TLS_RSA_WITH_RC4_128_MD5                        0x0004
+#define OP_PCL_TLS_RSA_WITH_RC4_128_SHA                        0x0005
+#define OP_PCL_TLS_RSA_EXPORT_WITH_DES40_CBC_SHA       0x0008
+#define OP_PCL_TLS_RSA_WITH_DES_CBC_SHA                        0x0009
+#define OP_PCL_TLS_RSA_WITH_3DES_EDE_CBC_SHA           0x000a
+#define OP_PCL_TLS_DH_DSS_EXPORT_WITH_DES40_CBC_SHA    0x000b
+#define OP_PCL_TLS_DH_DSS_WITH_DES_CBC_SHA             0x000c
+#define OP_PCL_TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA                0x000d
+#define OP_PCL_TLS_DH_RSA_EXPORT_WITH_DES40_CBC_SHA    0x000e
+#define OP_PCL_TLS_DH_RSA_WITH_DES_CBC_SHA             0x000f
+#define OP_PCL_TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA                0x0010
+#define OP_PCL_TLS_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA   0x0011
+#define OP_PCL_TLS_DHE_DSS_WITH_DES_CBC_SHA            0x0012
+#define OP_PCL_TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA       0x0013
+#define OP_PCL_TLS_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA   0x0014
+#define OP_PCL_TLS_DHE_RSA_WITH_DES_CBC_SHA            0x0015
+#define OP_PCL_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA       0x0016
+#define OP_PCL_TLS_DH_anon_EXPORT_WITH_RC4_40_MD5      0x0017
+#define OP_PCL_TLS_DH_anon_WITH_RC4_128_MD5            0x0018
+#define OP_PCL_TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA   0x0019
+#define OP_PCL_TLS_DH_anon_WITH_DES_CBC_SHA            0x001a
+#define OP_PCL_TLS_DH_anon_WITH_3DES_EDE_CBC_SHA       0x001b
+#define OP_PCL_TLS_KRB5_WITH_DES_CBC_SHA               0x001e
+#define OP_PCL_TLS_KRB5_WITH_3DES_EDE_CBC_SHA          0x001f
+#define OP_PCL_TLS_KRB5_WITH_RC4_128_SHA               0x0020
+#define OP_PCL_TLS_KRB5_WITH_3DES_EDE_CBC_MD5          0x0023
+#define OP_PCL_TLS_KRB5_WITH_DES_CBC_MD5               0x0022
+#define OP_PCL_TLS_KRB5_WITH_RC4_128_MD5               0x0024
+#define OP_PCL_TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA     0x0026
+#define OP_PCL_TLS_KRB5_EXPORT_WITH_RC4_40_SHA         0x0028
+#define OP_PCL_TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5     0x0029
+#define OP_PCL_TLS_KRB5_EXPORT_WITH_RC4_40_MD5         0x002b
+#define OP_PCL_TLS_RSA_WITH_AES_128_CBC_SHA            0x002f
+#define OP_PCL_TLS_DH_DSS_WITH_AES_128_CBC_SHA         0x0030
+#define OP_PCL_TLS_DH_RSA_WITH_AES_128_CBC_SHA         0x0031
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_128_CBC_SHA                0x0032
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_128_CBC_SHA                0x0033
+#define OP_PCL_TLS_DH_anon_WITH_AES_128_CBC_SHA                0x0034
+#define OP_PCL_TLS_RSA_WITH_AES_256_CBC_SHA            0x0035
+#define OP_PCL_TLS_DH_DSS_WITH_AES_256_CBC_SHA         0x0036
+#define OP_PCL_TLS_DH_RSA_WITH_AES_256_CBC_SHA         0x0037
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_256_CBC_SHA                0x0038
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_256_CBC_SHA                0x0039
+#define OP_PCL_TLS_DH_anon_WITH_AES_256_CBC_SHA                0x003a
+#define OP_PCL_TLS_RSA_WITH_AES_128_CBC_SHA256         0x003c
+#define OP_PCL_TLS_RSA_WITH_AES_256_CBC_SHA256         0x003d
+#define OP_PCL_TLS_DH_DSS_WITH_AES_128_CBC_SHA256      0x003e
+#define OP_PCL_TLS_DH_RSA_WITH_AES_128_CBC_SHA256      0x003f
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_128_CBC_SHA256     0x0040
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256     0x0067
+#define OP_PCL_TLS_DH_DSS_WITH_AES_256_CBC_SHA256      0x0068
+#define OP_PCL_TLS_DH_RSA_WITH_AES_256_CBC_SHA256      0x0069
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_256_CBC_SHA256     0x006a
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256     0x006b
+#define OP_PCL_TLS_DH_anon_WITH_AES_128_CBC_SHA256     0x006c
+#define OP_PCL_TLS_DH_anon_WITH_AES_256_CBC_SHA256     0x006d
+#define OP_PCL_TLS_PSK_WITH_RC4_128_SHA                        0x008a
+#define OP_PCL_TLS_PSK_WITH_3DES_EDE_CBC_SHA           0x008b
+#define OP_PCL_TLS_PSK_WITH_AES_128_CBC_SHA            0x008c
+#define OP_PCL_TLS_PSK_WITH_AES_256_CBC_SHA            0x008d
+#define OP_PCL_TLS_DHE_PSK_WITH_RC4_128_SHA            0x008e
+#define OP_PCL_TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA       0x008f
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_128_CBC_SHA                0x0090
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_256_CBC_SHA                0x0091
+#define OP_PCL_TLS_RSA_PSK_WITH_RC4_128_SHA            0x0092
+#define OP_PCL_TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA       0x0093
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_128_CBC_SHA                0x0094
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_256_CBC_SHA                0x0095
+#define OP_PCL_TLS_RSA_WITH_AES_128_GCM_SHA256         0x009c
+#define OP_PCL_TLS_RSA_WITH_AES_256_GCM_SHA384         0x009d
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256     0x009e
+#define OP_PCL_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384     0x009f
+#define OP_PCL_TLS_DH_RSA_WITH_AES_128_GCM_SHA256      0x00a0
+#define OP_PCL_TLS_DH_RSA_WITH_AES_256_GCM_SHA384      0x00a1
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_128_GCM_SHA256     0x00a2
+#define OP_PCL_TLS_DHE_DSS_WITH_AES_256_GCM_SHA384     0x00a3
+#define OP_PCL_TLS_DH_DSS_WITH_AES_128_GCM_SHA256      0x00a4
+#define OP_PCL_TLS_DH_DSS_WITH_AES_256_GCM_SHA384      0x00a5
+#define OP_PCL_TLS_DH_anon_WITH_AES_128_GCM_SHA256     0x00a6
+#define OP_PCL_TLS_DH_anon_WITH_AES_256_GCM_SHA384     0x00a7
+#define OP_PCL_TLS_PSK_WITH_AES_128_GCM_SHA256         0x00a8
+#define OP_PCL_TLS_PSK_WITH_AES_256_GCM_SHA384         0x00a9
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256     0x00aa
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384     0x00ab
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_128_GCM_SHA256     0x00ac
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_256_GCM_SHA384     0x00ad
+#define OP_PCL_TLS_PSK_WITH_AES_128_CBC_SHA256         0x00ae
+#define OP_PCL_TLS_PSK_WITH_AES_256_CBC_SHA384         0x00af
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256     0x00b2
+#define OP_PCL_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384     0x00b3
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_128_CBC_SHA256     0x00b6
+#define OP_PCL_TLS_RSA_PSK_WITH_AES_256_CBC_SHA384     0x00b7
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_RC4_128_SHA         0xc002
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA    0xc003
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA     0xc004
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA     0xc005
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA                0xc007
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA   0xc008
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA    0xc009
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA    0xc00a
+#define OP_PCL_TLS_ECDH_RSA_WITH_RC4_128_SHA           0xc00c
+#define OP_PCL_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA      0xc00d
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA       0xc00e
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA       0xc00f
+#define OP_PCL_TLS_ECDHE_RSA_WITH_RC4_128_SHA          0xc011
+#define OP_PCL_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA     0xc012
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA      0xc013
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA      0xc014
+#define OP_PCL_TLS_ECDH_anon_WITH_RC4_128_SHA          0xc016
+#define OP_PCL_TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA     0xc017
+#define OP_PCL_TLS_ECDH_anon_WITH_AES_128_CBC_SHA      0xc018
+#define OP_PCL_TLS_ECDH_anon_WITH_AES_256_CBC_SHA      0xc019
+#define OP_PCL_TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA       0xc01a
+#define OP_PCL_TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA   0xc01b
+#define OP_PCL_TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA   0xc01c
+#define OP_PCL_TLS_SRP_SHA_WITH_AES_128_CBC_SHA                0xc01d
+#define OP_PCL_TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA    0xc01e
+#define OP_PCL_TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA    0xc01f
+#define OP_PCL_TLS_SRP_SHA_WITH_AES_256_CBC_SHA                0xc020
+#define OP_PCL_TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA    0xc021
+#define OP_PCL_TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA    0xc022
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 0xc023
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384 0xc024
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256  0xc025
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384  0xc026
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256   0xc027
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384   0xc028
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256    0xc029
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384    0xc02a
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 0xc02b
+#define OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 0xc02c
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256  0xc02d
+#define OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384  0xc02e
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256   0xc02f
+#define OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384   0xc030
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256    0xc031
+#define OP_PCL_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384    0xc032
+#define OP_PCL_TLS_ECDHE_PSK_WITH_RC4_128_SHA          0xc033
+#define OP_PCL_TLS_ECDHE_PSK_WITH_3DES_EDE_CBC_SHA     0xc034
+#define OP_PCL_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA      0xc035
+#define OP_PCL_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA      0xc036
+#define OP_PCL_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256   0xc037
+#define OP_PCL_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA384   0xc038
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_MD5                        0xff23
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA160             0xff30
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA384             0xff33
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA224             0xff34
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA512             0xff35
+#define OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA256             0xff36
+#define OP_PCL_PVT_TLS_AES_256_CBC_SHA160              0xff60
+#define OP_PCL_PVT_TLS_AES_256_CBC_SHA384              0xff63
+#define OP_PCL_PVT_TLS_AES_256_CBC_SHA224              0xff64
+#define OP_PCL_PVT_TLS_AES_256_CBC_SHA512              0xff65
+#define OP_PCL_PVT_TLS_AES_256_CBC_SHA256              0xff66
+#define OP_PCL_PVT_TLS_AES_128_CBC_SHA160              0xff80
+#define OP_PCL_PVT_TLS_AES_128_CBC_SHA384              0xff83
+#define OP_PCL_PVT_TLS_AES_128_CBC_SHA224              0xff84
+#define OP_PCL_PVT_TLS_AES_128_CBC_SHA512              0xff85
+#define OP_PCL_PVT_TLS_AES_128_CBC_SHA256              0xff86
+#define OP_PCL_PVT_TLS_AES_192_CBC_SHA160              0xff90
+#define OP_PCL_PVT_TLS_AES_192_CBC_SHA384              0xff93
+#define OP_PCL_PVT_TLS_AES_192_CBC_SHA224              0xff94
+#define OP_PCL_PVT_TLS_AES_192_CBC_SHA512              0xff95
+#define OP_PCL_PVT_TLS_AES_192_CBC_SHA256              0xff96
+#define OP_PCL_PVT_TLS_MASTER_SECRET_PRF_FE            0xfffe
+#define OP_PCL_PVT_TLS_MASTER_SECRET_PRF_FF            0xffff
 
 /* 802.16 WiMAX protinfos */
 #define OP_PCL_WIMAX_OFDM                       0x0201
 #define OP_PCL_LTE_MIXED_AUTH_SHIFT    0
 #define OP_PCL_LTE_MIXED_AUTH_MASK     (3 << OP_PCL_LTE_MIXED_AUTH_SHIFT)
 #define OP_PCL_LTE_MIXED_ENC_SHIFT     8
-#define OP_PCL_LTE_MIXED_ENC_MASK      (3 < OP_PCL_LTE_MIXED_ENC_SHIFT)
+#define OP_PCL_LTE_MIXED_ENC_MASK      (3 << OP_PCL_LTE_MIXED_ENC_SHIFT)
 #define OP_PCL_LTE_MIXED_AUTH_NULL     (OP_PCL_LTE_NULL << \
                                         OP_PCL_LTE_MIXED_AUTH_SHIFT)
 #define OP_PCL_LTE_MIXED_AUTH_SNOW     (OP_PCL_LTE_SNOW << \
index 91f3e06..febcb6d 100644 (file)
@@ -409,6 +409,35 @@ cnstr_shdsc_kasumi_f9(uint32_t *descbuf, bool ps, bool swap,
        return PROGRAM_FINALIZE(p);
 }
 
+/**
+ * cnstr_shdsc_crc - CRC32 Accelerator (IEEE 802 CRC32 protocol mode)
+ * @descbuf: pointer to descriptor-under-construction buffer
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ *
+ * Return: size of descriptor written in words or negative number on error
+ */
+static inline int
+cnstr_shdsc_crc(uint32_t *descbuf, bool swap)
+{
+       struct program prg;
+       struct program *p = &prg;
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+
+       SHR_HDR(p, SHR_ALWAYS, 1, 0);
+
+       MATHB(p, SEQINSZ, SUB, MATH2, VSEQINSZ, 4, 0);
+       ALG_OPERATION(p, OP_ALG_ALGSEL_CRC,
+                     OP_ALG_AAI_802 | OP_ALG_AAI_DOC,
+                     OP_ALG_AS_FINALIZE, 0, DIR_ENC);
+       SEQFIFOLOAD(p, MSG2, 0, VLF | LAST2);
+       SEQSTORE(p, CONTEXT2, 0, 4, 0);
+
+       return PROGRAM_FINALIZE(p);
+}
+
 /**
  * cnstr_shdsc_gcm_encap - AES-GCM encap as a shared descriptor
  * @descbuf: pointer to descriptor-under-construction buffer
@@ -614,33 +643,4 @@ cnstr_shdsc_gcm_decap(uint32_t *descbuf, bool ps, bool swap,
        return PROGRAM_FINALIZE(p);
 }
 
-/**
- * cnstr_shdsc_crc - CRC32 Accelerator (IEEE 802 CRC32 protocol mode)
- * @descbuf: pointer to descriptor-under-construction buffer
- * @swap: must be true when core endianness doesn't match SEC endianness
- *
- * Return: size of descriptor written in words or negative number on error
- */
-static inline int
-cnstr_shdsc_crc(uint32_t *descbuf, bool swap)
-{
-       struct program prg;
-       struct program *p = &prg;
-
-       PROGRAM_CNTXT_INIT(p, descbuf, 0);
-       if (swap)
-               PROGRAM_SET_BSWAP(p);
-
-       SHR_HDR(p, SHR_ALWAYS, 1, 0);
-
-       MATHB(p, SEQINSZ, SUB, MATH2, VSEQINSZ, 4, 0);
-       ALG_OPERATION(p, OP_ALG_ALGSEL_CRC,
-                     OP_ALG_AAI_802 | OP_ALG_AAI_DOC,
-                     OP_ALG_AS_FINALIZE, 0, DIR_ENC);
-       SEQFIFOLOAD(p, MSG2, 0, VLF | LAST2);
-       SEQSTORE(p, CONTEXT2, 0, 4, 0);
-
-       return PROGRAM_FINALIZE(p);
-}
-
 #endif /* __DESC_ALGO_H__ */
index 35cc02a..d256a39 100644 (file)
@@ -522,44 +522,133 @@ enum ipsec_icv_size {
 
 /*
  * IPSec ESP Datapath Protocol Override Register (DPOVRD)
+ * IPSEC_N_* defines are for IPsec new mode.
  */
 
-#define IPSEC_DECO_DPOVRD_USE          0x80
+/**
+ * IPSEC_DPOVRD_USE - DPOVRD will override values specified in the PDB
+ */
+#define IPSEC_DPOVRD_USE       BIT(31)
 
-struct ipsec_deco_dpovrd {
-       uint8_t ovrd_ecn;
-       uint8_t ip_hdr_len;
-       uint8_t nh_offset;
-       union {
-               uint8_t next_header;    /* next header if encap */
-               uint8_t rsvd;           /* reserved if decap */
-       };
-};
+/**
+ * IPSEC_DPOVRD_ECN_SHIFT - Explicit Congestion Notification
+ *
+ * If set, MSB of the 4 bits indicates that the 2 LSBs will replace the ECN bits
+ * in the IP header.
+ */
+#define IPSEC_DPOVRD_ECN_SHIFT         24
 
-struct ipsec_new_encap_deco_dpovrd {
-#define IPSEC_NEW_ENCAP_DECO_DPOVRD_USE        0x8000
-       uint16_t ovrd_ip_hdr_len;       /* OVRD + outer IP header material
-                                        * length
-                                        */
-#define IPSEC_NEW_ENCAP_OIMIF          0x80
-       uint8_t oimif_aoipho;           /* OIMIF + actual outer IP header
-                                        * offset
-                                        */
-       uint8_t rsvd;
-};
+/**
+ * IPSEC_DPOVRD_ECN_MASK - See IPSEC_DPOVRD_ECN_SHIFT
+ */
+#define IPSEC_DPOVRD_ECN_MASK          (0xf << IPSEC_ENCAP_DPOVRD_ECN_SHIFT)
 
-struct ipsec_new_decap_deco_dpovrd {
-       uint8_t ovrd;
-       uint8_t aoipho_hi;              /* upper nibble of actual outer IP
-                                        * header
-                                        */
-       uint16_t aoipho_lo_ip_hdr_len;  /* lower nibble of actual outer IP
-                                        * header + outer IP header material
-                                        */
-};
+/**
+ * IPSEC_DPOVRD_IP_HDR_LEN_SHIFT - The length (in bytes) of the portion of the
+ *                                 IP header that is not encrypted
+ */
+#define IPSEC_DPOVRD_IP_HDR_LEN_SHIFT  16
+
+/**
+ * IPSEC_DPOVRD_IP_HDR_LEN_MASK - See IPSEC_DPOVRD_IP_HDR_LEN_SHIFT
+ */
+#define IPSEC_DPOVRD_IP_HDR_LEN_MASK   (0xff << IPSEC_DPOVRD_IP_HDR_LEN_SHIFT)
+
+/**
+ * IPSEC_DPOVRD_NH_OFFSET_SHIFT - The location of the next header field within
+ *                                the IP header of the transport mode packet
+ *
+ * Encap:
+ *     ESP_Trailer_NH <-- IP_Hdr[DPOVRD[NH_OFFSET]]
+ *     IP_Hdr[DPOVRD[NH_OFFSET]] <-- DPOVRD[NH]
+ *Decap:
+ *     IP_Hdr[DPOVRD[NH_OFFSET]] <-- ESP_Trailer_NH
+ */
+#define IPSEC_DPOVRD_NH_OFFSET_SHIFT   8
+
+/**
+ * IPSEC_DPOVRD_NH_OFFSET_MASK - See IPSEC_DPOVRD_NH_OFFSET_SHIFT
+ */
+#define IPSEC_DPOVRD_NH_OFFSET_MASK    (0xff << IPSEC_DPOVRD_NH_OFFSET_SHIFT)
+
+/**
+ * IPSEC_DPOVRD_NH_MASK - See IPSEC_DPOVRD_NH_OFFSET_SHIFT
+ *                        Valid only for encapsulation.
+ */
+#define IPSEC_DPOVRD_NH_MASK           0xff
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_OIM_LEN_SHIFT - Outer IP header Material length (encap)
+ *                                      Valid only if L2_COPY is not set.
+ */
+#define IPSEC_N_ENCAP_DPOVRD_OIM_LEN_SHIFT     16
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_OIM_LEN_MASK - See IPSEC_N_ENCAP_DPOVRD_OIM_LEN_SHIFT
+ */
+#define IPSEC_N_ENCAP_DPOVRD_OIM_LEN_MASK \
+       (0xfff << IPSEC_N_ENCAP_DPOVRD_OIM_LEN_SHIFT)
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_L2_LEN_SHIFT - L2 header length
+ *                                     Valid only if L2_COPY is set.
+ */
+#define IPSEC_N_ENCAP_DPOVRD_L2_LEN_SHIFT      16
 
-static inline void
-__gen_auth_key(struct program *program, struct alginfo *authdata)
+/**
+ * IPSEC_N_ENCAP_DPOVRD_L2_LEN_MASK - See IPSEC_N_ENCAP_DPOVRD_L2_LEN_SHIFT
+ */
+#define IPSEC_N_ENCAP_DPOVRD_L2_LEN_MASK \
+       (0xff << IPSEC_N_ENCAP_DPOVRD_L2_LEN_SHIFT)
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_OIMIF -  Outer IP header Material in Input Frame
+ */
+#define IPSEC_N_ENCAP_DPOVRD_OIMIF             BIT(15)
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_L2_COPY - L2 header present in input frame
+ *
+ * Note: For Era <= 8, this bit is reserved (not used) by HW.
+ */
+#define IPSEC_N_ENCAP_DPOVRD_L2_COPY           BIT(14)
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_AOIPHO_SHIFT - Actual Outer IP Header Offset (encap)
+ */
+#define IPSEC_N_ENCAP_DPOVRD_AOIPHO_SHIFT      8
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_AOIPHO_MASK - See IPSEC_N_ENCAP_DPOVRD_AOIPHO_SHIFT
+ */
+#define IPSEC_N_ENCAP_DPOVRD_AOIPHO_MASK \
+       (0x3c << IPSEC_N_ENCAP_DPOVRD_AOIPHO_SHIFT)
+
+/**
+ * IPSEC_N_ENCAP_DPOVRD_NH_MASK -  Next Header
+ *
+ * Used in the Next Header field of the encapsulated payload.
+ */
+#define IPSEC_N_ENCAP_DPOVRD_NH_MASK           0xff
+
+/**
+ * IPSEC_N_DECAP_DPOVRD_AOIPHO_SHIFT - Actual Outer IP Header Offset (decap)
+ */
+#define IPSEC_N_DECAP_DPOVRD_AOIPHO_SHIFT      12
+
+/**
+ * IPSEC_N_DECAP_DPOVRD_AOIPHO_MASK - See IPSEC_N_DECAP_DPOVRD_AOIPHO_SHIFT
+ */
+#define IPSEC_N_DECAP_DPOVRD_AOIPHO_MASK \
+       (0xff << IPSEC_N_DECAP_DPOVRD_AOIPHO_SHIFT)
+
+/**
+ * IPSEC_N_DECAP_DPOVRD_OIM_LEN_MASK - Outer IP header Material length (decap)
+ */
+#define IPSEC_N_DECAP_DPOVRD_OIM_LEN_MASK      0xfff
+
+static inline void __gen_auth_key(struct program *program,
+                                 struct alginfo *authdata)
 {
        uint32_t dkp_protid;
 
@@ -603,6 +692,7 @@ __gen_auth_key(struct program *program, struct alginfo *authdata)
  * @descbuf: pointer to buffer used for descriptor construction
  * @ps: if 36/40bit addressing is desired, this parameter must be true
  * @swap: if true, perform descriptor byte swapping on a 4-byte boundary
+ * @share: sharing type of shared descriptor
  * @pdb: pointer to the PDB to be used with this descriptor
  *       This structure will be copied inline to the descriptor under
  *       construction. No error checking will be made. Refer to the
@@ -621,6 +711,7 @@ __gen_auth_key(struct program *program, struct alginfo *authdata)
  */
 static inline int
 cnstr_shdsc_ipsec_encap(uint32_t *descbuf, bool ps, bool swap,
+                                         enum rta_share_type share,
                        struct ipsec_encap_pdb *pdb,
                        struct alginfo *cipherdata,
                        struct alginfo *authdata)
@@ -638,7 +729,7 @@ cnstr_shdsc_ipsec_encap(uint32_t *descbuf, bool ps, bool swap,
                PROGRAM_SET_BSWAP(p);
        if (ps)
                PROGRAM_SET_36BIT_ADDR(p);
-       phdr = SHR_HDR(p, SHR_SERIAL, hdr, 0);
+       phdr = SHR_HDR(p, share, hdr, 0);
        __rta_copy_ipsec_encap_pdb(p, pdb, cipherdata->algtype);
        COPY_DATA(p, pdb->ip_hdr, pdb->ip_hdr_len);
        SET_LABEL(p, hdr);
@@ -669,6 +760,7 @@ cnstr_shdsc_ipsec_encap(uint32_t *descbuf, bool ps, bool swap,
  * @descbuf: pointer to buffer used for descriptor construction
  * @ps: if 36/40bit addressing is desired, this parameter must be true
  * @swap: if true, perform descriptor byte swapping on a 4-byte boundary
+ * @share: sharing type of shared descriptor
  * @pdb: pointer to the PDB to be used with this descriptor
  *       This structure will be copied inline to the descriptor under
  *       construction. No error checking will be made. Refer to the
@@ -687,6 +779,7 @@ cnstr_shdsc_ipsec_encap(uint32_t *descbuf, bool ps, bool swap,
  */
 static inline int
 cnstr_shdsc_ipsec_decap(uint32_t *descbuf, bool ps, bool swap,
+                       enum rta_share_type share,
                        struct ipsec_decap_pdb *pdb,
                        struct alginfo *cipherdata,
                        struct alginfo *authdata)
@@ -704,7 +797,7 @@ cnstr_shdsc_ipsec_decap(uint32_t *descbuf, bool ps, bool swap,
                PROGRAM_SET_BSWAP(p);
        if (ps)
                PROGRAM_SET_36BIT_ADDR(p);
-       phdr = SHR_HDR(p, SHR_SERIAL, hdr, 0);
+       phdr = SHR_HDR(p, share, hdr, 0);
        __rta_copy_ipsec_decap_pdb(p, pdb, cipherdata->algtype);
        SET_LABEL(p, hdr);
        pkeyjmp = JUMP(p, keyjmp, LOCAL_JUMP, ALL_TRUE, BOTH|SHRD);
@@ -1040,7 +1133,7 @@ cnstr_shdsc_ipsec_decap_des_aes_xcbc(uint32_t *descbuf,
  * layers to determine whether Outer IP Header and/or keys can be inlined or
  * not. To be used as first parameter of rta_inline_query().
  */
-#define IPSEC_NEW_ENC_BASE_DESC_LEN    (5 * CAAM_CMD_SZ + \
+#define IPSEC_NEW_ENC_BASE_DESC_LEN    (12 * CAAM_CMD_SZ + \
                                         sizeof(struct ipsec_encap_pdb))
 
 /**
@@ -1052,7 +1145,7 @@ cnstr_shdsc_ipsec_decap_des_aes_xcbc(uint32_t *descbuf,
  * layers to determine whether Outer IP Header and/or key can be inlined or
  * not. To be used as first parameter of rta_inline_query().
  */
-#define IPSEC_NEW_NULL_ENC_BASE_DESC_LEN       (4 * CAAM_CMD_SZ + \
+#define IPSEC_NEW_NULL_ENC_BASE_DESC_LEN       (11 * CAAM_CMD_SZ + \
                                                 sizeof(struct ipsec_encap_pdb))
 
 /**
@@ -1061,6 +1154,7 @@ cnstr_shdsc_ipsec_decap_des_aes_xcbc(uint32_t *descbuf,
  * @descbuf: pointer to buffer used for descriptor construction
  * @ps: if 36/40bit addressing is desired, this parameter must be true
  * @swap: must be true when core endianness doesn't match SEC endianness
+ * @share: sharing type of shared descriptor
  * @pdb: pointer to the PDB to be used with this descriptor
  *       This structure will be copied inline to the descriptor under
  *       construction. No error checking will be made. Refer to the
@@ -1080,11 +1174,21 @@ cnstr_shdsc_ipsec_decap_des_aes_xcbc(uint32_t *descbuf,
  *            compute MDHA on the fly in HW.
  *            Valid algorithm values - one of OP_PCL_IPSEC_*
  *
+ * Note: L2 header copy functionality is implemented assuming that bits 14
+ * (currently reserved) and 16-23 (part of Outer IP Header Material Length)
+ * in DPOVRD register are not used (which is usually the case when L3 header
+ * is provided in PDB).
+ * When DPOVRD[14] is set, frame starts with an L2 header; in this case, the
+ * L2 header length is found at DPOVRD[23:16]. SEC uses this length to copy
+ * the header and then it deletes DPOVRD[23:16] (so there is no side effect
+ * when later running IPsec protocol).
+ *
  * Return: size of descriptor written in words or negative number on error
  */
 static inline int
 cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
                            bool swap,
+                                             enum rta_share_type share,
                            struct ipsec_encap_pdb *pdb,
                            uint8_t *opt_ip_hdr,
                            struct alginfo *cipherdata,
@@ -1097,6 +1201,8 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
        REFERENCE(pkeyjmp);
        LABEL(hdr);
        REFERENCE(phdr);
+       LABEL(l2copy);
+       REFERENCE(pl2copy);
 
        if (rta_sec_era < RTA_SEC_ERA_8) {
                pr_err("IPsec new mode encap: available only for Era %d or above\n",
@@ -1109,7 +1215,7 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
                PROGRAM_SET_BSWAP(p);
        if (ps)
                PROGRAM_SET_36BIT_ADDR(p);
-       phdr = SHR_HDR(p, SHR_SERIAL, hdr, 0);
+       phdr = SHR_HDR(p, share, hdr, 0);
 
        __rta_copy_ipsec_encap_pdb(p, pdb, cipherdata->algtype);
 
@@ -1128,6 +1234,16 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
        }
        SET_LABEL(p, hdr);
 
+       MATHB(p, DPOVRD, AND, IPSEC_N_ENCAP_DPOVRD_L2_COPY, NONE, 4, IMMED2);
+       pl2copy = JUMP(p, l2copy, LOCAL_JUMP, ALL_TRUE, MATH_Z);
+       MATHI(p, DPOVRD, RSHIFT, IPSEC_N_ENCAP_DPOVRD_L2_LEN_SHIFT, VSEQOUTSZ,
+             1, 0);
+       MATHB(p, DPOVRD, AND, ~IPSEC_N_ENCAP_DPOVRD_L2_LEN_MASK, DPOVRD, 4,
+             IMMED2);
+       /* TODO: CLASS2 corresponds to AUX=2'b10; add more intuitive defines */
+       SEQFIFOSTORE(p, METADATA, 0, 0, CLASS2 | VLF);
+       SET_LABEL(p, l2copy);
+
        pkeyjmp = JUMP(p, keyjmp, LOCAL_JUMP, ALL_TRUE, SHRD);
        if (authdata->keylen)
                __gen_auth_key(p, authdata);
@@ -1138,6 +1254,7 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
        PROTOCOL(p, OP_TYPE_ENCAP_PROTOCOL,
                 OP_PCLID_IPSEC_NEW,
                 (uint16_t)(cipherdata->algtype | authdata->algtype));
+       PATCH_JUMP(p, pl2copy, l2copy);
        PATCH_JUMP(p, pkeyjmp, keyjmp);
        PATCH_HDR(p, phdr, hdr);
        return PROGRAM_FINALIZE(p);
@@ -1171,6 +1288,7 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
  * @descbuf: pointer to buffer used for descriptor construction
  * @ps: if 36/40bit addressing is desired, this parameter must be true
  * @swap: must be true when core endianness doesn't match SEC endianness
+ * @share: sharing type of shared descriptor
  * @pdb: pointer to the PDB to be used with this descriptor
  *       This structure will be copied inline to the descriptor under
  *       construction. No error checking will be made. Refer to the
@@ -1188,6 +1306,7 @@ cnstr_shdsc_ipsec_new_encap(uint32_t *descbuf, bool ps,
 static inline int
 cnstr_shdsc_ipsec_new_decap(uint32_t *descbuf, bool ps,
                            bool swap,
+                                             enum rta_share_type share,
                            struct ipsec_decap_pdb *pdb,
                            struct alginfo *cipherdata,
                            struct alginfo *authdata)
@@ -1211,7 +1330,7 @@ cnstr_shdsc_ipsec_new_decap(uint32_t *descbuf, bool ps,
                PROGRAM_SET_BSWAP(p);
        if (ps)
                PROGRAM_SET_36BIT_ADDR(p);
-       phdr = SHR_HDR(p, SHR_SERIAL, hdr, 0);
+       phdr = SHR_HDR(p, share, hdr, 0);
        __rta_copy_ipsec_decap_pdb(p, pdb, cipherdata->algtype);
        SET_LABEL(p, hdr);
        pkeyjmp = JUMP(p, keyjmp, LOCAL_JUMP, ALL_TRUE, SHRD);
diff --git a/drivers/crypto/dpaa2_sec/hw/desc/pdcp.h b/drivers/crypto/dpaa2_sec/hw/desc/pdcp.h
new file mode 100644 (file)
index 0000000..719ef60
--- /dev/null
@@ -0,0 +1,2796 @@
+/*
+ * Copyright 2008-2013 Freescale Semiconductor, Inc.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause or GPL-2.0+
+ */
+
+#ifndef __DESC_PDCP_H__
+#define __DESC_PDCP_H__
+
+#include "hw/rta.h"
+#include "common.h"
+
+/**
+ * DOC: PDCP Shared Descriptor Constructors
+ *
+ * Shared descriptors for PDCP protocol.
+ */
+
+/**
+ * PDCP_NULL_MAX_FRAME_LEN - The maximum frame frame length that is supported by
+ *                           PDCP NULL protocol.
+ */
+#define PDCP_NULL_MAX_FRAME_LEN                0x00002FFF
+
+/**
+ * PDCP_MAC_I_LEN - The length of the MAC-I for PDCP protocol operation
+ */
+#define PDCP_MAC_I_LEN                 0x00000004
+
+/**
+ * PDCP_MAX_FRAME_LEN_STATUS - The status returned in FD status/command field in
+ *                             case the input frame is larger than
+ *                             PDCP_NULL_MAX_FRAME_LEN.
+ */
+#define PDCP_MAX_FRAME_LEN_STATUS      0xF1
+
+/**
+ * PDCP_C_PLANE_SN_MASK - This mask is used in the PDCP descriptors for
+ *                        extracting the sequence number (SN) from the PDCP
+ *                        Control Plane header. For PDCP Control Plane, the SN
+ *                        is constant (5 bits) as opposed to PDCP Data Plane
+ *                        (7/12/15 bits).
+ */
+#define PDCP_C_PLANE_SN_MASK           0x1F000000
+#define PDCP_C_PLANE_SN_MASK_BE                0x0000001F
+
+/**
+ * PDCP_U_PLANE_15BIT_SN_MASK - This mask is used in the PDCP descriptors for
+ *                              extracting the sequence number (SN) from the
+ *                              PDCP User Plane header. For PDCP Control Plane,
+ *                              the SN is constant (5 bits) as opposed to PDCP
+ *                              Data Plane (7/12/15 bits).
+ */
+#define PDCP_U_PLANE_15BIT_SN_MASK     0xFF7F0000
+#define PDCP_U_PLANE_15BIT_SN_MASK_BE  0x00007FFF
+
+/**
+ * PDCP_BEARER_MASK - This mask is used masking out the bearer for PDCP
+ *                    processing with SNOW f9 in LTE.
+ *
+ * The value on which this mask is applied is formatted as below:
+ *     Count-C (32 bit) | Bearer (5 bit) | Direction (1 bit) | 0 (26 bits)
+ *
+ * Applying this mask is done for creating the upper 64 bits of the IV needed
+ * for SNOW f9.
+ *
+ * The lower 32 bits of the mask are used for masking the direction for AES
+ * CMAC IV.
+ */
+#define PDCP_BEARER_MASK               0x00000004FFFFFFFFull
+#define PDCP_BEARER_MASK_BE            0xFFFFFFFF04000000ull
+
+/**
+ * PDCP_DIR_MASK - This mask is used masking out the direction for PDCP
+ *                 processing with SNOW f9 in LTE.
+ *
+ * The value on which this mask is applied is formatted as below:
+ *     Bearer (5 bit) | Direction (1 bit) | 0 (26 bits)
+ *
+ * Applying this mask is done for creating the lower 32 bits of the IV needed
+ * for SNOW f9.
+ *
+ * The upper 32 bits of the mask are used for masking the direction for AES
+ * CMAC IV.
+ */
+#define PDCP_DIR_MASK                  0x00000000000000F8ull
+#define PDCP_DIR_MASK_BE                       0xF800000000000000ull
+
+/**
+ * PDCP_NULL_INT_MAC_I_VAL - The value of the PDCP PDU MAC-I in case NULL
+ *                           integrity is used.
+ */
+
+#define PDCP_NULL_INT_MAC_I_VAL                0x00000000
+
+/**
+ * PDCP_NULL_INT_ICV_CHECK_FAILED_STATUS - The status used to report ICV check
+ *                                         failed in case of NULL integrity
+ *                                         Control Plane processing.
+ */
+#define PDCP_NULL_INT_ICV_CHECK_FAILED_STATUS  0x0A
+/**
+ * PDCP_DPOVRD_HFN_OV_EN - Value to be used in the FD status/cmd field to
+ *                         indicate the HFN override mechanism is active for the
+ *                         frame.
+ */
+#define PDCP_DPOVRD_HFN_OV_EN          0x80000000
+
+/**
+ * PDCP_P4080REV2_HFN_OV_BUFLEN - The length in bytes of the supplementary space
+ *                                that must be provided by the user at the
+ *                                beginning of the input frame buffer for
+ *                                P4080 REV 2.
+ *
+ * The format of the frame buffer is the following:
+ *
+ *  |<---PDCP_P4080REV2_HFN_OV_BUFLEN-->|
+ * //===================================||============||==============\\
+ * || PDCP_DPOVRD_HFN_OV_EN | HFN value || PDCP Header|| PDCP Payload ||
+ * \\===================================||============||==============//
+ *
+ * If HFN override mechanism is not desired, then the MSB of the first 4 bytes
+ * must be set to 0b.
+ */
+#define PDCP_P4080REV2_HFN_OV_BUFLEN   4
+
+/**
+ * enum cipher_type_pdcp - Type selectors for cipher types in PDCP protocol OP
+ *                         instructions.
+ * @PDCP_CIPHER_TYPE_NULL: NULL
+ * @PDCP_CIPHER_TYPE_SNOW: SNOW F8
+ * @PDCP_CIPHER_TYPE_AES: AES
+ * @PDCP_CIPHER_TYPE_ZUC: ZUCE
+ * @PDCP_CIPHER_TYPE_INVALID: invalid option
+ */
+enum cipher_type_pdcp {
+       PDCP_CIPHER_TYPE_NULL,
+       PDCP_CIPHER_TYPE_SNOW,
+       PDCP_CIPHER_TYPE_AES,
+       PDCP_CIPHER_TYPE_ZUC,
+       PDCP_CIPHER_TYPE_INVALID
+};
+
+/**
+ * enum auth_type_pdcp - Type selectors for integrity types in PDCP protocol OP
+ *                       instructions.
+ * @PDCP_AUTH_TYPE_NULL: NULL
+ * @PDCP_AUTH_TYPE_SNOW: SNOW F9
+ * @PDCP_AUTH_TYPE_AES: AES CMAC
+ * @PDCP_AUTH_TYPE_ZUC: ZUCA
+ * @PDCP_AUTH_TYPE_INVALID: invalid option
+ */
+enum auth_type_pdcp {
+       PDCP_AUTH_TYPE_NULL,
+       PDCP_AUTH_TYPE_SNOW,
+       PDCP_AUTH_TYPE_AES,
+       PDCP_AUTH_TYPE_ZUC,
+       PDCP_AUTH_TYPE_INVALID
+};
+
+/**
+ * enum pdcp_dir - Type selectors for direction for PDCP protocol
+ * @PDCP_DIR_UPLINK: uplink direction
+ * @PDCP_DIR_DOWNLINK: downlink direction
+ * @PDCP_DIR_INVALID: invalid option
+ */
+enum pdcp_dir {
+       PDCP_DIR_UPLINK = 0,
+       PDCP_DIR_DOWNLINK = 1,
+       PDCP_DIR_INVALID
+};
+
+/**
+ * enum pdcp_plane - PDCP domain selectors
+ * @PDCP_CONTROL_PLANE: Control Plane
+ * @PDCP_DATA_PLANE: Data Plane
+ * @PDCP_SHORT_MAC: Short MAC
+ */
+enum pdcp_plane {
+       PDCP_CONTROL_PLANE,
+       PDCP_DATA_PLANE,
+       PDCP_SHORT_MAC
+};
+
+/**
+ * enum pdcp_sn_size - Sequence Number Size selectors for PDCP protocol
+ * @PDCP_SN_SIZE_5: 5bit sequence number
+ * @PDCP_SN_SIZE_7: 7bit sequence number
+ * @PDCP_SN_SIZE_12: 12bit sequence number
+ * @PDCP_SN_SIZE_15: 15bit sequence number
+ * @PDCP_SN_SIZE_18: 18bit sequence number
+ */
+enum pdcp_sn_size {
+       PDCP_SN_SIZE_5 = 5,
+       PDCP_SN_SIZE_7 = 7,
+       PDCP_SN_SIZE_12 = 12,
+       PDCP_SN_SIZE_15 = 15
+};
+
+/*
+ * PDCP Control Plane Protocol Data Blocks
+ */
+#define PDCP_C_PLANE_PDB_HFN_SHIFT             5
+#define PDCP_C_PLANE_PDB_BEARER_SHIFT          27
+#define PDCP_C_PLANE_PDB_DIR_SHIFT             26
+#define PDCP_C_PLANE_PDB_HFN_THR_SHIFT         5
+
+#define PDCP_U_PLANE_PDB_OPT_SHORT_SN          0x2
+#define PDCP_U_PLANE_PDB_OPT_15B_SN            0x4
+#define PDCP_U_PLANE_PDB_SHORT_SN_HFN_SHIFT    7
+#define PDCP_U_PLANE_PDB_LONG_SN_HFN_SHIFT     12
+#define PDCP_U_PLANE_PDB_15BIT_SN_HFN_SHIFT    15
+#define PDCP_U_PLANE_PDB_BEARER_SHIFT          27
+#define PDCP_U_PLANE_PDB_DIR_SHIFT             26
+#define PDCP_U_PLANE_PDB_SHORT_SN_HFN_THR_SHIFT        7
+#define PDCP_U_PLANE_PDB_LONG_SN_HFN_THR_SHIFT 12
+#define PDCP_U_PLANE_PDB_15BIT_SN_HFN_THR_SHIFT        15
+
+struct pdcp_pdb {
+       union {
+               uint32_t opt;
+               uint32_t rsvd;
+       } opt_res;
+       uint32_t hfn_res;       /* HyperFrame number,(27, 25 or 21 bits),
+                                * left aligned & right-padded with zeros.
+                                */
+       uint32_t bearer_dir_res;/* Bearer(5 bits), packet direction (1 bit),
+                                * left aligned & right-padded with zeros.
+                                */
+       uint32_t hfn_thr_res;   /* HyperFrame number threshold (27, 25 or 21
+                                * bits), left aligned & right-padded with
+                                * zeros.
+                                */
+};
+
+/*
+ * PDCP internal PDB types
+ */
+enum pdb_type_e {
+       PDCP_PDB_TYPE_NO_PDB,
+       PDCP_PDB_TYPE_FULL_PDB,
+       PDCP_PDB_TYPE_REDUCED_PDB,
+       PDCP_PDB_TYPE_INVALID
+};
+
+/*
+ * Function for appending the portion of a PDCP Control Plane shared descriptor
+ * which performs NULL encryption and integrity (i.e. copies the input frame
+ * to the output frame, appending 32 bits of zeros at the end (MAC-I for
+ * NULL integrity).
+ */
+static inline int
+pdcp_insert_cplane_null_op(struct program *p,
+                          bool swap __maybe_unused,
+                          struct alginfo *cipherdata __maybe_unused,
+                          struct alginfo *authdata __maybe_unused,
+                          unsigned int dir,
+                          unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       LABEL(local_offset);
+       REFERENCE(move_cmd_read_descbuf);
+       REFERENCE(move_cmd_write_descbuf);
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MATHB(p, SEQINSZ, ADD, ZERO, VSEQINSZ, 4, 0);
+               if (dir == OP_TYPE_ENCAP_PROTOCOL)
+                       MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+               else
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+       } else {
+               MATHB(p, SEQINSZ, ADD, ONE, VSEQINSZ, 4, 0);
+               MATHB(p, VSEQINSZ, SUB, ONE, VSEQINSZ, 4, 0);
+
+               if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+                       MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+                       MATHB(p, VSEQINSZ, SUB, ONE, MATH0, 4, 0);
+               } else {
+                       MATHB(p, VSEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQINSZ, 4,
+                             IMMED2);
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+                       MATHB(p, VSEQOUTSZ, SUB, ONE, MATH0, 4, 0);
+               }
+
+               MATHB(p, MATH0, ADD, ONE, MATH0, 4, 0);
+
+               /*
+                * Since MOVELEN is available only starting with
+                * SEC ERA 3, use poor man's MOVELEN: create a MOVE
+                * command dynamically by writing the length from M1 by
+                * OR-ing the command in the M1 register and MOVE the
+                * result into the descriptor buffer. Care must be taken
+                * wrt. the location of the command because of SEC
+                * pipelining. The actual MOVEs are written at the end
+                * of the descriptor due to calculations needed on the
+                * offset in the descriptor for the MOVE command.
+                */
+               move_cmd_read_descbuf = MOVE(p, DESCBUF, 0, MATH0, 0, 6,
+                                            IMMED);
+               move_cmd_write_descbuf = MOVE(p, MATH0, 0, DESCBUF, 0, 8,
+                                             WAITCOMP | IMMED);
+       }
+       MATHB(p, VSEQINSZ, SUB, PDCP_NULL_MAX_FRAME_LEN, NONE, 4,
+             IMMED2);
+       JUMP(p, PDCP_MAX_FRAME_LEN_STATUS, HALT_STATUS, ALL_FALSE, MATH_N);
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               if (dir == OP_TYPE_ENCAP_PROTOCOL)
+                       MATHB(p, VSEQINSZ, ADD, ZERO, MATH0, 4, 0);
+               else
+                       MATHB(p, VSEQOUTSZ, ADD, ZERO, MATH0, 4, 0);
+       }
+       SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+       SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MOVE(p, AB1, 0, OFIFO, 0, MATH0, 0);
+       } else {
+               SET_LABEL(p, local_offset);
+
+               /* Shut off automatic Info FIFO entries */
+               LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+               /* Placeholder for MOVE command with length from M1 register */
+               MOVE(p, IFIFOAB1, 0, OFIFO, 0, 0, IMMED);
+               /* Enable automatic Info FIFO entries */
+               LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+       }
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MATHB(p, MATH1, XOR, MATH1, MATH0, 8, 0);
+               MOVE(p, MATH0, 0, OFIFO, 0, 4, IMMED);
+       }
+
+       if (rta_sec_era < RTA_SEC_ERA_3) {
+               PATCH_MOVE(p, move_cmd_read_descbuf, local_offset);
+               PATCH_MOVE(p, move_cmd_write_descbuf, local_offset);
+       }
+
+       return 0;
+}
+
+static inline int
+insert_copy_frame_op(struct program *p,
+                    struct alginfo *cipherdata __maybe_unused,
+                    unsigned int dir __maybe_unused)
+{
+       LABEL(local_offset);
+       REFERENCE(move_cmd_read_descbuf);
+       REFERENCE(move_cmd_write_descbuf);
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MATHB(p, SEQINSZ, ADD, ZERO, VSEQINSZ,  4, 0);
+               MATHB(p, SEQINSZ, ADD, ZERO, VSEQOUTSZ,  4, 0);
+       } else {
+               MATHB(p, SEQINSZ, ADD, ONE, VSEQINSZ,  4, 0);
+               MATHB(p, VSEQINSZ, SUB, ONE, VSEQINSZ,  4, 0);
+               MATHB(p, SEQINSZ, ADD, ONE, VSEQOUTSZ,  4, 0);
+               MATHB(p, VSEQOUTSZ, SUB, ONE, VSEQOUTSZ,  4, 0);
+               MATHB(p, VSEQINSZ, SUB, ONE, MATH0,  4, 0);
+               MATHB(p, MATH0, ADD, ONE, MATH0,  4, 0);
+
+               /*
+                * Since MOVELEN is available only starting with
+                * SEC ERA 3, use poor man's MOVELEN: create a MOVE
+                * command dynamically by writing the length from M1 by
+                * OR-ing the command in the M1 register and MOVE the
+                * result into the descriptor buffer. Care must be taken
+                * wrt. the location of the command because of SEC
+                * pipelining. The actual MOVEs are written at the end
+                * of the descriptor due to calculations needed on the
+                * offset in the descriptor for the MOVE command.
+                */
+               move_cmd_read_descbuf = MOVE(p, DESCBUF, 0, MATH0, 0, 6,
+                                            IMMED);
+               move_cmd_write_descbuf = MOVE(p, MATH0, 0, DESCBUF, 0, 8,
+                                             WAITCOMP | IMMED);
+       }
+       MATHB(p, SEQINSZ, SUB, PDCP_NULL_MAX_FRAME_LEN, NONE,  4,
+             IFB | IMMED2);
+       JUMP(p, PDCP_MAX_FRAME_LEN_STATUS, HALT_STATUS, ALL_FALSE, MATH_N);
+
+       if (rta_sec_era > RTA_SEC_ERA_2)
+               MATHB(p, VSEQINSZ, ADD, ZERO, MATH0,  4, 0);
+
+       SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MOVE(p, AB1, 0, OFIFO, 0, MATH0, 0);
+       } else {
+               SET_LABEL(p, local_offset);
+
+               /* Shut off automatic Info FIFO entries */
+               LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+
+               /* Placeholder for MOVE command with length from M0 register */
+               MOVE(p, IFIFOAB1, 0, OFIFO, 0, 0, IMMED);
+
+               /* Enable automatic Info FIFO entries */
+               LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+       }
+
+       SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+       if (rta_sec_era < RTA_SEC_ERA_3) {
+               PATCH_MOVE(p, move_cmd_read_descbuf, local_offset);
+               PATCH_MOVE(p, move_cmd_write_descbuf, local_offset);
+       }
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_int_only_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata __maybe_unused,
+                              struct alginfo *authdata, unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       LABEL(local_offset);
+       REFERENCE(move_cmd_read_descbuf);
+       REFERENCE(move_cmd_write_descbuf);
+
+       switch (authdata->algtype) {
+       case PDCP_AUTH_TYPE_SNOW:
+               /* Insert Auth Key */
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               SEQLOAD(p, MATH0, 7, 1, 0);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+
+               if (rta_sec_era > RTA_SEC_ERA_2 ||
+                   (rta_sec_era == RTA_SEC_ERA_2 &&
+                                  era_2_sw_hfn_ovrd == 0)) {
+                       SEQINPTR(p, 0, 1, RTO);
+               } else {
+                       SEQINPTR(p, 0, 5, RTO);
+                       SEQFIFOLOAD(p, SKIP, 4, 0);
+               }
+
+               if (swap == false) {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1,  8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1,  8, 0);
+
+                       MOVEB(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+
+                       MATHB(p, MATH2, AND, PDCP_BEARER_MASK, MATH2, 8,
+                             IMMED2);
+                       MOVEB(p, DESCBUF, 0x0C, MATH3, 0, 4, WAITCOMP | IMMED);
+                       MATHB(p, MATH3, AND, PDCP_DIR_MASK, MATH3, 8, IMMED2);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVEB(p, MATH2, 0, CONTEXT2, 0, 0x0C, WAITCOMP | IMMED);
+               } else {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1,  8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1,  8, 0);
+
+                       MOVE(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+                       MATHB(p, MATH2, AND, PDCP_BEARER_MASK_BE, MATH2, 8,
+                             IMMED2);
+
+                       MOVE(p, DESCBUF, 0x0C, MATH3, 0, 4, WAITCOMP | IMMED);
+                       MATHB(p, MATH3, AND, PDCP_DIR_MASK_BE, MATH3, 8,
+                             IMMED2);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVE(p, MATH2, 0, CONTEXT2, 0, 0x0C, WAITCOMP | IMMED);
+               }
+
+               if (dir == OP_TYPE_DECAP_PROTOCOL) {
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, MATH1, 4,
+                             IMMED2);
+               } else {
+                       if (rta_sec_era > RTA_SEC_ERA_2) {
+                               MATHB(p, SEQINSZ, SUB, ZERO, MATH1, 4,
+                                     0);
+                       } else {
+                               MATHB(p, SEQINSZ, ADD, ONE, MATH1, 4,
+                                     0);
+                               MATHB(p, MATH1, SUB, ONE, MATH1, 4,
+                                     0);
+                       }
+               }
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MATHB(p, MATH1, SUB, ZERO, VSEQINSZ, 4, 0);
+                       MATHB(p, MATH1, SUB, ZERO, VSEQOUTSZ, 4, 0);
+               } else {
+                       MATHB(p, ZERO, ADD, MATH1, VSEQINSZ, 4, 0);
+                       MATHB(p, ZERO, ADD, MATH1, VSEQOUTSZ, 4, 0);
+
+                       /*
+                        * Since MOVELEN is available only starting with
+                        * SEC ERA 3, use poor man's MOVELEN: create a MOVE
+                        * command dynamically by writing the length from M1 by
+                        * OR-ing the command in the M1 register and MOVE the
+                        * result into the descriptor buffer. Care must be taken
+                        * wrt. the location of the command because of SEC
+                        * pipelining. The actual MOVEs are written at the end
+                        * of the descriptor due to calculations needed on the
+                        * offset in the descriptor for the MOVE command.
+                        */
+                       move_cmd_read_descbuf = MOVE(p, DESCBUF, 0, MATH1, 0, 6,
+                                                    IMMED);
+                       move_cmd_write_descbuf = MOVE(p, MATH1, 0, DESCBUF, 0,
+                                                     8, WAITCOMP | IMMED);
+               }
+
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F9, OP_ALG_AAI_F9,
+                             OP_ALG_AS_INITFINAL,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                    ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                             DIR_ENC);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       SEQFIFOLOAD(p, MSGINSNOOP, 0,
+                                   VLF | LAST1 | LAST2 | FLUSH1);
+                       MOVE(p, AB1, 0, OFIFO, 0, MATH1, 0);
+               } else {
+                       SEQFIFOLOAD(p, MSGINSNOOP, 0,
+                                   VLF | LAST1 | LAST2 | FLUSH1);
+                       SET_LABEL(p, local_offset);
+
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+                       /*
+                        * Placeholder for MOVE command with length from M1
+                        * register
+                        */
+                       MOVE(p, IFIFOAB1, 0, OFIFO, 0, 0, IMMED);
+                       /* Enable automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+               }
+
+               if (dir == OP_TYPE_DECAP_PROTOCOL)
+                       SEQFIFOLOAD(p, ICV2, 4, LAST2);
+               else
+                       SEQSTORE(p, CONTEXT2, 0, 4, 0);
+
+               break;
+
+       case PDCP_AUTH_TYPE_AES:
+               /* Insert Auth Key */
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               SEQLOAD(p, MATH0, 7, 1, 0);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+               if (rta_sec_era > RTA_SEC_ERA_2 ||
+                   (rta_sec_era == RTA_SEC_ERA_2 &&
+                    era_2_sw_hfn_ovrd == 0)) {
+                       SEQINPTR(p, 0, 1, RTO);
+               } else {
+                       SEQINPTR(p, 0, 5, RTO);
+                       SEQFIFOLOAD(p, SKIP, 4, 0);
+               }
+
+               if (swap == false) {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+
+                       MOVEB(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVEB(p, MATH2, 0, IFIFOAB1, 0, 8, IMMED);
+               } else {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1, 8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+
+                       MOVE(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVE(p, MATH2, 0, IFIFOAB1, 0, 8, IMMED);
+               }
+
+               if (dir == OP_TYPE_DECAP_PROTOCOL) {
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, MATH1, 4,
+                             IMMED2);
+               } else {
+                       if (rta_sec_era > RTA_SEC_ERA_2) {
+                               MATHB(p, SEQINSZ, SUB, ZERO, MATH1, 4,
+                                     0);
+                       } else {
+                               MATHB(p, SEQINSZ, ADD, ONE, MATH1, 4,
+                                     0);
+                               MATHB(p, MATH1, SUB, ONE, MATH1, 4,
+                                     0);
+                       }
+               }
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MATHB(p, MATH1, SUB, ZERO, VSEQINSZ, 4, 0);
+                       MATHB(p, MATH1, SUB, ZERO, VSEQOUTSZ, 4, 0);
+               } else {
+                       MATHB(p, ZERO, ADD, MATH1, VSEQINSZ, 4, 0);
+                       MATHB(p, ZERO, ADD, MATH1, VSEQOUTSZ, 4, 0);
+
+                       /*
+                        * Since MOVELEN is available only starting with
+                        * SEC ERA 3, use poor man's MOVELEN: create a MOVE
+                        * command dynamically by writing the length from M1 by
+                        * OR-ing the command in the M1 register and MOVE the
+                        * result into the descriptor buffer. Care must be taken
+                        * wrt. the location of the command because of SEC
+                        * pipelining. The actual MOVEs are written at the end
+                        * of the descriptor due to calculations needed on the
+                        * offset in the descriptor for the MOVE command.
+                        */
+                       move_cmd_read_descbuf = MOVE(p, DESCBUF, 0, MATH1, 0, 6,
+                                                    IMMED);
+                       move_cmd_write_descbuf = MOVE(p, MATH1, 0, DESCBUF, 0,
+                                                     8, WAITCOMP | IMMED);
+               }
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                    ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                             DIR_ENC);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MOVE(p, AB2, 0, OFIFO, 0, MATH1, 0);
+                       SEQFIFOLOAD(p, MSGINSNOOP, 0,
+                                   VLF | LAST1 | LAST2 | FLUSH1);
+               } else {
+                       SEQFIFOLOAD(p, MSGINSNOOP, 0,
+                                   VLF | LAST1 | LAST2 | FLUSH1);
+                       SET_LABEL(p, local_offset);
+
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+
+                       /*
+                        * Placeholder for MOVE command with length from
+                        * M1 register
+                        */
+                       MOVE(p, IFIFOAB2, 0, OFIFO, 0, 0, IMMED);
+
+                       /* Enable automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+               }
+
+               if (dir == OP_TYPE_DECAP_PROTOCOL)
+                       SEQFIFOLOAD(p, ICV1, 4, LAST1 | FLUSH1);
+               else
+                       SEQSTORE(p, CONTEXT1, 0, 4, 0);
+
+               break;
+
+       case PDCP_AUTH_TYPE_ZUC:
+               if (rta_sec_era < RTA_SEC_ERA_5) {
+                       pr_err("Invalid era for selected algorithm\n");
+                       return -ENOTSUP;
+               }
+               /* Insert Auth Key */
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               SEQLOAD(p, MATH0, 7, 1, 0);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+               SEQINPTR(p, 0, 1, RTO);
+               if (swap == false) {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+
+                       MOVEB(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVEB(p, MATH2, 0, CONTEXT2, 0, 8, IMMED);
+
+               } else {
+                       MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1, 8,
+                             IFB | IMMED2);
+                       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+
+                       MOVE(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+                       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+                       MOVE(p, MATH2, 0, CONTEXT2, 0, 8, IMMED);
+               }
+               if (dir == OP_TYPE_DECAP_PROTOCOL)
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, MATH1, 4,
+                             IMMED2);
+               else
+                       MATHB(p, SEQINSZ, SUB, ZERO, MATH1, 4, 0);
+
+               MATHB(p, MATH1, SUB, ZERO, VSEQINSZ, 4, 0);
+               MATHB(p, MATH1, SUB, ZERO, VSEQOUTSZ, 4, 0);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCA,
+                             OP_ALG_AAI_F9,
+                             OP_ALG_AS_INITFINAL,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                    ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                             DIR_ENC);
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST1 | LAST2 | FLUSH1);
+               MOVE(p, AB1, 0, OFIFO, 0, MATH1, 0);
+
+               if (dir == OP_TYPE_DECAP_PROTOCOL)
+                       SEQFIFOLOAD(p, ICV2, 4, LAST2);
+               else
+                       SEQSTORE(p, CONTEXT2, 0, 4, 0);
+
+               break;
+
+       default:
+               pr_err("%s: Invalid integrity algorithm selected: %d\n",
+                      "pdcp_insert_cplane_int_only_op", authdata->algtype);
+               return -EINVAL;
+       }
+
+       if (rta_sec_era < RTA_SEC_ERA_3) {
+               PATCH_MOVE(p, move_cmd_read_descbuf, local_offset);
+               PATCH_MOVE(p, move_cmd_write_descbuf, local_offset);
+       }
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_enc_only_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata __maybe_unused,
+                              unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       /* Insert Cipher Key */
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                               (uint16_t)cipherdata->algtype << 8);
+               return 0;
+       }
+
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1, 8,
+                       IFB | IMMED2);
+       SEQSTORE(p, MATH0, 7, 1, 0);
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+
+       switch (cipherdata->algtype) {
+       case PDCP_CIPHER_TYPE_SNOW:
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, WAITCOMP | IMMED);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               } else {
+                       MATHB(p, SEQINSZ, SUB, ONE, MATH1, 4, 0);
+                       MATHB(p, MATH1, ADD, ONE, VSEQINSZ, 4, 0);
+               }
+
+               if (dir == OP_TYPE_ENCAP_PROTOCOL)
+                       MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+               else
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F8,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL, ICV_CHECK_DISABLE,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                       DIR_ENC : DIR_DEC);
+               break;
+
+       case PDCP_CIPHER_TYPE_AES:
+               MOVE(p, MATH2, 0, CONTEXT1, 0x10, 0x10, WAITCOMP | IMMED);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               } else {
+                       MATHB(p, SEQINSZ, SUB, ONE, MATH1, 4, 0);
+                       MATHB(p, MATH1, ADD, ONE, VSEQINSZ, 4, 0);
+               }
+
+               if (dir == OP_TYPE_ENCAP_PROTOCOL)
+                       MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+               else
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CTR,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                       DIR_ENC : DIR_DEC);
+               break;
+
+       case PDCP_CIPHER_TYPE_ZUC:
+               if (rta_sec_era < RTA_SEC_ERA_5) {
+                       pr_err("Invalid era for selected algorithm\n");
+                       return -ENOTSUP;
+               }
+
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 0x08, IMMED);
+               MOVE(p, MATH2, 0, CONTEXT1, 0x08, 0x08, WAITCOMP | IMMED);
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               if (dir == OP_TYPE_ENCAP_PROTOCOL)
+                       MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+               else
+                       MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4,
+                             IMMED2);
+
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCE,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             dir == OP_TYPE_ENCAP_PROTOCOL ?
+                                       DIR_ENC : DIR_DEC);
+               break;
+
+       default:
+               pr_err("%s: Invalid encrypt algorithm selected: %d\n",
+                      "pdcp_insert_cplane_enc_only_op", cipherdata->algtype);
+               return -EINVAL;
+       }
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               SEQFIFOLOAD(p, MSG1, 0, VLF);
+               FIFOLOAD(p, MSG1, PDCP_NULL_INT_MAC_I_VAL, 4,
+                        LAST1 | FLUSH1 | IMMED);
+       } else {
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+               MOVE(p, OFIFO, 0, MATH1, 4, PDCP_MAC_I_LEN, WAITCOMP | IMMED);
+               MATHB(p, MATH1, XOR, PDCP_NULL_INT_MAC_I_VAL, NONE, 4, IMMED2);
+               JUMP(p, PDCP_NULL_INT_ICV_CHECK_FAILED_STATUS,
+                    HALT_STATUS, ALL_FALSE, MATH_Z);
+       }
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_acc_op(struct program *p,
+                         bool swap __maybe_unused,
+                         struct alginfo *cipherdata,
+                         struct alginfo *authdata,
+                         unsigned int dir,
+                         unsigned char era_2_hfn_ovrd __maybe_unused)
+{
+       /* Insert Auth Key */
+       KEY(p, KEY2, authdata->key_enc_flags, authdata->key, authdata->keylen,
+           INLINE_KEY(authdata));
+
+       /* Insert Cipher Key */
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+       PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL, (uint16_t)cipherdata->algtype);
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_snow_aes_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       LABEL(back_to_sd_offset);
+       LABEL(end_desc);
+       LABEL(local_offset);
+       LABEL(jump_to_beginning);
+       LABEL(fifo_load_mac_i_offset);
+       REFERENCE(seqin_ptr_read);
+       REFERENCE(seqin_ptr_write);
+       REFERENCE(seq_out_read);
+       REFERENCE(jump_back_to_sd_cmd);
+       REFERENCE(move_mac_i_to_desc_buf);
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                               cipherdata->keylen, INLINE_KEY(cipherdata));
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                               authdata->keylen, INLINE_KEY(authdata));
+
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+
+               return 0;
+       }
+
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1, 8,
+                       IFB | IMMED2);
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 0x08, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+       SEQSTORE(p, MATH0, 7, 1, 0);
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               if (rta_sec_era > RTA_SEC_ERA_2 ||
+                   (rta_sec_era == RTA_SEC_ERA_2 &&
+                                  era_2_sw_hfn_ovrd == 0)) {
+                       SEQINPTR(p, 0, 1, RTO);
+               } else {
+                       SEQINPTR(p, 0, 5, RTO);
+                       SEQFIFOLOAD(p, SKIP, 4, 0);
+               }
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               MOVE(p, MATH2, 0, IFIFOAB1, 0, 0x08, IMMED);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+                       MATHB(p, SEQINSZ, SUB, ZERO, MATH1, 4, 0);
+                       MATHB(p, VSEQINSZ, ADD, PDCP_MAC_I_LEN - 1, VSEQOUTSZ,
+                             4, IMMED2);
+               } else {
+                       MATHB(p, SEQINSZ, SUB, MATH3, VSEQINSZ, 4, 0);
+                       MATHB(p, VSEQINSZ, ADD, PDCP_MAC_I_LEN - 1, VSEQOUTSZ,
+                             4, IMMED2);
+                       /*
+                        * Note: Although the calculations below might seem a
+                        * little off, the logic is the following:
+                        *
+                        * - SEQ IN PTR RTO below needs the full length of the
+                        *   frame; in case of P4080_REV_2_HFN_OV_WORKAROUND,
+                        *   this means the length of the frame to be processed
+                        *   + 4 bytes (the HFN override flag and value).
+                        *   The length of the frame to be processed minus 1
+                        *   byte is in the VSIL register (because
+                        *   VSIL = SIL + 3, due to 1 byte, the header being
+                        *   already written by the SEQ STORE above). So for
+                        *   calculating the length to use in RTO, I add one
+                        *   to the VSIL value in order to obtain the total
+                        *   frame length. This helps in case of P4080 which
+                        *   can have the value 0 as an operand in a MATH
+                        *   command only as SRC1 When the HFN override
+                        *   workaround is not enabled, the length of the
+                        *   frame is given by the SIL register; the
+                        *   calculation is similar to the one in the SEC 4.2
+                        *   and SEC 5.3 cases.
+                        */
+                       if (era_2_sw_hfn_ovrd)
+                               MATHB(p, VSEQOUTSZ, ADD, ONE, MATH1, 4,
+                                     0);
+                       else
+                               MATHB(p, SEQINSZ, ADD, MATH3, MATH1, 4,
+                                     0);
+               }
+               /*
+                * Placeholder for filling the length in
+                * SEQIN PTR RTO below
+                */
+               seqin_ptr_read = MOVE(p, DESCBUF, 0, MATH1, 0, 6, IMMED);
+               seqin_ptr_write = MOVE(p, MATH1, 0, DESCBUF, 0, 8,
+                                      WAITCOMP | IMMED);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_DEC);
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+               MOVE(p, CONTEXT1, 0, MATH3, 0, 4, WAITCOMP | IMMED);
+               if (rta_sec_era <= RTA_SEC_ERA_3)
+                       LOAD(p, CLRW_CLR_C1KEY |
+                            CLRW_CLR_C1CTX |
+                            CLRW_CLR_C1ICV |
+                            CLRW_CLR_C1DATAS |
+                            CLRW_CLR_C1MODE,
+                            CLRW, 0, 4, IMMED);
+               else
+                       LOAD(p, CLRW_RESET_CLS1_CHA |
+                            CLRW_CLR_C1KEY |
+                            CLRW_CLR_C1CTX |
+                            CLRW_CLR_C1ICV |
+                            CLRW_CLR_C1DATAS |
+                            CLRW_CLR_C1MODE,
+                            CLRW, 0, 4, IMMED);
+
+               if (rta_sec_era <= RTA_SEC_ERA_3)
+                       LOAD(p, CCTRL_RESET_CHA_ALL, CCTRL, 0, 4, IMMED);
+
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                   cipherdata->keylen, INLINE_KEY(cipherdata));
+               SET_LABEL(p, local_offset);
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, IMMED);
+               SEQINPTR(p, 0, 0, RTO);
+
+               if (rta_sec_era == RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd) {
+                       SEQFIFOLOAD(p, SKIP, 5, 0);
+                       MATHB(p, SEQINSZ, ADD, ONE, SEQINSZ, 4, 0);
+               }
+
+               MATHB(p, SEQINSZ, SUB, ONE, VSEQINSZ, 4, 0);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F8,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_ENC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+               if (rta_sec_era > RTA_SEC_ERA_2 ||
+                   (rta_sec_era == RTA_SEC_ERA_2 &&
+                                  era_2_sw_hfn_ovrd == 0))
+                       SEQFIFOLOAD(p, SKIP, 1, 0);
+
+               SEQFIFOLOAD(p, MSG1, 0, VLF);
+               MOVE(p, MATH3, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+               PATCH_MOVE(p, seqin_ptr_read, local_offset);
+               PATCH_MOVE(p, seqin_ptr_write, local_offset);
+       } else {
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, IMMED);
+
+               if (rta_sec_era >= RTA_SEC_ERA_5)
+                       MOVE(p, CONTEXT1, 0, CONTEXT2, 0, 8, IMMED);
+
+               if (rta_sec_era > RTA_SEC_ERA_2)
+                       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               else
+                       MATHB(p, SEQINSZ, SUB, MATH3, VSEQINSZ, 4, 0);
+
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+/*
+ * TODO: To be changed when proper support is added in RTA (can't load a
+ * command that is also written by RTA (or patch it for that matter).
+ * Change when proper RTA support is added.
+ */
+               if (p->ps)
+                       WORD(p, 0x168B0004);
+               else
+                       WORD(p, 0x16880404);
+
+               jump_back_to_sd_cmd = JUMP(p, 0, LOCAL_JUMP, ALL_TRUE, 0);
+               /*
+                * Placeholder for command reading  the SEQ OUT command in
+                * JD. Done for rereading the decrypted data and performing
+                * the integrity check
+                */
+/*
+ * TODO: RTA currently doesn't support patching of length of a MOVE command
+ * Thus, it is inserted as a raw word, as per PS setting.
+ */
+               if (p->ps)
+                       seq_out_read = MOVE(p, DESCBUF, 0, MATH1, 0, 20,
+                                           WAITCOMP | IMMED);
+               else
+                       seq_out_read = MOVE(p, DESCBUF, 0, MATH1, 0, 16,
+                                           WAITCOMP | IMMED);
+
+               MATHB(p, MATH1, XOR, CMD_SEQ_IN_PTR ^ CMD_SEQ_OUT_PTR, MATH1, 4,
+                     IMMED2);
+               /* Placeholder for overwriting the SEQ IN  with SEQ OUT */
+/*
+ * TODO: RTA currently doesn't support patching of length of a MOVE command
+ * Thus, it is inserted as a raw word, as per PS setting.
+ */
+               if (p->ps)
+                       MOVE(p, MATH1, 0, DESCBUF, 0, 24, IMMED);
+               else
+                       MOVE(p, MATH1, 0, DESCBUF, 0, 20, IMMED);
+
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                   cipherdata->keylen, INLINE_KEY(cipherdata));
+
+               if (rta_sec_era >= RTA_SEC_ERA_4)
+                       MOVE(p, CONTEXT1, 0, CONTEXT2, 0, 8, IMMED);
+               else
+                       MOVE(p, CONTEXT1, 0, MATH3, 0, 8, IMMED);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F8,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_DEC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+               if (rta_sec_era <= RTA_SEC_ERA_3)
+                       move_mac_i_to_desc_buf = MOVE(p, OFIFO, 0, DESCBUF, 0,
+                                                     4, WAITCOMP | IMMED);
+               else
+                       MOVE(p, OFIFO, 0, MATH3, 0, 4, IMMED);
+
+               if (rta_sec_era <= RTA_SEC_ERA_3)
+                       LOAD(p, CCTRL_RESET_CHA_ALL, CCTRL, 0, 4, IMMED);
+               else
+                       LOAD(p, CLRW_RESET_CLS1_CHA |
+                            CLRW_CLR_C1KEY |
+                            CLRW_CLR_C1CTX |
+                            CLRW_CLR_C1ICV |
+                            CLRW_CLR_C1DATAS |
+                            CLRW_CLR_C1MODE,
+                            CLRW, 0, 4, IMMED);
+
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               /*
+                * Placeholder for jump in SD for executing the new SEQ IN PTR
+                * command (which is actually the old SEQ OUT PTR command
+                * copied over from JD.
+                */
+               SET_LABEL(p, jump_to_beginning);
+               JUMP(p, 1 - jump_to_beginning, LOCAL_JUMP, ALL_TRUE, 0);
+               SET_LABEL(p, back_to_sd_offset);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_ENABLE,
+                             DIR_DEC);
+
+               /* Read the # of bytes written in the output buffer + 1 (HDR) */
+               MATHB(p, VSEQOUTSZ, ADD, ONE, VSEQINSZ, 4, 0);
+
+               if (rta_sec_era <= RTA_SEC_ERA_3)
+                       MOVE(p, MATH3, 0, IFIFOAB1, 0, 8, IMMED);
+               else
+                       MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 8, IMMED);
+
+               if (rta_sec_era == RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd)
+                       SEQFIFOLOAD(p, SKIP, 4, 0);
+
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+               if (rta_sec_era >= RTA_SEC_ERA_4) {
+                       LOAD(p, NFIFOENTRY_STYPE_ALTSOURCE |
+                            NFIFOENTRY_DEST_CLASS1 |
+                            NFIFOENTRY_DTYPE_ICV |
+                            NFIFOENTRY_LC1 |
+                            NFIFOENTRY_FC1 | 4, NFIFO_SZL, 0, 4, IMMED);
+                       MOVE(p, MATH3, 0, ALTSOURCE, 0, 4, IMMED);
+               } else {
+                       SET_LABEL(p, fifo_load_mac_i_offset);
+                       FIFOLOAD(p, ICV1, fifo_load_mac_i_offset, 4,
+                                LAST1 | FLUSH1 | IMMED);
+               }
+
+               SET_LABEL(p, end_desc);
+
+               if (!p->ps) {
+                       PATCH_MOVE(p, seq_out_read, end_desc + 1);
+                       PATCH_JUMP(p, jump_back_to_sd_cmd,
+                                  back_to_sd_offset + jump_back_to_sd_cmd - 5);
+
+                       if (rta_sec_era <= RTA_SEC_ERA_3)
+                               PATCH_MOVE(p, move_mac_i_to_desc_buf,
+                                          fifo_load_mac_i_offset + 1);
+               } else {
+                       PATCH_MOVE(p, seq_out_read, end_desc + 2);
+                       PATCH_JUMP(p, jump_back_to_sd_cmd,
+                                  back_to_sd_offset + jump_back_to_sd_cmd - 5);
+
+                       if (rta_sec_era <= RTA_SEC_ERA_3)
+                               PATCH_MOVE(p, move_mac_i_to_desc_buf,
+                                          fifo_load_mac_i_offset + 1);
+               }
+       }
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_aes_snow_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+       KEY(p, KEY2, authdata->key_enc_flags, authdata->key, authdata->keylen,
+           INLINE_KEY(authdata));
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+
+               return 0;
+       }
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL)
+               MATHB(p, SEQINSZ, SUB, ONE, VSEQINSZ, 4, 0);
+
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       MOVE(p, MATH0, 7, IFIFOAB2, 0, 1, IMMED);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK_BE, MATH1, 8,
+                       IFB | IMMED2);
+
+       SEQSTORE(p, MATH0, 7, 1, 0);
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH1, 8, 0);
+       MOVE(p, MATH1, 0, CONTEXT1, 16, 8, IMMED);
+       MOVE(p, MATH1, 0, CONTEXT2, 0, 4, IMMED);
+       if (swap == false) {
+               MATHB(p, MATH1, AND, lower_32_bits(PDCP_BEARER_MASK), MATH2, 4,
+                       IMMED2);
+               MATHB(p, MATH1, AND, upper_32_bits(PDCP_DIR_MASK), MATH3, 4,
+                       IMMED2);
+       } else {
+               MATHB(p, MATH1, AND, lower_32_bits(PDCP_BEARER_MASK_BE), MATH2,
+                       4, IMMED2);
+               MATHB(p, MATH1, AND, upper_32_bits(PDCP_DIR_MASK_BE), MATH3,
+                       4, IMMED2);
+       }
+       MATHB(p, MATH3, SHLD, MATH3, MATH3, 8, 0);
+       MOVE(p, MATH2, 4, OFIFO, 0, 12, IMMED);
+       MOVE(p, OFIFO, 0, CONTEXT2, 4, 12, IMMED);
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+       } else {
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, MATH1, 4, IMMED2);
+
+               MATHB(p, ZERO, ADD, MATH1, VSEQOUTSZ, 4, 0);
+               MATHB(p, ZERO, ADD, MATH1, VSEQINSZ, 4, 0);
+       }
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL)
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+       else
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F9,
+                     OP_ALG_AAI_F9,
+                     OP_ALG_AS_INITFINAL,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ?
+                            ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                     DIR_DEC);
+       ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                     OP_ALG_AAI_CTR,
+                     OP_ALG_AS_INITFINAL,
+                     ICV_CHECK_DISABLE,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ? DIR_ENC : DIR_DEC);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST2);
+               MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+       } else {
+               SEQFIFOLOAD(p, MSGOUTSNOOP, 0, VLF | LAST2);
+               SEQFIFOLOAD(p, MSG1, 4, LAST1 | FLUSH1);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CLASS1 | NOP | NIFP);
+
+               if (rta_sec_era >= RTA_SEC_ERA_6)
+                       LOAD(p, 0, DCTRL, 0, LDLEN_RST_CHA_OFIFO_PTR, IMMED);
+
+               MOVE(p, OFIFO, 0, MATH0, 0, 4, WAITCOMP | IMMED);
+
+               NFIFOADD(p, IFIFO, ICV2, 4, LAST2);
+
+               if (rta_sec_era <= RTA_SEC_ERA_2) {
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+                       MOVE(p, MATH0, 0, IFIFOAB2, 0, 4, WAITCOMP | IMMED);
+               } else {
+                       MOVE(p, MATH0, 0, IFIFO, 0, 4, WAITCOMP | IMMED);
+               }
+       }
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_snow_zuc_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       LABEL(keyjump);
+       REFERENCE(pkeyjump);
+
+       if (rta_sec_era < RTA_SEC_ERA_5) {
+               pr_err("Invalid era for selected algorithm\n");
+               return -ENOTSUP;
+       }
+
+       pkeyjump = JUMP(p, keyjump, LOCAL_JUMP, ALL_TRUE, SHRD | SELF | BOTH);
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+       KEY(p, KEY2, authdata->key_enc_flags, authdata->key, authdata->keylen,
+           INLINE_KEY(authdata));
+
+       SET_LABEL(p, keyjump);
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+               return 0;
+       }
+
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       MOVE(p, MATH0, 7, IFIFOAB2, 0, 1, IMMED);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+       MOVE(p, MATH2, 0, CONTEXT1, 0, 8, IMMED);
+       MOVE(p, MATH2, 0, CONTEXT2, 0, 8, WAITCOMP | IMMED);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL)
+               MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+       else
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+
+       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+       SEQSTORE(p, MATH0, 7, 1, 0);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST2);
+       } else {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               SEQFIFOLOAD(p, MSGOUTSNOOP, 0, VLF | LAST1 | FLUSH1);
+       }
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCA,
+                     OP_ALG_AAI_F9,
+                     OP_ALG_AS_INITFINAL,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ?
+                            ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                     DIR_ENC);
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F8,
+                     OP_ALG_AAI_F8,
+                     OP_ALG_AS_INITFINAL,
+                     ICV_CHECK_DISABLE,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ? DIR_ENC : DIR_DEC);
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+       } else {
+               /* Save ICV */
+               MOVE(p, OFIFO, 0, MATH0, 0, 4, IMMED);
+               LOAD(p, NFIFOENTRY_STYPE_ALTSOURCE |
+                    NFIFOENTRY_DEST_CLASS2 |
+                    NFIFOENTRY_DTYPE_ICV |
+                    NFIFOENTRY_LC2 | 4, NFIFO_SZL, 0, 4, IMMED);
+               MOVE(p, MATH0, 0, ALTSOURCE, 0, 4, WAITCOMP | IMMED);
+       }
+
+       /* Reset ZUCA mode and done interrupt */
+       LOAD(p, CLRW_CLR_C2MODE, CLRW, 0, 4, IMMED);
+       LOAD(p, CIRQ_ZADI, ICTRL, 0, 4, IMMED);
+
+       PATCH_JUMP(p, pkeyjump, keyjump);
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_aes_zuc_op(struct program *p,
+                             bool swap __maybe_unused,
+                             struct alginfo *cipherdata,
+                             struct alginfo *authdata,
+                             unsigned int dir,
+                             unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       LABEL(keyjump);
+       REFERENCE(pkeyjump);
+
+       if (rta_sec_era < RTA_SEC_ERA_5) {
+               pr_err("Invalid era for selected algorithm\n");
+               return -ENOTSUP;
+       }
+
+       pkeyjump = JUMP(p, keyjump, LOCAL_JUMP, ALL_TRUE, SHRD | SELF | BOTH);
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+       KEY(p, KEY2, authdata->key_enc_flags, authdata->key, authdata->keylen,
+           INLINE_KEY(authdata));
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+
+               return 0;
+       }
+
+       SET_LABEL(p, keyjump);
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       MOVE(p, MATH0, 7, IFIFOAB2, 0, 1, IMMED);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+       MOVE(p, MATH2, 0, CONTEXT1, 16, 8, IMMED);
+       MOVE(p, MATH2, 0, CONTEXT2, 0, 8, WAITCOMP | IMMED);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL)
+               MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+       else
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+
+       MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+       SEQSTORE(p, MATH0, 7, 1, 0);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST2);
+       } else {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               SEQFIFOLOAD(p, MSGOUTSNOOP, 0, VLF | LAST1 | FLUSH1);
+       }
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCA,
+                     OP_ALG_AAI_F9,
+                     OP_ALG_AS_INITFINAL,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ?
+                            ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                     DIR_ENC);
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                     OP_ALG_AAI_CTR,
+                     OP_ALG_AS_INITFINAL,
+                     ICV_CHECK_DISABLE,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ? DIR_ENC : DIR_DEC);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+       } else {
+               /* Save ICV */
+               MOVE(p, OFIFO, 0, MATH0, 0, 4, IMMED);
+
+               LOAD(p, NFIFOENTRY_STYPE_ALTSOURCE |
+                    NFIFOENTRY_DEST_CLASS2 |
+                    NFIFOENTRY_DTYPE_ICV |
+                    NFIFOENTRY_LC2 | 4, NFIFO_SZL, 0, 4, IMMED);
+               MOVE(p, MATH0, 0, ALTSOURCE, 0, 4, WAITCOMP | IMMED);
+       }
+
+       /* Reset ZUCA mode and done interrupt */
+       LOAD(p, CLRW_CLR_C2MODE, CLRW, 0, 4, IMMED);
+       LOAD(p, CIRQ_ZADI, ICTRL, 0, 4, IMMED);
+
+       PATCH_JUMP(p, pkeyjump, keyjump);
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_zuc_snow_op(struct program *p,
+                              bool swap __maybe_unused,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned int dir,
+                              unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       LABEL(keyjump);
+       REFERENCE(pkeyjump);
+
+       if (rta_sec_era < RTA_SEC_ERA_5) {
+               pr_err("Invalid era for selected algorithm\n");
+               return -ENOTSUP;
+       }
+
+       pkeyjump = JUMP(p, keyjump, LOCAL_JUMP, ALL_TRUE, SHRD | SELF | BOTH);
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+       KEY(p, KEY2, authdata->key_enc_flags, authdata->key, authdata->keylen,
+           INLINE_KEY(authdata));
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+
+               return 0;
+       }
+
+       SET_LABEL(p, keyjump);
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       MOVE(p, MATH0, 7, IFIFOAB2, 0, 1, IMMED);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH1, 8, 0);
+       MOVE(p, MATH1, 0, CONTEXT1, 0, 8, IMMED);
+       MOVE(p, MATH1, 0, CONTEXT2, 0, 4, IMMED);
+       if (swap == false) {
+               MATHB(p, MATH1, AND, lower_32_bits(PDCP_BEARER_MASK), MATH2,
+                       4, IMMED2);
+               MATHB(p, MATH1, AND, upper_32_bits(PDCP_DIR_MASK), MATH3,
+                       4, IMMED2);
+       } else {
+               MATHB(p, MATH1, AND, lower_32_bits(PDCP_BEARER_MASK_BE), MATH2,
+                       4, IMMED2);
+               MATHB(p, MATH1, AND, upper_32_bits(PDCP_DIR_MASK_BE), MATH3,
+                       4, IMMED2);
+       }
+       MATHB(p, MATH3, SHLD, MATH3, MATH3, 8, 0);
+       MOVE(p, MATH2, 4, OFIFO, 0, 12, IMMED);
+       MOVE(p, OFIFO, 0, CONTEXT2, 4, 12, IMMED);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MATHB(p, SEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+       } else {
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+               MATHB(p, VSEQOUTSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+       }
+
+       SEQSTORE(p, MATH0, 7, 1, 0);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST2);
+       } else {
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               SEQFIFOLOAD(p, MSGOUTSNOOP, 0, VLF | LAST2);
+       }
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F9,
+                     OP_ALG_AAI_F9,
+                     OP_ALG_AS_INITFINAL,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ?
+                            ICV_CHECK_DISABLE : ICV_CHECK_ENABLE,
+                     DIR_DEC);
+
+       ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCE,
+                     OP_ALG_AAI_F8,
+                     OP_ALG_AS_INITFINAL,
+                     ICV_CHECK_DISABLE,
+                     dir == OP_TYPE_ENCAP_PROTOCOL ? DIR_ENC : DIR_DEC);
+
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+       } else {
+               SEQFIFOLOAD(p, MSG1, 4, LAST1 | FLUSH1);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CLASS1 | NOP | NIFP);
+
+               if (rta_sec_era >= RTA_SEC_ERA_6)
+                       /*
+                        * For SEC ERA 6, there's a problem with the OFIFO
+                        * pointer, and thus it needs to be reset here before
+                        * moving to M0.
+                        */
+                       LOAD(p, 0, DCTRL, 0, LDLEN_RST_CHA_OFIFO_PTR, IMMED);
+
+               /* Put ICV to M0 before sending it to C2 for comparison. */
+               MOVE(p, OFIFO, 0, MATH0, 0, 4, WAITCOMP | IMMED);
+
+               LOAD(p, NFIFOENTRY_STYPE_ALTSOURCE |
+                    NFIFOENTRY_DEST_CLASS2 |
+                    NFIFOENTRY_DTYPE_ICV |
+                    NFIFOENTRY_LC2 | 4, NFIFO_SZL, 0, 4, IMMED);
+               MOVE(p, MATH0, 0, ALTSOURCE, 0, 4, IMMED);
+       }
+
+       PATCH_JUMP(p, pkeyjump, keyjump);
+       return 0;
+}
+
+static inline int
+pdcp_insert_cplane_zuc_aes_op(struct program *p,
+                             bool swap __maybe_unused,
+                             struct alginfo *cipherdata,
+                             struct alginfo *authdata,
+                             unsigned int dir,
+                             unsigned char era_2_sw_hfn_ovrd __maybe_unused)
+{
+       if (rta_sec_era < RTA_SEC_ERA_5) {
+               pr_err("Invalid era for selected algorithm\n");
+               return -ENOTSUP;
+       }
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                               cipherdata->keylen, INLINE_KEY(cipherdata));
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                               authdata->keylen, INLINE_KEY(authdata));
+
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_CTRL_MIXED,
+                        ((uint16_t)cipherdata->algtype << 8) |
+                        (uint16_t)authdata->algtype);
+               return 0;
+       }
+
+       SEQLOAD(p, MATH0, 7, 1, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_C_PLANE_SN_MASK, MATH1, 8,
+                       IFB | IMMED2);
+
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 4, MATH2, 0, 0x08, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+       SEQSTORE(p, MATH0, 7, 1, 0);
+       if (dir == OP_TYPE_ENCAP_PROTOCOL) {
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               MOVE(p, MATH2, 0, IFIFOAB1, 0, 0x08, IMMED);
+               MOVE(p, MATH0, 7, IFIFOAB1, 0, 1, IMMED);
+
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               MATHB(p, VSEQINSZ, ADD, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_DEC);
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+               MOVE(p, CONTEXT1, 0, MATH3, 0, 4, WAITCOMP | IMMED);
+               LOAD(p, CLRW_RESET_CLS1_CHA |
+                    CLRW_CLR_C1KEY |
+                    CLRW_CLR_C1CTX |
+                    CLRW_CLR_C1ICV |
+                    CLRW_CLR_C1DATAS |
+                    CLRW_CLR_C1MODE,
+                    CLRW, 0, 4, IMMED);
+
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                   cipherdata->keylen, INLINE_KEY(cipherdata));
+
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, IMMED);
+               SEQINPTR(p, 0, PDCP_NULL_MAX_FRAME_LEN, RTO);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCE,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_ENC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+               SEQFIFOLOAD(p, SKIP, 1, 0);
+
+               SEQFIFOLOAD(p, MSG1, 0, VLF);
+               MOVE(p, MATH3, 0, IFIFOAB1, 0, 4, LAST1 | FLUSH1 | IMMED);
+       } else {
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, IMMED);
+
+               MOVE(p, CONTEXT1, 0, CONTEXT2, 0, 8, IMMED);
+
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+
+               MATHB(p, SEQINSZ, SUB, PDCP_MAC_I_LEN, VSEQOUTSZ, 4, IMMED2);
+
+               KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+                   cipherdata->keylen, INLINE_KEY(cipherdata));
+
+               MOVE(p, CONTEXT1, 0, CONTEXT2, 0, 8, IMMED);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCE,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_DEC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF | CONT);
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+               MOVE(p, OFIFO, 0, MATH3, 0, 4, IMMED);
+
+               LOAD(p, CLRW_RESET_CLS1_CHA |
+                    CLRW_CLR_C1KEY |
+                    CLRW_CLR_C1CTX |
+                    CLRW_CLR_C1ICV |
+                    CLRW_CLR_C1DATAS |
+                    CLRW_CLR_C1MODE,
+                    CLRW, 0, 4, IMMED);
+
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+
+               SEQINPTR(p, 0, 0, SOP);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_ENABLE,
+                             DIR_DEC);
+
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+
+               MOVE(p, CONTEXT2, 0, IFIFOAB1, 0, 8, IMMED);
+
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+               LOAD(p, NFIFOENTRY_STYPE_ALTSOURCE |
+                    NFIFOENTRY_DEST_CLASS1 |
+                    NFIFOENTRY_DTYPE_ICV |
+                    NFIFOENTRY_LC1 |
+                    NFIFOENTRY_FC1 | 4, NFIFO_SZL, 0, 4, IMMED);
+               MOVE(p, MATH3, 0, ALTSOURCE, 0, 4, IMMED);
+       }
+
+       return 0;
+}
+
+static inline int
+pdcp_insert_uplane_15bit_op(struct program *p,
+                           bool swap __maybe_unused,
+                           struct alginfo *cipherdata,
+                           unsigned int dir)
+{
+       int op;
+       /* Insert Cipher Key */
+       KEY(p, KEY1, cipherdata->key_enc_flags, cipherdata->key,
+           cipherdata->keylen, INLINE_KEY(cipherdata));
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               PROTOCOL(p, dir, OP_PCLID_LTE_PDCP_USER,
+                        (uint16_t)cipherdata->algtype);
+               return 0;
+       }
+
+       SEQLOAD(p, MATH0, 6, 2, 0);
+       JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+       if (swap == false)
+               MATHB(p, MATH0, AND, PDCP_U_PLANE_15BIT_SN_MASK, MATH1, 8,
+                     IFB | IMMED2);
+       else
+               MATHB(p, MATH0, AND, PDCP_U_PLANE_15BIT_SN_MASK_BE, MATH1, 8,
+                     IFB | IMMED2);
+       SEQSTORE(p, MATH0, 6, 2, 0);
+       MATHB(p, MATH1, SHLD, MATH1, MATH1, 8, 0);
+       MOVE(p, DESCBUF, 8, MATH2, 0, 8, WAITCOMP | IMMED);
+       MATHB(p, MATH1, OR, MATH2, MATH2, 8, 0);
+
+       MATHB(p, SEQINSZ, SUB, MATH3, VSEQINSZ, 4, 0);
+       MATHB(p, SEQINSZ, SUB, MATH3, VSEQOUTSZ, 4, 0);
+
+       SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+       op = dir == OP_TYPE_ENCAP_PROTOCOL ? DIR_ENC : DIR_DEC;
+       switch (cipherdata->algtype) {
+       case PDCP_CIPHER_TYPE_SNOW:
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 8, WAITCOMP | IMMED);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F8,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             op);
+               break;
+
+       case PDCP_CIPHER_TYPE_AES:
+               MOVE(p, MATH2, 0, CONTEXT1, 0x10, 0x10, WAITCOMP | IMMED);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CTR,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             op);
+               break;
+
+       case PDCP_CIPHER_TYPE_ZUC:
+               if (rta_sec_era < RTA_SEC_ERA_5) {
+                       pr_err("Invalid era for selected algorithm\n");
+                       return -ENOTSUP;
+               }
+               MOVE(p, MATH2, 0, CONTEXT1, 0, 0x08, IMMED);
+               MOVE(p, MATH2, 0, CONTEXT1, 0x08, 0x08, WAITCOMP | IMMED);
+
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCE,
+                             OP_ALG_AAI_F8,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             op);
+               break;
+
+       default:
+               pr_err("%s: Invalid encrypt algorithm selected: %d\n",
+                      "pdcp_insert_uplane_15bit_op", cipherdata->algtype);
+               return -EINVAL;
+       }
+
+       SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | FLUSH1);
+
+       return 0;
+}
+
+/*
+ * Function for inserting the snippet of code responsible for creating
+ * the HFN override code via either DPOVRD or via the input frame.
+ */
+static inline int
+insert_hfn_ov_op(struct program *p,
+                uint32_t shift,
+                enum pdb_type_e pdb_type,
+                unsigned char era_2_sw_hfn_ovrd)
+{
+       uint32_t imm = PDCP_DPOVRD_HFN_OV_EN;
+       uint16_t hfn_pdb_offset;
+
+       if (rta_sec_era == RTA_SEC_ERA_2 && !era_2_sw_hfn_ovrd)
+               return 0;
+
+       switch (pdb_type) {
+       case PDCP_PDB_TYPE_NO_PDB:
+               /*
+                * If there is no PDB, then HFN override mechanism does not
+                * make any sense, thus in this case the function will
+                * return the pointer to the current position in the
+                * descriptor buffer
+                */
+               return 0;
+
+       case PDCP_PDB_TYPE_REDUCED_PDB:
+               hfn_pdb_offset = 4;
+               break;
+
+       case PDCP_PDB_TYPE_FULL_PDB:
+               hfn_pdb_offset = 8;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MATHB(p, DPOVRD, AND, imm, NONE, 8, IFB | IMMED2);
+       } else {
+               SEQLOAD(p, MATH0, 4, 4, 0);
+               JUMP(p, 1, LOCAL_JUMP, ALL_TRUE, CALM);
+               MATHB(p, MATH0, AND, imm, NONE, 8, IFB | IMMED2);
+               SEQSTORE(p, MATH0, 4, 4, 0);
+       }
+
+       if (rta_sec_era >= RTA_SEC_ERA_8)
+               JUMP(p, 6, LOCAL_JUMP, ALL_TRUE, MATH_Z);
+       else
+               JUMP(p, 5, LOCAL_JUMP, ALL_TRUE, MATH_Z);
+
+       if (rta_sec_era > RTA_SEC_ERA_2)
+               MATHB(p, DPOVRD, LSHIFT, shift, MATH0, 4, IMMED2);
+       else
+               MATHB(p, MATH0, LSHIFT, shift, MATH0, 4, IMMED2);
+
+       MATHB(p, MATH0, SHLD, MATH0, MATH0, 8, 0);
+       MOVE(p, MATH0, 0, DESCBUF, hfn_pdb_offset, 4, IMMED);
+
+       if (rta_sec_era >= RTA_SEC_ERA_8)
+               /*
+                * For ERA8, DPOVRD could be handled by the PROTOCOL command
+                * itself. For now, this is not done. Thus, clear DPOVRD here
+                * to alleviate any side-effects.
+                */
+               MATHB(p, DPOVRD, AND, ZERO, DPOVRD, 4, STL);
+
+       return 0;
+}
+
+/*
+ * PDCP Control PDB creation function
+ */
+static inline enum pdb_type_e
+cnstr_pdcp_c_plane_pdb(struct program *p,
+                      uint32_t hfn,
+                      unsigned char bearer,
+                      unsigned char direction,
+                      uint32_t hfn_threshold,
+                      struct alginfo *cipherdata,
+                      struct alginfo *authdata)
+{
+       struct pdcp_pdb pdb;
+       enum pdb_type_e
+               pdb_mask[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = {
+                       {       /* NULL */
+                               PDCP_PDB_TYPE_NO_PDB,           /* NULL */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* SNOW f9 */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* AES CMAC */
+                               PDCP_PDB_TYPE_FULL_PDB          /* ZUC-I */
+                       },
+                       {       /* SNOW f8 */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* NULL */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* SNOW f9 */
+                               PDCP_PDB_TYPE_REDUCED_PDB,      /* AES CMAC */
+                               PDCP_PDB_TYPE_REDUCED_PDB       /* ZUC-I */
+                       },
+                       {       /* AES CTR */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* NULL */
+                               PDCP_PDB_TYPE_REDUCED_PDB,      /* SNOW f9 */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* AES CMAC */
+                               PDCP_PDB_TYPE_REDUCED_PDB       /* ZUC-I */
+                       },
+                       {       /* ZUC-E */
+                               PDCP_PDB_TYPE_FULL_PDB,         /* NULL */
+                               PDCP_PDB_TYPE_REDUCED_PDB,      /* SNOW f9 */
+                               PDCP_PDB_TYPE_REDUCED_PDB,      /* AES CMAC */
+                               PDCP_PDB_TYPE_FULL_PDB          /* ZUC-I */
+                       },
+       };
+
+       if (rta_sec_era >= RTA_SEC_ERA_8) {
+               memset(&pdb, 0x00, sizeof(struct pdcp_pdb));
+
+               /* This is a HW issue. Bit 2 should be set to zero,
+                * but it does not work this way. Override here.
+                */
+               pdb.opt_res.rsvd = 0x00000002;
+
+               /* Copy relevant information from user to PDB */
+               pdb.hfn_res = hfn << PDCP_C_PLANE_PDB_HFN_SHIFT;
+               pdb.bearer_dir_res = (uint32_t)
+                               ((bearer << PDCP_C_PLANE_PDB_BEARER_SHIFT) |
+                                (direction << PDCP_C_PLANE_PDB_DIR_SHIFT));
+               pdb.hfn_thr_res =
+                               hfn_threshold << PDCP_C_PLANE_PDB_HFN_THR_SHIFT;
+
+               /* copy PDB in descriptor*/
+               __rta_out32(p, pdb.opt_res.opt);
+               __rta_out32(p, pdb.hfn_res);
+               __rta_out32(p, pdb.bearer_dir_res);
+               __rta_out32(p, pdb.hfn_thr_res);
+
+               return PDCP_PDB_TYPE_FULL_PDB;
+       }
+
+       switch (pdb_mask[cipherdata->algtype][authdata->algtype]) {
+       case PDCP_PDB_TYPE_NO_PDB:
+               break;
+
+       case PDCP_PDB_TYPE_REDUCED_PDB:
+               __rta_out32(p, (hfn << PDCP_C_PLANE_PDB_HFN_SHIFT));
+               __rta_out32(p,
+                           (uint32_t)((bearer <<
+                                       PDCP_C_PLANE_PDB_BEARER_SHIFT) |
+                                       (direction <<
+                                        PDCP_C_PLANE_PDB_DIR_SHIFT)));
+               break;
+
+       case PDCP_PDB_TYPE_FULL_PDB:
+               memset(&pdb, 0x00, sizeof(struct pdcp_pdb));
+
+               /* This is a HW issue. Bit 2 should be set to zero,
+                * but it does not work this way. Override here.
+                */
+               pdb.opt_res.rsvd = 0x00000002;
+
+               /* Copy relevant information from user to PDB */
+               pdb.hfn_res = hfn << PDCP_C_PLANE_PDB_HFN_SHIFT;
+               pdb.bearer_dir_res = (uint32_t)
+                       ((bearer << PDCP_C_PLANE_PDB_BEARER_SHIFT) |
+                        (direction << PDCP_C_PLANE_PDB_DIR_SHIFT));
+               pdb.hfn_thr_res =
+                       hfn_threshold << PDCP_C_PLANE_PDB_HFN_THR_SHIFT;
+
+               /* copy PDB in descriptor*/
+               __rta_out32(p, pdb.opt_res.opt);
+               __rta_out32(p, pdb.hfn_res);
+               __rta_out32(p, pdb.bearer_dir_res);
+               __rta_out32(p, pdb.hfn_thr_res);
+
+               break;
+
+       default:
+               return PDCP_PDB_TYPE_INVALID;
+       }
+
+       return pdb_mask[cipherdata->algtype][authdata->algtype];
+}
+
+/*
+ * PDCP UPlane PDB creation function
+ */
+static inline int
+cnstr_pdcp_u_plane_pdb(struct program *p,
+                      enum pdcp_sn_size sn_size,
+                      uint32_t hfn, unsigned short bearer,
+                      unsigned short direction,
+                      uint32_t hfn_threshold)
+{
+       struct pdcp_pdb pdb;
+       /* Read options from user */
+       /* Depending on sequence number length, the HFN and HFN threshold
+        * have different lengths.
+        */
+       memset(&pdb, 0x00, sizeof(struct pdcp_pdb));
+
+       switch (sn_size) {
+       case PDCP_SN_SIZE_7:
+               pdb.opt_res.opt |= PDCP_U_PLANE_PDB_OPT_SHORT_SN;
+               pdb.hfn_res = hfn << PDCP_U_PLANE_PDB_SHORT_SN_HFN_SHIFT;
+               pdb.hfn_thr_res =
+                       hfn_threshold<<PDCP_U_PLANE_PDB_SHORT_SN_HFN_THR_SHIFT;
+               break;
+
+       case PDCP_SN_SIZE_12:
+               pdb.opt_res.opt &= (uint32_t)(~PDCP_U_PLANE_PDB_OPT_SHORT_SN);
+               pdb.hfn_res = hfn << PDCP_U_PLANE_PDB_LONG_SN_HFN_SHIFT;
+               pdb.hfn_thr_res =
+                       hfn_threshold<<PDCP_U_PLANE_PDB_LONG_SN_HFN_THR_SHIFT;
+               break;
+
+       case PDCP_SN_SIZE_15:
+               pdb.opt_res.opt = (uint32_t)(PDCP_U_PLANE_PDB_OPT_15B_SN);
+               pdb.hfn_res = hfn << PDCP_U_PLANE_PDB_15BIT_SN_HFN_SHIFT;
+               pdb.hfn_thr_res =
+                       hfn_threshold<<PDCP_U_PLANE_PDB_15BIT_SN_HFN_THR_SHIFT;
+               break;
+
+       default:
+               pr_err("Invalid Sequence Number Size setting in PDB\n");
+               return -EINVAL;
+       }
+
+       pdb.bearer_dir_res = (uint32_t)
+                               ((bearer << PDCP_U_PLANE_PDB_BEARER_SHIFT) |
+                                (direction << PDCP_U_PLANE_PDB_DIR_SHIFT));
+
+       /* copy PDB in descriptor*/
+       __rta_out32(p, pdb.opt_res.opt);
+       __rta_out32(p, pdb.hfn_res);
+       __rta_out32(p, pdb.bearer_dir_res);
+       __rta_out32(p, pdb.hfn_thr_res);
+
+       return 0;
+}
+/**
+ * cnstr_shdsc_pdcp_c_plane_encap - Function for creating a PDCP Control Plane
+ *                                  encapsulation descriptor.
+ * @descbuf: pointer to buffer for descriptor construction
+ * @ps: if 36/40bit addressing is desired, this parameter must be true
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ * @hfn: starting Hyper Frame Number to be used together with the SN from the
+ *       PDCP frames.
+ * @bearer: radio bearer ID
+ * @direction: the direction of the PDCP frame (UL/DL)
+ * @hfn_threshold: HFN value that once reached triggers a warning from SEC that
+ *                 keys should be renegotiated at the earliest convenience.
+ * @cipherdata: pointer to block cipher transform definitions
+ *              Valid algorithm values are those from cipher_type_pdcp enum.
+ * @authdata: pointer to authentication transform definitions
+ *            Valid algorithm values are those from auth_type_pdcp enum.
+ * @era_2_sw_hfn_ovrd: if software HFN override mechanism is desired for
+ *                     this descriptor. Note: Can only be used for
+ *                     SEC ERA 2.
+ * Return: size of descriptor written in words or negative number on error.
+ *         Once the function returns, the value of this parameter can be used
+ *         for reclaiming the space that wasn't used for the descriptor.
+ *
+ * Note: descbuf must be large enough to contain a full 256 byte long
+ * descriptor; after the function returns, by subtracting the actual number of
+ * bytes used, the user can reuse the remaining buffer space for other purposes.
+ */
+static inline int
+cnstr_shdsc_pdcp_c_plane_encap(uint32_t *descbuf,
+                              bool ps,
+                              bool swap,
+                              uint32_t hfn,
+                              unsigned char bearer,
+                              unsigned char direction,
+                              uint32_t hfn_threshold,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       static int
+               (*pdcp_cp_fp[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID])
+                       (struct program*, bool swap, struct alginfo *,
+                        struct alginfo *, unsigned int,
+                       unsigned char __maybe_unused) = {
+               {       /* NULL */
+                       pdcp_insert_cplane_null_op,     /* NULL */
+                       pdcp_insert_cplane_int_only_op, /* SNOW f9 */
+                       pdcp_insert_cplane_int_only_op, /* AES CMAC */
+                       pdcp_insert_cplane_int_only_op  /* ZUC-I */
+               },
+               {       /* SNOW f8 */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_acc_op,      /* SNOW f9 */
+                       pdcp_insert_cplane_snow_aes_op, /* AES CMAC */
+                       pdcp_insert_cplane_snow_zuc_op  /* ZUC-I */
+               },
+               {       /* AES CTR */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_aes_snow_op, /* SNOW f9 */
+                       pdcp_insert_cplane_acc_op,      /* AES CMAC */
+                       pdcp_insert_cplane_aes_zuc_op   /* ZUC-I */
+               },
+               {       /* ZUC-E */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_zuc_snow_op, /* SNOW f9 */
+                       pdcp_insert_cplane_zuc_aes_op,  /* AES CMAC */
+                       pdcp_insert_cplane_acc_op       /* ZUC-I */
+               },
+       };
+       static enum rta_share_type
+               desc_share[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = {
+               {       /* NULL */
+                       SHR_WAIT,       /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_ALWAYS,     /* AES CMAC */
+                       SHR_ALWAYS      /* ZUC-I */
+               },
+               {       /* SNOW f8 */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_WAIT,       /* AES CMAC */
+                       SHR_WAIT        /* ZUC-I */
+               },
+               {       /* AES CTR */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_ALWAYS,     /* AES CMAC */
+                       SHR_WAIT        /* ZUC-I */
+               },
+               {       /* ZUC-E */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_WAIT,       /* SNOW f9 */
+                       SHR_WAIT,       /* AES CMAC */
+                       SHR_ALWAYS      /* ZUC-I */
+               },
+       };
+       enum pdb_type_e pdb_type;
+       struct program prg;
+       struct program *p = &prg;
+       int err;
+       LABEL(pdb_end);
+
+       if (rta_sec_era != RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd) {
+               pr_err("Cannot select SW HFN override for other era than 2");
+               return -EINVAL;
+       }
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+       if (ps)
+               PROGRAM_SET_36BIT_ADDR(p);
+
+       SHR_HDR(p, desc_share[cipherdata->algtype][authdata->algtype], 0, 0);
+
+       pdb_type = cnstr_pdcp_c_plane_pdb(p,
+                       hfn,
+                       bearer,
+                       direction,
+                       hfn_threshold,
+                       cipherdata,
+                       authdata);
+
+       SET_LABEL(p, pdb_end);
+
+       err = insert_hfn_ov_op(p, PDCP_SN_SIZE_5, pdb_type,
+                              era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       err = pdcp_cp_fp[cipherdata->algtype][authdata->algtype](p,
+               swap,
+               cipherdata,
+               authdata,
+               OP_TYPE_ENCAP_PROTOCOL,
+               era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       PATCH_HDR(p, 0, pdb_end);
+
+       return PROGRAM_FINALIZE(p);
+}
+
+/**
+ * cnstr_shdsc_pdcp_c_plane_decap - Function for creating a PDCP Control Plane
+ *                                  decapsulation descriptor.
+ * @descbuf: pointer to buffer for descriptor construction
+ * @ps: if 36/40bit addressing is desired, this parameter must be true
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ * @hfn: starting Hyper Frame Number to be used together with the SN from the
+ *       PDCP frames.
+ * @bearer: radio bearer ID
+ * @direction: the direction of the PDCP frame (UL/DL)
+ * @hfn_threshold: HFN value that once reached triggers a warning from SEC that
+ *                 keys should be renegotiated at the earliest convenience.
+ * @cipherdata: pointer to block cipher transform definitions
+ *              Valid algorithm values are those from cipher_type_pdcp enum.
+ * @authdata: pointer to authentication transform definitions
+ *            Valid algorithm values are those from auth_type_pdcp enum.
+ * @era_2_sw_hfn_ovrd: if software HFN override mechanism is desired for
+ *                     this descriptor. Note: Can only be used for
+ *                     SEC ERA 2.
+ *
+ * Return: size of descriptor written in words or negative number on error.
+ *         Once the function returns, the value of this parameter can be used
+ *         for reclaiming the space that wasn't used for the descriptor.
+ *
+ * Note: descbuf must be large enough to contain a full 256 byte long
+ * descriptor; after the function returns, by subtracting the actual number of
+ * bytes used, the user can reuse the remaining buffer space for other purposes.
+ */
+static inline int
+cnstr_shdsc_pdcp_c_plane_decap(uint32_t *descbuf,
+                              bool ps,
+                              bool swap,
+                              uint32_t hfn,
+                              unsigned char bearer,
+                              unsigned char direction,
+                              uint32_t hfn_threshold,
+                              struct alginfo *cipherdata,
+                              struct alginfo *authdata,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       static int
+               (*pdcp_cp_fp[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID])
+                       (struct program*, bool swap, struct alginfo *,
+                        struct alginfo *, unsigned int, unsigned char) = {
+               {       /* NULL */
+                       pdcp_insert_cplane_null_op,     /* NULL */
+                       pdcp_insert_cplane_int_only_op, /* SNOW f9 */
+                       pdcp_insert_cplane_int_only_op, /* AES CMAC */
+                       pdcp_insert_cplane_int_only_op  /* ZUC-I */
+               },
+               {       /* SNOW f8 */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_acc_op,      /* SNOW f9 */
+                       pdcp_insert_cplane_snow_aes_op, /* AES CMAC */
+                       pdcp_insert_cplane_snow_zuc_op  /* ZUC-I */
+               },
+               {       /* AES CTR */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_aes_snow_op, /* SNOW f9 */
+                       pdcp_insert_cplane_acc_op,      /* AES CMAC */
+                       pdcp_insert_cplane_aes_zuc_op   /* ZUC-I */
+               },
+               {       /* ZUC-E */
+                       pdcp_insert_cplane_enc_only_op, /* NULL */
+                       pdcp_insert_cplane_zuc_snow_op, /* SNOW f9 */
+                       pdcp_insert_cplane_zuc_aes_op,  /* AES CMAC */
+                       pdcp_insert_cplane_acc_op       /* ZUC-I */
+               },
+       };
+       static enum rta_share_type
+               desc_share[PDCP_CIPHER_TYPE_INVALID][PDCP_AUTH_TYPE_INVALID] = {
+               {       /* NULL */
+                       SHR_WAIT,       /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_ALWAYS,     /* AES CMAC */
+                       SHR_ALWAYS      /* ZUC-I */
+               },
+               {       /* SNOW f8 */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_WAIT,       /* AES CMAC */
+                       SHR_WAIT        /* ZUC-I */
+               },
+               {       /* AES CTR */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_ALWAYS,     /* SNOW f9 */
+                       SHR_ALWAYS,     /* AES CMAC */
+                       SHR_WAIT        /* ZUC-I */
+               },
+               {       /* ZUC-E */
+                       SHR_ALWAYS,     /* NULL */
+                       SHR_WAIT,       /* SNOW f9 */
+                       SHR_WAIT,       /* AES CMAC */
+                       SHR_ALWAYS      /* ZUC-I */
+               },
+       };
+       enum pdb_type_e pdb_type;
+       struct program prg;
+       struct program *p = &prg;
+       int err;
+       LABEL(pdb_end);
+
+       if (rta_sec_era != RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd) {
+               pr_err("Cannot select SW HFN override for other era than 2");
+               return -EINVAL;
+       }
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+       if (ps)
+               PROGRAM_SET_36BIT_ADDR(p);
+
+       SHR_HDR(p, desc_share[cipherdata->algtype][authdata->algtype], 0, 0);
+
+       pdb_type = cnstr_pdcp_c_plane_pdb(p,
+                       hfn,
+                       bearer,
+                       direction,
+                       hfn_threshold,
+                       cipherdata,
+                       authdata);
+
+       SET_LABEL(p, pdb_end);
+
+       err = insert_hfn_ov_op(p, PDCP_SN_SIZE_5, pdb_type,
+                              era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       err = pdcp_cp_fp[cipherdata->algtype][authdata->algtype](p,
+               swap,
+               cipherdata,
+               authdata,
+               OP_TYPE_DECAP_PROTOCOL,
+               era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       PATCH_HDR(p, 0, pdb_end);
+
+       return PROGRAM_FINALIZE(p);
+}
+
+/**
+ * cnstr_shdsc_pdcp_u_plane_encap - Function for creating a PDCP User Plane
+ *                                  encapsulation descriptor.
+ * @descbuf: pointer to buffer for descriptor construction
+ * @ps: if 36/40bit addressing is desired, this parameter must be true
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ * @sn_size: selects Sequence Number Size: 7/12/15 bits
+ * @hfn: starting Hyper Frame Number to be used together with the SN from the
+ *       PDCP frames.
+ * @bearer: radio bearer ID
+ * @direction: the direction of the PDCP frame (UL/DL)
+ * @hfn_threshold: HFN value that once reached triggers a warning from SEC that
+ *                 keys should be renegotiated at the earliest convenience.
+ * @cipherdata: pointer to block cipher transform definitions
+ *              Valid algorithm values are those from cipher_type_pdcp enum.
+ * @era_2_sw_hfn_ovrd: if software HFN override mechanism is desired for
+ *                     this descriptor. Note: Can only be used for
+ *                     SEC ERA 2.
+ *
+ * Return: size of descriptor written in words or negative number on error.
+ *         Once the function returns, the value of this parameter can be used
+ *         for reclaiming the space that wasn't used for the descriptor.
+ *
+ * Note: descbuf must be large enough to contain a full 256 byte long
+ * descriptor; after the function returns, by subtracting the actual number of
+ * bytes used, the user can reuse the remaining buffer space for other purposes.
+ */
+static inline int
+cnstr_shdsc_pdcp_u_plane_encap(uint32_t *descbuf,
+                              bool ps,
+                              bool swap,
+                              enum pdcp_sn_size sn_size,
+                              uint32_t hfn,
+                              unsigned short bearer,
+                              unsigned short direction,
+                              uint32_t hfn_threshold,
+                              struct alginfo *cipherdata,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       struct program prg;
+       struct program *p = &prg;
+       int err;
+       LABEL(pdb_end);
+
+       if (rta_sec_era != RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd) {
+               pr_err("Cannot select SW HFN ovrd for other era than 2");
+               return -EINVAL;
+       }
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+       if (ps)
+               PROGRAM_SET_36BIT_ADDR(p);
+
+       SHR_HDR(p, SHR_ALWAYS, 0, 0);
+       if (cnstr_pdcp_u_plane_pdb(p, sn_size, hfn, bearer, direction,
+                                  hfn_threshold)) {
+               pr_err("Error creating PDCP UPlane PDB\n");
+               return -EINVAL;
+       }
+       SET_LABEL(p, pdb_end);
+
+       err = insert_hfn_ov_op(p, sn_size, PDCP_PDB_TYPE_FULL_PDB,
+                              era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       switch (sn_size) {
+       case PDCP_SN_SIZE_7:
+       case PDCP_SN_SIZE_12:
+               switch (cipherdata->algtype) {
+               case PDCP_CIPHER_TYPE_ZUC:
+                       if (rta_sec_era < RTA_SEC_ERA_5) {
+                               pr_err("Invalid era for selected algorithm\n");
+                               return -ENOTSUP;
+                       }
+               case PDCP_CIPHER_TYPE_AES:
+               case PDCP_CIPHER_TYPE_SNOW:
+                       /* Insert Cipher Key */
+                       KEY(p, KEY1, cipherdata->key_enc_flags,
+                           (uint64_t)cipherdata->key, cipherdata->keylen,
+                           INLINE_KEY(cipherdata));
+                       PROTOCOL(p, OP_TYPE_ENCAP_PROTOCOL,
+                                OP_PCLID_LTE_PDCP_USER,
+                                (uint16_t)cipherdata->algtype);
+                       break;
+               case PDCP_CIPHER_TYPE_NULL:
+                       insert_copy_frame_op(p,
+                                            cipherdata,
+                                            OP_TYPE_ENCAP_PROTOCOL);
+                       break;
+               default:
+                       pr_err("%s: Invalid encrypt algorithm selected: %d\n",
+                              "cnstr_pcl_shdsc_pdcp_u_plane_decap",
+                              cipherdata->algtype);
+                       return -EINVAL;
+               }
+               break;
+
+       case PDCP_SN_SIZE_15:
+               switch (cipherdata->algtype) {
+               case PDCP_CIPHER_TYPE_NULL:
+                       insert_copy_frame_op(p,
+                                            cipherdata,
+                                            OP_TYPE_ENCAP_PROTOCOL);
+                       break;
+
+               default:
+                       err = pdcp_insert_uplane_15bit_op(p, swap, cipherdata,
+                               OP_TYPE_ENCAP_PROTOCOL);
+                       if (err)
+                               return err;
+                       break;
+               }
+               break;
+
+       case PDCP_SN_SIZE_5:
+       default:
+               pr_err("Invalid SN size selected\n");
+               return -ENOTSUP;
+       }
+
+       PATCH_HDR(p, 0, pdb_end);
+       return PROGRAM_FINALIZE(p);
+}
+
+/**
+ * cnstr_shdsc_pdcp_u_plane_decap - Function for creating a PDCP User Plane
+ *                                  decapsulation descriptor.
+ * @descbuf: pointer to buffer for descriptor construction
+ * @ps: if 36/40bit addressing is desired, this parameter must be true
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ * @sn_size: selects Sequence Number Size: 7/12/15 bits
+ * @hfn: starting Hyper Frame Number to be used together with the SN from the
+ *       PDCP frames.
+ * @bearer: radio bearer ID
+ * @direction: the direction of the PDCP frame (UL/DL)
+ * @hfn_threshold: HFN value that once reached triggers a warning from SEC that
+ *                 keys should be renegotiated at the earliest convenience.
+ * @cipherdata: pointer to block cipher transform definitions
+ *              Valid algorithm values are those from cipher_type_pdcp enum.
+ * @era_2_sw_hfn_ovrd: if software HFN override mechanism is desired for
+ *                     this descriptor. Note: Can only be used for
+ *                     SEC ERA 2.
+ *
+ * Return: size of descriptor written in words or negative number on error.
+ *         Once the function returns, the value of this parameter can be used
+ *         for reclaiming the space that wasn't used for the descriptor.
+ *
+ * Note: descbuf must be large enough to contain a full 256 byte long
+ * descriptor; after the function returns, by subtracting the actual number of
+ * bytes used, the user can reuse the remaining buffer space for other purposes.
+ */
+static inline int
+cnstr_shdsc_pdcp_u_plane_decap(uint32_t *descbuf,
+                              bool ps,
+                              bool swap,
+                              enum pdcp_sn_size sn_size,
+                              uint32_t hfn,
+                              unsigned short bearer,
+                              unsigned short direction,
+                              uint32_t hfn_threshold,
+                              struct alginfo *cipherdata,
+                              unsigned char era_2_sw_hfn_ovrd)
+{
+       struct program prg;
+       struct program *p = &prg;
+       int err;
+       LABEL(pdb_end);
+
+       if (rta_sec_era != RTA_SEC_ERA_2 && era_2_sw_hfn_ovrd) {
+               pr_err("Cannot select SW HFN override for other era than 2");
+               return -EINVAL;
+       }
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+       if (ps)
+               PROGRAM_SET_36BIT_ADDR(p);
+
+       SHR_HDR(p, SHR_ALWAYS, 0, 0);
+       if (cnstr_pdcp_u_plane_pdb(p, sn_size, hfn, bearer, direction,
+                                  hfn_threshold)) {
+               pr_err("Error creating PDCP UPlane PDB\n");
+               return -EINVAL;
+       }
+       SET_LABEL(p, pdb_end);
+
+       err = insert_hfn_ov_op(p, sn_size, PDCP_PDB_TYPE_FULL_PDB,
+                              era_2_sw_hfn_ovrd);
+       if (err)
+               return err;
+
+       switch (sn_size) {
+       case PDCP_SN_SIZE_7:
+       case PDCP_SN_SIZE_12:
+               switch (cipherdata->algtype) {
+               case PDCP_CIPHER_TYPE_ZUC:
+                       if (rta_sec_era < RTA_SEC_ERA_5) {
+                               pr_err("Invalid era for selected algorithm\n");
+                               return -ENOTSUP;
+                       }
+               case PDCP_CIPHER_TYPE_AES:
+               case PDCP_CIPHER_TYPE_SNOW:
+                       /* Insert Cipher Key */
+                       KEY(p, KEY1, cipherdata->key_enc_flags,
+                           cipherdata->key, cipherdata->keylen,
+                           INLINE_KEY(cipherdata));
+                       PROTOCOL(p, OP_TYPE_DECAP_PROTOCOL,
+                                OP_PCLID_LTE_PDCP_USER,
+                                (uint16_t)cipherdata->algtype);
+                       break;
+               case PDCP_CIPHER_TYPE_NULL:
+                       insert_copy_frame_op(p,
+                                            cipherdata,
+                                            OP_TYPE_DECAP_PROTOCOL);
+                       break;
+               default:
+                       pr_err("%s: Invalid encrypt algorithm selected: %d\n",
+                              "cnstr_pcl_shdsc_pdcp_u_plane_decap",
+                              cipherdata->algtype);
+                       return -EINVAL;
+               }
+               break;
+
+       case PDCP_SN_SIZE_15:
+               switch (cipherdata->algtype) {
+               case PDCP_CIPHER_TYPE_NULL:
+                       insert_copy_frame_op(p,
+                                            cipherdata,
+                                            OP_TYPE_DECAP_PROTOCOL);
+                       break;
+
+               default:
+                       err = pdcp_insert_uplane_15bit_op(p, swap, cipherdata,
+                               OP_TYPE_DECAP_PROTOCOL);
+                       if (err)
+                               return err;
+                       break;
+               }
+               break;
+
+       case PDCP_SN_SIZE_5:
+       default:
+               pr_err("Invalid SN size selected\n");
+               return -ENOTSUP;
+       }
+
+       PATCH_HDR(p, 0, pdb_end);
+       return PROGRAM_FINALIZE(p);
+}
+
+/**
+ * cnstr_shdsc_pdcp_short_mac - Function for creating a PDCP Short MAC
+ *                              descriptor.
+ * @descbuf: pointer to buffer for descriptor construction
+ * @ps: if 36/40bit addressing is desired, this parameter must be true
+ * @swap: must be true when core endianness doesn't match SEC endianness
+ * @authdata: pointer to authentication transform definitions
+ *            Valid algorithm values are those from auth_type_pdcp enum.
+ *
+ * Return: size of descriptor written in words or negative number on error.
+ *         Once the function returns, the value of this parameter can be used
+ *         for reclaiming the space that wasn't used for the descriptor.
+ *
+ * Note: descbuf must be large enough to contain a full 256 byte long
+ * descriptor; after the function returns, by subtracting the actual number of
+ * bytes used, the user can reuse the remaining buffer space for other purposes.
+ */
+static inline int
+cnstr_shdsc_pdcp_short_mac(uint32_t *descbuf,
+                          bool ps,
+                          bool swap,
+                          struct alginfo *authdata)
+{
+       struct program prg;
+       struct program *p = &prg;
+       uint32_t iv[3] = {0, 0, 0};
+       LABEL(local_offset);
+       REFERENCE(move_cmd_read_descbuf);
+       REFERENCE(move_cmd_write_descbuf);
+
+       PROGRAM_CNTXT_INIT(p, descbuf, 0);
+       if (swap)
+               PROGRAM_SET_BSWAP(p);
+       if (ps)
+               PROGRAM_SET_36BIT_ADDR(p);
+
+       SHR_HDR(p, SHR_ALWAYS, 1, 0);
+
+       if (rta_sec_era > RTA_SEC_ERA_2) {
+               MATHB(p, SEQINSZ, SUB, ZERO, VSEQINSZ, 4, 0);
+               MATHB(p, SEQINSZ, SUB, ZERO, MATH1, 4, 0);
+       } else {
+               MATHB(p, SEQINSZ, ADD, ONE, MATH1, 4, 0);
+               MATHB(p, MATH1, SUB, ONE, MATH1, 4, 0);
+               MATHB(p, ZERO, ADD, MATH1, VSEQINSZ, 4, 0);
+               MOVE(p, MATH1, 0, MATH0, 0, 8, IMMED);
+
+               /*
+                * Since MOVELEN is available only starting with
+                * SEC ERA 3, use poor man's MOVELEN: create a MOVE
+                * command dynamically by writing the length from M1 by
+                * OR-ing the command in the M1 register and MOVE the
+                * result into the descriptor buffer. Care must be taken
+                * wrt. the location of the command because of SEC
+                * pipelining. The actual MOVEs are written at the end
+                * of the descriptor due to calculations needed on the
+                * offset in the descriptor for the MOVE command.
+                */
+               move_cmd_read_descbuf = MOVE(p, DESCBUF, 0, MATH0, 0, 6,
+                                            IMMED);
+               move_cmd_write_descbuf = MOVE(p, MATH0, 0, DESCBUF, 0, 8,
+                                             WAITCOMP | IMMED);
+       }
+       MATHB(p, ZERO, ADD, MATH1, VSEQOUTSZ, 4, 0);
+
+       switch (authdata->algtype) {
+       case PDCP_AUTH_TYPE_NULL:
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MOVE(p, AB1, 0, OFIFO, 0, MATH1, 0);
+               } else {
+                       SET_LABEL(p, local_offset);
+
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+
+                       /* Placeholder for MOVE command with length from M1
+                        * register
+                        */
+                       MOVE(p, IFIFOAB1, 0, OFIFO, 0, 0, IMMED);
+
+                       /* Enable automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+               }
+
+               LOAD(p, (uintptr_t)iv, MATH0, 0, 8, IMMED | COPY);
+               SEQFIFOLOAD(p, MSG1, 0, VLF | LAST1 | LAST2 | FLUSH1);
+               SEQSTORE(p, MATH0, 0, 4, 0);
+
+               break;
+
+       case PDCP_AUTH_TYPE_SNOW:
+               iv[0] = 0xFFFFFFFF;
+               iv[1] = swap ? swab32(0x04000000) : 0x04000000;
+               iv[2] = swap ? swab32(0xF8000000) : 0xF8000000;
+
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               LOAD(p, (uintptr_t)&iv, CONTEXT2, 0, 12, IMMED | COPY);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_SNOW_F9,
+                             OP_ALG_AAI_F9,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_ENC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MOVE(p, AB1, 0, OFIFO, 0, MATH1, 0);
+               } else {
+                       SET_LABEL(p, local_offset);
+
+
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+
+                       /* Placeholder for MOVE command with length from M1
+                        * register
+                        */
+                       MOVE(p, IFIFOAB1, 0, OFIFO, 0, 0, IMMED);
+
+                       /* Enable automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+               }
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST1 | LAST2 | FLUSH1);
+               SEQSTORE(p, CONTEXT2, 0, 4, 0);
+
+               break;
+
+       case PDCP_AUTH_TYPE_AES:
+               iv[0] = 0xFFFFFFFF;
+               iv[1] = swap ? swab32(0xFC000000) : 0xFC000000;
+               iv[2] = 0x00000000; /* unused */
+
+               KEY(p, KEY1, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               LOAD(p, (uintptr_t)&iv, MATH0, 0, 8, IMMED | COPY);
+               MOVE(p, MATH0, 0, IFIFOAB1, 0, 8, IMMED);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_AES,
+                             OP_ALG_AAI_CMAC,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_ENC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+
+               if (rta_sec_era > RTA_SEC_ERA_2) {
+                       MOVE(p, AB2, 0, OFIFO, 0, MATH1, 0);
+               } else {
+                       SET_LABEL(p, local_offset);
+
+                       /* Shut off automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_DISABLE_AUTO_NFIFO, 0, IMMED);
+
+                       /* Placeholder for MOVE command with length from M1
+                        * register
+                        */
+                       MOVE(p, IFIFOAB2, 0, OFIFO, 0, 0, IMMED);
+
+                       /* Enable automatic Info FIFO entries */
+                       LOAD(p, 0, DCTRL, LDOFF_ENABLE_AUTO_NFIFO, 0, IMMED);
+               }
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST1 | LAST2 | FLUSH1);
+               SEQSTORE(p, CONTEXT1, 0, 4, 0);
+
+               break;
+
+       case PDCP_AUTH_TYPE_ZUC:
+               if (rta_sec_era < RTA_SEC_ERA_5) {
+                       pr_err("Invalid era for selected algorithm\n");
+                       return -ENOTSUP;
+               }
+               iv[0] = 0xFFFFFFFF;
+               iv[1] = swap ? swab32(0xFC000000) : 0xFC000000;
+               iv[2] = 0x00000000; /* unused */
+
+               KEY(p, KEY2, authdata->key_enc_flags, authdata->key,
+                   authdata->keylen, INLINE_KEY(authdata));
+               LOAD(p, (uintptr_t)&iv, CONTEXT2, 0, 12, IMMED | COPY);
+               ALG_OPERATION(p, OP_ALG_ALGSEL_ZUCA,
+                             OP_ALG_AAI_F9,
+                             OP_ALG_AS_INITFINAL,
+                             ICV_CHECK_DISABLE,
+                             DIR_ENC);
+               SEQFIFOSTORE(p, MSG, 0, 0, VLF);
+               MOVE(p, AB1, 0, OFIFO, 0, MATH1, 0);
+               SEQFIFOLOAD(p, MSGINSNOOP, 0, VLF | LAST1 | LAST2 | FLUSH1);
+               SEQSTORE(p, CONTEXT2, 0, 4, 0);
+
+               break;
+
+       default:
+               pr_err("%s: Invalid integrity algorithm selected: %d\n",
+                      "cnstr_shdsc_pdcp_short_mac", authdata->algtype);
+               return -EINVAL;
+       }
+
+
+       if (rta_sec_era < RTA_SEC_ERA_3) {
+               PATCH_MOVE(p, move_cmd_read_descbuf, local_offset);
+               PATCH_MOVE(p, move_cmd_write_descbuf, local_offset);
+       }
+
+       return PROGRAM_FINALIZE(p);
+}
+
+#endif /* __DESC_PDCP_H__ */
index d9a5b0e..cf8dfb9 100644 (file)
@@ -14,178 +14,176 @@ static inline int
 __rta_ssl_proto(uint16_t protoinfo)
 {
        switch (protoinfo) {
-       case OP_PCL_SSL30_RC4_40_MD5_2:
-       case OP_PCL_SSL30_RC4_128_MD5_2:
-       case OP_PCL_SSL30_RC4_128_SHA_5:
-       case OP_PCL_SSL30_RC4_40_MD5_3:
-       case OP_PCL_SSL30_RC4_128_MD5_3:
-       case OP_PCL_SSL30_RC4_128_SHA:
-       case OP_PCL_SSL30_RC4_128_MD5:
-       case OP_PCL_SSL30_RC4_40_SHA:
-       case OP_PCL_SSL30_RC4_40_MD5:
-       case OP_PCL_SSL30_RC4_128_SHA_2:
-       case OP_PCL_SSL30_RC4_128_SHA_3:
-       case OP_PCL_SSL30_RC4_128_SHA_4:
-       case OP_PCL_SSL30_RC4_128_SHA_6:
-       case OP_PCL_SSL30_RC4_128_SHA_7:
-       case OP_PCL_SSL30_RC4_128_SHA_8:
-       case OP_PCL_SSL30_RC4_128_SHA_9:
-       case OP_PCL_SSL30_RC4_128_SHA_10:
-       case OP_PCL_TLS_ECDHE_PSK_RC4_128_SHA:
+       case OP_PCL_TLS_RSA_EXPORT_WITH_RC4_40_MD5:
+       case OP_PCL_TLS_RSA_WITH_RC4_128_MD5:
+       case OP_PCL_TLS_RSA_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_DH_anon_EXPORT_WITH_RC4_40_MD5:
+       case OP_PCL_TLS_DH_anon_WITH_RC4_128_MD5:
+       case OP_PCL_TLS_KRB5_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_KRB5_WITH_RC4_128_MD5:
+       case OP_PCL_TLS_KRB5_EXPORT_WITH_RC4_40_SHA:
+       case OP_PCL_TLS_KRB5_EXPORT_WITH_RC4_40_MD5:
+       case OP_PCL_TLS_PSK_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_DHE_PSK_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_RSA_PSK_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDH_RSA_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDH_anon_WITH_RC4_128_SHA:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_RC4_128_SHA:
                if (rta_sec_era == RTA_SEC_ERA_7)
                        return -EINVAL;
                /* fall through if not Era 7 */
-       case OP_PCL_SSL30_DES40_CBC_SHA:
-       case OP_PCL_SSL30_DES_CBC_SHA_2:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_5:
-       case OP_PCL_SSL30_DES40_CBC_SHA_2:
-       case OP_PCL_SSL30_DES_CBC_SHA_3:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_6:
-       case OP_PCL_SSL30_DES40_CBC_SHA_3:
-       case OP_PCL_SSL30_DES_CBC_SHA_4:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_7:
-       case OP_PCL_SSL30_DES40_CBC_SHA_4:
-       case OP_PCL_SSL30_DES_CBC_SHA_5:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_8:
-       case OP_PCL_SSL30_DES40_CBC_SHA_5:
-       case OP_PCL_SSL30_DES_CBC_SHA_6:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_9:
-       case OP_PCL_SSL30_DES40_CBC_SHA_6:
-       case OP_PCL_SSL30_DES_CBC_SHA_7:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_10:
-       case OP_PCL_SSL30_DES_CBC_SHA:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA:
-       case OP_PCL_SSL30_DES_CBC_MD5:
-       case OP_PCL_SSL30_3DES_EDE_CBC_MD5:
-       case OP_PCL_SSL30_DES40_CBC_SHA_7:
-       case OP_PCL_SSL30_DES40_CBC_MD5:
-       case OP_PCL_SSL30_AES_128_CBC_SHA:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_2:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_3:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_4:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_5:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_6:
-       case OP_PCL_SSL30_AES_256_CBC_SHA:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_2:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_3:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_4:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_5:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_6:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256_2:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256_3:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256_4:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256_5:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256_2:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256_3:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256_4:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256_5:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256_6:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256_6:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_2:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_7:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_7:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_3:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_8:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_8:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_4:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_9:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_9:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_1:
-       case OP_PCL_SSL30_AES_256_GCM_SHA384_1:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_2:
-       case OP_PCL_SSL30_AES_256_GCM_SHA384_2:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_3:
-       case OP_PCL_SSL30_AES_256_GCM_SHA384_3:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_4:
-       case OP_PCL_SSL30_AES_256_GCM_SHA384_4:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_5:
-       case OP_PCL_SSL30_AES_256_GCM_SHA384_5:
-       case OP_PCL_SSL30_AES_128_GCM_SHA256_6:
-       case OP_PCL_TLS_DH_ANON_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_PSK_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_PSK_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_DHE_PSK_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_DHE_PSK_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_RSA_PSK_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_RSA_PSK_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_PSK_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_PSK_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_DHE_PSK_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_DHE_PSK_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_RSA_PSK_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_RSA_PSK_AES_256_CBC_SHA384:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_11:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_10:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_10:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_12:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_11:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_11:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_12:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_13:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_12:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_14:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_13:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_13:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_15:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_14:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_14:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_16:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_17:
-       case OP_PCL_SSL30_3DES_EDE_CBC_SHA_18:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_15:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_16:
-       case OP_PCL_SSL30_AES_128_CBC_SHA_17:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_15:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_16:
-       case OP_PCL_SSL30_AES_256_CBC_SHA_17:
-       case OP_PCL_TLS_ECDHE_ECDSA_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_ECDHE_ECDSA_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_ECDH_ECDSA_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_ECDH_ECDSA_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_ECDHE_RSA_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_ECDHE_RSA_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_ECDH_RSA_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_ECDH_RSA_AES_256_CBC_SHA384:
-       case OP_PCL_TLS_ECDHE_ECDSA_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_ECDHE_ECDSA_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_ECDH_ECDSA_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_ECDH_ECDSA_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_ECDHE_RSA_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_ECDHE_RSA_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_ECDH_RSA_AES_128_GCM_SHA256:
-       case OP_PCL_TLS_ECDH_RSA_AES_256_GCM_SHA384:
-       case OP_PCL_TLS_ECDHE_PSK_3DES_EDE_CBC_SHA:
-       case OP_PCL_TLS_ECDHE_PSK_AES_128_CBC_SHA:
-       case OP_PCL_TLS_ECDHE_PSK_AES_256_CBC_SHA:
-       case OP_PCL_TLS_ECDHE_PSK_AES_128_CBC_SHA256:
-       case OP_PCL_TLS_ECDHE_PSK_AES_256_CBC_SHA384:
-       case OP_PCL_TLS12_3DES_EDE_CBC_MD5:
-       case OP_PCL_TLS12_3DES_EDE_CBC_SHA160:
-       case OP_PCL_TLS12_3DES_EDE_CBC_SHA224:
-       case OP_PCL_TLS12_3DES_EDE_CBC_SHA256:
-       case OP_PCL_TLS12_3DES_EDE_CBC_SHA384:
-       case OP_PCL_TLS12_3DES_EDE_CBC_SHA512:
-       case OP_PCL_TLS12_AES_128_CBC_SHA160:
-       case OP_PCL_TLS12_AES_128_CBC_SHA224:
-       case OP_PCL_TLS12_AES_128_CBC_SHA256:
-       case OP_PCL_TLS12_AES_128_CBC_SHA384:
-       case OP_PCL_TLS12_AES_128_CBC_SHA512:
-       case OP_PCL_TLS12_AES_192_CBC_SHA160:
-       case OP_PCL_TLS12_AES_192_CBC_SHA224:
-       case OP_PCL_TLS12_AES_192_CBC_SHA256:
-       case OP_PCL_TLS12_AES_192_CBC_SHA512:
-       case OP_PCL_TLS12_AES_256_CBC_SHA160:
-       case OP_PCL_TLS12_AES_256_CBC_SHA224:
-       case OP_PCL_TLS12_AES_256_CBC_SHA256:
-       case OP_PCL_TLS12_AES_256_CBC_SHA384:
-       case OP_PCL_TLS12_AES_256_CBC_SHA512:
-       case OP_PCL_TLS_PVT_AES_192_CBC_SHA160:
-       case OP_PCL_TLS_PVT_AES_192_CBC_SHA384:
-       case OP_PCL_TLS_PVT_AES_192_CBC_SHA224:
-       case OP_PCL_TLS_PVT_AES_192_CBC_SHA512:
-       case OP_PCL_TLS_PVT_AES_192_CBC_SHA256:
-       case OP_PCL_TLS_PVT_MASTER_SECRET_PRF_FE:
-       case OP_PCL_TLS_PVT_MASTER_SECRET_PRF_FF:
+       case OP_PCL_TLS_RSA_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_RSA_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DH_RSA_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_DH_RSA_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_DH_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_DHE_DSS_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_DHE_DSS_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DHE_RSA_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_DHE_RSA_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DH_anon_EXPORT_WITH_DES40_CBC_SHA:
+       case OP_PCL_TLS_DH_anon_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_DH_anon_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_KRB5_WITH_DES_CBC_SHA:
+       case OP_PCL_TLS_KRB5_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_KRB5_WITH_DES_CBC_MD5:
+       case OP_PCL_TLS_KRB5_WITH_3DES_EDE_CBC_MD5:
+       case OP_PCL_TLS_KRB5_EXPORT_WITH_DES_CBC_40_SHA:
+       case OP_PCL_TLS_KRB5_EXPORT_WITH_DES_CBC_40_MD5:
+       case OP_PCL_TLS_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DH_anon_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DH_anon_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_TLS_DH_anon_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DH_anon_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_TLS_PSK_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_PSK_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_PSK_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_DHE_PSK_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_RSA_PSK_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_RSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_RSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DHE_RSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DH_RSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DHE_DSS_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DH_DSS_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DH_anon_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DH_anon_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_PSK_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_PSK_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_PSK_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_PSK_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_DHE_PSK_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_RSA_PSK_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDH_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDH_anon_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDH_anon_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDH_anon_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_RSA_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_DSS_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_RSA_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_DSS_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_RSA_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_SRP_SHA_DSS_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_ECDH_ECDSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_128_GCM_SHA256:
+       case OP_PCL_TLS_ECDH_RSA_WITH_AES_256_GCM_SHA384:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_3DES_EDE_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_ECDHE_PSK_WITH_AES_256_CBC_SHA384:
+       case OP_PCL_TLS_RSA_WITH_AES_128_CBC_SHA256:
+       case OP_PCL_TLS_RSA_WITH_AES_256_CBC_SHA256:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_MD5:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA160:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA224:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA256:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA384:
+       case OP_PCL_PVT_TLS_3DES_EDE_CBC_SHA512:
+       case OP_PCL_PVT_TLS_AES_128_CBC_SHA160:
+       case OP_PCL_PVT_TLS_AES_128_CBC_SHA224:
+       case OP_PCL_PVT_TLS_AES_128_CBC_SHA256:
+       case OP_PCL_PVT_TLS_AES_128_CBC_SHA384:
+       case OP_PCL_PVT_TLS_AES_128_CBC_SHA512:
+       case OP_PCL_PVT_TLS_AES_192_CBC_SHA160:
+       case OP_PCL_PVT_TLS_AES_192_CBC_SHA224:
+       case OP_PCL_PVT_TLS_AES_192_CBC_SHA256:
+       case OP_PCL_PVT_TLS_AES_192_CBC_SHA512:
+       case OP_PCL_PVT_TLS_AES_256_CBC_SHA160:
+       case OP_PCL_PVT_TLS_AES_256_CBC_SHA224:
+       case OP_PCL_PVT_TLS_AES_256_CBC_SHA384:
+       case OP_PCL_PVT_TLS_AES_256_CBC_SHA512:
+       case OP_PCL_PVT_TLS_AES_256_CBC_SHA256:
+       case OP_PCL_PVT_TLS_AES_192_CBC_SHA384:
+       case OP_PCL_PVT_TLS_MASTER_SECRET_PRF_FE:
+       case OP_PCL_PVT_TLS_MASTER_SECRET_PRF_FF:
                return 0;
        }
 
@@ -322,6 +320,12 @@ static const uint32_t proto_blob_flags[] = {
                OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK,
        OP_PCL_BLOB_FORMAT_MASK | OP_PCL_BLOB_BLACK | OP_PCL_BLOB_TKEK |
                OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK | OP_PCL_BLOB_SEC_MEM,
+       OP_PCL_BLOB_FORMAT_MASK | OP_PCL_BLOB_BLACK | OP_PCL_BLOB_TKEK |
+               OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK | OP_PCL_BLOB_SEC_MEM,
+       OP_PCL_BLOB_FORMAT_MASK | OP_PCL_BLOB_BLACK | OP_PCL_BLOB_TKEK |
+               OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK | OP_PCL_BLOB_SEC_MEM,
+       OP_PCL_BLOB_FORMAT_MASK | OP_PCL_BLOB_BLACK | OP_PCL_BLOB_TKEK |
+               OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK | OP_PCL_BLOB_SEC_MEM,
        OP_PCL_BLOB_FORMAT_MASK | OP_PCL_BLOB_BLACK | OP_PCL_BLOB_TKEK |
                OP_PCL_BLOB_EKT | OP_PCL_BLOB_REG_MASK | OP_PCL_BLOB_SEC_MEM
 };
@@ -556,7 +560,7 @@ static const struct proto_map proto_table[] = {
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_TLS10_PRF,     __rta_ssl_proto},
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_TLS11_PRF,     __rta_ssl_proto},
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_TLS12_PRF,     __rta_ssl_proto},
-       {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_DTLS10_PRF,    __rta_ssl_proto},
+       {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_DTLS_PRF,      __rta_ssl_proto},
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_IKEV1_PRF,     __rta_ike_proto},
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_IKEV2_PRF,     __rta_ike_proto},
        {OP_TYPE_UNI_PROTOCOL,   OP_PCLID_PUBLICKEYPAIR, __rta_dlc_proto},
@@ -568,7 +572,7 @@ static const struct proto_map proto_table[] = {
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_TLS10,         __rta_ssl_proto},
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_TLS11,         __rta_ssl_proto},
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_TLS12,         __rta_ssl_proto},
-       {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_DTLS10,        __rta_ssl_proto},
+       {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_DTLS,          __rta_ssl_proto},
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_MACSEC,        __rta_macsec_proto},
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_WIFI,          __rta_wifi_proto},
        {OP_TYPE_DECAP_PROTOCOL, OP_PCLID_WIMAX,         __rta_wimax_proto},
index 6e66610..5357187 100644 (file)
@@ -497,6 +497,28 @@ __rta_out64(struct program *program, bool is_ext, uint64_t val)
        }
 }
 
+static inline void __rta_out_be64(struct program *program, bool is_ext,
+                                 uint64_t val)
+{
+       if (is_ext) {
+               __rta_out_be32(program, upper_32_bits(val));
+               __rta_out_be32(program, lower_32_bits(val));
+       } else {
+               __rta_out_be32(program, lower_32_bits(val));
+       }
+}
+
+static inline void __rta_out_le64(struct program *program, bool is_ext,
+                                 uint64_t val)
+{
+       if (is_ext) {
+               __rta_out_le32(program, lower_32_bits(val));
+               __rta_out_le32(program, upper_32_bits(val));
+       } else {
+               __rta_out_le32(program, lower_32_bits(val));
+       }
+}
+
 static inline unsigned int
 rta_word(struct program *program, uint32_t val)
 {
index de8ca97..87e0def 100644 (file)
@@ -6,6 +6,7 @@
  */
 #include <fsl_mc_sys.h>
 #include <fsl_mc_cmd.h>
+#include <fsl_dpopr.h>
 #include <fsl_dpseci.h>
 #include <fsl_dpseci_cmd.h>
 
@@ -116,11 +117,13 @@ int dpseci_create(struct fsl_mc_io *mc_io,
                                          cmd_flags,
                                          dprc_token);
        cmd_params = (struct dpseci_cmd_create *)cmd.params;
-       for (i = 0; i < DPSECI_PRIO_NUM; i++)
+       for (i = 0; i < 8; i++)
                cmd_params->priorities[i] = cfg->priorities[i];
+       for (i = 0; i < 8; i++)
+               cmd_params->priorities2[i] = cfg->priorities[8 + i];
        cmd_params->num_tx_queues = cfg->num_tx_queues;
        cmd_params->num_rx_queues = cfg->num_rx_queues;
-       cmd_params->options = cfg->options;
+       cmd_params->options = cpu_to_le32(cfg->options);
 
        /* send command to mc*/
        err = mc_send_command(mc_io, &cmd);
@@ -302,7 +305,7 @@ int dpseci_get_attributes(struct fsl_mc_io *mc_io,
        /* retrieve response parameters */
        rsp_params = (struct dpseci_rsp_get_attr *)cmd.params;
        attr->id = le32_to_cpu(rsp_params->id);
-       attr->options = rsp_params->options;
+       attr->options = le32_to_cpu(rsp_params->options);
        attr->num_tx_queues = rsp_params->num_tx_queues;
        attr->num_rx_queues = rsp_params->num_rx_queues;
 
@@ -490,6 +493,8 @@ int dpseci_get_sec_attr(struct fsl_mc_io *mc_io,
        attr->arc4_acc_num = rsp_params->arc4_acc_num;
        attr->des_acc_num = rsp_params->des_acc_num;
        attr->aes_acc_num = rsp_params->aes_acc_num;
+       attr->ccha_acc_num = rsp_params->ccha_acc_num;
+       attr->ptha_acc_num = rsp_params->ptha_acc_num;
 
        return 0;
 }
@@ -569,6 +574,113 @@ int dpseci_get_api_version(struct fsl_mc_io *mc_io,
        return 0;
 }
 
+/**
+ * dpseci_set_opr() - Set Order Restoration configuration.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSECI object
+ * @index:     The queue index
+ * @options:   Configuration mode options
+ *                     can be OPR_OPT_CREATE or OPR_OPT_RETIRE
+ * @cfg:       Configuration options for the OPR
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpseci_set_opr(struct fsl_mc_io *mc_io,
+                  uint32_t cmd_flags,
+                  uint16_t token,
+                  uint8_t index,
+                  uint8_t options,
+                  struct opr_cfg *cfg)
+{
+       struct dpseci_cmd_set_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPSECI_CMDID_SET_OPR,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpseci_cmd_set_opr *)cmd.params;
+       cmd_params->index = index;
+       cmd_params->options = options;
+       cmd_params->oloe = cfg->oloe;
+       cmd_params->oeane = cfg->oeane;
+       cmd_params->olws = cfg->olws;
+       cmd_params->oa = cfg->oa;
+       cmd_params->oprrws = cfg->oprrws;
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpseci_get_opr() - Retrieve Order Restoration config and query.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSECI object
+ * @index:     The queue index
+ * @cfg:       Returned OPR configuration
+ * @qry:       Returned OPR query
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpseci_get_opr(struct fsl_mc_io *mc_io,
+                  uint32_t cmd_flags,
+                  uint16_t token,
+                  uint8_t index,
+                  struct opr_cfg *cfg,
+                  struct opr_qry *qry)
+{
+       struct dpseci_rsp_get_opr *rsp_params;
+       struct dpseci_cmd_get_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPSECI_CMDID_GET_OPR,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpseci_cmd_get_opr *)cmd.params;
+       cmd_params->index = index;
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpseci_rsp_get_opr *)cmd.params;
+       cfg->oloe = rsp_params->oloe;
+       cfg->oeane = rsp_params->oeane;
+       cfg->olws = rsp_params->olws;
+       cfg->oa = rsp_params->oa;
+       cfg->oprrws = rsp_params->oprrws;
+       qry->rip = dpseci_get_field(rsp_params->flags, RIP);
+       qry->enable = dpseci_get_field(rsp_params->flags, OPR_ENABLE);
+       qry->nesn = le16_to_cpu(rsp_params->nesn);
+       qry->ndsn = le16_to_cpu(rsp_params->ndsn);
+       qry->ea_tseq = le16_to_cpu(rsp_params->ea_tseq);
+       qry->tseq_nlis = dpseci_get_field(rsp_params->tseq_nlis, TSEQ_NLIS);
+       qry->ea_hseq = le16_to_cpu(rsp_params->ea_hseq);
+       qry->hseq_nlis = dpseci_get_field(rsp_params->hseq_nlis, HSEQ_NLIS);
+       qry->ea_hptr = le16_to_cpu(rsp_params->ea_hptr);
+       qry->ea_tptr = le16_to_cpu(rsp_params->ea_tptr);
+       qry->opr_vid = le16_to_cpu(rsp_params->opr_vid);
+       qry->opr_id = le16_to_cpu(rsp_params->opr_id);
+
+       return 0;
+}
+
+/**
+ * dpseci_set_congestion_notification() - Set congestion group
+ *     notification configuration
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSECI object
+ * @cfg:       congestion notification configuration
+ *
+ * Return:     '0' on success, error code otherwise
+ */
 int dpseci_set_congestion_notification(
                        struct fsl_mc_io *mc_io,
                        uint32_t cmd_flags,
@@ -604,6 +716,16 @@ int dpseci_set_congestion_notification(
        return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpseci_get_congestion_notification() - Get congestion group
+ *     notification configuration
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPSECI object
+ * @cfg:       congestion notification configuration
+ *
+ * Return:     '0' on success, error code otherwise
+ */
 int dpseci_get_congestion_notification(
                                struct fsl_mc_io *mc_io,
                                uint32_t cmd_flags,
index 12ac005..279e8f4 100644 (file)
@@ -20,7 +20,7 @@ struct fsl_mc_io;
 /**
  * Maximum number of Tx/Rx priorities per DPSECI object
  */
-#define DPSECI_PRIO_NUM                8
+#define DPSECI_MAX_QUEUE_NUM           16
 
 /**
  * All queues considered; see dpseci_set_rx_queue()
@@ -58,7 +58,7 @@ struct dpseci_cfg {
        uint32_t options;
        uint8_t num_tx_queues;
        uint8_t num_rx_queues;
-       uint8_t priorities[DPSECI_PRIO_NUM];
+       uint8_t priorities[DPSECI_MAX_QUEUE_NUM];
 };
 
 int dpseci_create(struct fsl_mc_io *mc_io,
@@ -259,6 +259,10 @@ int dpseci_get_tx_queue(struct fsl_mc_io *mc_io,
  *                     implemented in this version of SEC.
  * @aes_acc_num:       The number of copies of the AES module that are
  *                     implemented in this version of SEC.
+ * @ccha_acc_num:      The number of copies of the ChaCha20 module that are
+ *                     implemented in this version of SEC.
+ * @ptha_acc_num:      The number of copies of the Poly1305 module that are
+ *                     implemented in this version of SEC.
  **/
 
 struct dpseci_sec_attr {
@@ -279,6 +283,8 @@ struct dpseci_sec_attr {
        uint8_t arc4_acc_num;
        uint8_t des_acc_num;
        uint8_t aes_acc_num;
+       uint8_t ccha_acc_num;
+       uint8_t ptha_acc_num;
 };
 
 int dpseci_get_sec_attr(struct fsl_mc_io *mc_io,
@@ -316,6 +322,21 @@ int dpseci_get_api_version(struct fsl_mc_io *mc_io,
                           uint32_t cmd_flags,
                           uint16_t *major_ver,
                           uint16_t *minor_ver);
+
+int dpseci_set_opr(struct fsl_mc_io *mc_io,
+                  uint32_t cmd_flags,
+                  uint16_t token,
+                  uint8_t index,
+                  uint8_t options,
+                  struct opr_cfg *cfg);
+
+int dpseci_get_opr(struct fsl_mc_io *mc_io,
+                  uint32_t cmd_flags,
+                  uint16_t token,
+                  uint8_t index,
+                  struct opr_cfg *cfg,
+                  struct opr_qry *qry);
+
 /**
  * enum dpseci_congestion_unit - DPSECI congestion units
  * @DPSECI_CONGESTION_UNIT_BYTES: bytes units
index 26cef0f..af3518a 100644 (file)
@@ -9,22 +9,25 @@
 
 /* DPSECI Version */
 #define DPSECI_VER_MAJOR               5
-#define DPSECI_VER_MINOR               1
+#define DPSECI_VER_MINOR               3
 
 /* Command versioning */
 #define DPSECI_CMD_BASE_VERSION                1
 #define DPSECI_CMD_BASE_VERSION_V2     2
+#define DPSECI_CMD_BASE_VERSION_V3     3
 #define DPSECI_CMD_ID_OFFSET           4
 
 #define DPSECI_CMD_V1(id) \
        ((id << DPSECI_CMD_ID_OFFSET) | DPSECI_CMD_BASE_VERSION)
 #define DPSECI_CMD_V2(id) \
        ((id << DPSECI_CMD_ID_OFFSET) | DPSECI_CMD_BASE_VERSION_V2)
+#define DPSECI_CMD_V3(id) \
+       ((id << DPSECI_CMD_ID_OFFSET) | DPSECI_CMD_BASE_VERSION_V3)
 
 /* Command IDs */
 #define DPSECI_CMDID_CLOSE             DPSECI_CMD_V1(0x800)
 #define DPSECI_CMDID_OPEN              DPSECI_CMD_V1(0x809)
-#define DPSECI_CMDID_CREATE            DPSECI_CMD_V2(0x909)
+#define DPSECI_CMDID_CREATE            DPSECI_CMD_V3(0x909)
 #define DPSECI_CMDID_DESTROY           DPSECI_CMD_V1(0x989)
 #define DPSECI_CMDID_GET_API_VERSION   DPSECI_CMD_V1(0xa09)
 
 #define DPSECI_CMDID_SET_RX_QUEUE      DPSECI_CMD_V1(0x194)
 #define DPSECI_CMDID_GET_RX_QUEUE      DPSECI_CMD_V1(0x196)
 #define DPSECI_CMDID_GET_TX_QUEUE      DPSECI_CMD_V1(0x197)
-#define DPSECI_CMDID_GET_SEC_ATTR      DPSECI_CMD_V1(0x198)
+#define DPSECI_CMDID_GET_SEC_ATTR      DPSECI_CMD_V2(0x198)
 #define DPSECI_CMDID_GET_SEC_COUNTERS  DPSECI_CMD_V1(0x199)
-
+#define DPSECI_CMDID_SET_OPR           DPSECI_CMD_V1(0x19A)
+#define DPSECI_CMDID_GET_OPR           DPSECI_CMD_V1(0x19B)
 #define DPSECI_CMDID_SET_CONGESTION_NOTIFICATION       DPSECI_CMD_V1(0x170)
 #define DPSECI_CMDID_GET_CONGESTION_NOTIFICATION       DPSECI_CMD_V1(0x171)
 
@@ -63,6 +67,8 @@ struct dpseci_cmd_create {
        uint8_t num_rx_queues;
        uint8_t pad[6];
        uint32_t options;
+       uint32_t pad2;
+       uint8_t priorities2[8];
 };
 
 struct dpseci_cmd_destroy {
@@ -152,6 +158,8 @@ struct dpseci_rsp_get_sec_attr {
        uint8_t arc4_acc_num;
        uint8_t des_acc_num;
        uint8_t aes_acc_num;
+       uint8_t ccha_acc_num;
+       uint8_t ptha_acc_num;
 };
 
 struct dpseci_rsp_get_sec_counters {
@@ -169,6 +177,63 @@ struct dpseci_rsp_get_api_version {
        uint16_t minor;
 };
 
+struct dpseci_cmd_set_opr {
+       uint16_t pad0;
+       uint8_t index;
+       uint8_t options;
+       uint8_t pad1[7];
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+};
+
+struct dpseci_cmd_get_opr {
+       uint16_t pad;
+       uint8_t index;
+};
+
+#define DPSECI_RIP_SHIFT       0
+#define DPSECI_RIP_SIZE                1
+#define DPSECI_OPR_ENABLE_SHIFT        1
+#define DPSECI_OPR_ENABLE_SIZE 1
+#define DPSECI_TSEQ_NLIS_SHIFT 0
+#define DPSECI_TSEQ_NLIS_SIZE  1
+#define DPSECI_HSEQ_NLIS_SHIFT 0
+#define DPSECI_HSEQ_NLIS_SIZE  1
+
+struct dpseci_rsp_get_opr {
+       uint64_t pad0;
+       /* from LSB: rip:1 enable:1 */
+       uint8_t flags;
+       uint16_t pad1;
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+       uint16_t nesn;
+       uint16_t pad8;
+       uint16_t ndsn;
+       uint16_t pad2;
+       uint16_t ea_tseq;
+       /* only the LSB */
+       uint8_t tseq_nlis;
+       uint8_t pad3;
+       uint16_t ea_hseq;
+       /* only the LSB */
+       uint8_t hseq_nlis;
+       uint8_t pad4;
+       uint16_t ea_hptr;
+       uint16_t pad5;
+       uint16_t ea_tptr;
+       uint16_t pad6;
+       uint16_t opr_vid;
+       uint16_t pad7;
+       uint16_t opr_id;
+};
+
 #define DPSECI_DEST_TYPE_SHIFT         0
 #define DPSECI_DEST_TYPE_SIZE          4
 #define DPSECI_CG_UNITS_SHIFT          4
index 01afc58..8fa4827 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
         build = false
 endif
index 8591cc0..0bfb986 100644 (file)
@@ -2,3 +2,11 @@ DPDK_17.05 {
 
        local: *;
 };
+
+DPDK_18.11 {
+       global:
+
+       dpaa2_sec_eventq_attach;
+       dpaa2_sec_eventq_detach;
+
+} DPDK_17.05;
index 9be4470..5ce95c2 100644 (file)
@@ -11,7 +11,6 @@ LIB = librte_pmd_dpaa_sec.a
 
 # build flags
 CFLAGS += -DALLOW_EXPERIMENTAL_API
-CFLAGS += -D _GNU_SOURCE
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
@@ -38,5 +37,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC) += dpaa_sec.c
 
 LDLIBS += -lrte_bus_dpaa
 LDLIBS += -lrte_mempool_dpaa
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index f571050..d83e745 100644 (file)
@@ -23,6 +23,7 @@
 #include <rte_mbuf.h>
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
+#include <rte_spinlock.h>
 
 #include <fsl_usd.h>
 #include <fsl_qman.h>
@@ -106,6 +107,12 @@ dpaa_mem_vtop(void *vaddr)
 static inline void *
 dpaa_mem_ptov(rte_iova_t paddr)
 {
+       void *va;
+
+       va = (void *)dpaax_iova_table_get_va(paddr);
+       if (likely(va))
+               return va;
+
        return rte_mem_iova2virt(paddr);
 }
 
@@ -274,6 +281,9 @@ caam_auth_alg(dpaa_sec_session *ses, struct alginfo *alginfo_a)
 {
        switch (ses->auth_alg) {
        case RTE_CRYPTO_AUTH_NULL:
+               alginfo_a->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_HMAC_NULL : 0;
                ses->digest_length = 0;
                break;
        case RTE_CRYPTO_AUTH_MD5_HMAC:
@@ -322,6 +332,9 @@ caam_cipher_alg(dpaa_sec_session *ses, struct alginfo *alginfo_c)
 {
        switch (ses->cipher_alg) {
        case RTE_CRYPTO_CIPHER_NULL:
+               alginfo_c->algtype =
+                       (ses->proto_alg == RTE_SECURITY_PROTOCOL_IPSEC) ?
+                       OP_PCL_IPSEC_NULL : 0;
                break;
        case RTE_CRYPTO_CIPHER_AES_CBC:
                alginfo_c->algtype =
@@ -359,6 +372,87 @@ caam_aead_alg(dpaa_sec_session *ses, struct alginfo *alginfo)
        }
 }
 
+/* prepare ipsec proto command block of the session */
+static int
+dpaa_sec_prep_ipsec_cdb(dpaa_sec_session *ses)
+{
+       struct alginfo cipherdata = {0}, authdata = {0};
+       struct sec_cdb *cdb = &ses->cdb;
+       int32_t shared_desc_len = 0;
+       int err;
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       int swap = false;
+#else
+       int swap = true;
+#endif
+
+       caam_cipher_alg(ses, &cipherdata);
+       if (cipherdata.algtype == (unsigned int)DPAA_SEC_ALG_UNSUPPORT) {
+               DPAA_SEC_ERR("not supported cipher alg");
+               return -ENOTSUP;
+       }
+
+       cipherdata.key = (size_t)ses->cipher_key.data;
+       cipherdata.keylen = ses->cipher_key.length;
+       cipherdata.key_enc_flags = 0;
+       cipherdata.key_type = RTA_DATA_IMM;
+
+       caam_auth_alg(ses, &authdata);
+       if (authdata.algtype == (unsigned int)DPAA_SEC_ALG_UNSUPPORT) {
+               DPAA_SEC_ERR("not supported auth alg");
+               return -ENOTSUP;
+       }
+
+       authdata.key = (size_t)ses->auth_key.data;
+       authdata.keylen = ses->auth_key.length;
+       authdata.key_enc_flags = 0;
+       authdata.key_type = RTA_DATA_IMM;
+
+       cdb->sh_desc[0] = cipherdata.keylen;
+       cdb->sh_desc[1] = authdata.keylen;
+       err = rta_inline_query(IPSEC_AUTH_VAR_AES_DEC_BASE_DESC_LEN,
+                              MIN_JOB_DESC_SIZE,
+                              (unsigned int *)cdb->sh_desc,
+                              &cdb->sh_desc[2], 2);
+
+       if (err < 0) {
+               DPAA_SEC_ERR("Crypto: Incorrect key lengths");
+               return err;
+       }
+       if (cdb->sh_desc[2] & 1)
+               cipherdata.key_type = RTA_DATA_IMM;
+       else {
+               cipherdata.key = (size_t)dpaa_mem_vtop(
+                                       (void *)(size_t)cipherdata.key);
+               cipherdata.key_type = RTA_DATA_PTR;
+       }
+       if (cdb->sh_desc[2] & (1<<1))
+               authdata.key_type = RTA_DATA_IMM;
+       else {
+               authdata.key = (size_t)dpaa_mem_vtop(
+                                       (void *)(size_t)authdata.key);
+               authdata.key_type = RTA_DATA_PTR;
+       }
+
+       cdb->sh_desc[0] = 0;
+       cdb->sh_desc[1] = 0;
+       cdb->sh_desc[2] = 0;
+       if (ses->dir == DIR_ENC) {
+               shared_desc_len = cnstr_shdsc_ipsec_new_encap(
+                               cdb->sh_desc,
+                               true, swap, SHR_SERIAL,
+                               &ses->encap_pdb,
+                               (uint8_t *)&ses->ip4_hdr,
+                               &cipherdata, &authdata);
+       } else if (ses->dir == DIR_DEC) {
+               shared_desc_len = cnstr_shdsc_ipsec_new_decap(
+                               cdb->sh_desc,
+                               true, swap, SHR_SERIAL,
+                               &ses->decap_pdb,
+                               &cipherdata, &authdata);
+       }
+       return shared_desc_len;
+}
 
 /* prepare command block of the session */
 static int
@@ -376,7 +470,9 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
 
        memset(cdb, 0, sizeof(struct sec_cdb));
 
-       if (is_cipher_only(ses)) {
+       if (is_proto_ipsec(ses)) {
+               shared_desc_len = dpaa_sec_prep_ipsec_cdb(ses);
+       } else if (is_cipher_only(ses)) {
                caam_cipher_alg(ses, &alginfo_c);
                if (alginfo_c.algtype == (unsigned int)DPAA_SEC_ALG_UNSUPPORT) {
                        DPAA_SEC_ERR("not supported cipher alg");
@@ -484,28 +580,13 @@ dpaa_sec_prep_cdb(dpaa_sec_session *ses)
                cdb->sh_desc[0] = 0;
                cdb->sh_desc[1] = 0;
                cdb->sh_desc[2] = 0;
-               if (is_proto_ipsec(ses)) {
-                       if (ses->dir == DIR_ENC) {
-                               shared_desc_len = cnstr_shdsc_ipsec_new_encap(
-                                               cdb->sh_desc,
-                                               true, swap, &ses->encap_pdb,
-                                               (uint8_t *)&ses->ip4_hdr,
-                                               &alginfo_c, &alginfo_a);
-                       } else if (ses->dir == DIR_DEC) {
-                               shared_desc_len = cnstr_shdsc_ipsec_new_decap(
-                                               cdb->sh_desc,
-                                               true, swap, &ses->decap_pdb,
-                                               &alginfo_c, &alginfo_a);
-                       }
-               } else {
-                       /* Auth_only_len is set as 0 here and it will be
-                        * overwritten in fd for each packet.
-                        */
-                       shared_desc_len = cnstr_shdsc_authenc(cdb->sh_desc,
-                                       true, swap, &alginfo_c, &alginfo_a,
-                                       ses->iv.length, 0,
-                                       ses->digest_length, ses->dir);
-               }
+               /* Auth_only_len is set as 0 here and it will be
+                * overwritten in fd for each packet.
+                */
+               shared_desc_len = cnstr_shdsc_authenc(cdb->sh_desc,
+                               true, swap, &alginfo_c, &alginfo_a,
+                               ses->iv.length, 0,
+                               ses->digest_length, ses->dir);
        }
 
        if (shared_desc_len < 0) {
@@ -1445,20 +1526,26 @@ dpaa_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
                                nb_ops = loop;
                                goto send_pkts;
                        }
-                       if (unlikely(!ses->qp || ses->qp != qp)) {
-                               DPAA_SEC_DP_ERR("sess->qp - %p qp %p",
-                                            ses->qp, qp);
+                       if (unlikely(!ses->qp)) {
                                if (dpaa_sec_attach_sess_q(qp, ses)) {
                                        frames_to_send = loop;
                                        nb_ops = loop;
                                        goto send_pkts;
                                }
+                       } else if (unlikely(ses->qp != qp)) {
+                               DPAA_SEC_DP_ERR("Old:sess->qp = %p"
+                                       " New qp = %p\n", ses->qp, qp);
+                               frames_to_send = loop;
+                               nb_ops = loop;
+                               goto send_pkts;
                        }
 
                        auth_only_len = op->sym->auth.data.length -
                                                op->sym->cipher.data.length;
                        if (rte_pktmbuf_is_contiguous(op->sym->m_src)) {
-                               if (is_auth_only(ses)) {
+                               if (is_proto_ipsec(ses)) {
+                                       cf = build_proto(op, ses);
+                               } else if (is_auth_only(ses)) {
                                        cf = build_auth_only(op, ses);
                                } else if (is_cipher_only(ses)) {
                                        cf = build_cipher_only(op, ses);
@@ -1467,8 +1554,6 @@ dpaa_sec_enqueue_burst(void *qp, struct rte_crypto_op **ops,
                                        auth_only_len = ses->auth_only_len;
                                } else if (is_auth_cipher(ses)) {
                                        cf = build_cipher_auth(op, ses);
-                               } else if (is_proto_ipsec(ses)) {
-                                       cf = build_proto(op, ses);
                                } else {
                                        DPAA_SEC_DP_ERR("not supported ops");
                                        frames_to_send = loop;
@@ -1760,6 +1845,7 @@ dpaa_sec_set_session_parameters(struct rte_cryptodev *dev,
                DPAA_SEC_ERR("invalid session struct");
                return -EINVAL;
        }
+       memset(session, 0, sizeof(dpaa_sec_session));
 
        /* Default IV length = 0 */
        session->iv.length = 0;
@@ -1807,7 +1893,9 @@ dpaa_sec_set_session_parameters(struct rte_cryptodev *dev,
                return -EINVAL;
        }
        session->ctx_pool = internals->ctx_pool;
+       rte_spinlock_lock(&internals->lock);
        session->inq = dpaa_sec_attach_rxq(internals);
+       rte_spinlock_unlock(&internals->lock);
        if (session->inq == NULL) {
                DPAA_SEC_ERR("unable to attach sec queue");
                goto err1;
@@ -1888,111 +1976,86 @@ dpaa_sec_set_ipsec_session(__rte_unused struct rte_cryptodev *dev,
 {
        struct dpaa_sec_dev_private *internals = dev->data->dev_private;
        struct rte_security_ipsec_xform *ipsec_xform = &conf->ipsec;
-       struct rte_crypto_auth_xform *auth_xform;
-       struct rte_crypto_cipher_xform *cipher_xform;
+       struct rte_crypto_auth_xform *auth_xform = NULL;
+       struct rte_crypto_cipher_xform *cipher_xform = NULL;
        dpaa_sec_session *session = (dpaa_sec_session *)sess;
 
        PMD_INIT_FUNC_TRACE();
 
+       memset(session, 0, sizeof(dpaa_sec_session));
        if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
                cipher_xform = &conf->crypto_xform->cipher;
-               auth_xform = &conf->crypto_xform->next->auth;
+               if (conf->crypto_xform->next)
+                       auth_xform = &conf->crypto_xform->next->auth;
        } else {
                auth_xform = &conf->crypto_xform->auth;
-               cipher_xform = &conf->crypto_xform->next->cipher;
+               if (conf->crypto_xform->next)
+                       cipher_xform = &conf->crypto_xform->next->cipher;
        }
        session->proto_alg = conf->protocol;
-       session->cipher_key.data = rte_zmalloc(NULL,
-                                              cipher_xform->key.length,
-                                              RTE_CACHE_LINE_SIZE);
-       if (session->cipher_key.data == NULL &&
-                       cipher_xform->key.length > 0) {
-               DPAA_SEC_ERR("No Memory for cipher key");
-               return -ENOMEM;
-       }
 
-       session->cipher_key.length = cipher_xform->key.length;
-       session->auth_key.data = rte_zmalloc(NULL,
-                                       auth_xform->key.length,
-                                       RTE_CACHE_LINE_SIZE);
-       if (session->auth_key.data == NULL &&
-                       auth_xform->key.length > 0) {
-               DPAA_SEC_ERR("No Memory for auth key");
-               rte_free(session->cipher_key.data);
-               return -ENOMEM;
+       if (cipher_xform && cipher_xform->algo != RTE_CRYPTO_CIPHER_NULL) {
+               session->cipher_key.data = rte_zmalloc(NULL,
+                                                      cipher_xform->key.length,
+                                                      RTE_CACHE_LINE_SIZE);
+               if (session->cipher_key.data == NULL &&
+                               cipher_xform->key.length > 0) {
+                       DPAA_SEC_ERR("No Memory for cipher key");
+                       return -ENOMEM;
+               }
+               memcpy(session->cipher_key.data, cipher_xform->key.data,
+                               cipher_xform->key.length);
+               session->cipher_key.length = cipher_xform->key.length;
+
+               switch (cipher_xform->algo) {
+               case RTE_CRYPTO_CIPHER_AES_CBC:
+               case RTE_CRYPTO_CIPHER_3DES_CBC:
+               case RTE_CRYPTO_CIPHER_AES_CTR:
+                       break;
+               default:
+                       DPAA_SEC_ERR("Crypto: Unsupported Cipher alg %u",
+                               cipher_xform->algo);
+                       goto out;
+               }
+               session->cipher_alg = cipher_xform->algo;
+       } else {
+               session->cipher_key.data = NULL;
+               session->cipher_key.length = 0;
+               session->cipher_alg = RTE_CRYPTO_CIPHER_NULL;
        }
-       session->auth_key.length = auth_xform->key.length;
-       memcpy(session->cipher_key.data, cipher_xform->key.data,
-                       cipher_xform->key.length);
-       memcpy(session->auth_key.data, auth_xform->key.data,
-                       auth_xform->key.length);
 
-       switch (auth_xform->algo) {
-       case RTE_CRYPTO_AUTH_SHA1_HMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA1_HMAC;
-               break;
-       case RTE_CRYPTO_AUTH_MD5_HMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_MD5_HMAC;
-               break;
-       case RTE_CRYPTO_AUTH_SHA256_HMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA256_HMAC;
-               break;
-       case RTE_CRYPTO_AUTH_SHA384_HMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA384_HMAC;
-               break;
-       case RTE_CRYPTO_AUTH_SHA512_HMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_SHA512_HMAC;
-               break;
-       case RTE_CRYPTO_AUTH_AES_CMAC:
-               session->auth_alg = RTE_CRYPTO_AUTH_AES_CMAC;
-               break;
-       case RTE_CRYPTO_AUTH_NULL:
+       if (auth_xform && auth_xform->algo != RTE_CRYPTO_AUTH_NULL) {
+               session->auth_key.data = rte_zmalloc(NULL,
+                                               auth_xform->key.length,
+                                               RTE_CACHE_LINE_SIZE);
+               if (session->auth_key.data == NULL &&
+                               auth_xform->key.length > 0) {
+                       DPAA_SEC_ERR("No Memory for auth key");
+                       rte_free(session->cipher_key.data);
+                       return -ENOMEM;
+               }
+               memcpy(session->auth_key.data, auth_xform->key.data,
+                               auth_xform->key.length);
+               session->auth_key.length = auth_xform->key.length;
+
+               switch (auth_xform->algo) {
+               case RTE_CRYPTO_AUTH_SHA1_HMAC:
+               case RTE_CRYPTO_AUTH_MD5_HMAC:
+               case RTE_CRYPTO_AUTH_SHA256_HMAC:
+               case RTE_CRYPTO_AUTH_SHA384_HMAC:
+               case RTE_CRYPTO_AUTH_SHA512_HMAC:
+               case RTE_CRYPTO_AUTH_AES_CMAC:
+                       break;
+               default:
+                       DPAA_SEC_ERR("Crypto: Unsupported auth alg %u",
+                               auth_xform->algo);
+                       goto out;
+               }
+               session->auth_alg = auth_xform->algo;
+       } else {
+               session->auth_key.data = NULL;
+               session->auth_key.length = 0;
                session->auth_alg = RTE_CRYPTO_AUTH_NULL;
-               break;
-       case RTE_CRYPTO_AUTH_SHA224_HMAC:
-       case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
-       case RTE_CRYPTO_AUTH_SNOW3G_UIA2:
-       case RTE_CRYPTO_AUTH_SHA1:
-       case RTE_CRYPTO_AUTH_SHA256:
-       case RTE_CRYPTO_AUTH_SHA512:
-       case RTE_CRYPTO_AUTH_SHA224:
-       case RTE_CRYPTO_AUTH_SHA384:
-       case RTE_CRYPTO_AUTH_MD5:
-       case RTE_CRYPTO_AUTH_AES_GMAC:
-       case RTE_CRYPTO_AUTH_KASUMI_F9:
-       case RTE_CRYPTO_AUTH_AES_CBC_MAC:
-       case RTE_CRYPTO_AUTH_ZUC_EIA3:
-               DPAA_SEC_ERR("Crypto: Unsupported auth alg %u",
-                       auth_xform->algo);
-               goto out;
-       default:
-               DPAA_SEC_ERR("Crypto: Undefined Auth specified %u",
-                       auth_xform->algo);
-               goto out;
-       }
-
-       switch (cipher_xform->algo) {
-       case RTE_CRYPTO_CIPHER_AES_CBC:
-               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CBC;
-               break;
-       case RTE_CRYPTO_CIPHER_3DES_CBC:
-               session->cipher_alg = RTE_CRYPTO_CIPHER_3DES_CBC;
-               break;
-       case RTE_CRYPTO_CIPHER_AES_CTR:
-               session->cipher_alg = RTE_CRYPTO_CIPHER_AES_CTR;
-               break;
-       case RTE_CRYPTO_CIPHER_NULL:
-       case RTE_CRYPTO_CIPHER_SNOW3G_UEA2:
-       case RTE_CRYPTO_CIPHER_3DES_ECB:
-       case RTE_CRYPTO_CIPHER_AES_ECB:
-       case RTE_CRYPTO_CIPHER_KASUMI_F8:
-               DPAA_SEC_ERR("Crypto: Unsupported Cipher alg %u",
-                       cipher_xform->algo);
-               goto out;
-       default:
-               DPAA_SEC_ERR("Crypto: Undefined Cipher specified %u",
-                       cipher_xform->algo);
-               goto out;
        }
 
        if (ipsec_xform->direction == RTE_SECURITY_IPSEC_SA_DIR_EGRESS) {
@@ -2020,7 +2083,8 @@ dpaa_sec_set_ipsec_session(__rte_unused struct rte_cryptodev *dev,
                        (IPVERSION << PDBNH_ESP_ENCAP_SHIFT) |
                        PDBOPTS_ESP_OIHI_PDB_INL |
                        PDBOPTS_ESP_IVSRC |
-                       PDBHMO_ESP_ENCAP_DTTL;
+                       PDBHMO_ESP_ENCAP_DTTL |
+                       PDBHMO_ESP_SNR;
                session->encap_pdb.spi = ipsec_xform->spi;
                session->encap_pdb.ip_hdr_len = sizeof(struct ip);
 
@@ -2033,7 +2097,9 @@ dpaa_sec_set_ipsec_session(__rte_unused struct rte_cryptodev *dev,
        } else
                goto out;
        session->ctx_pool = internals->ctx_pool;
+       rte_spinlock_lock(&internals->lock);
        session->inq = dpaa_sec_attach_rxq(internals);
+       rte_spinlock_unlock(&internals->lock);
        if (session->inq == NULL) {
                DPAA_SEC_ERR("unable to attach sec queue");
                goto out;
@@ -2204,7 +2270,7 @@ dpaa_sec_capabilities_get(void *device __rte_unused)
        return dpaa_sec_security_cap;
 }
 
-struct rte_security_ops dpaa_sec_security_ops = {
+static const struct rte_security_ops dpaa_sec_security_ops = {
        .session_create = dpaa_sec_security_session_create,
        .session_update = NULL,
        .session_stats_get = NULL,
@@ -2284,6 +2350,7 @@ dpaa_sec_dev_init(struct rte_cryptodev *cryptodev)
        security_instance->sess_cnt = 0;
        cryptodev->security_ctx = security_instance;
 
+       rte_spinlock_init(&internals->lock);
        for (i = 0; i < internals->max_nb_queue_pairs; i++) {
                /* init qman fq for queue pair */
                qp = &internals->qps[i];
@@ -2316,7 +2383,7 @@ init_error:
 }
 
 static int
-cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv,
+cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused,
                                struct rte_dpaa_device *dpaa_dev)
 {
        struct rte_cryptodev *cryptodev;
@@ -2344,7 +2411,6 @@ cryptodev_dpaa_sec_probe(struct rte_dpaa_driver *dpaa_drv,
 
        dpaa_dev->crypto_dev = cryptodev;
        cryptodev->device = &dpaa_dev->device;
-       cryptodev->device->driver = &dpaa_drv->driver;
 
        /* init user callbacks */
        TAILQ_INIT(&(cryptodev->link_intr_cbs));
index ac6c00a..f4b8784 100644 (file)
@@ -137,7 +137,7 @@ struct dpaa_sec_qp {
        int tx_errs;
 };
 
-#define RTE_DPAA_MAX_NB_SEC_QPS 8
+#define RTE_DPAA_MAX_NB_SEC_QPS 2
 #define RTE_DPAA_MAX_RX_QUEUE RTE_DPAA_SEC_PMD_MAX_NB_SESSIONS
 #define DPAA_MAX_DEQUEUE_NUM_FRAMES 63
 
@@ -150,6 +150,7 @@ struct dpaa_sec_dev_private {
        unsigned char inq_attach[RTE_DPAA_MAX_RX_QUEUE];
        unsigned int max_nb_queue_pairs;
        unsigned int max_nb_sessions;
+       rte_spinlock_t lock;
 };
 
 #define MAX_SG_ENTRIES         16
diff --git a/drivers/crypto/kasumi/meson.build b/drivers/crypto/kasumi/meson.build
new file mode 100644 (file)
index 0000000..a09b0e2
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+lib = cc.find_library('libsso_kasumi', required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+endif
+
+sources = files('rte_kasumi_pmd.c', 'rte_kasumi_pmd_ops.c')
+deps += ['bus_vdev']
index d64ca41..bf1bd92 100644 (file)
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-drivers = ['ccp', 'dpaa_sec', 'dpaa2_sec', 'mvsam',
-       'null', 'openssl', 'qat', 'virtio']
+drivers = ['aesni_gcm', 'aesni_mb', 'caam_jr', 'ccp', 'dpaa_sec', 'dpaa2_sec',
+       'kasumi', 'mvsam', 'null', 'octeontx', 'openssl', 'qat', 'scheduler',
+       'virtio', 'zuc']
 
 std_deps = ['cryptodev'] # cryptodev pulls in all other needed deps
 config_flag_fmt = 'RTE_LIBRTE_@0@_PMD'
index c3dc72c..2b4d036 100644 (file)
@@ -19,6 +19,7 @@ LIB = librte_pmd_mvsam_crypto.a
 # build flags
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(RTE_SDK)/drivers/common/mvep
 CFLAGS += -I$(LIBMUSDK_PATH)/include
 CFLAGS += -DMVCONF_TYPES_PUBLIC
 CFLAGS += -DMVCONF_DMA_PHYS_ADDR_T_PUBLIC
@@ -31,9 +32,9 @@ EXPORT_MAP := rte_pmd_mvsam_version.map
 
 # external library dependencies
 LDLIBS += -L$(LIBMUSDK_PATH)/lib -lmusdk
-LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_kvargs
 LDLIBS += -lrte_cryptodev
-LDLIBS += -lrte_bus_vdev
+LDLIBS += -lrte_bus_vdev -lrte_common_mvep
 
 # library source files
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO) += rte_mrvl_pmd.c
index 3c8ea3c..f1c8796 100644 (file)
@@ -18,4 +18,4 @@ endif
 
 sources = files('rte_mrvl_pmd.c', 'rte_mrvl_pmd_ops.c')
 
-deps += ['bus_vdev']
+deps += ['bus_vdev', 'common_mvep']
index 73eff75..c2ae82a 100644 (file)
 #include <rte_bus_vdev.h>
 #include <rte_malloc.h>
 #include <rte_cpuflags.h>
+#include <rte_kvargs.h>
+#include <rte_mvep_common.h>
 
 #include "rte_mrvl_pmd_private.h"
 
-#define MRVL_MUSDK_DMA_MEMSIZE 41943040
-
 #define MRVL_PMD_MAX_NB_SESS_ARG               ("max_nb_sessions")
 #define MRVL_PMD_DEFAULT_MAX_NB_SESSIONS       2048
 
@@ -68,6 +68,9 @@ __rte_aligned(32);
  */
 static const
 struct cipher_params_mapping cipher_map[RTE_CRYPTO_CIPHER_LIST_END] = {
+       [RTE_CRYPTO_CIPHER_NULL] = {
+               .supported = ALGO_SUPPORTED,
+               .cipher_alg = SAM_CIPHER_NONE },
        [RTE_CRYPTO_CIPHER_3DES_CBC] = {
                .supported = ALGO_SUPPORTED,
                .cipher_alg = SAM_CIPHER_3DES,
@@ -93,6 +96,11 @@ struct cipher_params_mapping cipher_map[RTE_CRYPTO_CIPHER_LIST_END] = {
                .cipher_alg = SAM_CIPHER_AES,
                .cipher_mode = SAM_CIPHER_CTR,
                .max_key_len = BITS2BYTES(256) },
+       [RTE_CRYPTO_CIPHER_AES_ECB] = {
+               .supported = ALGO_SUPPORTED,
+               .cipher_alg = SAM_CIPHER_AES,
+               .cipher_mode = SAM_CIPHER_ECB,
+               .max_key_len = BITS2BYTES(256) },
 };
 
 /**
@@ -100,6 +108,9 @@ struct cipher_params_mapping cipher_map[RTE_CRYPTO_CIPHER_LIST_END] = {
  */
 static const
 struct auth_params_mapping auth_map[RTE_CRYPTO_AUTH_LIST_END] = {
+       [RTE_CRYPTO_AUTH_NULL] = {
+               .supported = ALGO_SUPPORTED,
+               .auth_alg = SAM_AUTH_NONE },
        [RTE_CRYPTO_AUTH_MD5_HMAC] = {
                .supported = ALGO_SUPPORTED,
                .auth_alg = SAM_AUTH_HMAC_MD5 },
@@ -112,6 +123,9 @@ struct auth_params_mapping auth_map[RTE_CRYPTO_AUTH_LIST_END] = {
        [RTE_CRYPTO_AUTH_SHA1] = {
                .supported = ALGO_SUPPORTED,
                .auth_alg = SAM_AUTH_HASH_SHA1 },
+       [RTE_CRYPTO_AUTH_SHA224_HMAC] = {
+               .supported = ALGO_SUPPORTED,
+               .auth_alg = SAM_AUTH_HMAC_SHA2_224 },
        [RTE_CRYPTO_AUTH_SHA224] = {
                .supported = ALGO_SUPPORTED,
                .auth_alg = SAM_AUTH_HASH_SHA2_224 },
@@ -210,7 +224,7 @@ mrvl_crypto_set_cipher_session_parameters(struct mrvl_crypto_session *sess,
 {
        /* Make sure we've got proper struct */
        if (cipher_xform->type != RTE_CRYPTO_SYM_XFORM_CIPHER) {
-               MRVL_CRYPTO_LOG_ERR("Wrong xform struct provided!");
+               MRVL_LOG(ERR, "Wrong xform struct provided!");
                return -EINVAL;
        }
 
@@ -218,7 +232,7 @@ mrvl_crypto_set_cipher_session_parameters(struct mrvl_crypto_session *sess,
        if ((cipher_xform->cipher.algo > RTE_DIM(cipher_map)) ||
                (cipher_map[cipher_xform->cipher.algo].supported
                        != ALGO_SUPPORTED)) {
-               MRVL_CRYPTO_LOG_ERR("Cipher algorithm not supported!");
+               MRVL_LOG(ERR, "Cipher algorithm not supported!");
                return -EINVAL;
        }
 
@@ -238,7 +252,7 @@ mrvl_crypto_set_cipher_session_parameters(struct mrvl_crypto_session *sess,
        /* Get max key length. */
        if (cipher_xform->cipher.key.length >
                cipher_map[cipher_xform->cipher.algo].max_key_len) {
-               MRVL_CRYPTO_LOG_ERR("Wrong key length!");
+               MRVL_LOG(ERR, "Wrong key length!");
                return -EINVAL;
        }
 
@@ -261,14 +275,14 @@ mrvl_crypto_set_auth_session_parameters(struct mrvl_crypto_session *sess,
 {
        /* Make sure we've got proper struct */
        if (auth_xform->type != RTE_CRYPTO_SYM_XFORM_AUTH) {
-               MRVL_CRYPTO_LOG_ERR("Wrong xform struct provided!");
+               MRVL_LOG(ERR, "Wrong xform struct provided!");
                return -EINVAL;
        }
 
        /* See if map data is present and valid */
        if ((auth_xform->auth.algo > RTE_DIM(auth_map)) ||
                (auth_map[auth_xform->auth.algo].supported != ALGO_SUPPORTED)) {
-               MRVL_CRYPTO_LOG_ERR("Auth algorithm not supported!");
+               MRVL_LOG(ERR, "Auth algorithm not supported!");
                return -EINVAL;
        }
 
@@ -300,7 +314,7 @@ mrvl_crypto_set_aead_session_parameters(struct mrvl_crypto_session *sess,
 {
        /* Make sure we've got proper struct */
        if (aead_xform->type != RTE_CRYPTO_SYM_XFORM_AEAD) {
-               MRVL_CRYPTO_LOG_ERR("Wrong xform struct provided!");
+               MRVL_LOG(ERR, "Wrong xform struct provided!");
                return -EINVAL;
        }
 
@@ -308,7 +322,7 @@ mrvl_crypto_set_aead_session_parameters(struct mrvl_crypto_session *sess,
        if ((aead_xform->aead.algo > RTE_DIM(aead_map)) ||
                (aead_map[aead_xform->aead.algo].supported
                        != ALGO_SUPPORTED)) {
-               MRVL_CRYPTO_LOG_ERR("AEAD algorithm not supported!");
+               MRVL_LOG(ERR, "AEAD algorithm not supported!");
                return -EINVAL;
        }
 
@@ -326,7 +340,7 @@ mrvl_crypto_set_aead_session_parameters(struct mrvl_crypto_session *sess,
        /* Get max key length. */
        if (aead_xform->aead.key.length >
                aead_map[aead_xform->aead.algo].max_key_len) {
-               MRVL_CRYPTO_LOG_ERR("Wrong key length!");
+               MRVL_LOG(ERR, "Wrong key length!");
                return -EINVAL;
        }
 
@@ -391,21 +405,21 @@ mrvl_crypto_set_session_parameters(struct mrvl_crypto_session *sess,
        if ((cipher_xform != NULL) &&
                (mrvl_crypto_set_cipher_session_parameters(
                        sess, cipher_xform) < 0)) {
-               MRVL_CRYPTO_LOG_ERR("Invalid/unsupported cipher parameters");
+               MRVL_LOG(ERR, "Invalid/unsupported cipher parameters!");
                return -EINVAL;
        }
 
        if ((auth_xform != NULL) &&
                (mrvl_crypto_set_auth_session_parameters(
                        sess, auth_xform) < 0)) {
-               MRVL_CRYPTO_LOG_ERR("Invalid/unsupported auth parameters");
+               MRVL_LOG(ERR, "Invalid/unsupported auth parameters!");
                return -EINVAL;
        }
 
        if ((aead_xform != NULL) &&
                (mrvl_crypto_set_aead_session_parameters(
                        sess, aead_xform) < 0)) {
-               MRVL_CRYPTO_LOG_ERR("Invalid/unsupported aead parameters");
+               MRVL_LOG(ERR, "Invalid/unsupported aead parameters!");
                return -EINVAL;
        }
 
@@ -437,12 +451,14 @@ mrvl_request_prepare(struct sam_cio_op_params *request,
                struct rte_crypto_op *op)
 {
        struct mrvl_crypto_session *sess;
-       struct rte_mbuf *dst_mbuf;
+       struct rte_mbuf *src_mbuf, *dst_mbuf;
+       uint16_t segments_nb;
        uint8_t *digest;
+       int i;
 
        if (unlikely(op->sess_type == RTE_CRYPTO_OP_SESSIONLESS)) {
-               MRVL_CRYPTO_LOG_ERR("MRVL CRYPTO PMD only supports session "
-                               "oriented requests, op (%p) is sessionless.",
+               MRVL_LOG(ERR, "MRVL CRYPTO PMD only supports session "
+                               "oriented requests, op (%p) is sessionless!",
                                op);
                return -EINVAL;
        }
@@ -450,39 +466,56 @@ mrvl_request_prepare(struct sam_cio_op_params *request,
        sess = (struct mrvl_crypto_session *)get_sym_session_private_data(
                        op->sym->session, cryptodev_driver_id);
        if (unlikely(sess == NULL)) {
-               MRVL_CRYPTO_LOG_ERR("Session was not created for this device");
+               MRVL_LOG(ERR, "Session was not created for this device!");
                return -EINVAL;
        }
 
-       /*
+       request->sa = sess->sam_sess;
+       request->cookie = op;
+
+       src_mbuf = op->sym->m_src;
+       segments_nb = src_mbuf->nb_segs;
+       /* The following conditions must be met:
+        * - Destination buffer is required when segmented source buffer
+        * - Segmented destination buffer is not supported
+        */
+       if ((segments_nb > 1) && (!op->sym->m_dst)) {
+               MRVL_LOG(ERR, "op->sym->m_dst = NULL!");
+               return -1;
+       }
+       /* For non SG case:
         * If application delivered us null dst buffer, it means it expects
         * us to deliver the result in src buffer.
         */
        dst_mbuf = op->sym->m_dst ? op->sym->m_dst : op->sym->m_src;
 
-       request->sa = sess->sam_sess;
-       request->cookie = op;
-
-       /* Single buffers only, sorry. */
-       request->num_bufs = 1;
-       request->src = src_bd;
-       src_bd->vaddr = rte_pktmbuf_mtod(op->sym->m_src, void *);
-       src_bd->paddr = rte_pktmbuf_iova(op->sym->m_src);
-       src_bd->len = rte_pktmbuf_data_len(op->sym->m_src);
-
-       /* Empty source. */
-       if (rte_pktmbuf_data_len(op->sym->m_src) == 0) {
-               /* EIP does not support 0 length buffers. */
-               MRVL_CRYPTO_LOG_ERR("Buffer length == 0 not supported!");
+       if (!rte_pktmbuf_is_contiguous(dst_mbuf)) {
+               MRVL_LOG(ERR, "Segmented destination buffer not supported!");
                return -1;
        }
 
+       request->num_bufs = segments_nb;
+       for (i = 0; i < segments_nb; i++) {
+               /* Empty source. */
+               if (rte_pktmbuf_data_len(src_mbuf) == 0) {
+                       /* EIP does not support 0 length buffers. */
+                       MRVL_LOG(ERR, "Buffer length == 0 not supported!");
+                       return -1;
+               }
+               src_bd[i].vaddr = rte_pktmbuf_mtod(src_mbuf, void *);
+               src_bd[i].paddr = rte_pktmbuf_iova(src_mbuf);
+               src_bd[i].len = rte_pktmbuf_data_len(src_mbuf);
+
+               src_mbuf = src_mbuf->next;
+       }
+       request->src = src_bd;
+
        /* Empty destination. */
        if (rte_pktmbuf_data_len(dst_mbuf) == 0) {
                /* Make dst buffer fit at least source data. */
                if (rte_pktmbuf_append(dst_mbuf,
                        rte_pktmbuf_data_len(op->sym->m_src)) == NULL) {
-                       MRVL_CRYPTO_LOG_ERR("Unable to set big enough dst buffer!");
+                       MRVL_LOG(ERR, "Unable to set big enough dst buffer!");
                        return -1;
                }
        }
@@ -527,7 +560,7 @@ mrvl_request_prepare(struct sam_cio_op_params *request,
 
        /*
         * EIP supports only scenarios where ICV(digest buffer) is placed at
-        * auth_icv_offset. Any other placement means risking errors.
+        * auth_icv_offset.
         */
        if (sess->sam_sess_params.dir == SAM_DIR_ENCRYPT) {
                /*
@@ -536,17 +569,36 @@ mrvl_request_prepare(struct sam_cio_op_params *request,
                 */
                if (rte_pktmbuf_mtod_offset(
                                dst_mbuf, uint8_t *,
-                               request->auth_icv_offset) == digest) {
+                               request->auth_icv_offset) == digest)
                        return 0;
-               }
        } else {/* sess->sam_sess_params.dir == SAM_DIR_DECRYPT */
                /*
                 * EIP will look for digest at auth_icv_offset
-                * offset in SRC buffer.
+                * offset in SRC buffer. It must be placed in the last
+                * segment and the offset must be set to reach digest
+                * in the last segment
                 */
-               if (rte_pktmbuf_mtod_offset(
-                               op->sym->m_src, uint8_t *,
-                               request->auth_icv_offset) == digest) {
+               struct rte_mbuf *last_seg =  op->sym->m_src;
+               uint32_t d_offset = request->auth_icv_offset;
+               u32 d_size = sess->sam_sess_params.u.basic.auth_icv_len;
+               unsigned char *d_ptr;
+
+               /* Find the last segment and the offset for the last segment */
+               while ((last_seg->next != NULL) &&
+                               (d_offset >= last_seg->data_len)) {
+                       d_offset -= last_seg->data_len;
+                       last_seg = last_seg->next;
+               }
+
+               if (rte_pktmbuf_mtod_offset(last_seg, uint8_t *,
+                                           d_offset) == digest)
+                       return 0;
+
+               /* copy digest to last segment */
+               if (last_seg->buf_len >= (d_size + d_offset)) {
+                       d_ptr = (unsigned char *)last_seg->buf_addr +
+                                d_offset;
+                       rte_memcpy(d_ptr, digest, d_size);
                        return 0;
                }
        }
@@ -582,11 +634,10 @@ mrvl_crypto_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
        int ret;
        struct sam_cio_op_params requests[nb_ops];
        /*
-        * DPDK uses single fragment buffers, so we can KISS descriptors.
         * SAM does not store bd pointers, so on-stack scope will be enough.
         */
-       struct sam_buf_info src_bd[nb_ops];
-       struct sam_buf_info dst_bd[nb_ops];
+       struct mrvl_crypto_src_table src_bd[nb_ops];
+       struct sam_buf_info          dst_bd[nb_ops];
        struct mrvl_crypto_qp *qp = (struct mrvl_crypto_qp *)queue_pair;
 
        if (nb_ops == 0)
@@ -594,15 +645,17 @@ mrvl_crypto_pmd_enqueue_burst(void *queue_pair, struct rte_crypto_op **ops,
 
        /* Prepare the burst. */
        memset(&requests, 0, sizeof(requests));
+       memset(&src_bd, 0, sizeof(src_bd));
 
        /* Iterate through */
        for (; iter_ops < nb_ops; ++iter_ops) {
+               /* store the op id for debug */
+               src_bd[iter_ops].iter_ops = iter_ops;
                if (mrvl_request_prepare(&requests[iter_ops],
-                                       &src_bd[iter_ops],
+                                       src_bd[iter_ops].src_bd,
                                        &dst_bd[iter_ops],
                                        ops[iter_ops]) < 0) {
-                       MRVL_CRYPTO_LOG_ERR(
-                               "Error while parameters preparation!");
+                       MRVL_LOG(ERR, "Error while preparing parameters!");
                        qp->stats.enqueue_err_count++;
                        ops[iter_ops]->status = RTE_CRYPTO_OP_STATUS_ERROR;
 
@@ -680,12 +733,12 @@ mrvl_crypto_pmd_dequeue_burst(void *queue_pair,
                        ops[i]->status = RTE_CRYPTO_OP_STATUS_SUCCESS;
                        break;
                case SAM_CIO_ERR_ICV:
-                       MRVL_CRYPTO_LOG_DBG("CIO returned SAM_CIO_ERR_ICV.");
+                       MRVL_LOG(DEBUG, "CIO returned SAM_CIO_ERR_ICV.");
                        ops[i]->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
                        break;
                default:
-                       MRVL_CRYPTO_LOG_DBG(
-                               "CIO returned Error: %d", results[i].status);
+                       MRVL_LOG(DEBUG,
+                               "CIO returned Error: %d.", results[i].status);
                        ops[i]->status = RTE_CRYPTO_OP_STATUS_ERROR;
                        break;
                }
@@ -711,12 +764,12 @@ cryptodev_mrvl_crypto_create(const char *name,
        struct rte_cryptodev *dev;
        struct mrvl_crypto_private *internals;
        struct sam_init_params  sam_params;
-       int ret;
+       int ret = -EINVAL;
 
        dev = rte_cryptodev_pmd_create(name, &vdev->device,
                        &init_params->common);
        if (dev == NULL) {
-               MRVL_CRYPTO_LOG_ERR("failed to create cryptodev vdev");
+               MRVL_LOG(ERR, "Failed to create cryptodev vdev!");
                goto init_error;
        }
 
@@ -729,7 +782,9 @@ cryptodev_mrvl_crypto_create(const char *name,
 
        dev->feature_flags = RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO |
                        RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING |
-                       RTE_CRYPTODEV_FF_HW_ACCELERATED;
+                       RTE_CRYPTODEV_FF_HW_ACCELERATED |
+                       RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
+                       RTE_CRYPTODEV_FF_OOP_LB_IN_LB_OUT;
 
        /* Set vector instructions mode supported */
        internals = dev->data->dev_private;
@@ -737,29 +792,26 @@ cryptodev_mrvl_crypto_create(const char *name,
        internals->max_nb_qpairs = init_params->common.max_nb_queue_pairs;
        internals->max_nb_sessions = init_params->max_nb_sessions;
 
-       /*
-        * ret == -EEXIST is correct, it means DMA
-        * has been already initialized.
-        */
-       ret = mv_sys_dma_mem_init(MRVL_MUSDK_DMA_MEMSIZE);
-       if (ret < 0) {
-               if (ret != -EEXIST)
-                       return ret;
-
-               MRVL_CRYPTO_LOG_INFO(
-                       "DMA memory has been already initialized by a different driver.");
-       }
+       ret = rte_mvep_init(MVEP_MOD_T_SAM, NULL);
+       if (ret)
+               goto init_error;
 
        sam_params.max_num_sessions = internals->max_nb_sessions;
 
-       return sam_init(&sam_params);
+       /* sam_set_debug_flags(3); */
+
+       ret = sam_init(&sam_params);
+       if (ret)
+               goto init_error;
+
+       return 0;
 
 init_error:
-       MRVL_CRYPTO_LOG_ERR(
-               "driver %s: %s failed", init_params->common.name, __func__);
+       MRVL_LOG(ERR,
+               "Driver %s: %s failed!", init_params->common.name, __func__);
 
        cryptodev_mrvl_crypto_uninit(vdev);
-       return -EFAULT;
+       return ret;
 }
 
 /** Parse integer from integer argument */
@@ -771,7 +823,7 @@ parse_integer_arg(const char *key __rte_unused,
 
        *i = atoi(value);
        if (*i < 0) {
-               MRVL_CRYPTO_LOG_ERR("Argument has to be positive.\n");
+               MRVL_LOG(ERR, "Argument has to be positive!");
                return -EINVAL;
        }
 
@@ -786,9 +838,8 @@ parse_name_arg(const char *key __rte_unused,
        struct rte_cryptodev_pmd_init_params *params = extra_args;
 
        if (strlen(value) >= RTE_CRYPTODEV_NAME_MAX_LEN - 1) {
-               MRVL_CRYPTO_LOG_ERR("Invalid name %s, should be less than "
-                               "%u bytes.\n", value,
-                               RTE_CRYPTODEV_NAME_MAX_LEN - 1);
+               MRVL_LOG(ERR, "Invalid name %s, should be less than %u bytes!",
+                        value, RTE_CRYPTODEV_NAME_MAX_LEN - 1);
                return -EINVAL;
        }
 
@@ -864,7 +915,7 @@ cryptodev_mrvl_crypto_init(struct rte_vdev_device *vdev)
                        .private_data_size =
                                sizeof(struct mrvl_crypto_private),
                        .max_nb_queue_pairs =
-                               sam_get_num_inst() * SAM_HW_RING_NUM,
+                               sam_get_num_inst() * sam_get_num_cios(0),
                        .socket_id = rte_socket_id()
                },
                .max_nb_sessions = MRVL_PMD_DEFAULT_MAX_NB_SESSIONS
@@ -880,9 +931,8 @@ cryptodev_mrvl_crypto_init(struct rte_vdev_device *vdev)
 
        ret = mrvl_pmd_parse_input_args(&init_params, args);
        if (ret) {
-               RTE_LOG(ERR, PMD,
-                       "Failed to parse initialisation arguments[%s]\n",
-                       args);
+               MRVL_LOG(ERR, "Failed to parse initialisation arguments[%s]!",
+                        args);
                return -EINVAL;
        }
 
@@ -904,11 +954,11 @@ cryptodev_mrvl_crypto_uninit(struct rte_vdev_device *vdev)
        if (name == NULL)
                return -EINVAL;
 
-       RTE_LOG(INFO, PMD,
-               "Closing Marvell crypto device %s on numa socket %u\n",
-               name, rte_socket_id());
+       MRVL_LOG(INFO, "Closing Marvell crypto device %s on numa socket %u.",
+                name, rte_socket_id());
 
        sam_deinit();
+       rte_mvep_deinit(MVEP_MOD_T_SAM);
 
        cryptodev = rte_cryptodev_pmd_get_named_dev(name);
        if (cryptodev == NULL)
@@ -935,3 +985,8 @@ RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_MRVL_PMD,
        "socket_id=<int>");
 RTE_PMD_REGISTER_CRYPTO_DRIVER(mrvl_crypto_drv, cryptodev_mrvl_pmd_drv.driver,
                cryptodev_driver_id);
+
+RTE_INIT(crypto_mrvl_init_log)
+{
+       mrvl_logtype_driver = rte_log_register("pmd.crypto.mvsam");
+}
index c045562..9956f05 100644 (file)
@@ -30,9 +30,9 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 16,
+                                       .min = 12,
                                        .max = 16,
-                                       .increment = 0
+                                       .increment = 4
                                },
                        }, }
                }, }
@@ -50,9 +50,9 @@ static const struct rte_cryptodev_capabilities
                                                .increment = 0
                                        },
                                        .digest_size = {
-                                               .min = 16,
+                                               .min = 12,
                                                .max = 16,
-                                               .increment = 0
+                                               .increment = 4
                                        },
                                }, }
                        }, }
@@ -70,9 +70,9 @@ static const struct rte_cryptodev_capabilities
                                                .increment = 1
                                        },
                                        .digest_size = {
-                                               .min = 20,
+                                               .min = 12,
                                                .max = 20,
-                                               .increment = 0
+                                               .increment = 4
                                        },
                                }, }
                        }, }
@@ -90,8 +90,29 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 20,
+                                       .min = 12,
                                        .max = 20,
+                                       .increment = 4
+                               },
+                       }, }
+               }, }
+       },
+       {
+               /* SHA224 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA224_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                               .digest_size = {
+                                       .min = 28,
+                                       .max = 28,
                                        .increment = 0
                                },
                        }, }
@@ -110,9 +131,9 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 28,
+                                       .min = 12,
                                        .max = 28,
-                                       .increment = 0
+                                       .increment = 4
                                },
                        }, }
                }, }
@@ -130,9 +151,9 @@ static const struct rte_cryptodev_capabilities
                                                .increment = 1
                                        },
                                        .digest_size = {
-                                               .min = 32,
+                                               .min = 12,
                                                .max = 32,
-                                               .increment = 0
+                                               .increment = 4
                                        },
                                }, }
                        }, }
@@ -150,9 +171,9 @@ static const struct rte_cryptodev_capabilities
                                                .increment = 0
                                        },
                                        .digest_size = {
-                                               .min = 32,
+                                               .min = 12,
                                                .max = 32,
-                                               .increment = 0
+                                               .increment = 4
                                        },
                                }, }
                        }, }
@@ -170,9 +191,9 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 48,
+                                       .min = 12,
                                        .max = 48,
-                                       .increment = 0
+                                       .increment = 4
                                },
                        }, }
                }, }
@@ -190,9 +211,9 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 48,
+                                       .min = 12,
                                        .max = 48,
-                                       .increment = 0
+                                       .increment = 4
                                },
                        }, }
                }, }
@@ -210,9 +231,9 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 64,
-                                       .max = 64,
-                                       .increment = 0
+                                       .min = 12,
+                                       .max = 48,
+                                       .increment = 4
                                },
                        }, }
                }, }
@@ -230,8 +251,8 @@ static const struct rte_cryptodev_capabilities
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 64,
-                                       .max = 64,
+                                       .min = 12,
+                                       .max = 48,
                                        .increment = 0
                                },
                        }, }
@@ -277,6 +298,26 @@ static const struct rte_cryptodev_capabilities
                        }, }
                }, }
        },
+       {       /* AES ECB */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_ECB,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
        {       /* AES GCM */
                .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
                {.sym = {
@@ -372,6 +413,71 @@ static const struct rte_cryptodev_capabilities
                        }, }
                }, }
        },
+       {       /* 3DES ECB */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_3DES_ECB,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 24,
+                                       .max = 24,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* NULL (AUTH) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, },
+               }, },
+       },
+       {       /* NULL (CIPHER) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, },
+               }, }
+       },
 
        RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
 };
@@ -551,7 +657,7 @@ mrvl_crypto_pmd_qp_setup(struct rte_cryptodev *dev, uint16_t qp_id,
                 */
                int num = sam_get_num_inst();
                if (num == 0) {
-                       MRVL_CRYPTO_LOG_ERR("No crypto engines detected.\n");
+                       MRVL_LOG(ERR, "No crypto engines detected!");
                        return -1;
                }
 
@@ -635,7 +741,7 @@ mrvl_crypto_pmd_sym_session_configure(__rte_unused struct rte_cryptodev *dev,
        int ret;
 
        if (sess == NULL) {
-               MRVL_CRYPTO_LOG_ERR("Invalid session struct.");
+               MRVL_LOG(ERR, "Invalid session struct!");
                return -EINVAL;
        }
 
@@ -646,7 +752,7 @@ mrvl_crypto_pmd_sym_session_configure(__rte_unused struct rte_cryptodev *dev,
 
        ret = mrvl_crypto_set_session_parameters(sess_private_data, xform);
        if (ret != 0) {
-               MRVL_CRYPTO_LOG_ERR("Failed to configure session parameters.");
+               MRVL_LOG(ERR, "Failed to configure session parameters!");
 
                /* Return session to mempool */
                rte_mempool_put(mp, sess_private_data);
@@ -658,7 +764,7 @@ mrvl_crypto_pmd_sym_session_configure(__rte_unused struct rte_cryptodev *dev,
        mrvl_sess = (struct mrvl_crypto_session *)sess_private_data;
        if (sam_session_create(&mrvl_sess->sam_sess_params,
                                &mrvl_sess->sam_sess) < 0) {
-               MRVL_CRYPTO_LOG_DBG("Failed to create session!");
+               MRVL_LOG(DEBUG, "Failed to create session!");
                return -EIO;
        }
 
@@ -686,7 +792,7 @@ mrvl_crypto_pmd_sym_session_clear(struct rte_cryptodev *dev,
 
                if (mrvl_sess->sam_sess &&
                    sam_session_destroy(mrvl_sess->sam_sess) < 0) {
-                       MRVL_CRYPTO_LOG_INFO("Error while destroying session!");
+                       MRVL_LOG(ERR, "Error while destroying session!");
                }
 
                memset(sess, 0, sizeof(struct mrvl_crypto_session));
index c16d95b..6f8cf56 100644 (file)
 #define CRYPTODEV_NAME_MRVL_PMD crypto_mvsam
 /**< Marvell PMD device name */
 
-#define MRVL_CRYPTO_LOG_ERR(fmt, args...) \
-       RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n",  \
-                       RTE_STR(CRYPTODEV_NAME_MRVL_PMD), \
-                       __func__, __LINE__, ## args)
-
-#ifdef RTE_LIBRTE_PMD_MRVL_CRYPTO_DEBUG
-#define MRVL_CRYPTO_LOG_INFO(fmt, args...) \
-       RTE_LOG(INFO, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \
-                       RTE_STR(CRYPTODEV_NAME_MRVL_PMD), \
-                       __func__, __LINE__, ## args)
-
-#define MRVL_CRYPTO_LOG_DBG(fmt, args...) \
-       RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \
-                       RTE_STR(CRYPTODEV_NAME_MRVL_PMD), \
-                       __func__, __LINE__, ## args)
-
-#else
-#define MRVL_CRYPTO_LOG_INFO(fmt, args...)
-#define MRVL_CRYPTO_LOG_DBG(fmt, args...)
-#endif
+/** MRVL PMD LOGTYPE DRIVER */
+int mrvl_logtype_driver;
+
+#define MRVL_LOG(level, fmt, ...) \
+       rte_log(RTE_LOG_ ## level, mrvl_logtype_driver, \
+                       "%s() line %u: " fmt "\n", __func__, __LINE__, \
+                                       ## __VA_ARGS__)
 
 /**
  * Handy bits->bytes conversion macro.
  */
 #define BITS2BYTES(x) ((x) >> 3)
 
+#define MRVL_MAX_SEGMENTS 16
+
 /** The operation order mode enumerator. */
 enum mrvl_crypto_chain_order {
        MRVL_CRYPTO_CHAIN_CIPHER_ONLY,
@@ -84,6 +73,11 @@ struct mrvl_crypto_session {
        uint16_t cipher_iv_offset;
 } __rte_cache_aligned;
 
+struct mrvl_crypto_src_table {
+       uint16_t iter_ops;
+       struct sam_buf_info src_bd[MRVL_MAX_SEGMENTS];
+} __rte_cache_aligned;
+
 /** Set and validate MRVL crypto session parameters */
 extern int
 mrvl_crypto_set_session_parameters(struct mrvl_crypto_session *sess,
index bb2b6e1..2bdcd01 100644 (file)
@@ -308,7 +308,7 @@ null_crypto_pmd_sym_session_clear(struct rte_cryptodev *dev,
        }
 }
 
-struct rte_cryptodev_ops pmd_ops = {
+static struct rte_cryptodev_ops pmd_ops = {
                .dev_configure          = null_crypto_pmd_config,
                .dev_start              = null_crypto_pmd_start,
                .dev_stop               = null_crypto_pmd_stop,
diff --git a/drivers/crypto/octeontx/Makefile b/drivers/crypto/octeontx/Makefile
new file mode 100644 (file)
index 0000000..2e78e69
--- /dev/null
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Cavium, Inc
+#
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_pmd_octeontx_crypto.a
+
+# library version
+LIBABIVER := 1
+
+# build flags
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_cryptodev
+LDLIBS += -lrte_pci -lrte_bus_pci
+LDLIBS += -lrte_common_cpt
+
+VPATH += $(RTE_SDK)/drivers/crypto/octeontx
+
+CFLAGS += -O3 -DCPT_MODEL=CRYPTO_OCTEONTX
+CFLAGS += -I$(RTE_SDK)/drivers/common/cpt
+
+# PMD code
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += otx_cryptodev.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += otx_cryptodev_capabilities.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += otx_cryptodev_hw_access.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += otx_cryptodev_mbox.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += otx_cryptodev_ops.c
+
+# export include files
+SYMLINK-y-include +=
+
+# versioning export map
+EXPORT_MAP := rte_pmd_octeontx_crypto_version.map
+
+# library dependencies
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += lib/librte_cryptodev
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += lib/librte_mempool
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/crypto/octeontx/meson.build b/drivers/crypto/octeontx/meson.build
new file mode 100644 (file)
index 0000000..6511b40
--- /dev/null
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Cavium, Inc
+if host_machine.system() != 'linux'
+       build = false
+endif
+
+deps += ['bus_pci']
+deps += ['common_cpt']
+name = 'octeontx_crypto'
+
+sources = files('otx_cryptodev.c',
+               'otx_cryptodev_capabilities.c',
+               'otx_cryptodev_hw_access.c',
+               'otx_cryptodev_mbox.c',
+               'otx_cryptodev_ops.c')
+
+includes += include_directories('../../common/cpt')
+cflags += '-DCPT_MODEL=CRYPTO_OCTEONTX'
diff --git a/drivers/crypto/octeontx/otx_cryptodev.c b/drivers/crypto/octeontx/otx_cryptodev.c
new file mode 100644 (file)
index 0000000..269f045
--- /dev/null
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#include <rte_bus_pci.h>
+#include <rte_common.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_log.h>
+#include <rte_pci.h>
+
+/* CPT common headers */
+#include "cpt_pmd_logs.h"
+
+#include "otx_cryptodev.h"
+#include "otx_cryptodev_ops.h"
+
+static int otx_cryptodev_logtype;
+
+static struct rte_pci_id pci_id_cpt_table[] = {
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CPT_81XX_PCI_VF_DEVICE_ID),
+       },
+       /* sentinel */
+       {
+               .device_id = 0
+       },
+};
+
+static void
+otx_cpt_logtype_init(void)
+{
+       cpt_logtype = otx_cryptodev_logtype;
+}
+
+static int
+otx_cpt_pci_probe(struct rte_pci_driver *pci_drv,
+                       struct rte_pci_device *pci_dev)
+{
+       struct rte_cryptodev *cryptodev;
+       char name[RTE_CRYPTODEV_NAME_MAX_LEN];
+       int retval;
+
+       if (pci_drv == NULL)
+               return -ENODEV;
+
+       rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
+
+       cryptodev = rte_cryptodev_pmd_allocate(name, rte_socket_id());
+       if (cryptodev == NULL)
+               return -ENOMEM;
+
+       cryptodev->device = &pci_dev->device;
+       cryptodev->device->driver = &pci_drv->driver;
+       cryptodev->driver_id = otx_cryptodev_driver_id;
+
+       /* init user callbacks */
+       TAILQ_INIT(&(cryptodev->link_intr_cbs));
+
+       /* init logtype used in common */
+       otx_cpt_logtype_init();
+
+       /* Invoke PMD device initialization function */
+       retval = otx_cpt_dev_create(cryptodev);
+       if (retval == 0)
+               return 0;
+
+       CPT_LOG_ERR("[DRV %s]: Failed to create device "
+                       "(vendor_id: 0x%x device_id: 0x%x",
+                       pci_drv->driver.name,
+                       (unsigned int) pci_dev->id.vendor_id,
+                       (unsigned int) pci_dev->id.device_id);
+
+       cryptodev->attached = RTE_CRYPTODEV_DETACHED;
+
+       return -ENXIO;
+}
+
+static int
+otx_cpt_pci_remove(struct rte_pci_device *pci_dev)
+{
+       struct rte_cryptodev *cryptodev;
+       char name[RTE_CRYPTODEV_NAME_MAX_LEN];
+
+       if (pci_dev == NULL)
+               return -EINVAL;
+
+       rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
+
+       cryptodev = rte_cryptodev_pmd_get_named_dev(name);
+       if (cryptodev == NULL)
+               return -ENODEV;
+
+       if (pci_dev->driver == NULL)
+               return -ENODEV;
+
+       /* free crypto device */
+       rte_cryptodev_pmd_release_device(cryptodev);
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               rte_free(cryptodev->data->dev_private);
+
+       cryptodev->device = NULL;
+       cryptodev->device->driver = NULL;
+       cryptodev->data = NULL;
+
+       /* free metapool memory */
+       cleanup_global_resources();
+
+       return 0;
+}
+
+static struct rte_pci_driver otx_cryptodev_pmd = {
+       .id_table = pci_id_cpt_table,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+       .probe = otx_cpt_pci_probe,
+       .remove = otx_cpt_pci_remove,
+};
+
+static struct cryptodev_driver otx_cryptodev_drv;
+
+RTE_PMD_REGISTER_PCI(CRYPTODEV_NAME_OCTEONTX_PMD, otx_cryptodev_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(CRYPTODEV_NAME_OCTEONTX_PMD, pci_id_cpt_table);
+RTE_PMD_REGISTER_CRYPTO_DRIVER(otx_cryptodev_drv, otx_cryptodev_pmd.driver,
+               otx_cryptodev_driver_id);
+
+RTE_INIT(otx_cpt_init_log)
+{
+       /* Bus level logs */
+       otx_cryptodev_logtype = rte_log_register("pmd.crypto.octeontx");
+       if (otx_cryptodev_logtype >= 0)
+               rte_log_set_level(otx_cryptodev_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/crypto/octeontx/otx_cryptodev.h b/drivers/crypto/octeontx/otx_cryptodev.h
new file mode 100644 (file)
index 0000000..6c2871d
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _OTX_CRYPTODEV_H_
+#define _OTX_CRYPTODEV_H_
+
+/* Cavium OCTEON TX crypto PMD device name */
+#define CRYPTODEV_NAME_OCTEONTX_PMD    crypto_octeontx
+
+/* Device ID */
+#define PCI_VENDOR_ID_CAVIUM           0x177d
+#define CPT_81XX_PCI_VF_DEVICE_ID      0xa041
+
+/*
+ * Crypto device driver ID
+ */
+uint8_t otx_cryptodev_driver_id;
+
+#endif /* _OTX_CRYPTODEV_H_ */
diff --git a/drivers/crypto/octeontx/otx_cryptodev_capabilities.c b/drivers/crypto/octeontx/otx_cryptodev_capabilities.c
new file mode 100644 (file)
index 0000000..946571c
--- /dev/null
@@ -0,0 +1,604 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#include <rte_cryptodev.h>
+
+#include "otx_cryptodev_capabilities.h"
+
+static const struct rte_cryptodev_capabilities otx_capabilities[] = {
+       /* Symmetric capabilities */
+       {       /* NULL (AUTH) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                       }, },
+               }, },
+       },
+       {       /* AES GMAC (AUTH) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_AES_GMAC,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .digest_size = {
+                                       .min = 8,
+                                       .max = 16,
+                                       .increment = 4
+                               },
+                               .iv_size = {
+                                       .min = 12,
+                                       .max = 12,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* KASUMI (F9) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_KASUMI_F9,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 4,
+                                       .max = 4,
+                                       .increment = 0
+                               },
+                       }, }
+               }, }
+       },
+       {       /* MD5 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_MD5,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 16,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* MD5 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_MD5_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 8,
+                                       .max = 64,
+                                       .increment = 8
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 16,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA1 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA1,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 20,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA1 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA1_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 64,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 20,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA224 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA224,
+                               .block_size = 64,
+                                       .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 28,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA224 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA224_HMAC,
+                               .block_size = 64,
+                                       .key_size = {
+                                       .min = 64,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 28,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA256 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA256,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 32,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA256 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA256_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 64,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 32,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA384 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA384,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 48,
+                                       .increment = 1
+                                       },
+                       }, }
+               }, }
+       },
+       {       /* SHA384 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA384_HMAC,
+                               .block_size = 64,
+                               .key_size = {
+                                       .min = 64,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 48,
+                                       .increment = 1
+                                       },
+                       }, }
+               }, }
+       },
+       {       /* SHA512 */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA512,
+                               .block_size = 128,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SHA512 HMAC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SHA512_HMAC,
+                               .block_size = 128,
+                               .key_size = {
+                                       .min = 64,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 1,
+                                       .max = 64,
+                                       .increment = 1
+                               },
+                       }, }
+               }, }
+       },
+       {       /* SNOW 3G (UIA2) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_SNOW3G_UIA2,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 4,
+                                       .max = 4,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* ZUC (EIA3) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,
+                       {.auth = {
+                               .algo = RTE_CRYPTO_AUTH_ZUC_EIA3,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .digest_size = {
+                                       .min = 4,
+                                       .max = 4,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* NULL (CIPHER) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_NULL,
+                               .block_size = 1,
+                               .key_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, },
+               }, }
+       },
+       {       /* 3DES CBC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 24,
+                                       .max = 24,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 8,
+                                       .max = 16,
+                                       .increment = 8
+                               }
+                       }, }
+               }, }
+       },
+       {       /* 3DES ECB */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_3DES_ECB,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 24,
+                                       .max = 24,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 0,
+                                       .max = 0,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES CBC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_CBC,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES CTR */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_CTR,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .iv_size = {
+                                       .min = 12,
+                                       .max = 16,
+                                       .increment = 4
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES XTS */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_AES_XTS,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 32,
+                                       .max = 64,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* DES CBC */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_DES_CBC,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 8,
+                                       .max = 8,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 8,
+                                       .max = 8,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* KASUMI (F8) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_KASUMI_F8,
+                               .block_size = 8,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 8,
+                                       .max = 8,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* SNOW 3G (UEA2) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_SNOW3G_UEA2,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* ZUC (EEA3) */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_CIPHER,
+                       {.cipher = {
+                               .algo = RTE_CRYPTO_CIPHER_ZUC_EEA3,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               },
+                               .iv_size = {
+                                       .min = 16,
+                                       .max = 16,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       {       /* AES GCM */
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+               {.sym = {
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AEAD,
+                       {.aead = {
+                               .algo = RTE_CRYPTO_AEAD_AES_GCM,
+                               .block_size = 16,
+                               .key_size = {
+                                       .min = 16,
+                                       .max = 32,
+                                       .increment = 8
+                               },
+                               .digest_size = {
+                                       .min = 8,
+                                       .max = 16,
+                                       .increment = 4
+                               },
+                               .aad_size = {
+                                       .min = 0,
+                                       .max = 1024,
+                                       .increment = 1
+                               },
+                               .iv_size = {
+                                       .min = 12,
+                                       .max = 12,
+                                       .increment = 0
+                               }
+                       }, }
+               }, }
+       },
+       /* End of symmetric capabilities */
+       RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST()
+};
+
+const struct rte_cryptodev_capabilities *
+otx_get_capabilities(void)
+{
+       return otx_capabilities;
+}
diff --git a/drivers/crypto/octeontx/otx_cryptodev_capabilities.h b/drivers/crypto/octeontx/otx_cryptodev_capabilities.h
new file mode 100644 (file)
index 0000000..fc62821
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _OTX_CRYPTODEV_CAPABILITIES_H_
+#define _OTX_CRYPTODEV_CAPABILITIES_H_
+
+#include <rte_cryptodev.h>
+
+/*
+ * Get capabilities list for the device
+ *
+ */
+const struct rte_cryptodev_capabilities *
+otx_get_capabilities(void);
+
+#endif /* _OTX_CRYPTODEV_CAPABILITIES_H_ */
diff --git a/drivers/crypto/octeontx/otx_cryptodev_hw_access.c b/drivers/crypto/octeontx/otx_cryptodev_hw_access.c
new file mode 100644 (file)
index 0000000..5e705a8
--- /dev/null
@@ -0,0 +1,598 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+#include <assert.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_memzone.h>
+
+#include "otx_cryptodev_hw_access.h"
+#include "otx_cryptodev_mbox.h"
+
+#include "cpt_pmd_logs.h"
+#include "cpt_hw_types.h"
+
+/*
+ * VF HAL functions
+ * Access its own BAR0/4 registers by passing VF number as 0.
+ * OS/PCI maps them accordingly.
+ */
+
+static int
+otx_cpt_vf_init(struct cpt_vf *cptvf)
+{
+       int ret = 0;
+
+       /* Check ready with PF */
+       /* Gets chip ID / device Id from PF if ready */
+       ret = otx_cpt_check_pf_ready(cptvf);
+       if (ret) {
+               CPT_LOG_ERR("%s: PF not responding to READY msg",
+                               cptvf->dev_name);
+               ret = -EBUSY;
+               goto exit;
+       }
+
+       CPT_LOG_DP_DEBUG("%s: %s done", cptvf->dev_name, __func__);
+
+exit:
+       return ret;
+}
+
+/*
+ * Read Interrupt status of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static uint64_t
+otx_cpt_read_vf_misc_intr_status(struct cpt_vf *cptvf)
+{
+       return CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf), CPTX_VQX_MISC_INT(0, 0));
+}
+
+/*
+ * Clear mailbox interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_mbox_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.mbox = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear instruction NCB read error interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_irde_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.irde = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear NCB result write response error interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_nwrp_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.nwrp = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear swerr interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_swerr_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.swerr = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear hwerr interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_hwerr_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.hwerr = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear translation fault interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_fault_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                               CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.fault = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+               CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/*
+ * Clear doorbell overflow interrupt of the VF
+ *
+ * @param   cptvf      cptvf structure
+ */
+static void
+otx_cpt_clear_dovf_intr(struct cpt_vf *cptvf)
+{
+       cptx_vqx_misc_int_t vqx_misc_int;
+
+       vqx_misc_int.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                     CPTX_VQX_MISC_INT(0, 0));
+       /* W1C for the VF */
+       vqx_misc_int.s.dovf = 1;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_MISC_INT(0, 0), vqx_misc_int.u);
+}
+
+/* Write to VQX_CTL register
+ */
+static void
+otx_cpt_write_vq_ctl(struct cpt_vf *cptvf, bool val)
+{
+       cptx_vqx_ctl_t vqx_ctl;
+
+       vqx_ctl.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                CPTX_VQX_CTL(0, 0));
+       vqx_ctl.s.ena = val;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_CTL(0, 0), vqx_ctl.u);
+}
+
+/* Write to VQX_INPROG register
+ */
+static void
+otx_cpt_write_vq_inprog(struct cpt_vf *cptvf, uint8_t val)
+{
+       cptx_vqx_inprog_t vqx_inprg;
+
+       vqx_inprg.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                  CPTX_VQX_INPROG(0, 0));
+       vqx_inprg.s.inflight = val;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_INPROG(0, 0), vqx_inprg.u);
+}
+
+/* Write to VQX_DONE_WAIT NUMWAIT register
+ */
+static void
+otx_cpt_write_vq_done_numwait(struct cpt_vf *cptvf, uint32_t val)
+{
+       cptx_vqx_done_wait_t vqx_dwait;
+
+       vqx_dwait.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                  CPTX_VQX_DONE_WAIT(0, 0));
+       vqx_dwait.s.num_wait = val;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_DONE_WAIT(0, 0), vqx_dwait.u);
+}
+
+/* Write to VQX_DONE_WAIT NUM_WAIT register
+ */
+static void
+otx_cpt_write_vq_done_timewait(struct cpt_vf *cptvf, uint16_t val)
+{
+       cptx_vqx_done_wait_t vqx_dwait;
+
+       vqx_dwait.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                  CPTX_VQX_DONE_WAIT(0, 0));
+       vqx_dwait.s.time_wait = val;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_DONE_WAIT(0, 0), vqx_dwait.u);
+}
+
+/* Write to VQX_SADDR register
+ */
+static void
+otx_cpt_write_vq_saddr(struct cpt_vf *cptvf, uint64_t val)
+{
+       cptx_vqx_saddr_t vqx_saddr;
+
+       vqx_saddr.u = val;
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_SADDR(0, 0), vqx_saddr.u);
+}
+
+static void
+otx_cpt_vfvq_init(struct cpt_vf *cptvf)
+{
+       uint64_t base_addr = 0;
+
+       /* Disable the VQ */
+       otx_cpt_write_vq_ctl(cptvf, 0);
+
+       /* Reset the doorbell */
+       otx_cpt_write_vq_doorbell(cptvf, 0);
+       /* Clear inflight */
+       otx_cpt_write_vq_inprog(cptvf, 0);
+
+       /* Write VQ SADDR */
+       base_addr = (uint64_t)(cptvf->cqueue.chead[0].dma_addr);
+       otx_cpt_write_vq_saddr(cptvf, base_addr);
+
+       /* Configure timerhold / coalescence */
+       otx_cpt_write_vq_done_timewait(cptvf, CPT_TIMER_THOLD);
+       otx_cpt_write_vq_done_numwait(cptvf, CPT_COUNT_THOLD);
+
+       /* Enable the VQ */
+       otx_cpt_write_vq_ctl(cptvf, 1);
+}
+
+static int
+cpt_vq_init(struct cpt_vf *cptvf, uint8_t group)
+{
+       int err;
+
+       /* Convey VQ LEN to PF */
+       err = otx_cpt_send_vq_size_msg(cptvf);
+       if (err) {
+               CPT_LOG_ERR("%s: PF not responding to QLEN msg",
+                           cptvf->dev_name);
+               err = -EBUSY;
+               goto cleanup;
+       }
+
+       /* CPT VF device initialization */
+       otx_cpt_vfvq_init(cptvf);
+
+       /* Send msg to PF to assign currnet Q to required group */
+       cptvf->vfgrp = group;
+       err = otx_cpt_send_vf_grp_msg(cptvf, group);
+       if (err) {
+               CPT_LOG_ERR("%s: PF not responding to VF_GRP msg",
+                           cptvf->dev_name);
+               err = -EBUSY;
+               goto cleanup;
+       }
+
+       CPT_LOG_DP_DEBUG("%s: %s done", cptvf->dev_name, __func__);
+       return 0;
+
+cleanup:
+       return err;
+}
+
+void
+otx_cpt_poll_misc(struct cpt_vf *cptvf)
+{
+       uint64_t intr;
+
+       intr = otx_cpt_read_vf_misc_intr_status(cptvf);
+
+       if (!intr)
+               return;
+
+       /* Check for MISC interrupt types */
+       if (likely(intr & CPT_VF_INTR_MBOX_MASK)) {
+               CPT_LOG_DP_DEBUG("%s: Mailbox interrupt 0x%lx on CPT VF %d",
+                       cptvf->dev_name, (unsigned int long)intr, cptvf->vfid);
+               otx_cpt_handle_mbox_intr(cptvf);
+               otx_cpt_clear_mbox_intr(cptvf);
+       } else if (unlikely(intr & CPT_VF_INTR_IRDE_MASK)) {
+               otx_cpt_clear_irde_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: Instruction NCB read error interrupt "
+                               "0x%lx on CPT VF %d", cptvf->dev_name,
+                               (unsigned int long)intr, cptvf->vfid);
+       } else if (unlikely(intr & CPT_VF_INTR_NWRP_MASK)) {
+               otx_cpt_clear_nwrp_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: NCB response write error interrupt 0x%lx"
+                               " on CPT VF %d", cptvf->dev_name,
+                               (unsigned int long)intr, cptvf->vfid);
+       } else if (unlikely(intr & CPT_VF_INTR_SWERR_MASK)) {
+               otx_cpt_clear_swerr_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: Software error interrupt 0x%lx on CPT VF "
+                               "%d", cptvf->dev_name, (unsigned int long)intr,
+                               cptvf->vfid);
+       } else if (unlikely(intr & CPT_VF_INTR_HWERR_MASK)) {
+               otx_cpt_clear_hwerr_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: Hardware error interrupt 0x%lx on CPT VF "
+                               "%d", cptvf->dev_name, (unsigned int long)intr,
+                               cptvf->vfid);
+       } else if (unlikely(intr & CPT_VF_INTR_FAULT_MASK)) {
+               otx_cpt_clear_fault_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: Translation fault interrupt 0x%lx on CPT VF "
+                               "%d", cptvf->dev_name, (unsigned int long)intr,
+                               cptvf->vfid);
+       } else if (unlikely(intr & CPT_VF_INTR_DOVF_MASK)) {
+               otx_cpt_clear_dovf_intr(cptvf);
+               CPT_LOG_DP_DEBUG("%s: Doorbell overflow interrupt 0x%lx on CPT VF "
+                               "%d", cptvf->dev_name, (unsigned int long)intr,
+                               cptvf->vfid);
+       } else
+               CPT_LOG_DP_ERR("%s: Unhandled interrupt 0x%lx in CPT VF %d",
+                               cptvf->dev_name, (unsigned int long)intr,
+                               cptvf->vfid);
+}
+
+int
+otx_cpt_hw_init(struct cpt_vf *cptvf, void *pdev, void *reg_base, char *name)
+{
+       memset(cptvf, 0, sizeof(struct cpt_vf));
+
+       /* Bar0 base address */
+       cptvf->reg_base = reg_base;
+       strncpy(cptvf->dev_name, name, 32);
+
+       cptvf->pdev = pdev;
+
+       /* To clear if there are any pending mbox msgs */
+       otx_cpt_poll_misc(cptvf);
+
+       if (otx_cpt_vf_init(cptvf)) {
+               CPT_LOG_ERR("Failed to initialize CPT VF device");
+               return -1;
+       }
+
+       return 0;
+}
+
+int
+otx_cpt_deinit_device(void *dev)
+{
+       struct cpt_vf *cptvf = (struct cpt_vf *)dev;
+
+       /* Do misc work one last time */
+       otx_cpt_poll_misc(cptvf);
+
+       return 0;
+}
+
+int
+otx_cpt_get_resource(void *dev, uint8_t group, struct cpt_instance **instance)
+{
+       int ret = -ENOENT, len, qlen, i;
+       int chunk_len, chunks, chunk_size;
+       struct cpt_vf *cptvf = (struct cpt_vf *)dev;
+       struct cpt_instance *cpt_instance;
+       struct command_chunk *chunk_head = NULL, *chunk_prev = NULL;
+       struct command_chunk *chunk = NULL;
+       uint8_t *mem;
+       const struct rte_memzone *rz;
+       uint64_t dma_addr = 0, alloc_len, used_len;
+       uint64_t *next_ptr;
+       uint64_t pg_sz = sysconf(_SC_PAGESIZE);
+
+       CPT_LOG_DP_DEBUG("Initializing cpt resource %s", cptvf->dev_name);
+
+       cpt_instance = &cptvf->instance;
+
+       memset(&cptvf->cqueue, 0, sizeof(cptvf->cqueue));
+       memset(&cptvf->pqueue, 0, sizeof(cptvf->pqueue));
+
+       /* Chunks are of fixed size buffers */
+       chunks = DEFAULT_CMD_QCHUNKS;
+       chunk_len = DEFAULT_CMD_QCHUNK_SIZE;
+
+       qlen = chunks * chunk_len;
+       /* Chunk size includes 8 bytes of next chunk ptr */
+       chunk_size = chunk_len * CPT_INST_SIZE + CPT_NEXT_CHUNK_PTR_SIZE;
+
+       /* For command chunk structures */
+       len = chunks * RTE_ALIGN(sizeof(struct command_chunk), 8);
+
+       /* For pending queue */
+       len += qlen * RTE_ALIGN(sizeof(struct rid), 8);
+
+       /* So that instruction queues start as pg size aligned */
+       len = RTE_ALIGN(len, pg_sz);
+
+       /* For Instruction queues */
+       len += chunks * RTE_ALIGN(chunk_size, 128);
+
+       /* Wastage after instruction queues */
+       len = RTE_ALIGN(len, pg_sz);
+
+       rz = rte_memzone_reserve_aligned(cptvf->dev_name, len, cptvf->node,
+                                        RTE_MEMZONE_SIZE_HINT_ONLY |
+                                        RTE_MEMZONE_256MB,
+                                        RTE_CACHE_LINE_SIZE);
+       if (!rz) {
+               ret = rte_errno;
+               goto cleanup;
+       }
+
+       mem = rz->addr;
+       dma_addr = rz->phys_addr;
+       alloc_len = len;
+
+       memset(mem, 0, len);
+
+       cpt_instance->rsvd = (uintptr_t)rz;
+
+       /* Pending queue setup */
+       cptvf->pqueue.rid_queue = (struct rid *)mem;
+       cptvf->pqueue.enq_tail = 0;
+       cptvf->pqueue.deq_head = 0;
+       cptvf->pqueue.pending_count = 0;
+
+       mem +=  qlen * RTE_ALIGN(sizeof(struct rid), 8);
+       len -=  qlen * RTE_ALIGN(sizeof(struct rid), 8);
+       dma_addr += qlen * RTE_ALIGN(sizeof(struct rid), 8);
+
+       /* Alignment wastage */
+       used_len = alloc_len - len;
+       mem += RTE_ALIGN(used_len, pg_sz) - used_len;
+       len -= RTE_ALIGN(used_len, pg_sz) - used_len;
+       dma_addr += RTE_ALIGN(used_len, pg_sz) - used_len;
+
+       /* Init instruction queues */
+       chunk_head = &cptvf->cqueue.chead[0];
+       i = qlen;
+
+       chunk_prev = NULL;
+       for (i = 0; i < DEFAULT_CMD_QCHUNKS; i++) {
+               int csize;
+
+               chunk = &cptvf->cqueue.chead[i];
+               chunk->head = mem;
+               chunk->dma_addr = dma_addr;
+
+               csize = RTE_ALIGN(chunk_size, 128);
+               mem += csize;
+               dma_addr += csize;
+               len -= csize;
+
+               if (chunk_prev) {
+                       next_ptr = (uint64_t *)(chunk_prev->head +
+                                               chunk_size - 8);
+                       *next_ptr = (uint64_t)chunk->dma_addr;
+               }
+               chunk_prev = chunk;
+       }
+       /* Circular loop */
+       next_ptr = (uint64_t *)(chunk_prev->head + chunk_size - 8);
+       *next_ptr = (uint64_t)chunk_head->dma_addr;
+
+       assert(!len);
+
+       /* This is used for CPT(0)_PF_Q(0..15)_CTL.size config */
+       cptvf->qsize = chunk_size / 8;
+       cptvf->cqueue.qhead = chunk_head->head;
+       cptvf->cqueue.idx = 0;
+       cptvf->cqueue.cchunk = 0;
+
+       if (cpt_vq_init(cptvf, group)) {
+               CPT_LOG_ERR("Failed to initialize CPT VQ of device %s",
+                           cptvf->dev_name);
+               ret = -EBUSY;
+               goto cleanup;
+       }
+
+       *instance = cpt_instance;
+
+       CPT_LOG_DP_DEBUG("Crypto device (%s) initialized", cptvf->dev_name);
+
+       return 0;
+cleanup:
+       rte_memzone_free(rz);
+       *instance = NULL;
+       return ret;
+}
+
+int
+otx_cpt_put_resource(struct cpt_instance *instance)
+{
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       struct rte_memzone *rz;
+
+       if (!cptvf) {
+               CPT_LOG_ERR("Invalid CPTVF handle");
+               return -EINVAL;
+       }
+
+       CPT_LOG_DP_DEBUG("Releasing cpt device %s", cptvf->dev_name);
+
+       rz = (struct rte_memzone *)instance->rsvd;
+       rte_memzone_free(rz);
+       return 0;
+}
+
+int
+otx_cpt_start_device(void *dev)
+{
+       int rc;
+       struct cpt_vf *cptvf = (struct cpt_vf *)dev;
+
+       rc = otx_cpt_send_vf_up(cptvf);
+       if (rc) {
+               CPT_LOG_ERR("Failed to mark CPT VF device %s UP, rc = %d",
+                           cptvf->dev_name, rc);
+               return -EFAULT;
+       }
+
+       if ((cptvf->vftype != SE_TYPE) && (cptvf->vftype != AE_TYPE)) {
+               CPT_LOG_ERR("Fatal error, unexpected vf type %u, for CPT VF "
+                           "device %s", cptvf->vftype, cptvf->dev_name);
+               return -ENOENT;
+       }
+
+       return 0;
+}
+
+void
+otx_cpt_stop_device(void *dev)
+{
+       int rc;
+       uint32_t pending, retries = 5;
+       struct cpt_vf *cptvf = (struct cpt_vf *)dev;
+
+       /* Wait for pending entries to complete */
+       pending = otx_cpt_read_vq_doorbell(cptvf);
+       while (pending) {
+               CPT_LOG_DP_DEBUG("%s: Waiting for pending %u cmds to complete",
+                                cptvf->dev_name, pending);
+               sleep(1);
+               pending = otx_cpt_read_vq_doorbell(cptvf);
+               retries--;
+               if (!retries)
+                       break;
+       }
+
+       if (!retries && pending) {
+               CPT_LOG_ERR("%s: Timeout waiting for commands(%u)",
+                           cptvf->dev_name, pending);
+               return;
+       }
+
+       rc = otx_cpt_send_vf_down(cptvf);
+       if (rc) {
+               CPT_LOG_ERR("Failed to bring down vf %s, rc %d",
+                           cptvf->dev_name, rc);
+               return;
+       }
+}
diff --git a/drivers/crypto/octeontx/otx_cryptodev_hw_access.h b/drivers/crypto/octeontx/otx_cryptodev_hw_access.h
new file mode 100644 (file)
index 0000000..82b15ee
--- /dev/null
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+#ifndef _OTX_CRYPTODEV_HW_ACCESS_H_
+#define _OTX_CRYPTODEV_HW_ACCESS_H_
+
+#include <stdbool.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_cycles.h>
+#include <rte_io.h>
+#include <rte_memory.h>
+#include <rte_prefetch.h>
+
+#include "cpt_common.h"
+#include "cpt_hw_types.h"
+#include "cpt_mcode_defines.h"
+#include "cpt_pmd_logs.h"
+
+#define CPT_INTR_POLL_INTERVAL_MS      (50)
+
+/* Default command queue length */
+#define DEFAULT_CMD_QCHUNKS            2
+#define DEFAULT_CMD_QCHUNK_SIZE                1023
+#define DEFAULT_CMD_QLEN \
+               (DEFAULT_CMD_QCHUNK_SIZE * DEFAULT_CMD_QCHUNKS)
+
+#define CPT_CSR_REG_BASE(cpt)          ((cpt)->reg_base)
+
+/* Read hw register */
+#define CPT_READ_CSR(__hw_addr, __offset) \
+       rte_read64_relaxed((uint8_t *)__hw_addr + __offset)
+
+/* Write hw register */
+#define CPT_WRITE_CSR(__hw_addr, __offset, __val) \
+       rte_write64_relaxed((__val), ((uint8_t *)__hw_addr + __offset))
+
+/* cpt instance */
+struct cpt_instance {
+       uint32_t queue_id;
+       uintptr_t rsvd;
+};
+
+struct command_chunk {
+       /** 128-byte aligned real_vaddr */
+       uint8_t *head;
+       /** 128-byte aligned real_dma_addr */
+       phys_addr_t dma_addr;
+};
+
+/**
+ * Command queue structure
+ */
+struct command_queue {
+       /** Command queue host write idx */
+       uint32_t idx;
+       /** Command queue chunk */
+       uint32_t cchunk;
+       /** Command queue head; instructions are inserted here */
+       uint8_t *qhead;
+       /** Command chunk list head */
+       struct command_chunk chead[DEFAULT_CMD_QCHUNKS];
+};
+
+/**
+ * CPT VF device structure
+ */
+struct cpt_vf {
+       /** CPT instance */
+       struct cpt_instance instance;
+       /** Register start address */
+       uint8_t *reg_base;
+       /** Command queue information */
+       struct command_queue cqueue;
+       /** Pending queue information */
+       struct pending_queue pqueue;
+       /** Meta information per vf */
+       struct cptvf_meta_info meta_info;
+
+       /** Below fields are accessed only in control path */
+
+       /** Env specific pdev representing the pci dev */
+       void *pdev;
+       /** Calculated queue size */
+       uint32_t qsize;
+       /** Device index (0...CPT_MAX_VQ_NUM)*/
+       uint8_t  vfid;
+       /** VF type of cpt_vf_type_t (SE_TYPE(2) or AE_TYPE(1) */
+       uint8_t  vftype;
+       /** VF group (0 - 8) */
+       uint8_t  vfgrp;
+       /** Operating node: Bits (46:44) in BAR0 address */
+       uint8_t  node;
+
+       /** VF-PF mailbox communication */
+
+       /** Flag if acked */
+       bool pf_acked;
+       /** Flag if not acked */
+       bool pf_nacked;
+
+       /** Device name */
+       char dev_name[32];
+} __rte_cache_aligned;
+
+/*
+ * CPT Registers map for 81xx
+ */
+
+/* VF registers */
+#define CPTX_VQX_CTL(a, b)             (0x0000100ll + 0x1000000000ll * \
+                                        ((a) & 0x0) + 0x100000ll * (b))
+#define CPTX_VQX_SADDR(a, b)           (0x0000200ll + 0x1000000000ll * \
+                                        ((a) & 0x0) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_WAIT(a, b)       (0x0000400ll + 0x1000000000ll * \
+                                        ((a) & 0x0) + 0x100000ll * (b))
+#define CPTX_VQX_INPROG(a, b)          (0x0000410ll + 0x1000000000ll * \
+                                        ((a) & 0x0) + 0x100000ll * (b))
+#define CPTX_VQX_DONE(a, b)            (0x0000420ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_ACK(a, b)                (0x0000440ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_INT_W1S(a, b)    (0x0000460ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_INT_W1C(a, b)    (0x0000468ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_ENA_W1S(a, b)    (0x0000470ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DONE_ENA_W1C(a, b)    (0x0000478ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_MISC_INT(a, b)                (0x0000500ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_MISC_INT_W1S(a, b)    (0x0000508ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_MISC_ENA_W1S(a, b)    (0x0000510ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_MISC_ENA_W1C(a, b)    (0x0000518ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VQX_DOORBELL(a, b)                (0x0000600ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b))
+#define CPTX_VFX_PF_MBOXX(a, b, c)     (0x0001000ll + 0x1000000000ll * \
+                                        ((a) & 0x1) + 0x100000ll * (b) + \
+                                        8ll * ((c) & 0x1))
+
+/* VF HAL functions */
+
+void
+otx_cpt_poll_misc(struct cpt_vf *cptvf);
+
+int
+otx_cpt_hw_init(struct cpt_vf *cptvf, void *pdev, void *reg_base, char *name);
+
+int
+otx_cpt_deinit_device(void *dev);
+
+int
+otx_cpt_get_resource(void *dev, uint8_t group, struct cpt_instance **instance);
+
+int
+otx_cpt_put_resource(struct cpt_instance *instance);
+
+int
+otx_cpt_start_device(void *cptvf);
+
+void
+otx_cpt_stop_device(void *cptvf);
+
+/* Write to VQX_DOORBELL register
+ */
+static __rte_always_inline void
+otx_cpt_write_vq_doorbell(struct cpt_vf *cptvf, uint32_t val)
+{
+       cptx_vqx_doorbell_t vqx_dbell;
+
+       vqx_dbell.u = 0;
+       vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VQX_DOORBELL(0, 0), vqx_dbell.u);
+}
+
+static __rte_always_inline uint32_t
+otx_cpt_read_vq_doorbell(struct cpt_vf *cptvf)
+{
+       cptx_vqx_doorbell_t vqx_dbell;
+
+       vqx_dbell.u = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                                  CPTX_VQX_DOORBELL(0, 0));
+       return vqx_dbell.s.dbell_cnt;
+}
+
+static __rte_always_inline void
+otx_cpt_ring_dbell(struct cpt_instance *instance, uint16_t count)
+{
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       /* Memory barrier to flush pending writes */
+       rte_smp_wmb();
+       otx_cpt_write_vq_doorbell(cptvf, count);
+}
+
+static __rte_always_inline void *
+get_cpt_inst(struct command_queue *cqueue)
+{
+       CPT_LOG_DP_DEBUG("CPT queue idx %u\n", cqueue->idx);
+       return &cqueue->qhead[cqueue->idx * CPT_INST_SIZE];
+}
+
+static __rte_always_inline void
+fill_cpt_inst(struct cpt_instance *instance, void *req)
+{
+       struct command_queue *cqueue;
+       cpt_inst_s_t *cpt_ist_p;
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       struct cpt_request_info *user_req = (struct cpt_request_info *)req;
+       cqueue = &cptvf->cqueue;
+       cpt_ist_p = get_cpt_inst(cqueue);
+       rte_prefetch_non_temporal(cpt_ist_p);
+
+       /* EI0, EI1, EI2, EI3 are already prepared */
+       /* HW W0 */
+       cpt_ist_p->u[0] = 0;
+       /* HW W1 */
+       cpt_ist_p->s8x.res_addr = user_req->comp_baddr;
+       /* HW W2 */
+       cpt_ist_p->u[2] = 0;
+       /* HW W3 */
+       cpt_ist_p->s8x.wq_ptr = 0;
+
+       /* MC EI0 */
+       cpt_ist_p->s8x.ei0 = user_req->ist.ei0;
+       /* MC EI1 */
+       cpt_ist_p->s8x.ei1 = user_req->ist.ei1;
+       /* MC EI2 */
+       cpt_ist_p->s8x.ei2 = user_req->ist.ei2;
+       /* MC EI3 */
+       cpt_ist_p->s8x.ei3 = user_req->ist.ei3;
+}
+
+static __rte_always_inline void
+mark_cpt_inst(struct cpt_instance *instance)
+{
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       struct command_queue *queue = &cptvf->cqueue;
+       if (unlikely(++queue->idx >= DEFAULT_CMD_QCHUNK_SIZE)) {
+               uint32_t cchunk = queue->cchunk;
+               MOD_INC(cchunk, DEFAULT_CMD_QCHUNKS);
+               queue->qhead = queue->chead[cchunk].head;
+               queue->idx = 0;
+               queue->cchunk = cchunk;
+       }
+}
+
+static __rte_always_inline uint8_t
+check_nb_command_id(struct cpt_request_info *user_req,
+               struct cpt_instance *instance)
+{
+       uint8_t ret = ERR_REQ_PENDING;
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       volatile cpt_res_s_t *cptres;
+
+       cptres = (volatile cpt_res_s_t *)user_req->completion_addr;
+
+       if (unlikely(cptres->s8x.compcode == CPT_8X_COMP_E_NOTDONE)) {
+               /*
+                * Wait for some time for this command to get completed
+                * before timing out
+                */
+               if (rte_get_timer_cycles() < user_req->time_out)
+                       return ret;
+               /*
+                * TODO: See if alternate caddr can be used to not loop
+                * longer than needed.
+                */
+               if ((cptres->s8x.compcode == CPT_8X_COMP_E_NOTDONE) &&
+                   (user_req->extra_time < TIME_IN_RESET_COUNT)) {
+                       user_req->extra_time++;
+                       return ret;
+               }
+
+               if (cptres->s8x.compcode != CPT_8X_COMP_E_NOTDONE)
+                       goto complete;
+
+               ret = ERR_REQ_TIMEOUT;
+               CPT_LOG_DP_ERR("Request %p timedout", user_req);
+               otx_cpt_poll_misc(cptvf);
+               goto exit;
+       }
+
+complete:
+       if (likely(cptres->s8x.compcode == CPT_8X_COMP_E_GOOD)) {
+               ret = 0; /* success */
+               if (unlikely((uint8_t)*user_req->alternate_caddr)) {
+                       ret = (uint8_t)*user_req->alternate_caddr;
+                       CPT_LOG_DP_ERR("Request %p : failed with microcode"
+                               " error, MC completion code : 0x%x", user_req,
+                               ret);
+               }
+               CPT_LOG_DP_DEBUG("MC status %.8x\n",
+                          *((volatile uint32_t *)user_req->alternate_caddr));
+               CPT_LOG_DP_DEBUG("HW status %.8x\n",
+                          *((volatile uint32_t *)user_req->completion_addr));
+       } else if ((cptres->s8x.compcode == CPT_8X_COMP_E_SWERR) ||
+                  (cptres->s8x.compcode == CPT_8X_COMP_E_FAULT)) {
+               ret = (uint8_t)*user_req->alternate_caddr;
+               if (!ret)
+                       ret = ERR_BAD_ALT_CCODE;
+               CPT_LOG_DP_DEBUG("Request %p : failed with %s : err code :%x",
+                          user_req,
+                          (cptres->s8x.compcode == CPT_8X_COMP_E_FAULT) ?
+                          "DMA Fault" : "Software error", ret);
+       } else {
+               CPT_LOG_DP_ERR("Request %p : unexpected completion code %d",
+                          user_req, cptres->s8x.compcode);
+               ret = (uint8_t)*user_req->alternate_caddr;
+       }
+
+exit:
+       return ret;
+}
+
+#endif /* _OTX_CRYPTODEV_HW_ACCESS_H_ */
diff --git a/drivers/crypto/octeontx/otx_cryptodev_mbox.c b/drivers/crypto/octeontx/otx_cryptodev_mbox.c
new file mode 100644 (file)
index 0000000..a8e51a8
--- /dev/null
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#include <unistd.h>
+
+#include "otx_cryptodev_hw_access.h"
+#include "otx_cryptodev_mbox.h"
+
+void
+otx_cpt_handle_mbox_intr(struct cpt_vf *cptvf)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       /*
+        * MBOX[0] contains msg
+        * MBOX[1] contains data
+        */
+       mbx.msg  = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                               CPTX_VFX_PF_MBOXX(0, 0, 0));
+       mbx.data = CPT_READ_CSR(CPT_CSR_REG_BASE(cptvf),
+                               CPTX_VFX_PF_MBOXX(0, 0, 1));
+
+       CPT_LOG_DP_DEBUG("%s: Mailbox msg 0x%lx from PF",
+                   cptvf->dev_name, (unsigned int long)mbx.msg);
+       switch (mbx.msg) {
+       case OTX_CPT_MSG_READY:
+               {
+                       otx_cpt_chipid_vfid_t cid;
+
+                       cid.u64 = mbx.data;
+                       cptvf->pf_acked = true;
+                       cptvf->vfid = cid.s.vfid;
+                       CPT_LOG_DP_DEBUG("%s: Received VFID %d chip_id %d",
+                                        cptvf->dev_name,
+                                        cptvf->vfid, cid.s.chip_id);
+               }
+               break;
+       case OTX_CPT_MSG_QBIND_GRP:
+               cptvf->pf_acked = true;
+               cptvf->vftype = mbx.data;
+               CPT_LOG_DP_DEBUG("%s: VF %d type %s group %d",
+                                cptvf->dev_name, cptvf->vfid,
+                                ((mbx.data == SE_TYPE) ? "SE" : "AE"),
+                                cptvf->vfgrp);
+               break;
+       case OTX_CPT_MBOX_MSG_TYPE_ACK:
+               cptvf->pf_acked = true;
+               break;
+       case OTX_CPT_MBOX_MSG_TYPE_NACK:
+               cptvf->pf_nacked = true;
+               break;
+       default:
+               CPT_LOG_DP_DEBUG("%s: Invalid msg from PF, msg 0x%lx",
+                                cptvf->dev_name, (unsigned int long)mbx.msg);
+               break;
+       }
+}
+
+/* Send a mailbox message to PF
+ * @vf: vf from which this message to be sent
+ * @mbx: Message to be sent
+ */
+static void
+otx_cpt_send_msg_to_pf(struct cpt_vf *cptvf, struct cpt_mbox *mbx)
+{
+       /* Writing mbox(1) causes interrupt */
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VFX_PF_MBOXX(0, 0, 0), mbx->msg);
+       CPT_WRITE_CSR(CPT_CSR_REG_BASE(cptvf),
+                     CPTX_VFX_PF_MBOXX(0, 0, 1), mbx->data);
+}
+
+static int32_t
+otx_cpt_send_msg_to_pf_timeout(struct cpt_vf *cptvf, struct cpt_mbox *mbx)
+{
+       int timeout = OTX_CPT_MBOX_MSG_TIMEOUT;
+       int sleep_ms = 10;
+
+       cptvf->pf_acked = false;
+       cptvf->pf_nacked = false;
+
+       otx_cpt_send_msg_to_pf(cptvf, mbx);
+
+       /* Wait for previous message to be acked, timeout 2sec */
+       while (!cptvf->pf_acked) {
+               if (cptvf->pf_nacked)
+                       return -EINVAL;
+               usleep(sleep_ms * 1000);
+               otx_cpt_poll_misc(cptvf);
+               if (cptvf->pf_acked)
+                       break;
+               timeout -= sleep_ms;
+               if (!timeout) {
+                       CPT_LOG_ERR("%s: PF didn't ack mbox msg %lx(vfid %u)",
+                                   cptvf->dev_name,
+                                   (unsigned int long)(mbx->msg & 0xFF),
+                                   cptvf->vfid);
+                       return -EBUSY;
+               }
+       }
+       return 0;
+}
+
+int
+otx_cpt_check_pf_ready(struct cpt_vf *cptvf)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       mbx.msg = OTX_CPT_MSG_READY;
+       if (otx_cpt_send_msg_to_pf_timeout(cptvf, &mbx)) {
+               CPT_LOG_ERR("%s: PF didn't respond to READY msg",
+                           cptvf->dev_name);
+               return 1;
+       }
+       return 0;
+}
+
+int
+otx_cpt_send_vq_size_msg(struct cpt_vf *cptvf)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       mbx.msg = OTX_CPT_MSG_QLEN;
+
+       mbx.data = cptvf->qsize;
+       if (otx_cpt_send_msg_to_pf_timeout(cptvf, &mbx)) {
+               CPT_LOG_ERR("%s: PF didn't respond to vq_size msg",
+                           cptvf->dev_name);
+               return 1;
+       }
+       return 0;
+}
+
+int
+otx_cpt_send_vf_grp_msg(struct cpt_vf *cptvf, uint32_t group)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       mbx.msg = OTX_CPT_MSG_QBIND_GRP;
+
+       /* Convey group of the VF */
+       mbx.data = group;
+       if (otx_cpt_send_msg_to_pf_timeout(cptvf, &mbx)) {
+               CPT_LOG_ERR("%s: PF didn't respond to vf_type msg",
+                           cptvf->dev_name);
+               return 1;
+       }
+       return 0;
+}
+
+int
+otx_cpt_send_vf_up(struct cpt_vf *cptvf)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       mbx.msg = OTX_CPT_MSG_VF_UP;
+       if (otx_cpt_send_msg_to_pf_timeout(cptvf, &mbx)) {
+               CPT_LOG_ERR("%s: PF didn't respond to UP msg",
+                           cptvf->dev_name);
+               return 1;
+       }
+       return 0;
+}
+
+int
+otx_cpt_send_vf_down(struct cpt_vf *cptvf)
+{
+       struct cpt_mbox mbx = {0, 0};
+
+       mbx.msg = OTX_CPT_MSG_VF_DOWN;
+       if (otx_cpt_send_msg_to_pf_timeout(cptvf, &mbx)) {
+               CPT_LOG_ERR("%s: PF didn't respond to DOWN msg",
+                           cptvf->dev_name);
+               return 1;
+       }
+       return 0;
+}
diff --git a/drivers/crypto/octeontx/otx_cryptodev_mbox.h b/drivers/crypto/octeontx/otx_cryptodev_mbox.h
new file mode 100644 (file)
index 0000000..b05d1c5
--- /dev/null
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _OTX_CRYPTODEV_MBOX_H_
+#define _OTX_CRYPTODEV_MBOX_H_
+
+#include <rte_byteorder.h>
+#include <rte_common.h>
+
+#include "cpt_common.h"
+#include "cpt_pmd_logs.h"
+
+#include "otx_cryptodev_hw_access.h"
+
+#define OTX_CPT_MBOX_MSG_TIMEOUT    2000 /* In Milli Seconds */
+
+#define OTX_CPT_MBOX_MSG_TYPE_REQ      0
+#define OTX_CPT_MBOX_MSG_TYPE_ACK      1
+#define OTX_CPT_MBOX_MSG_TYPE_NACK     2
+#define OTX_CPT_MBOX_MSG_TYPE_NOP      3
+
+/* CPT mailbox structure */
+struct cpt_mbox {
+       /** Message type MBOX[0] */
+       uint64_t msg;
+       /** Data         MBOX[1] */
+       uint64_t data;
+};
+
+typedef enum {
+       OTX_CPT_MSG_VF_UP = 1,
+       OTX_CPT_MSG_VF_DOWN,
+       OTX_CPT_MSG_READY,
+       OTX_CPT_MSG_QLEN,
+       OTX_CPT_MSG_QBIND_GRP,
+       OTX_CPT_MSG_VQ_PRIORITY,
+       OTX_CPT_MSG_PF_TYPE,
+} otx_cpt_mbox_opcode_t;
+
+typedef union {
+       uint64_t u64;
+       struct {
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+               uint32_t chip_id;
+               uint8_t vfid;
+               uint8_t reserved[3];
+#else
+               uint8_t reserved[3];
+               uint8_t vfid;
+               uint32_t chip_id;
+#endif
+       } s;
+} otx_cpt_chipid_vfid_t;
+
+/* Poll handler to handle mailbox messages from VFs */
+void
+otx_cpt_handle_mbox_intr(struct cpt_vf *cptvf);
+
+/*
+ * Checks if VF is able to comminicate with PF
+ * and also gets the CPT number this VF is associated to.
+ */
+int
+otx_cpt_check_pf_ready(struct cpt_vf *cptvf);
+
+/*
+ * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF.
+ * Must be ACKed.
+ */
+int
+otx_cpt_send_vq_size_msg(struct cpt_vf *cptvf);
+
+/*
+ * Communicate VF group required to PF and get the VQ binded to that group
+ */
+int
+otx_cpt_send_vf_grp_msg(struct cpt_vf *cptvf, uint32_t group);
+
+/*
+ * Communicate to PF that VF is UP and running
+ */
+int
+otx_cpt_send_vf_up(struct cpt_vf *cptvf);
+
+/*
+ * Communicate to PF that VF is DOWN and running
+ */
+int
+otx_cpt_send_vf_down(struct cpt_vf *cptvf);
+
+#endif /* _OTX_CRYPTODEV_MBOX_H_ */
diff --git a/drivers/crypto/octeontx/otx_cryptodev_ops.c b/drivers/crypto/octeontx/otx_cryptodev_ops.c
new file mode 100644 (file)
index 0000000..23f9659
--- /dev/null
@@ -0,0 +1,531 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#include <rte_alarm.h>
+#include <rte_bus_pci.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_malloc.h>
+
+#include "cpt_pmd_logs.h"
+#include "cpt_pmd_ops_helper.h"
+#include "cpt_ucode.h"
+#include "cpt_request_mgr.h"
+
+#include "otx_cryptodev.h"
+#include "otx_cryptodev_capabilities.h"
+#include "otx_cryptodev_hw_access.h"
+#include "otx_cryptodev_ops.h"
+
+static int otx_cryptodev_probe_count;
+static rte_spinlock_t otx_probe_count_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct rte_mempool *otx_cpt_meta_pool;
+static int otx_cpt_op_mlen;
+static int otx_cpt_op_sb_mlen;
+
+/* Forward declarations */
+
+static int
+otx_cpt_que_pair_release(struct rte_cryptodev *dev, uint16_t que_pair_id);
+
+/*
+ * Initializes global variables used by fast-path code
+ *
+ * @return
+ *   - 0 on success, errcode on error
+ */
+static int
+init_global_resources(void)
+{
+       /* Get meta len for scatter gather mode */
+       otx_cpt_op_mlen = cpt_pmd_ops_helper_get_mlen_sg_mode();
+
+       /* Extra 4B saved for future considerations */
+       otx_cpt_op_mlen += 4 * sizeof(uint64_t);
+
+       otx_cpt_meta_pool = rte_mempool_create("cpt_metabuf-pool", 4096 * 16,
+                                              otx_cpt_op_mlen, 512, 0,
+                                              NULL, NULL, NULL, NULL,
+                                              SOCKET_ID_ANY, 0);
+       if (!otx_cpt_meta_pool) {
+               CPT_LOG_ERR("cpt metabuf pool not created");
+               return -ENOMEM;
+       }
+
+       /* Get meta len for direct mode */
+       otx_cpt_op_sb_mlen = cpt_pmd_ops_helper_get_mlen_direct_mode();
+
+       /* Extra 4B saved for future considerations */
+       otx_cpt_op_sb_mlen += 4 * sizeof(uint64_t);
+
+       return 0;
+}
+
+void
+cleanup_global_resources(void)
+{
+       /* Take lock */
+       rte_spinlock_lock(&otx_probe_count_lock);
+
+       /* Decrement the cryptodev count */
+       otx_cryptodev_probe_count--;
+
+       /* Free buffers */
+       if (otx_cpt_meta_pool && otx_cryptodev_probe_count == 0)
+               rte_mempool_free(otx_cpt_meta_pool);
+
+       /* Free lock */
+       rte_spinlock_unlock(&otx_probe_count_lock);
+}
+
+/* Alarm routines */
+
+static void
+otx_cpt_alarm_cb(void *arg)
+{
+       struct cpt_vf *cptvf = arg;
+       otx_cpt_poll_misc(cptvf);
+       rte_eal_alarm_set(CPT_INTR_POLL_INTERVAL_MS * 1000,
+                         otx_cpt_alarm_cb, cptvf);
+}
+
+static int
+otx_cpt_periodic_alarm_start(void *arg)
+{
+       return rte_eal_alarm_set(CPT_INTR_POLL_INTERVAL_MS * 1000,
+                                otx_cpt_alarm_cb, arg);
+}
+
+static int
+otx_cpt_periodic_alarm_stop(void *arg)
+{
+       return rte_eal_alarm_cancel(otx_cpt_alarm_cb, arg);
+}
+
+/* PMD ops */
+
+static int
+otx_cpt_dev_config(struct rte_cryptodev *dev __rte_unused,
+                  struct rte_cryptodev_config *config __rte_unused)
+{
+       CPT_PMD_INIT_FUNC_TRACE();
+       return 0;
+}
+
+static int
+otx_cpt_dev_start(struct rte_cryptodev *c_dev)
+{
+       void *cptvf = c_dev->data->dev_private;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       return otx_cpt_start_device(cptvf);
+}
+
+static void
+otx_cpt_dev_stop(struct rte_cryptodev *c_dev)
+{
+       void *cptvf = c_dev->data->dev_private;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       otx_cpt_stop_device(cptvf);
+}
+
+static int
+otx_cpt_dev_close(struct rte_cryptodev *c_dev)
+{
+       void *cptvf = c_dev->data->dev_private;
+       int i, ret;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < c_dev->data->nb_queue_pairs; i++) {
+               ret = otx_cpt_que_pair_release(c_dev, i);
+               if (ret)
+                       return ret;
+       }
+
+       otx_cpt_periodic_alarm_stop(cptvf);
+       otx_cpt_deinit_device(cptvf);
+
+       return 0;
+}
+
+static void
+otx_cpt_dev_info_get(struct rte_cryptodev *dev, struct rte_cryptodev_info *info)
+{
+       CPT_PMD_INIT_FUNC_TRACE();
+       if (info != NULL) {
+               info->max_nb_queue_pairs = CPT_NUM_QS_PER_VF;
+               info->feature_flags = dev->feature_flags;
+               info->capabilities = otx_get_capabilities();
+               info->sym.max_nb_sessions = 0;
+               info->driver_id = otx_cryptodev_driver_id;
+               info->min_mbuf_headroom_req = OTX_CPT_MIN_HEADROOM_REQ;
+               info->min_mbuf_tailroom_req = OTX_CPT_MIN_TAILROOM_REQ;
+       }
+}
+
+static void
+otx_cpt_stats_get(struct rte_cryptodev *dev __rte_unused,
+                 struct rte_cryptodev_stats *stats __rte_unused)
+{
+       CPT_PMD_INIT_FUNC_TRACE();
+}
+
+static void
+otx_cpt_stats_reset(struct rte_cryptodev *dev __rte_unused)
+{
+       CPT_PMD_INIT_FUNC_TRACE();
+}
+
+static int
+otx_cpt_que_pair_setup(struct rte_cryptodev *dev,
+                      uint16_t que_pair_id,
+                      const struct rte_cryptodev_qp_conf *qp_conf,
+                      int socket_id __rte_unused,
+                      struct rte_mempool *session_pool __rte_unused)
+{
+       void *cptvf = dev->data->dev_private;
+       struct cpt_instance *instance = NULL;
+       struct rte_pci_device *pci_dev;
+       int ret = -1;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       if (dev->data->queue_pairs[que_pair_id] != NULL) {
+               ret = otx_cpt_que_pair_release(dev, que_pair_id);
+               if (ret)
+                       return ret;
+       }
+
+       if (qp_conf->nb_descriptors > DEFAULT_CMD_QLEN) {
+               CPT_LOG_INFO("Number of descriptors too big %d, using default "
+                            "queue length of %d", qp_conf->nb_descriptors,
+                            DEFAULT_CMD_QLEN);
+       }
+
+       pci_dev = RTE_DEV_TO_PCI(dev->device);
+
+       if (pci_dev->mem_resource[0].addr == NULL) {
+               CPT_LOG_ERR("PCI mem address null");
+               return -EIO;
+       }
+
+       ret = otx_cpt_get_resource(cptvf, 0, &instance);
+       if (ret != 0) {
+               CPT_LOG_ERR("Error getting instance handle from device %s : "
+                           "ret = %d", dev->data->name, ret);
+               return ret;
+       }
+
+       instance->queue_id = que_pair_id;
+       dev->data->queue_pairs[que_pair_id] = instance;
+
+       return 0;
+}
+
+static int
+otx_cpt_que_pair_release(struct rte_cryptodev *dev, uint16_t que_pair_id)
+{
+       struct cpt_instance *instance = dev->data->queue_pairs[que_pair_id];
+       int ret;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       ret = otx_cpt_put_resource(instance);
+       if (ret != 0) {
+               CPT_LOG_ERR("Error putting instance handle of device %s : "
+                           "ret = %d", dev->data->name, ret);
+               return ret;
+       }
+
+       dev->data->queue_pairs[que_pair_id] = NULL;
+
+       return 0;
+}
+
+static unsigned int
+otx_cpt_get_session_size(struct rte_cryptodev *dev __rte_unused)
+{
+       return cpt_get_session_size();
+}
+
+static void
+otx_cpt_session_init(void *sym_sess, uint8_t driver_id)
+{
+       struct rte_cryptodev_sym_session *sess = sym_sess;
+       struct cpt_sess_misc *cpt_sess =
+        (struct cpt_sess_misc *) get_sym_session_private_data(sess, driver_id);
+
+       CPT_PMD_INIT_FUNC_TRACE();
+       cpt_sess->ctx_dma_addr = rte_mempool_virt2iova(cpt_sess) +
+                       sizeof(struct cpt_sess_misc);
+}
+
+static int
+otx_cpt_session_cfg(struct rte_cryptodev *dev,
+                   struct rte_crypto_sym_xform *xform,
+                   struct rte_cryptodev_sym_session *sess,
+                   struct rte_mempool *mempool)
+{
+       struct rte_crypto_sym_xform *chain;
+       void *sess_private_data = NULL;
+
+       CPT_PMD_INIT_FUNC_TRACE();
+
+       if (cpt_is_algo_supported(xform))
+               goto err;
+
+       if (unlikely(sess == NULL)) {
+               CPT_LOG_ERR("invalid session struct");
+               return -EINVAL;
+       }
+
+       if (rte_mempool_get(mempool, &sess_private_data)) {
+               CPT_LOG_ERR("Could not allocate sess_private_data");
+               return -ENOMEM;
+       }
+
+       chain = xform;
+       while (chain) {
+               switch (chain->type) {
+               case RTE_CRYPTO_SYM_XFORM_AEAD:
+                       if (fill_sess_aead(chain, sess_private_data))
+                               goto err;
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_CIPHER:
+                       if (fill_sess_cipher(chain, sess_private_data))
+                               goto err;
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AUTH:
+                       if (chain->auth.algo == RTE_CRYPTO_AUTH_AES_GMAC) {
+                               if (fill_sess_gmac(chain, sess_private_data))
+                                       goto err;
+                       } else {
+                               if (fill_sess_auth(chain, sess_private_data))
+                                       goto err;
+                       }
+                       break;
+               default:
+                       CPT_LOG_ERR("Invalid crypto xform type");
+                       break;
+               }
+               chain = chain->next;
+       }
+       set_sym_session_private_data(sess, dev->driver_id, sess_private_data);
+       otx_cpt_session_init(sess, dev->driver_id);
+       return 0;
+
+err:
+       if (sess_private_data)
+               rte_mempool_put(mempool, sess_private_data);
+       return -EPERM;
+}
+
+static void
+otx_cpt_session_clear(struct rte_cryptodev *dev,
+                 struct rte_cryptodev_sym_session *sess)
+{
+       void *sess_priv = get_sym_session_private_data(sess, dev->driver_id);
+
+       CPT_PMD_INIT_FUNC_TRACE();
+       if (sess_priv) {
+               memset(sess_priv, 0, otx_cpt_get_session_size(dev));
+               struct rte_mempool *sess_mp = rte_mempool_from_obj(sess_priv);
+               set_sym_session_private_data(sess, dev->driver_id, NULL);
+               rte_mempool_put(sess_mp, sess_priv);
+       }
+}
+
+static uint16_t
+otx_cpt_pkt_enqueue(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+       struct cpt_instance *instance = (struct cpt_instance *)qptr;
+       uint16_t count = 0;
+       int ret;
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       struct pending_queue *pqueue = &cptvf->pqueue;
+
+       count = DEFAULT_CMD_QLEN - pqueue->pending_count;
+       if (nb_ops > count)
+               nb_ops = count;
+
+       count = 0;
+       while (likely(count < nb_ops)) {
+               ret = cpt_pmd_crypto_operation(instance, ops[count], pqueue,
+                                               otx_cryptodev_driver_id);
+               if (unlikely(ret))
+                       break;
+               count++;
+       }
+       otx_cpt_ring_dbell(instance, count);
+       return count;
+}
+
+static uint16_t
+otx_cpt_pkt_dequeue(void *qptr, struct rte_crypto_op **ops, uint16_t nb_ops)
+{
+       struct cpt_instance *instance = (struct cpt_instance *)qptr;
+       struct cpt_vf *cptvf = (struct cpt_vf *)instance;
+       struct pending_queue *pqueue = &cptvf->pqueue;
+       uint16_t nb_completed, i = 0;
+       uint8_t compcode[nb_ops];
+
+       nb_completed = cpt_dequeue_burst(instance, nb_ops,
+                                        (void **)ops, compcode, pqueue);
+       while (likely(i < nb_completed)) {
+               struct rte_crypto_op *cop;
+               void *metabuf;
+               uintptr_t *rsp;
+               uint8_t status;
+
+               rsp = (void *)ops[i];
+               status = compcode[i];
+               if (likely((i + 1) < nb_completed))
+                       rte_prefetch0(ops[i+1]);
+               metabuf = (void *)rsp[0];
+               cop = (void *)rsp[1];
+
+               ops[i] = cop;
+
+               if (likely(status == 0)) {
+                       if (likely(!rsp[2]))
+                               cop->status =
+                                       RTE_CRYPTO_OP_STATUS_SUCCESS;
+                       else
+                               compl_auth_verify(cop, (uint8_t *)rsp[2],
+                                                 rsp[3]);
+               } else if (status == ERR_GC_ICV_MISCOMPARE) {
+                       /*auth data mismatch */
+                       cop->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
+               } else {
+                       cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
+               }
+               free_op_meta(metabuf, cptvf->meta_info.cptvf_meta_pool);
+               i++;
+       }
+       return nb_completed;
+}
+
+static struct rte_cryptodev_ops cptvf_ops = {
+       /* Device related operations */
+       .dev_configure = otx_cpt_dev_config,
+       .dev_start = otx_cpt_dev_start,
+       .dev_stop = otx_cpt_dev_stop,
+       .dev_close = otx_cpt_dev_close,
+       .dev_infos_get = otx_cpt_dev_info_get,
+
+       .stats_get = otx_cpt_stats_get,
+       .stats_reset = otx_cpt_stats_reset,
+       .queue_pair_setup = otx_cpt_que_pair_setup,
+       .queue_pair_release = otx_cpt_que_pair_release,
+       .queue_pair_count = NULL,
+
+       /* Crypto related operations */
+       .sym_session_get_size = otx_cpt_get_session_size,
+       .sym_session_configure = otx_cpt_session_cfg,
+       .sym_session_clear = otx_cpt_session_clear
+};
+
+static void
+otx_cpt_common_vars_init(struct cpt_vf *cptvf)
+{
+       cptvf->meta_info.cptvf_meta_pool = otx_cpt_meta_pool;
+       cptvf->meta_info.cptvf_op_mlen = otx_cpt_op_mlen;
+       cptvf->meta_info.cptvf_op_sb_mlen = otx_cpt_op_sb_mlen;
+}
+
+int
+otx_cpt_dev_create(struct rte_cryptodev *c_dev)
+{
+       struct rte_pci_device *pdev = RTE_DEV_TO_PCI(c_dev->device);
+       struct cpt_vf *cptvf = NULL;
+       void *reg_base;
+       char dev_name[32];
+       int ret;
+
+       if (pdev->mem_resource[0].phys_addr == 0ULL)
+               return -EIO;
+
+       /* for secondary processes, we don't initialise any further as primary
+        * has already done this work.
+        */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       cptvf = rte_zmalloc_socket("otx_cryptodev_private_mem",
+                       sizeof(struct cpt_vf), RTE_CACHE_LINE_SIZE,
+                       rte_socket_id());
+
+       if (cptvf == NULL) {
+               CPT_LOG_ERR("Cannot allocate memory for device private data");
+               return -ENOMEM;
+       }
+
+       snprintf(dev_name, 32, "%02x:%02x.%x",
+                       pdev->addr.bus, pdev->addr.devid, pdev->addr.function);
+
+       reg_base = pdev->mem_resource[0].addr;
+       if (!reg_base) {
+               CPT_LOG_ERR("Failed to map BAR0 of %s", dev_name);
+               ret = -ENODEV;
+               goto fail;
+       }
+
+       ret = otx_cpt_hw_init(cptvf, pdev, reg_base, dev_name);
+       if (ret) {
+               CPT_LOG_ERR("Failed to init cptvf %s", dev_name);
+               ret = -EIO;
+               goto fail;
+       }
+
+       /* Start off timer for mailbox interrupts */
+       otx_cpt_periodic_alarm_start(cptvf);
+
+       rte_spinlock_lock(&otx_probe_count_lock);
+       if (!otx_cryptodev_probe_count) {
+               ret = init_global_resources();
+               if (ret) {
+                       rte_spinlock_unlock(&otx_probe_count_lock);
+                       goto init_fail;
+               }
+       }
+       otx_cryptodev_probe_count++;
+       rte_spinlock_unlock(&otx_probe_count_lock);
+
+       /* Initialize data path variables used by common code */
+       otx_cpt_common_vars_init(cptvf);
+
+       c_dev->dev_ops = &cptvf_ops;
+
+       c_dev->enqueue_burst = otx_cpt_pkt_enqueue;
+       c_dev->dequeue_burst = otx_cpt_pkt_dequeue;
+
+       c_dev->feature_flags = RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO |
+                       RTE_CRYPTODEV_FF_HW_ACCELERATED |
+                       RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING |
+                       RTE_CRYPTODEV_FF_IN_PLACE_SGL |
+                       RTE_CRYPTODEV_FF_OOP_SGL_IN_LB_OUT |
+                       RTE_CRYPTODEV_FF_OOP_SGL_IN_SGL_OUT;
+
+       /* Save dev private data */
+       c_dev->data->dev_private = cptvf;
+
+       return 0;
+
+init_fail:
+       otx_cpt_periodic_alarm_stop(cptvf);
+       otx_cpt_deinit_device(cptvf);
+
+fail:
+       if (cptvf) {
+               /* Free private data allocated */
+               rte_free(cptvf);
+       }
+
+       return ret;
+}
diff --git a/drivers/crypto/octeontx/otx_cryptodev_ops.h b/drivers/crypto/octeontx/otx_cryptodev_ops.h
new file mode 100644 (file)
index 0000000..b3efecf
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Cavium, Inc
+ */
+
+#ifndef _OTX_CRYPTODEV_OPS_H_
+#define _OTX_CRYPTODEV_OPS_H_
+
+#define OTX_CPT_MIN_HEADROOM_REQ       (24)
+#define OTX_CPT_MIN_TAILROOM_REQ       (8)
+#define CPT_NUM_QS_PER_VF              (1)
+
+void
+cleanup_global_resources(void);
+
+int
+otx_cpt_dev_create(struct rte_cryptodev *c_dev);
+
+#endif /* _OTX_CRYPTODEV_OPS_H_ */
diff --git a/drivers/crypto/octeontx/rte_pmd_octeontx_crypto_version.map b/drivers/crypto/octeontx/rte_pmd_octeontx_crypto_version.map
new file mode 100644 (file)
index 0000000..521e51f
--- /dev/null
@@ -0,0 +1,4 @@
+DPDK_18.11 {
+
+       local: *;
+};
index 45f9a33..eecb7d3 100644 (file)
 
 #if (OPENSSL_VERSION_NUMBER < 0x10100000L)
 
-#define set_rsa_params(rsa, p, q, ret) \
-       do {rsa->p = p; rsa->q = q; ret = 0; } while (0)
-
-#define set_rsa_crt_params(rsa, dmp1, dmq1, iqmp, ret) \
-       do { \
-               rsa->dmp1 = dmp1; \
-               rsa->dmq1 = dmq1; \
-               rsa->iqmp = iqmp; \
-               ret = 0; \
-       } while (0)
-
-#define set_rsa_keys(rsa, n, e, d, ret) \
-       do { \
-               rsa->n = n; rsa->e = e; rsa->d = d; ret = 0; \
-       } while (0)
-
-#define set_dh_params(dh, p, g, ret) \
-       do { \
-               dh->p = p; \
-               dh->q = NULL; \
-               dh->g = g; \
-               ret = 0; \
-       } while (0)
-
-#define set_dh_priv_key(dh, priv_key, ret) \
-       do { dh->priv_key = priv_key; ret = 0; } while (0)
-
-#define set_dsa_params(dsa, p, q, g, ret) \
-       do { dsa->p = p; dsa->q = q; dsa->g = g; ret = 0; } while (0)
-
-#define get_dh_pub_key(dh, pub_key) \
-       (pub_key = dh->pub_key)
-
-#define get_dh_priv_key(dh, priv_key) \
-       (priv_key = dh->priv_key)
-
-#define set_dsa_sign(sign, r, s) \
-       do { sign->r = r; sign->s = s; } while (0)
-
-#define get_dsa_sign(sign, r, s) \
-       do { r = sign->r; s = sign->s; } while (0)
-
-#define set_dsa_keys(dsa, pub, priv, ret) \
-       do { dsa->pub_key = pub; dsa->priv_key = priv; ret = 0; } while (0)
-
-#define set_dsa_pub_key(dsa, pub_key) \
-       (dsa->pub_key = pub_key)
-
-#define get_dsa_priv_key(dsa, priv_key) \
-       (priv_key = dsa->priv_key)
+static __rte_always_inline int
+set_rsa_params(RSA *rsa, BIGNUM *p, BIGNUM *q)
+{
+       rsa->p = p;
+       rsa->q = q;
+       return 0;
+}
+
+static __rte_always_inline int
+set_rsa_crt_params(RSA *rsa, BIGNUM *dmp1, BIGNUM *dmq1, BIGNUM *iqmp)
+{
+       rsa->dmp1 = dmp1;
+       rsa->dmq1 = dmq1;
+       rsa->iqmp = iqmp;
+       return 0;
+}
+
+static __rte_always_inline int
+set_rsa_keys(RSA *rsa, BIGNUM *n, BIGNUM *e, BIGNUM *d)
+{
+       rsa->n = n;
+       rsa->e = e;
+       rsa->d = d;
+       return 0;
+}
+
+static __rte_always_inline int
+set_dh_params(DH *dh, BIGNUM *p, BIGNUM *g)
+{
+       dh->p = p;
+       dh->q = NULL;
+       dh->g = g;
+       return 0;
+}
+
+static __rte_always_inline int
+set_dh_priv_key(DH *dh, BIGNUM *priv_key)
+{
+       dh->priv_key = priv_key;
+       return 0;
+}
+
+static __rte_always_inline int
+set_dsa_params(DSA *dsa, BIGNUM *p, BIGNUM *q, BIGNUM *g)
+{
+       dsa->p = p;
+       dsa->q = q;
+       dsa->g = g;
+       return 0;
+}
+
+static __rte_always_inline void
+get_dh_pub_key(DH *dh, const BIGNUM **pub_key)
+{
+       *pub_key = dh->pub_key;
+}
+
+static __rte_always_inline void
+get_dh_priv_key(DH *dh, const BIGNUM **priv_key)
+{
+       *priv_key = dh->priv_key;
+}
+
+static __rte_always_inline void
+set_dsa_sign(DSA_SIG *sign, BIGNUM *r, BIGNUM *s)
+{
+       sign->r = r;
+       sign->s = s;
+}
+
+static __rte_always_inline void
+get_dsa_sign(DSA_SIG *sign, const BIGNUM **r, const BIGNUM **s)
+{
+       *r = sign->r;
+       *s = sign->s;
+}
+
+static __rte_always_inline int
+set_dsa_keys(DSA *dsa, BIGNUM *pub, BIGNUM *priv)
+{
+       dsa->pub_key = pub;
+       dsa->priv_key = priv;
+       return 0;
+}
+
+static __rte_always_inline void
+set_dsa_pub_key(DSA *dsa, BIGNUM *pub)
+{
+       dsa->pub_key = pub;
+}
+
+static __rte_always_inline void
+get_dsa_priv_key(DSA *dsa, BIGNUM **priv_key)
+{
+       *priv_key = dsa->priv_key;
+}
 
 #else
 
-#define set_rsa_params(rsa, p, q, ret) \
-       (ret = !RSA_set0_factors(rsa, p, q))
+static __rte_always_inline int
+set_rsa_params(RSA *rsa, BIGNUM *p, BIGNUM *q)
+{
+       return !(RSA_set0_factors(rsa, p, q));
+}
 
-#define set_rsa_crt_params(rsa, dmp1, dmq1, iqmp, ret) \
-       (ret = !RSA_set0_crt_params(rsa, dmp1, dmq1, iqmp))
+static __rte_always_inline int
+set_rsa_crt_params(RSA *rsa, BIGNUM *dmp1, BIGNUM *dmq1, BIGNUM *iqmp)
+{
+       return !(RSA_set0_crt_params(rsa, dmp1, dmq1, iqmp));
+}
 
 /* n, e must be non-null, d can be NULL */
-#define set_rsa_keys(rsa, n, e, d, ret) \
-       (ret = !RSA_set0_key(rsa, n, e, d))
-
-#define set_dh_params(dh, p, g, ret) \
-       (ret = !DH_set0_pqg(dh, p, NULL, g))
-
-#define set_dh_priv_key(dh, priv_key, ret) \
-       (ret = !DH_set0_key(dh, NULL, priv_key))
-
-#define get_dh_pub_key(dh, pub_key) \
-       (DH_get0_key(dh_key, &pub_key, NULL))
-
-#define get_dh_priv_key(dh, priv_key) \
-       (DH_get0_key(dh_key, NULL, &priv_key))
-
-#define set_dsa_params(dsa, p, q, g, ret) \
-       (ret = !DSA_set0_pqg(dsa, p, q, g))
-
-#define set_dsa_priv_key(dsa, priv_key) \
-       (DSA_set0_key(dsa, NULL, priv_key))
-
-#define set_dsa_sign(sign, r, s) \
-       (DSA_SIG_set0(sign, r, s))
-
-#define get_dsa_sign(sign, r, s) \
-       (DSA_SIG_get0(sign, &r, &s))
-
-#define set_dsa_keys(dsa, pub, priv, ret) \
-       (ret = !DSA_set0_key(dsa, pub, priv))
-
-#define set_dsa_pub_key(dsa, pub_key) \
-       (DSA_set0_key(dsa, pub_key, NULL))
 
-#define get_dsa_priv_key(dsa, priv_key) \
-       (DSA_get0_key(dsa, NULL, &priv_key))
+static __rte_always_inline  int
+set_rsa_keys(RSA *rsa, BIGNUM *n, BIGNUM *e, BIGNUM *d)
+{
+       return !(RSA_set0_key(rsa, n, e, d));
+}
+
+static __rte_always_inline int
+set_dh_params(DH *dh, BIGNUM *p, BIGNUM *g)
+{
+       return !(DH_set0_pqg(dh, p, NULL, g));
+}
+
+static __rte_always_inline int
+set_dh_priv_key(DH *dh, BIGNUM *priv_key)
+{
+       return !(DH_set0_key(dh, NULL, priv_key));
+}
+
+static __rte_always_inline void
+get_dh_pub_key(DH *dh_key, const BIGNUM **pub_key)
+{
+       DH_get0_key(dh_key, pub_key, NULL);
+}
+
+static __rte_always_inline void
+get_dh_priv_key(DH *dh_key, const BIGNUM **priv_key)
+{
+       DH_get0_key(dh_key, NULL, priv_key);
+}
+
+static __rte_always_inline int
+set_dsa_params(DSA *dsa, BIGNUM *p, BIGNUM *q, BIGNUM *g)
+{
+       return !(DSA_set0_pqg(dsa, p, q, g));
+}
+
+static __rte_always_inline void
+set_dsa_priv_key(DSA *dsa, BIGNUM *priv_key)
+{
+       DSA_set0_key(dsa, NULL, priv_key);
+}
+
+static __rte_always_inline void
+set_dsa_sign(DSA_SIG *sign, BIGNUM *r, BIGNUM *s)
+{
+       DSA_SIG_set0(sign, r, s);
+}
+
+static __rte_always_inline void
+get_dsa_sign(DSA_SIG *sign, const BIGNUM **r, const BIGNUM **s)
+{
+       DSA_SIG_get0(sign, r, s);
+}
+
+static __rte_always_inline int
+set_dsa_keys(DSA *dsa, BIGNUM *pub, BIGNUM *priv)
+{
+       return !(DSA_set0_key(dsa, pub, priv));
+}
+
+static __rte_always_inline void
+set_dsa_pub_key(DSA *dsa, BIGNUM *pub_key)
+{
+       DSA_set0_key(dsa, pub_key, NULL);
+}
+
+static __rte_always_inline void
+get_dsa_priv_key(DSA *dsa, const BIGNUM **priv_key)
+{
+       DSA_get0_key(dsa, NULL, priv_key);
+}
 
 #endif /* version < 10100000 */
 
index 7d263ab..003116d 100644 (file)
@@ -1509,15 +1509,7 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
 
        srclen = op->sym->auth.data.length;
 
-       if (sess->auth.operation == RTE_CRYPTO_AUTH_OP_VERIFY)
-               dst = qp->temp_digest;
-       else {
-               dst = op->sym->auth.digest.data;
-               if (dst == NULL)
-                       dst = rte_pktmbuf_mtod_offset(mbuf_dst, uint8_t *,
-                                       op->sym->auth.data.offset +
-                                       op->sym->auth.data.length);
-       }
+       dst = qp->temp_digest;
 
        switch (sess->auth.mode) {
        case OPENSSL_AUTH_AS_AUTH:
@@ -1540,6 +1532,15 @@ process_openssl_auth_op(struct openssl_qp *qp, struct rte_crypto_op *op,
                                sess->auth.digest_length) != 0) {
                        op->status = RTE_CRYPTO_OP_STATUS_AUTH_FAILED;
                }
+       } else {
+               uint8_t *auth_dst;
+
+               auth_dst = op->sym->auth.digest.data;
+               if (auth_dst == NULL)
+                       auth_dst = rte_pktmbuf_mtod_offset(mbuf_dst, uint8_t *,
+                                       op->sym->auth.data.offset +
+                                       op->sym->auth.data.length);
+               memcpy(auth_dst, dst, sess->auth.digest_length);
        }
 
        if (status != 0)
@@ -1564,7 +1565,7 @@ process_openssl_dsa_sign_op(struct rte_crypto_op *cop,
                cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
        } else {
                const BIGNUM *r = NULL, *s = NULL;
-               get_dsa_sign(sign, r, s);
+               get_dsa_sign(sign, &r, &s);
 
                op->r.length = BN_bn2bin(r, op->r.data);
                op->s.length = BN_bn2bin(s, op->s.data);
@@ -1666,7 +1667,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
                        cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
                        return -1;
                }
-               set_dh_priv_key(dh_key, priv_key, ret);
+               ret = set_dh_priv_key(dh_key, priv_key);
                if (ret) {
                        OPENSSL_LOG(ERR, "Failed to set private key\n");
                        cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
@@ -1715,7 +1716,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
                        cop->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
                        return -1;
                }
-               set_dh_priv_key(dh_key, priv_key, ret);
+               ret = set_dh_priv_key(dh_key, priv_key);
                if (ret) {
                        OPENSSL_LOG(ERR, "Failed to set private key\n");
                        cop->status = RTE_CRYPTO_OP_STATUS_ERROR;
@@ -1743,7 +1744,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
                                __func__, __LINE__);
 
                /* get the generated keys */
-               get_dh_pub_key(dh_key, pub_key);
+               get_dh_pub_key(dh_key, &pub_key);
 
                /* output public key */
                op->pub_key.length = BN_bn2bin(pub_key,
@@ -1758,7 +1759,7 @@ process_openssl_dh_op(struct rte_crypto_op *cop,
                                __func__, __LINE__);
 
                /* get the generated keys */
-               get_dh_priv_key(dh_key, priv_key);
+               get_dh_priv_key(dh_key, &priv_key);
 
                /* provide generated private key back to user */
                op->priv_key.length = BN_bn2bin(priv_key,
index de22843..c2b029e 100644 (file)
@@ -26,9 +26,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 16,
+                                       .min = 1,
                                        .max = 16,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -68,9 +68,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 20,
+                                       .min = 1,
                                        .max = 20,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -110,9 +110,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 28,
+                                       .min = 1,
                                        .max = 28,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -131,9 +131,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 0
                                },
                                .digest_size = {
-                                       .min = 28,
+                                       .min = 1,
                                        .max = 28,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -152,9 +152,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 32,
+                                       .min = 1,
                                        .max = 32,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -194,9 +194,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 48,
+                                       .min = 1,
                                        .max = 48,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -236,9 +236,9 @@ static const struct rte_cryptodev_capabilities openssl_pmd_capabilities[] = {
                                        .increment = 1
                                },
                                .digest_size = {
-                                       .min = 64,
+                                       .min = 1,
                                        .max = 64,
-                                       .increment = 0
+                                       .increment = 1
                                },
                                .iv_size = { 0 }
                        }, }
@@ -875,14 +875,14 @@ static int openssl_set_asym_session_parameters(
                                RSA_free(rsa);
                                goto err_rsa;
                        }
-                       set_rsa_params(rsa, p, q, ret);
+                       ret = set_rsa_params(rsa, p, q);
                        if (ret) {
                                OPENSSL_LOG(ERR,
                                        "failed to set rsa params\n");
                                RSA_free(rsa);
                                goto err_rsa;
                        }
-                       set_rsa_crt_params(rsa, dmp1, dmq1, iqmp, ret);
+                       ret = set_rsa_crt_params(rsa, dmp1, dmq1, iqmp);
                        if (ret) {
                                OPENSSL_LOG(ERR,
                                        "failed to set crt params\n");
@@ -896,7 +896,7 @@ static int openssl_set_asym_session_parameters(
                        }
                }
 
-               set_rsa_keys(rsa, n, e, d, ret);
+               ret = set_rsa_keys(rsa, n, e, d);
                if (ret) {
                        OPENSSL_LOG(ERR, "Failed to load rsa keys\n");
                        RSA_free(rsa);
@@ -1005,7 +1005,7 @@ err_rsa:
                                "failed to allocate resources\n");
                        goto err_dh;
                }
-               set_dh_params(dh, p, g, ret);
+               ret = set_dh_params(dh, p, g);
                if (ret) {
                        DH_free(dh);
                        goto err_dh;
@@ -1087,7 +1087,7 @@ err_dh:
                        goto err_dsa;
                }
 
-               set_dsa_params(dsa, p, q, g, ret);
+               ret = set_dsa_params(dsa, p, q, g);
                if (ret) {
                        DSA_free(dsa);
                        OPENSSL_LOG(ERR, "Failed to dsa params\n");
@@ -1101,7 +1101,7 @@ err_dh:
                 * both versions
                 */
                /* just set dummy public for very 1st call */
-               set_dsa_keys(dsa, pub_key, priv_key, ret);
+               ret = set_dsa_keys(dsa, pub_key, priv_key);
                if (ret) {
                        DSA_free(dsa);
                        OPENSSL_LOG(ERR, "Failed to set keys\n");
index eea08bc..7cba87d 100644 (file)
                        }, }                                            \
                }, }                                                    \
        },                                                              \
+       {       /* AES CMAC */                                          \
+               .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,                     \
+               {.sym = {                                               \
+                       .xform_type = RTE_CRYPTO_SYM_XFORM_AUTH,        \
+                       {.auth = {                                      \
+                               .algo = RTE_CRYPTO_AUTH_AES_CMAC,       \
+                               .block_size = 16,                       \
+                               .key_size = {                           \
+                                       .min = 16,                      \
+                                       .max = 16,                      \
+                                       .increment = 0                  \
+                               },                                      \
+                               .digest_size = {                        \
+                                       .min = 12,                      \
+                                       .max = 16,                      \
+                                       .increment = 4                  \
+                               }                                       \
+                       }, }                                            \
+               }, }                                                    \
+       },                                                              \
        {       /* AES CCM */                                           \
                .op = RTE_CRYPTO_OP_TYPE_SYMMETRIC,                     \
                {.sym = {                                               \
index 96f442e..c3f7004 100644 (file)
@@ -290,6 +290,7 @@ qat_sym_dev_create(struct qat_pci_device *qat_pci_dev)
                internals->qat_dev_capabilities = qat_gen1_sym_capabilities;
                break;
        case QAT_GEN2:
+       case QAT_GEN3:
                internals->qat_dev_capabilities = qat_gen2_sym_capabilities;
                break;
        default:
index d343285..5563d5b 100644 (file)
@@ -12,7 +12,7 @@
 #include "qat_sym_capabilities.h"
 #include "qat_device.h"
 
-/**< Intel(R) QAT Symmetric Crypto PMD device name */
+/** Intel(R) QAT Symmetric Crypto PMD driver name */
 #define CRYPTODEV_NAME_QAT_SYM_PMD     crypto_qat
 
 extern uint8_t cryptodev_qat_driver_id;
index 1d58220..8196e23 100644 (file)
@@ -498,6 +498,7 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
        struct qat_sym_dev_private *internals = dev->data->dev_private;
        uint8_t *key_data = auth_xform->key.data;
        uint8_t key_length = auth_xform->key.length;
+       session->aes_cmac = 0;
 
        switch (auth_xform->algo) {
        case RTE_CRYPTO_AUTH_SHA1_HMAC:
@@ -518,6 +519,10 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
        case RTE_CRYPTO_AUTH_AES_XCBC_MAC:
                session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC;
                break;
+       case RTE_CRYPTO_AUTH_AES_CMAC:
+               session->qat_hash_alg = ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC;
+               session->aes_cmac = 1;
+               break;
        case RTE_CRYPTO_AUTH_AES_GMAC:
                if (qat_sym_validate_aes_key(auth_xform->key.length,
                                &session->qat_cipher_alg) != 0) {
@@ -555,7 +560,6 @@ qat_sym_session_configure_auth(struct rte_cryptodev *dev,
        case RTE_CRYPTO_AUTH_SHA224:
        case RTE_CRYPTO_AUTH_SHA384:
        case RTE_CRYPTO_AUTH_MD5:
-       case RTE_CRYPTO_AUTH_AES_CMAC:
        case RTE_CRYPTO_AUTH_AES_CBC_MAC:
                QAT_LOG(ERR, "Crypto: Unsupported hash alg %u",
                                auth_xform->algo);
@@ -817,6 +821,8 @@ static int qat_hash_get_digest_size(enum icp_qat_hw_auth_algo qat_hash_alg)
                return ICP_QAT_HW_SHA512_STATE1_SZ;
        case ICP_QAT_HW_AUTH_ALGO_MD5:
                return ICP_QAT_HW_MD5_STATE1_SZ;
+       case ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC:
+               return ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
        case ICP_QAT_HW_AUTH_ALGO_DELIMITER:
                /* return maximum digest size in this case */
                return ICP_QAT_HW_SHA512_STATE1_SZ;
@@ -843,6 +849,8 @@ static int qat_hash_get_block_size(enum icp_qat_hw_auth_algo qat_hash_alg)
                return SHA512_CBLOCK;
        case ICP_QAT_HW_AUTH_ALGO_GALOIS_128:
                return 16;
+       case ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC:
+               return ICP_QAT_HW_AES_BLK_SZ;
        case ICP_QAT_HW_AUTH_ALGO_MD5:
                return MD5_CBLOCK;
        case ICP_QAT_HW_AUTH_ALGO_DELIMITER:
@@ -991,11 +999,28 @@ static int partial_hash_compute(enum icp_qat_hw_auth_algo hash_alg,
 #define HMAC_OPAD_VALUE        0x5c
 #define HASH_XCBC_PRECOMP_KEY_NUM 3
 
+static const uint8_t AES_CMAC_SEED[ICP_QAT_HW_AES_128_KEY_SZ];
+
+static void aes_cmac_key_derive(uint8_t *base, uint8_t *derived)
+{
+       int i;
+
+       derived[0] = base[0] << 1;
+       for (i = 1; i < ICP_QAT_HW_AES_BLK_SZ ; i++) {
+               derived[i] = base[i] << 1;
+               derived[i - 1] |= base[i] >> 7;
+       }
+
+       if (base[0] & 0x80)
+               derived[ICP_QAT_HW_AES_BLK_SZ - 1] ^= QAT_AES_CMAC_CONST_RB;
+}
+
 static int qat_sym_do_precomputes(enum icp_qat_hw_auth_algo hash_alg,
                                const uint8_t *auth_key,
                                uint16_t auth_keylen,
                                uint8_t *p_state_buf,
-                               uint16_t *p_state_len)
+                               uint16_t *p_state_len,
+                               uint8_t aes_cmac)
 {
        int block_size;
        uint8_t ipad[qat_hash_get_block_size(ICP_QAT_HW_AUTH_ALGO_DELIMITER)];
@@ -1003,47 +1028,91 @@ static int qat_sym_do_precomputes(enum icp_qat_hw_auth_algo hash_alg,
        int i;
 
        if (hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC) {
-               static uint8_t qat_aes_xcbc_key_seed[
-                                       ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ] = {
-                       0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-                       0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-                       0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-                       0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
-                       0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-                       0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-               };
 
-               uint8_t *in = NULL;
-               uint8_t *out = p_state_buf;
-               int x;
-               AES_KEY enc_key;
+               /* CMAC */
+               if (aes_cmac) {
+                       AES_KEY enc_key;
+                       uint8_t *in = NULL;
+                       uint8_t k0[ICP_QAT_HW_AES_128_KEY_SZ];
+                       uint8_t *k1, *k2;
 
-               in = rte_zmalloc("working mem for key",
-                               ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ, 16);
-               if (in == NULL) {
-                       QAT_LOG(ERR, "Failed to alloc memory");
-                       return -ENOMEM;
-               }
+                       auth_keylen = ICP_QAT_HW_AES_128_KEY_SZ;
+
+                       in = rte_zmalloc("AES CMAC K1",
+                                        ICP_QAT_HW_AES_128_KEY_SZ, 16);
+
+                       if (in == NULL) {
+                               QAT_LOG(ERR, "Failed to alloc memory");
+                               return -ENOMEM;
+                       }
+
+                       rte_memcpy(in, AES_CMAC_SEED,
+                                  ICP_QAT_HW_AES_128_KEY_SZ);
+                       rte_memcpy(p_state_buf, auth_key, auth_keylen);
 
-               rte_memcpy(in, qat_aes_xcbc_key_seed,
-                               ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
-               for (x = 0; x < HASH_XCBC_PRECOMP_KEY_NUM; x++) {
                        if (AES_set_encrypt_key(auth_key, auth_keylen << 3,
                                &enc_key) != 0) {
-                               rte_free(in -
-                                       (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ));
-                               memset(out -
-                                       (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ),
-                                       0, ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
+                               rte_free(in);
                                return -EFAULT;
                        }
-                       AES_encrypt(in, out, &enc_key);
-                       in += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
-                       out += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
+
+                       AES_encrypt(in, k0, &enc_key);
+
+                       k1 = p_state_buf + ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
+                       k2 = k1 + ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
+
+                       aes_cmac_key_derive(k0, k1);
+                       aes_cmac_key_derive(k1, k2);
+
+                       memset(k0, 0, ICP_QAT_HW_AES_128_KEY_SZ);
+                       *p_state_len = ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ;
+                       rte_free(in);
+                       return 0;
+               } else {
+                       static uint8_t qat_aes_xcbc_key_seed[
+                                       ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ] = {
+                               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                               0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+                               0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+                               0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+                               0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+                               0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+                       };
+
+                       uint8_t *in = NULL;
+                       uint8_t *out = p_state_buf;
+                       int x;
+                       AES_KEY enc_key;
+
+                       in = rte_zmalloc("working mem for key",
+                                       ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ, 16);
+                       if (in == NULL) {
+                               QAT_LOG(ERR, "Failed to alloc memory");
+                               return -ENOMEM;
+                       }
+
+                       rte_memcpy(in, qat_aes_xcbc_key_seed,
+                                       ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
+                       for (x = 0; x < HASH_XCBC_PRECOMP_KEY_NUM; x++) {
+                               if (AES_set_encrypt_key(auth_key,
+                                                       auth_keylen << 3,
+                                                       &enc_key) != 0) {
+                                       rte_free(in -
+                                         (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ));
+                                       memset(out -
+                                          (x * ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ),
+                                         0, ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ);
+                                       return -EFAULT;
+                               }
+                               AES_encrypt(in, out, &enc_key);
+                               in += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
+                               out += ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ;
+                       }
+                       *p_state_len = ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ;
+                       rte_free(in - x*ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ);
+                       return 0;
                }
-               *p_state_len = ICP_QAT_HW_AES_XCBC_MAC_STATE2_SZ;
-               rte_free(in - x*ICP_QAT_HW_AES_XCBC_MAC_KEY_SZ);
-               return 0;
+
        } else if ((hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_128) ||
                (hash_alg == ICP_QAT_HW_AUTH_ALGO_GALOIS_64)) {
                uint8_t *in = NULL;
@@ -1417,7 +1486,9 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
 
        if (cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_SNOW_3G_UIA2
                || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_KASUMI_F9
-               || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3)
+               || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_ZUC_3G_128_EIA3
+               || cdesc->qat_hash_alg == ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC
+                       )
                hash->auth_counter.counter = 0;
        else
                hash->auth_counter.counter = rte_bswap32(
@@ -1430,40 +1501,45 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
         */
        switch (cdesc->qat_hash_alg) {
        case ICP_QAT_HW_AUTH_ALGO_SHA1:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA1,
-                       authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA1, authkey,
+                       authkeylen, cdesc->cd_cur_ptr, &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(SHA)precompute failed");
                        return -EFAULT;
                }
                state2_size = RTE_ALIGN_CEIL(ICP_QAT_HW_SHA1_STATE2_SZ, 8);
                break;
        case ICP_QAT_HW_AUTH_ALGO_SHA224:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA224,
-                       authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA224, authkey,
+                       authkeylen, cdesc->cd_cur_ptr, &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(SHA)precompute failed");
                        return -EFAULT;
                }
                state2_size = ICP_QAT_HW_SHA224_STATE2_SZ;
                break;
        case ICP_QAT_HW_AUTH_ALGO_SHA256:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA256,
-                       authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA256, authkey,
+                       authkeylen, cdesc->cd_cur_ptr,  &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(SHA)precompute failed");
                        return -EFAULT;
                }
                state2_size = ICP_QAT_HW_SHA256_STATE2_SZ;
                break;
        case ICP_QAT_HW_AUTH_ALGO_SHA384:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA384,
-                       authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA384, authkey,
+                       authkeylen, cdesc->cd_cur_ptr, &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(SHA)precompute failed");
                        return -EFAULT;
                }
                state2_size = ICP_QAT_HW_SHA384_STATE2_SZ;
                break;
        case ICP_QAT_HW_AUTH_ALGO_SHA512:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA512,
-                       authkey, authkeylen, cdesc->cd_cur_ptr, &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_SHA512, authkey,
+                       authkeylen, cdesc->cd_cur_ptr,  &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(SHA)precompute failed");
                        return -EFAULT;
                }
@@ -1471,10 +1547,16 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
                break;
        case ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC:
                state1_size = ICP_QAT_HW_AES_XCBC_MAC_STATE1_SZ;
+
+               if (cdesc->aes_cmac)
+                       memset(cdesc->cd_cur_ptr, 0, state1_size);
                if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_AES_XCBC_MAC,
                        authkey, authkeylen, cdesc->cd_cur_ptr + state1_size,
-                       &state2_size)) {
-                       QAT_LOG(ERR, "(XCBC)precompute failed");
+                       &state2_size, cdesc->aes_cmac)) {
+                       cdesc->aes_cmac ? QAT_LOG(ERR,
+                                                 "(CMAC)precompute failed")
+                                       : QAT_LOG(ERR,
+                                                 "(XCBC)precompute failed");
                        return -EFAULT;
                }
                break;
@@ -1482,9 +1564,9 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
        case ICP_QAT_HW_AUTH_ALGO_GALOIS_64:
                qat_proto_flag = QAT_CRYPTO_PROTO_FLAG_GCM;
                state1_size = ICP_QAT_HW_GALOIS_128_STATE1_SZ;
-               if (qat_sym_do_precomputes(cdesc->qat_hash_alg,
-                       authkey, authkeylen, cdesc->cd_cur_ptr + state1_size,
-                       &state2_size)) {
+               if (qat_sym_do_precomputes(cdesc->qat_hash_alg, authkey,
+                       authkeylen, cdesc->cd_cur_ptr + state1_size,
+                       &state2_size, cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(GCM)precompute failed");
                        return -EFAULT;
                }
@@ -1542,9 +1624,9 @@ int qat_sym_session_aead_create_cd_auth(struct qat_sym_session *cdesc,
 
                break;
        case ICP_QAT_HW_AUTH_ALGO_MD5:
-               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_MD5,
-                       authkey, authkeylen, cdesc->cd_cur_ptr,
-                       &state1_size)) {
+               if (qat_sym_do_precomputes(ICP_QAT_HW_AUTH_ALGO_MD5, authkey,
+                       authkeylen, cdesc->cd_cur_ptr, &state1_size,
+                       cdesc->aes_cmac)) {
                        QAT_LOG(ERR, "(MD5)precompute failed");
                        return -EFAULT;
                }
index e8f51e5..43e25ce 100644 (file)
@@ -36,6 +36,8 @@
                                        ICP_QAT_HW_CIPHER_KEY_CONVERT, \
                                        ICP_QAT_HW_CIPHER_DECRYPT)
 
+#define QAT_AES_CMAC_CONST_RB 0x87
+
 enum qat_sym_proto_flag {
        QAT_CRYPTO_PROTO_FLAG_NONE = 0,
        QAT_CRYPTO_PROTO_FLAG_CCM = 1,
@@ -75,6 +77,7 @@ struct qat_sym_session {
        uint16_t digest_length;
        rte_spinlock_t lock;    /* protects this struct */
        enum qat_device_gen min_qat_dev_gen;
+       uint8_t aes_cmac;
 };
 
 int
diff --git a/drivers/crypto/scheduler/meson.build b/drivers/crypto/scheduler/meson.build
new file mode 100644 (file)
index 0000000..c5ba2d6
--- /dev/null
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+deps += ['bus_vdev', 'reorder']
+name = 'crypto_scheduler'
+sources = files(
+       'rte_cryptodev_scheduler.c',
+       'scheduler_failover.c',
+       'scheduler_multicore.c',
+       'scheduler_pkt_size_distr.c',
+       'scheduler_pmd.c',
+       'scheduler_pmd_ops.c',
+       'scheduler_roundrobin.c',
+)
+
+headers = files(
+       'rte_cryptodev_scheduler.h',
+       'rte_cryptodev_scheduler_operations.h',
+)
index 6e4919c..a214286 100644 (file)
@@ -174,7 +174,7 @@ rte_cryptodev_scheduler_slave_attach(uint8_t scheduler_id, uint8_t slave_id)
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -233,7 +233,7 @@ rte_cryptodev_scheduler_slave_detach(uint8_t scheduler_id, uint8_t slave_id)
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -290,7 +290,7 @@ rte_cryptodev_scheduler_mode_set(uint8_t scheduler_id,
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -308,28 +308,28 @@ rte_cryptodev_scheduler_mode_set(uint8_t scheduler_id,
        switch (mode) {
        case CDEV_SCHED_MODE_ROUNDROBIN:
                if (rte_cryptodev_scheduler_load_user_scheduler(scheduler_id,
-                               roundrobin_scheduler) < 0) {
+                               crypto_scheduler_roundrobin) < 0) {
                        CR_SCHED_LOG(ERR, "Failed to load scheduler");
                        return -1;
                }
                break;
        case CDEV_SCHED_MODE_PKT_SIZE_DISTR:
                if (rte_cryptodev_scheduler_load_user_scheduler(scheduler_id,
-                               pkt_size_based_distr_scheduler) < 0) {
+                               crypto_scheduler_pkt_size_based_distr) < 0) {
                        CR_SCHED_LOG(ERR, "Failed to load scheduler");
                        return -1;
                }
                break;
        case CDEV_SCHED_MODE_FAILOVER:
                if (rte_cryptodev_scheduler_load_user_scheduler(scheduler_id,
-                               failover_scheduler) < 0) {
+                               crypto_scheduler_failover) < 0) {
                        CR_SCHED_LOG(ERR, "Failed to load scheduler");
                        return -1;
                }
                break;
        case CDEV_SCHED_MODE_MULTICORE:
                if (rte_cryptodev_scheduler_load_user_scheduler(scheduler_id,
-                               multicore_scheduler) < 0) {
+                               crypto_scheduler_multicore) < 0) {
                        CR_SCHED_LOG(ERR, "Failed to load scheduler");
                        return -1;
                }
@@ -353,7 +353,7 @@ rte_cryptodev_scheduler_mode_get(uint8_t scheduler_id)
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -375,7 +375,7 @@ rte_cryptodev_scheduler_ordering_set(uint8_t scheduler_id,
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -403,7 +403,7 @@ rte_cryptodev_scheduler_ordering_get(uint8_t scheduler_id)
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -425,7 +425,7 @@ rte_cryptodev_scheduler_load_user_scheduler(uint8_t scheduler_id,
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -498,7 +498,7 @@ rte_cryptodev_scheduler_slaves_get(uint8_t scheduler_id, uint8_t *slaves)
                return -ENOTSUP;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
@@ -566,7 +566,7 @@ rte_cryptodev_scheduler_option_get(uint8_t scheduler_id,
                return -EINVAL;
        }
 
-       if (dev->driver_id != cryptodev_driver_id) {
+       if (dev->driver_id != cryptodev_scheduler_driver_id) {
                CR_SCHED_LOG(ERR, "Operation not supported");
                return -ENOTSUP;
        }
index 3faea40..9a72a90 100644 (file)
@@ -270,13 +270,13 @@ struct rte_cryptodev_scheduler {
 };
 
 /** Round-robin mode scheduler */
-extern struct rte_cryptodev_scheduler *roundrobin_scheduler;
+extern struct rte_cryptodev_scheduler *crypto_scheduler_roundrobin;
 /** Packet-size based distribution mode scheduler */
-extern struct rte_cryptodev_scheduler *pkt_size_based_distr_scheduler;
+extern struct rte_cryptodev_scheduler *crypto_scheduler_pkt_size_based_distr;
 /** Fail-over mode scheduler */
-extern struct rte_cryptodev_scheduler *failover_scheduler;
+extern struct rte_cryptodev_scheduler *crypto_scheduler_failover;
 /** multi-core mode scheduler */
-extern struct rte_cryptodev_scheduler *multicore_scheduler;
+extern struct rte_cryptodev_scheduler *crypto_scheduler_multicore;
 
 #ifdef __cplusplus
 }
index ddfb5b8..3a023b8 100644 (file)
@@ -197,7 +197,7 @@ scheduler_create_private_ctx(__rte_unused struct rte_cryptodev *dev)
        return 0;
 }
 
-struct rte_cryptodev_scheduler_ops scheduler_fo_ops = {
+static struct rte_cryptodev_scheduler_ops scheduler_fo_ops = {
        slave_attach,
        slave_detach,
        scheduler_start,
@@ -208,7 +208,7 @@ struct rte_cryptodev_scheduler_ops scheduler_fo_ops = {
        NULL    /*option_get */
 };
 
-struct rte_cryptodev_scheduler fo_scheduler = {
+static struct rte_cryptodev_scheduler fo_scheduler = {
                .name = "failover-scheduler",
                .description = "scheduler which enqueues to the primary slave, "
                                "and only then enqueues to the secondary slave "
@@ -217,4 +217,4 @@ struct rte_cryptodev_scheduler fo_scheduler = {
                .ops = &scheduler_fo_ops
 };
 
-struct rte_cryptodev_scheduler *failover_scheduler = &fo_scheduler;
+struct rte_cryptodev_scheduler *crypto_scheduler_failover = &fo_scheduler;
index d410e69..7808e9a 100644 (file)
@@ -392,7 +392,7 @@ exit:
        return -1;
 }
 
-struct rte_cryptodev_scheduler_ops scheduler_mc_ops = {
+static struct rte_cryptodev_scheduler_ops scheduler_mc_ops = {
        slave_attach,
        slave_detach,
        scheduler_start,
@@ -403,11 +403,11 @@ struct rte_cryptodev_scheduler_ops scheduler_mc_ops = {
        NULL    /* option_get */
 };
 
-struct rte_cryptodev_scheduler mc_scheduler = {
+static struct rte_cryptodev_scheduler mc_scheduler = {
                .name = "multicore-scheduler",
                .description = "scheduler which will run burst across multiple cpu cores",
                .mode = CDEV_SCHED_MODE_MULTICORE,
                .ops = &scheduler_mc_ops
 };
 
-struct rte_cryptodev_scheduler *multicore_scheduler = &mc_scheduler;
+struct rte_cryptodev_scheduler *crypto_scheduler_multicore = &mc_scheduler;
index 74129b6..45c8dce 100644 (file)
@@ -398,7 +398,7 @@ scheduler_option_get(struct rte_cryptodev *dev, uint32_t option_type,
        return 0;
 }
 
-struct rte_cryptodev_scheduler_ops scheduler_ps_ops = {
+static struct rte_cryptodev_scheduler_ops scheduler_ps_ops = {
        slave_attach,
        slave_detach,
        scheduler_start,
@@ -409,7 +409,7 @@ struct rte_cryptodev_scheduler_ops scheduler_ps_ops = {
        scheduler_option_get
 };
 
-struct rte_cryptodev_scheduler psd_scheduler = {
+static struct rte_cryptodev_scheduler psd_scheduler = {
                .name = "packet-size-based-scheduler",
                .description = "scheduler which will distribute crypto op "
                                "burst based on the packet size",
@@ -417,4 +417,4 @@ struct rte_cryptodev_scheduler psd_scheduler = {
                .ops = &scheduler_ps_ops
 };
 
-struct rte_cryptodev_scheduler *pkt_size_based_distr_scheduler = &psd_scheduler;
+struct rte_cryptodev_scheduler *crypto_scheduler_pkt_size_based_distr = &psd_scheduler;
index a9221a9..20198cc 100644 (file)
@@ -14,7 +14,7 @@
 #include "rte_cryptodev_scheduler.h"
 #include "scheduler_pmd_private.h"
 
-uint8_t cryptodev_driver_id;
+uint8_t cryptodev_scheduler_driver_id;
 
 struct scheduler_init_params {
        struct rte_cryptodev_pmd_init_params def_p;
@@ -38,7 +38,7 @@ struct scheduler_init_params {
 #define RTE_CRYPTODEV_VDEV_COREMASK            ("coremask")
 #define RTE_CRYPTODEV_VDEV_CORELIST            ("corelist")
 
-const char *scheduler_valid_params[] = {
+static const char * const scheduler_valid_params[] = {
        RTE_CRYPTODEV_VDEV_NAME,
        RTE_CRYPTODEV_VDEV_SLAVE,
        RTE_CRYPTODEV_VDEV_MODE,
@@ -91,7 +91,7 @@ cryptodev_scheduler_create(const char *name,
                return -EFAULT;
        }
 
-       dev->driver_id = cryptodev_driver_id;
+       dev->driver_id = cryptodev_scheduler_driver_id;
        dev->dev_ops = rte_crypto_scheduler_pmd_ops;
 
        sched_ctx = dev->data->dev_private;
@@ -569,4 +569,4 @@ RTE_PMD_REGISTER_PARAM_STRING(CRYPTODEV_NAME_SCHEDULER_PMD,
        "slave=<name>");
 RTE_PMD_REGISTER_CRYPTO_DRIVER(scheduler_crypto_drv,
                cryptodev_scheduler_pmd_drv.driver,
-               cryptodev_driver_id);
+               cryptodev_scheduler_driver_id);
index 778071c..939105a 100644 (file)
@@ -522,7 +522,7 @@ scheduler_pmd_sym_session_clear(struct rte_cryptodev *dev,
        }
 }
 
-struct rte_cryptodev_ops scheduler_pmd_ops = {
+static struct rte_cryptodev_ops scheduler_pmd_ops = {
                .dev_configure          = scheduler_pmd_config,
                .dev_start              = scheduler_pmd_start,
                .dev_stop               = scheduler_pmd_stop,
index d5e602a..3ed480c 100644 (file)
@@ -63,7 +63,7 @@ struct scheduler_qp_ctx {
 } __rte_cache_aligned;
 
 
-extern uint8_t cryptodev_driver_id;
+extern uint8_t cryptodev_scheduler_driver_id;
 
 static __rte_always_inline uint16_t
 get_max_enqueue_order_count(struct rte_ring *order_ring, uint16_t nb_ops)
index c7082a6..9b891d9 100644 (file)
@@ -190,7 +190,7 @@ scheduler_create_private_ctx(__rte_unused struct rte_cryptodev *dev)
        return 0;
 }
 
-struct rte_cryptodev_scheduler_ops scheduler_rr_ops = {
+static struct rte_cryptodev_scheduler_ops scheduler_rr_ops = {
        slave_attach,
        slave_detach,
        scheduler_start,
@@ -201,7 +201,7 @@ struct rte_cryptodev_scheduler_ops scheduler_rr_ops = {
        NULL    /* option_get */
 };
 
-struct rte_cryptodev_scheduler scheduler = {
+static struct rte_cryptodev_scheduler scheduler = {
                .name = "roundrobin-scheduler",
                .description = "scheduler which will round robin burst across "
                                "slave crypto devices",
@@ -209,4 +209,4 @@ struct rte_cryptodev_scheduler scheduler = {
                .ops = &scheduler_rr_ops
 };
 
-struct rte_cryptodev_scheduler *roundrobin_scheduler = &scheduler;
+struct rte_cryptodev_scheduler *crypto_scheduler_roundrobin = &scheduler;
diff --git a/drivers/crypto/zuc/meson.build b/drivers/crypto/zuc/meson.build
new file mode 100644 (file)
index 0000000..b8ca710
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+lib = cc.find_library('libsso_zuc', required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+endif
+
+sources = files('rte_zuc_pmd.c', 'rte_zuc_pmd_ops.c')
+deps += ['bus_vdev']
index f301d8d..03ad1b6 100644 (file)
@@ -6,6 +6,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += skeleton
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += sw
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV) += dsw
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += octeontx
 ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y)
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV) += dpaa
index ddd8552..6f93e7f 100644 (file)
@@ -34,5 +34,6 @@ LDLIBS += -lrte_mempool_dpaa
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_eventdev -lrte_pmd_dpaa -lrte_bus_vdev
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index 5443ef5..1e247e4 100644 (file)
@@ -30,6 +30,7 @@
 #include <rte_dpaa_bus.h>
 #include <rte_dpaa_logs.h>
 #include <rte_cycles.h>
+#include <rte_kvargs.h>
 
 #include <dpaa_ethdev.h>
 #include "dpaa_eventdev.h"
  * 1 Eventdev can have N Eventqueue
  */
 
+#define DISABLE_INTR_MODE "disable_intr"
+
 static int
 dpaa_event_dequeue_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
                                 uint64_t *timeout_ticks)
 {
-       uint64_t cycles_per_second;
-
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
 
+       uint64_t cycles_per_second;
+
        cycles_per_second = rte_get_timer_hz();
-       *timeout_ticks = ns * (cycles_per_second / NS_PER_S);
+       *timeout_ticks = (ns * cycles_per_second) / NS_PER_S;
+
+       return 0;
+}
+
+static int
+dpaa_event_dequeue_timeout_ticks_intr(struct rte_eventdev *dev, uint64_t ns,
+                                uint64_t *timeout_ticks)
+{
+       RTE_SET_USED(dev);
 
+       *timeout_ticks = ns/1000;
        return 0;
 }
 
@@ -100,6 +113,56 @@ dpaa_event_enqueue(void *port, const struct rte_event *ev)
        return dpaa_event_enqueue_burst(port, ev, 1);
 }
 
+static void drain_4_bytes(int fd, fd_set *fdset)
+{
+       if (FD_ISSET(fd, fdset)) {
+               /* drain 4 bytes */
+               uint32_t junk;
+               ssize_t sjunk = read(qman_thread_fd(), &junk, sizeof(junk));
+               if (sjunk != sizeof(junk))
+                       DPAA_EVENTDEV_ERR("UIO irq read error");
+       }
+}
+
+static inline int
+dpaa_event_dequeue_wait(uint64_t timeout_ticks)
+{
+       int fd_qman, nfds;
+       int ret;
+       fd_set readset;
+
+       /* Go into (and back out of) IRQ mode for each select,
+        * it simplifies exit-path considerations and other
+        * potential nastiness.
+        */
+       struct timeval tv = {
+               .tv_sec = timeout_ticks / 1000000,
+               .tv_usec = timeout_ticks % 1000000
+       };
+
+       fd_qman = qman_thread_fd();
+       nfds = fd_qman + 1;
+       FD_ZERO(&readset);
+       FD_SET(fd_qman, &readset);
+
+       qman_irqsource_add(QM_PIRQ_DQRI);
+
+       ret = select(nfds, &readset, NULL, NULL, &tv);
+       if (ret < 0)
+               return ret;
+       /* Calling irqsource_remove() prior to thread_irq()
+        * means thread_irq() will not process whatever caused
+        * the interrupts, however it does ensure that, once
+        * thread_irq() re-enables interrupts, they won't fire
+        * again immediately.
+        */
+       qman_irqsource_remove(~0);
+       drain_4_bytes(fd_qman, &readset);
+       qman_thread_irq();
+
+       return ret;
+}
+
 static uint16_t
 dpaa_event_dequeue_burst(void *port, struct rte_event ev[],
                         uint16_t nb_events, uint64_t timeout_ticks)
@@ -107,8 +170,8 @@ dpaa_event_dequeue_burst(void *port, struct rte_event ev[],
        int ret;
        u16 ch_id;
        void *buffers[8];
-       u32 num_frames, i;
-       uint64_t wait_time, cur_ticks, start_ticks;
+       u32 num_frames, i, irq = 0;
+       uint64_t cur_ticks = 0, wait_time_ticks = 0;
        struct dpaa_port *portal = (struct dpaa_port *)port;
        struct rte_mbuf *mbuf;
 
@@ -147,20 +210,21 @@ dpaa_event_dequeue_burst(void *port, struct rte_event ev[],
        }
        DPAA_PER_LCORE_DQRR_HELD = 0;
 
-       if (portal->timeout == DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_INVALID)
-               wait_time = timeout_ticks;
+       if (timeout_ticks)
+               wait_time_ticks = timeout_ticks;
        else
-               wait_time = portal->timeout;
+               wait_time_ticks = portal->timeout_us;
 
-       /* Lets dequeue the frames */
-       start_ticks = rte_get_timer_cycles();
-       wait_time += start_ticks;
+       wait_time_ticks += rte_get_timer_cycles();
        do {
+               /* Lets dequeue the frames */
                num_frames = qman_portal_dequeue(ev, nb_events, buffers);
-               if (num_frames != 0)
+               if (irq)
+                       irq = 0;
+               if (num_frames)
                        break;
                cur_ticks = rte_get_timer_cycles();
-       } while (cur_ticks < wait_time);
+       } while (cur_ticks < wait_time_ticks);
 
        return num_frames;
 }
@@ -171,11 +235,91 @@ dpaa_event_dequeue(void *port, struct rte_event *ev, uint64_t timeout_ticks)
        return dpaa_event_dequeue_burst(port, ev, 1, timeout_ticks);
 }
 
+static uint16_t
+dpaa_event_dequeue_burst_intr(void *port, struct rte_event ev[],
+                             uint16_t nb_events, uint64_t timeout_ticks)
+{
+       int ret;
+       u16 ch_id;
+       void *buffers[8];
+       u32 num_frames, i, irq = 0;
+       uint64_t cur_ticks = 0, wait_time_ticks = 0;
+       struct dpaa_port *portal = (struct dpaa_port *)port;
+       struct rte_mbuf *mbuf;
+
+       if (unlikely(!RTE_PER_LCORE(dpaa_io))) {
+               /* Affine current thread context to a qman portal */
+               ret = rte_dpaa_portal_init((void *)0);
+               if (ret) {
+                       DPAA_EVENTDEV_ERR("Unable to initialize portal");
+                       return ret;
+               }
+       }
+
+       if (unlikely(!portal->is_port_linked)) {
+               /*
+                * Affine event queue for current thread context
+                * to a qman portal.
+                */
+               for (i = 0; i < portal->num_linked_evq; i++) {
+                       ch_id = portal->evq_info[i].ch_id;
+                       dpaa_eventq_portal_add(ch_id);
+               }
+               portal->is_port_linked = true;
+       }
+
+       /* Check if there are atomic contexts to be released */
+       i = 0;
+       while (DPAA_PER_LCORE_DQRR_SIZE) {
+               if (DPAA_PER_LCORE_DQRR_HELD & (1 << i)) {
+                       qman_dca_index(i, 0);
+                       mbuf = DPAA_PER_LCORE_DQRR_MBUF(i);
+                       mbuf->seqn = DPAA_INVALID_MBUF_SEQN;
+                       DPAA_PER_LCORE_DQRR_HELD &= ~(1 << i);
+                       DPAA_PER_LCORE_DQRR_SIZE--;
+               }
+               i++;
+       }
+       DPAA_PER_LCORE_DQRR_HELD = 0;
+
+       if (timeout_ticks)
+               wait_time_ticks = timeout_ticks;
+       else
+               wait_time_ticks = portal->timeout_us;
+
+       do {
+               /* Lets dequeue the frames */
+               num_frames = qman_portal_dequeue(ev, nb_events, buffers);
+               if (irq)
+                       irq = 0;
+               if (num_frames)
+                       break;
+               if (wait_time_ticks) { /* wait for time */
+                       if (dpaa_event_dequeue_wait(wait_time_ticks) > 0) {
+                               irq = 1;
+                               continue;
+                       }
+                       break; /* no event after waiting */
+               }
+               cur_ticks = rte_get_timer_cycles();
+       } while (cur_ticks < wait_time_ticks);
+
+       return num_frames;
+}
+
+static uint16_t
+dpaa_event_dequeue_intr(void *port,
+                       struct rte_event *ev,
+                       uint64_t timeout_ticks)
+{
+       return dpaa_event_dequeue_burst_intr(port, ev, 1, timeout_ticks);
+}
+
 static void
 dpaa_event_dev_info_get(struct rte_eventdev *dev,
                        struct rte_event_dev_info *dev_info)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        dev_info->driver_name = "event_dpaa";
@@ -184,7 +328,7 @@ dpaa_event_dev_info_get(struct rte_eventdev *dev,
        dev_info->max_dequeue_timeout_ns =
                DPAA_EVENT_MAX_DEQUEUE_TIMEOUT;
        dev_info->dequeue_timeout_ns =
-               DPAA_EVENT_MIN_DEQUEUE_TIMEOUT;
+               DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS;
        dev_info->max_event_queues =
                DPAA_EVENT_MAX_QUEUES;
        dev_info->max_event_queue_flows =
@@ -220,8 +364,7 @@ dpaa_event_dev_configure(const struct rte_eventdev *dev)
        int ret, i;
        uint32_t *ch_id;
 
-       EVENTDEV_DRV_FUNC_TRACE();
-
+       EVENTDEV_INIT_FUNC_TRACE();
        priv->dequeue_timeout_ns = conf->dequeue_timeout_ns;
        priv->nb_events_limit = conf->nb_events_limit;
        priv->nb_event_queues = conf->nb_event_queues;
@@ -231,26 +374,18 @@ dpaa_event_dev_configure(const struct rte_eventdev *dev)
        priv->nb_event_port_enqueue_depth = conf->nb_event_port_enqueue_depth;
        priv->event_dev_cfg = conf->event_dev_cfg;
 
-       /* Check dequeue timeout method is per dequeue or global */
-       if (priv->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) {
-               /*
-                * Use timeout value as given in dequeue operation.
-                * So invalidating this timetout value.
-                */
-               priv->dequeue_timeout_ns = 0;
-       }
-
        ch_id = rte_malloc("dpaa-channels",
                          sizeof(uint32_t) * priv->nb_event_queues,
                          RTE_CACHE_LINE_SIZE);
        if (ch_id == NULL) {
-               EVENTDEV_DRV_ERR("Fail to allocate memory for dpaa channels\n");
+               DPAA_EVENTDEV_ERR("Fail to allocate memory for dpaa channels\n");
                return -ENOMEM;
        }
        /* Create requested event queues within the given event device */
        ret = qman_alloc_pool_range(ch_id, priv->nb_event_queues, 1, 0);
        if (ret < 0) {
-               EVENTDEV_DRV_ERR("Failed to create internal channel\n");
+               DPAA_EVENTDEV_ERR("qman_alloc_pool_range %u, err =%d\n",
+                                priv->nb_event_queues, ret);
                rte_free(ch_id);
                return ret;
        }
@@ -260,30 +395,41 @@ dpaa_event_dev_configure(const struct rte_eventdev *dev)
        /* Lets prepare event ports */
        memset(&priv->ports[0], 0,
              sizeof(struct dpaa_port) * priv->nb_event_ports);
+
+       /* Check dequeue timeout method is per dequeue or global */
        if (priv->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) {
-               for (i = 0; i < priv->nb_event_ports; i++) {
-                       priv->ports[i].timeout =
-                               DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_INVALID;
-               }
-       } else if (priv->dequeue_timeout_ns == 0) {
-               for (i = 0; i < priv->nb_event_ports; i++) {
-                       dpaa_event_dequeue_timeout_ticks(NULL,
-                               DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS,
-                               &priv->ports[i].timeout);
-               }
+               /*
+                * Use timeout value as given in dequeue operation.
+                * So invalidating this timeout value.
+                */
+               priv->dequeue_timeout_ns = 0;
+
+       } else if (conf->dequeue_timeout_ns == 0) {
+               priv->dequeue_timeout_ns = DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS;
        } else {
-               for (i = 0; i < priv->nb_event_ports; i++) {
-                       dpaa_event_dequeue_timeout_ticks(NULL,
-                               priv->dequeue_timeout_ns,
-                               &priv->ports[i].timeout);
+               priv->dequeue_timeout_ns = conf->dequeue_timeout_ns;
+       }
+
+       for (i = 0; i < priv->nb_event_ports; i++) {
+               if (priv->intr_mode) {
+                       priv->ports[i].timeout_us =
+                               priv->dequeue_timeout_ns/1000;
+               } else {
+                       uint64_t cycles_per_second;
+
+                       cycles_per_second = rte_get_timer_hz();
+                       priv->ports[i].timeout_us =
+                               (priv->dequeue_timeout_ns * cycles_per_second)
+                                       / NS_PER_S;
                }
        }
+
        /*
         * TODO: Currently portals are affined with threads. Maximum threads
         * can be created equals to number of lcore.
         */
        rte_free(ch_id);
-       EVENTDEV_DRV_LOG("Configured eventdev devid=%d", dev->data->dev_id);
+       DPAA_EVENTDEV_INFO("Configured eventdev devid=%d", dev->data->dev_id);
 
        return 0;
 }
@@ -291,7 +437,7 @@ dpaa_event_dev_configure(const struct rte_eventdev *dev)
 static int
 dpaa_event_dev_start(struct rte_eventdev *dev)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
        RTE_SET_USED(dev);
 
        return 0;
@@ -300,14 +446,14 @@ dpaa_event_dev_start(struct rte_eventdev *dev)
 static void
 dpaa_event_dev_stop(struct rte_eventdev *dev)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
        RTE_SET_USED(dev);
 }
 
 static int
 dpaa_event_dev_close(struct rte_eventdev *dev)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
        RTE_SET_USED(dev);
 
        return 0;
@@ -317,7 +463,7 @@ static void
 dpaa_event_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id,
                          struct rte_event_queue_conf *queue_conf)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        RTE_SET_USED(queue_id);
@@ -334,14 +480,14 @@ dpaa_event_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
        struct dpaa_eventdev *priv = dev->data->dev_private;
        struct dpaa_eventq *evq_info = &priv->evq_info[queue_id];
 
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        switch (queue_conf->schedule_type) {
        case RTE_SCHED_TYPE_PARALLEL:
        case RTE_SCHED_TYPE_ATOMIC:
                break;
        case RTE_SCHED_TYPE_ORDERED:
-               EVENTDEV_DRV_ERR("Schedule type is not supported.");
+               DPAA_EVENTDEV_ERR("Schedule type is not supported.");
                return -1;
        }
        evq_info->event_queue_cfg = queue_conf->event_queue_cfg;
@@ -353,7 +499,7 @@ dpaa_event_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
 static void
 dpaa_event_queue_release(struct rte_eventdev *dev, uint8_t queue_id)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        RTE_SET_USED(queue_id);
@@ -363,7 +509,7 @@ static void
 dpaa_event_port_default_conf_get(struct rte_eventdev *dev, uint8_t port_id,
                                 struct rte_event_port_conf *port_conf)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        RTE_SET_USED(port_id);
@@ -379,7 +525,7 @@ dpaa_event_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 {
        struct dpaa_eventdev *eventdev = dev->data->dev_private;
 
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(port_conf);
        dev->data->ports[port_id] = &eventdev->ports[port_id];
@@ -390,7 +536,7 @@ dpaa_event_port_setup(struct rte_eventdev *dev, uint8_t port_id,
 static void
 dpaa_event_port_release(void *port)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(port);
 }
@@ -454,7 +600,8 @@ dpaa_event_port_unlink(struct rte_eventdev *dev, void *port,
                event_queue->event_port = NULL;
        }
 
-       event_port->num_linked_evq = event_port->num_linked_evq - i;
+       if (event_port->num_linked_evq)
+               event_port->num_linked_evq = event_port->num_linked_evq - i;
 
        return (int)i;
 }
@@ -466,7 +613,7 @@ dpaa_event_eth_rx_adapter_caps_get(const struct rte_eventdev *dev,
 {
        const char *ethdev_driver = eth_dev->device->driver->name;
 
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
 
@@ -491,14 +638,14 @@ dpaa_event_eth_rx_adapter_queue_add(
        struct dpaa_if *dpaa_intf = eth_dev->data->dev_private;
        int ret, i;
 
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        if (rx_queue_id == -1) {
                for (i = 0; i < dpaa_intf->nb_rx_queues; i++) {
                        ret = dpaa_eth_eventq_attach(eth_dev, i, ch_id,
                                                     queue_conf);
                        if (ret) {
-                               EVENTDEV_DRV_ERR(
+                               DPAA_EVENTDEV_ERR(
                                        "Event Queue attach failed:%d\n", ret);
                                goto detach_configured_queues;
                        }
@@ -508,7 +655,7 @@ dpaa_event_eth_rx_adapter_queue_add(
 
        ret = dpaa_eth_eventq_attach(eth_dev, rx_queue_id, ch_id, queue_conf);
        if (ret)
-               EVENTDEV_DRV_ERR("dpaa_eth_eventq_attach failed:%d\n", ret);
+               DPAA_EVENTDEV_ERR("dpaa_eth_eventq_attach failed:%d\n", ret);
        return ret;
 
 detach_configured_queues:
@@ -527,14 +674,14 @@ dpaa_event_eth_rx_adapter_queue_del(const struct rte_eventdev *dev,
        int ret, i;
        struct dpaa_if *dpaa_intf = eth_dev->data->dev_private;
 
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        if (rx_queue_id == -1) {
                for (i = 0; i < dpaa_intf->nb_rx_queues; i++) {
                        ret = dpaa_eth_eventq_detach(eth_dev, i);
                        if (ret)
-                               EVENTDEV_DRV_ERR(
+                               DPAA_EVENTDEV_ERR(
                                        "Event Queue detach failed:%d\n", ret);
                }
 
@@ -543,7 +690,7 @@ dpaa_event_eth_rx_adapter_queue_del(const struct rte_eventdev *dev,
 
        ret = dpaa_eth_eventq_detach(eth_dev, rx_queue_id);
        if (ret)
-               EVENTDEV_DRV_ERR("dpaa_eth_eventq_detach failed:%d\n", ret);
+               DPAA_EVENTDEV_ERR("dpaa_eth_eventq_detach failed:%d\n", ret);
        return ret;
 }
 
@@ -551,7 +698,7 @@ static int
 dpaa_event_eth_rx_adapter_start(const struct rte_eventdev *dev,
                                const struct rte_eth_dev *eth_dev)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        RTE_SET_USED(eth_dev);
@@ -563,7 +710,7 @@ static int
 dpaa_event_eth_rx_adapter_stop(const struct rte_eventdev *dev,
                               const struct rte_eth_dev *eth_dev)
 {
-       EVENTDEV_DRV_FUNC_TRACE();
+       EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(dev);
        RTE_SET_USED(eth_dev);
@@ -593,8 +740,44 @@ static struct rte_eventdev_ops dpaa_eventdev_ops = {
        .eth_rx_adapter_stop = dpaa_event_eth_rx_adapter_stop,
 };
 
+static int flag_check_handler(__rte_unused const char *key,
+               const char *value, __rte_unused void *opaque)
+{
+       if (strcmp(value, "1"))
+               return -1;
+
+       return 0;
+}
+
+static int
+dpaa_event_check_flags(const char *params)
+{
+       struct rte_kvargs *kvlist;
+
+       if (params == NULL || params[0] == '\0')
+               return 0;
+
+       kvlist = rte_kvargs_parse(params, NULL);
+       if (kvlist == NULL)
+               return 0;
+
+       if (!rte_kvargs_count(kvlist, DISABLE_INTR_MODE)) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       /* INTR MODE is disabled when there's key-value pair: disable_intr = 1*/
+       if (rte_kvargs_process(kvlist, DISABLE_INTR_MODE,
+                               flag_check_handler, NULL) < 0) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+       rte_kvargs_free(kvlist);
+
+       return 1;
+}
+
 static int
-dpaa_event_dev_create(const char *name)
+dpaa_event_dev_create(const char *name, const char *params)
 {
        struct rte_eventdev *eventdev;
        struct dpaa_eventdev *priv;
@@ -603,21 +786,30 @@ dpaa_event_dev_create(const char *name)
                                           sizeof(struct dpaa_eventdev),
                                           rte_socket_id());
        if (eventdev == NULL) {
-               EVENTDEV_DRV_ERR("Failed to create eventdev vdev %s", name);
+               DPAA_EVENTDEV_ERR("Failed to create eventdev vdev %s", name);
                goto fail;
        }
+       priv = eventdev->data->dev_private;
 
        eventdev->dev_ops       = &dpaa_eventdev_ops;
        eventdev->enqueue       = dpaa_event_enqueue;
        eventdev->enqueue_burst = dpaa_event_enqueue_burst;
-       eventdev->dequeue       = dpaa_event_dequeue;
-       eventdev->dequeue_burst = dpaa_event_dequeue_burst;
+
+       if (dpaa_event_check_flags(params)) {
+               eventdev->dequeue       = dpaa_event_dequeue;
+               eventdev->dequeue_burst = dpaa_event_dequeue_burst;
+       } else {
+               priv->intr_mode = 1;
+               eventdev->dev_ops->timeout_ticks =
+                               dpaa_event_dequeue_timeout_ticks_intr;
+               eventdev->dequeue       = dpaa_event_dequeue_intr;
+               eventdev->dequeue_burst = dpaa_event_dequeue_burst_intr;
+       }
 
        /* For secondary processes, the primary has done all the work */
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
                return 0;
 
-       priv = eventdev->data->dev_private;
        priv->max_event_queues = DPAA_EVENT_MAX_QUEUES;
 
        return 0;
@@ -629,11 +821,14 @@ static int
 dpaa_event_dev_probe(struct rte_vdev_device *vdev)
 {
        const char *name;
+       const char *params;
 
        name = rte_vdev_device_name(vdev);
-       EVENTDEV_DRV_LOG("Initializing %s", name);
+       DPAA_EVENTDEV_INFO("Initializing %s", name);
+
+       params = rte_vdev_device_args(vdev);
 
-       return dpaa_event_dev_create(name);
+       return dpaa_event_dev_create(name, params);
 }
 
 static int
@@ -642,7 +837,7 @@ dpaa_event_dev_remove(struct rte_vdev_device *vdev)
        const char *name;
 
        name = rte_vdev_device_name(vdev);
-       EVENTDEV_DRV_LOG("Closing %s", name);
+       DPAA_EVENTDEV_INFO("Closing %s", name);
 
        return rte_event_pmd_vdev_uninit(name);
 }
@@ -653,3 +848,5 @@ static struct rte_vdev_driver vdev_eventdev_dpaa_pmd = {
 };
 
 RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_DPAA_PMD, vdev_eventdev_dpaa_pmd);
+RTE_PMD_REGISTER_PARAM_STRING(EVENTDEV_NAME_DPAA_PMD,
+               DISABLE_INTR_MODE "=<int>");
index 583e46c..8134e6b 100644 (file)
 
 #define EVENTDEV_NAME_DPAA_PMD         event_dpaa1
 
-#define EVENTDEV_DRV_LOG(fmt, args...) \
-               DPAA_EVENTDEV_INFO(fmt, ## args)
-#define EVENTDEV_DRV_FUNC_TRACE()      \
-               DPAA_EVENTDEV_DEBUG("%s() Called:\n", __func__)
-#define EVENTDEV_DRV_ERR(fmt, args...) \
-               DPAA_EVENTDEV_ERR("%s(): " fmt "\n", __func__, ## args)
-
-#define DPAA_EVENT_MAX_PORTS                   8
-#define DPAA_EVENT_MAX_QUEUES                  16
+#define DPAA_EVENT_MAX_PORTS                   4
+#define DPAA_EVENT_MAX_QUEUES                  8
 #define DPAA_EVENT_MIN_DEQUEUE_TIMEOUT 1
 #define DPAA_EVENT_MAX_DEQUEUE_TIMEOUT (UINT32_MAX - 1)
 #define DPAA_EVENT_MAX_QUEUE_FLOWS             2048
@@ -28,7 +21,7 @@
 #define DPAA_EVENT_MAX_EVENT_PRIORITY_LEVELS   0
 #define DPAA_EVENT_MAX_EVENT_PORT              RTE_MIN(RTE_MAX_LCORE, INT8_MAX)
 #define DPAA_EVENT_MAX_PORT_DEQUEUE_DEPTH      8
-#define DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS     100UL
+#define DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_NS     100000UL
 #define DPAA_EVENT_PORT_DEQUEUE_TIMEOUT_INVALID        ((uint64_t)-1)
 #define DPAA_EVENT_MAX_PORT_ENQUEUE_DEPTH      1
 #define DPAA_EVENT_MAX_NUM_EVENTS              (INT32_MAX - 1)
@@ -61,7 +54,7 @@ struct dpaa_port {
        struct dpaa_eventq evq_info[DPAA_EVENT_MAX_QUEUES];
        uint8_t num_linked_evq;
        uint8_t is_port_linked;
-       uint64_t timeout;
+       uint64_t timeout_us;
 };
 
 struct dpaa_eventdev {
@@ -72,7 +65,7 @@ struct dpaa_eventdev {
        uint8_t max_event_queues;
        uint8_t nb_event_queues;
        uint8_t nb_event_ports;
-       uint8_t resvd;
+       uint8_t intr_mode;
        uint32_t nb_event_queue_flows;
        uint32_t nb_event_port_dequeue_depth;
        uint32_t nb_event_port_enqueue_depth;
index 5e1a632..e0134cc 100644 (file)
@@ -21,13 +21,19 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal
 LDLIBS += -lrte_eal -lrte_eventdev
 LDLIBS += -lrte_bus_fslmc -lrte_mempool_dpaa2 -lrte_pmd_dpaa2
 LDLIBS += -lrte_bus_vdev
+LDLIBS += -lrte_common_dpaax
 CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2
 CFLAGS += -I$(RTE_SDK)/drivers/net/dpaa2/mc
 
+ifeq ($(CONFIG_RTE_LIBRTE_SECURITY),y)
+LDLIBS += -lrte_pmd_dpaa2_sec
+CFLAGS += -I$(RTE_SDK)/drivers/crypto/dpaa2_sec
+endif
+
 # versioning export map
 EXPORT_MAP := rte_pmd_dpaa2_event_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 # depends on fslmc bus which uses experimental API
 CFLAGS += -DALLOW_EXPERIMENTAL_API
index ea1e5cc..8d168b0 100644 (file)
@@ -27,6 +27,7 @@
 #include <rte_pci.h>
 #include <rte_bus_vdev.h>
 #include <rte_ethdev_driver.h>
+#include <rte_cryptodev.h>
 #include <rte_event_eth_rx_adapter.h>
 
 #include <fslmc_vfio.h>
@@ -34,6 +35,9 @@
 #include <dpaa2_hw_mempool.h>
 #include <dpaa2_hw_dpio.h>
 #include <dpaa2_ethdev.h>
+#ifdef RTE_LIBRTE_SECURITY
+#include <dpaa2_sec_event.h>
+#endif
 #include "dpaa2_eventdev.h"
 #include "dpaa2_eventdev_logs.h"
 #include <portal/dpaa2_hw_pvt.h>
@@ -54,34 +58,63 @@ static uint16_t
 dpaa2_eventdev_enqueue_burst(void *port, const struct rte_event ev[],
                             uint16_t nb_events)
 {
-       struct rte_eventdev *ev_dev =
-                       ((struct dpaa2_io_portal_t *)port)->eventdev;
-       struct dpaa2_eventdev *priv = ev_dev->data->dev_private;
+
+       struct dpaa2_port *dpaa2_portal = port;
+       struct dpaa2_dpio_dev *dpio_dev;
        uint32_t queue_id = ev[0].queue_id;
-       struct evq_info_t *evq_info = &priv->evq_info[queue_id];
+       struct dpaa2_eventq *evq_info;
        uint32_t fqid;
        struct qbman_swp *swp;
        struct qbman_fd fd_arr[MAX_TX_RING_SLOTS];
        uint32_t loop, frames_to_send;
        struct qbman_eq_desc eqdesc[MAX_TX_RING_SLOTS];
        uint16_t num_tx = 0;
-       int ret;
-
-       RTE_SET_USED(port);
+       int i, n, ret;
+       uint8_t channel_index;
 
        if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               /* Affine current thread context to a qman portal */
                ret = dpaa2_affine_qbman_swp();
-               if (ret) {
+               if (ret < 0) {
                        DPAA2_EVENTDEV_ERR("Failure in affining portal");
                        return 0;
                }
        }
-
+       /* todo - dpaa2_portal shall have dpio_dev - no per thread variable */
+       dpio_dev = DPAA2_PER_LCORE_DPIO;
        swp = DPAA2_PER_LCORE_PORTAL;
 
+       if (likely(dpaa2_portal->is_port_linked))
+               goto skip_linking;
+
+       /* Create mapping between portal and channel to receive packets */
+       for (i = 0; i < DPAA2_EVENT_MAX_QUEUES; i++) {
+               evq_info = &dpaa2_portal->evq_info[i];
+               if (!evq_info->event_port)
+                       continue;
+
+               ret = dpio_add_static_dequeue_channel(dpio_dev->dpio,
+                                                     CMD_PRI_LOW,
+                                                     dpio_dev->token,
+                                                     evq_info->dpcon->dpcon_id,
+                                                     &channel_index);
+               if (ret < 0) {
+                       DPAA2_EVENTDEV_ERR(
+                               "Static dequeue config failed: err(%d)", ret);
+                       goto err;
+               }
+
+               qbman_swp_push_set(swp, channel_index, 1);
+               evq_info->dpcon->channel_index = channel_index;
+       }
+       dpaa2_portal->is_port_linked = true;
+
+skip_linking:
+       evq_info = &dpaa2_portal->evq_info[queue_id];
+
        while (nb_events) {
-               frames_to_send = (nb_events >> 3) ?
-                       MAX_TX_RING_SLOTS : nb_events;
+               frames_to_send = (nb_events > dpaa2_eqcr_size) ?
+                       dpaa2_eqcr_size : nb_events;
 
                for (loop = 0; loop < frames_to_send; loop++) {
                        const struct rte_event *event = &ev[num_tx + loop];
@@ -99,14 +132,14 @@ dpaa2_eventdev_enqueue_burst(void *port, const struct rte_event ev[],
                        qbman_eq_desc_set_no_orp(&eqdesc[loop], 0);
                        qbman_eq_desc_set_response(&eqdesc[loop], 0, 0);
 
-                       if (event->mbuf->seqn) {
+                       if (event->sched_type == RTE_SCHED_TYPE_ATOMIC
+                               && event->mbuf->seqn) {
                                uint8_t dqrr_index = event->mbuf->seqn - 1;
 
                                qbman_eq_desc_set_dca(&eqdesc[loop], 1,
                                                      dqrr_index, 0);
                                DPAA2_PER_LCORE_DQRR_SIZE--;
-                               DPAA2_PER_LCORE_DQRR_HELD &=
-                                       ~(1 << dqrr_index);
+                               DPAA2_PER_LCORE_DQRR_HELD &= ~(1 << dqrr_index);
                        }
 
                        memset(&fd_arr[loop], 0, sizeof(struct qbman_fd));
@@ -116,7 +149,7 @@ dpaa2_eventdev_enqueue_burst(void *port, const struct rte_event ev[],
                         * to avoid copy
                         */
                        struct rte_event *ev_temp = rte_malloc(NULL,
-                               sizeof(struct rte_event), 0);
+                                               sizeof(struct rte_event), 0);
 
                        if (!ev_temp) {
                                if (!loop)
@@ -143,6 +176,18 @@ send_partial:
        }
 
        return num_tx;
+err:
+       for (n = 0; n < i; n++) {
+               evq_info = &dpaa2_portal->evq_info[n];
+               if (!evq_info->event_port)
+                       continue;
+               qbman_swp_push_set(swp, evq_info->dpcon->channel_index, 0);
+               dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0,
+                                               dpio_dev->token,
+                                               evq_info->dpcon->dpcon_id);
+       }
+       return 0;
+
 }
 
 static uint16_t
@@ -197,6 +242,7 @@ static void dpaa2_eventdev_process_atomic(struct qbman_swp *swp,
        ev->mbuf->seqn = dqrr_index + 1;
        DPAA2_PER_LCORE_DQRR_SIZE++;
        DPAA2_PER_LCORE_DQRR_HELD |= 1 << dqrr_index;
+       DPAA2_PER_LCORE_DQRR_MBUF(dqrr_index) = ev->mbuf;
 }
 
 static uint16_t
@@ -204,22 +250,53 @@ dpaa2_eventdev_dequeue_burst(void *port, struct rte_event ev[],
                             uint16_t nb_events, uint64_t timeout_ticks)
 {
        const struct qbman_result *dq;
+       struct dpaa2_dpio_dev *dpio_dev = NULL;
+       struct dpaa2_port *dpaa2_portal = port;
+       struct dpaa2_eventq *evq_info;
        struct qbman_swp *swp;
        const struct qbman_fd *fd;
        struct dpaa2_queue *rxq;
-       int num_pkts = 0, ret, i = 0;
-
-       RTE_SET_USED(port);
+       int num_pkts = 0, ret, i = 0, n;
+       uint8_t channel_index;
 
        if (unlikely(!DPAA2_PER_LCORE_DPIO)) {
+               /* Affine current thread context to a qman portal */
                ret = dpaa2_affine_qbman_swp();
-               if (ret) {
+               if (ret < 0) {
                        DPAA2_EVENTDEV_ERR("Failure in affining portal");
                        return 0;
                }
        }
+
+       dpio_dev = DPAA2_PER_LCORE_DPIO;
        swp = DPAA2_PER_LCORE_PORTAL;
 
+       if (likely(dpaa2_portal->is_port_linked))
+               goto skip_linking;
+
+       /* Create mapping between portal and channel to receive packets */
+       for (i = 0; i < DPAA2_EVENT_MAX_QUEUES; i++) {
+               evq_info = &dpaa2_portal->evq_info[i];
+               if (!evq_info->event_port)
+                       continue;
+
+               ret = dpio_add_static_dequeue_channel(dpio_dev->dpio,
+                                                     CMD_PRI_LOW,
+                                                     dpio_dev->token,
+                                                     evq_info->dpcon->dpcon_id,
+                                                     &channel_index);
+               if (ret < 0) {
+                       DPAA2_EVENTDEV_ERR(
+                               "Static dequeue config failed: err(%d)", ret);
+                       goto err;
+               }
+
+               qbman_swp_push_set(swp, channel_index, 1);
+               evq_info->dpcon->channel_index = channel_index;
+       }
+       dpaa2_portal->is_port_linked = true;
+
+skip_linking:
        /* Check if there are atomic contexts to be released */
        while (DPAA2_PER_LCORE_DQRR_SIZE) {
                if (DPAA2_PER_LCORE_DQRR_HELD & (1 << i)) {
@@ -258,6 +335,18 @@ dpaa2_eventdev_dequeue_burst(void *port, struct rte_event ev[],
        } while (num_pkts < nb_events);
 
        return num_pkts;
+err:
+       for (n = 0; n < i; n++) {
+               evq_info = &dpaa2_portal->evq_info[n];
+               if (!evq_info->event_port)
+                       continue;
+
+               qbman_swp_push_set(swp, evq_info->dpcon->channel_index, 0);
+               dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0,
+                                                       dpio_dev->token,
+                                               evq_info->dpcon->dpcon_id);
+       }
+       return 0;
 }
 
 static uint16_t
@@ -283,7 +372,7 @@ dpaa2_eventdev_info_get(struct rte_eventdev *dev,
        dev_info->max_dequeue_timeout_ns =
                DPAA2_EVENT_MAX_DEQUEUE_TIMEOUT;
        dev_info->dequeue_timeout_ns =
-               DPAA2_EVENT_MIN_DEQUEUE_TIMEOUT;
+               DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS;
        dev_info->max_event_queues = priv->max_event_queues;
        dev_info->max_event_queue_flows =
                DPAA2_EVENT_MAX_QUEUE_FLOWS;
@@ -292,6 +381,9 @@ dpaa2_eventdev_info_get(struct rte_eventdev *dev,
        dev_info->max_event_priority_levels =
                DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS;
        dev_info->max_event_ports = rte_fslmc_get_device_count(DPAA2_IO);
+       /* we only support dpio upto number of cores*/
+       if (dev_info->max_event_ports > rte_lcore_count())
+               dev_info->max_event_ports = rte_lcore_count();
        dev_info->max_event_port_dequeue_depth =
                DPAA2_EVENT_MAX_PORT_DEQUEUE_DEPTH;
        dev_info->max_event_port_enqueue_depth =
@@ -313,7 +405,6 @@ dpaa2_eventdev_configure(const struct rte_eventdev *dev)
 
        EVENTDEV_INIT_FUNC_TRACE();
 
-       priv->dequeue_timeout_ns = conf->dequeue_timeout_ns;
        priv->nb_event_queues = conf->nb_event_queues;
        priv->nb_event_ports = conf->nb_event_ports;
        priv->nb_event_queue_flows = conf->nb_event_queue_flows;
@@ -321,6 +412,20 @@ dpaa2_eventdev_configure(const struct rte_eventdev *dev)
        priv->nb_event_port_enqueue_depth = conf->nb_event_port_enqueue_depth;
        priv->event_dev_cfg = conf->event_dev_cfg;
 
+       /* Check dequeue timeout method is per dequeue or global */
+       if (priv->event_dev_cfg & RTE_EVENT_DEV_CFG_PER_DEQUEUE_TIMEOUT) {
+               /*
+                * Use timeout value as given in dequeue operation.
+                * So invalidating this timeout value.
+                */
+               priv->dequeue_timeout_ns = 0;
+
+       } else if (conf->dequeue_timeout_ns == 0) {
+               priv->dequeue_timeout_ns = DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS;
+       } else {
+               priv->dequeue_timeout_ns = conf->dequeue_timeout_ns;
+       }
+
        DPAA2_EVENTDEV_DEBUG("Configured eventdev devid=%d",
                             dev->data->dev_id);
        return 0;
@@ -370,30 +475,38 @@ dpaa2_eventdev_queue_def_conf(struct rte_eventdev *dev, uint8_t queue_id,
        queue_conf->priority = RTE_EVENT_DEV_PRIORITY_NORMAL;
 }
 
-static void
-dpaa2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t queue_id)
-{
-       EVENTDEV_INIT_FUNC_TRACE();
-
-       RTE_SET_USED(dev);
-       RTE_SET_USED(queue_id);
-}
-
 static int
 dpaa2_eventdev_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
                           const struct rte_event_queue_conf *queue_conf)
 {
        struct dpaa2_eventdev *priv = dev->data->dev_private;
-       struct evq_info_t *evq_info =
-               &priv->evq_info[queue_id];
+       struct dpaa2_eventq *evq_info = &priv->evq_info[queue_id];
 
        EVENTDEV_INIT_FUNC_TRACE();
 
+       switch (queue_conf->schedule_type) {
+       case RTE_SCHED_TYPE_PARALLEL:
+       case RTE_SCHED_TYPE_ATOMIC:
+               break;
+       case RTE_SCHED_TYPE_ORDERED:
+               DPAA2_EVENTDEV_ERR("Schedule type is not supported.");
+               return -1;
+       }
        evq_info->event_queue_cfg = queue_conf->event_queue_cfg;
+       evq_info->event_queue_id = queue_id;
 
        return 0;
 }
 
+static void
+dpaa2_eventdev_queue_release(struct rte_eventdev *dev, uint8_t queue_id)
+{
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       RTE_SET_USED(dev);
+       RTE_SET_USED(queue_id);
+}
+
 static void
 dpaa2_eventdev_port_def_conf(struct rte_eventdev *dev, uint8_t port_id,
                             struct rte_event_port_conf *port_conf)
@@ -402,7 +515,6 @@ dpaa2_eventdev_port_def_conf(struct rte_eventdev *dev, uint8_t port_id,
 
        RTE_SET_USED(dev);
        RTE_SET_USED(port_id);
-       RTE_SET_USED(port_conf);
 
        port_conf->new_event_threshold =
                DPAA2_EVENT_MAX_NUM_EVENTS;
@@ -413,56 +525,44 @@ dpaa2_eventdev_port_def_conf(struct rte_eventdev *dev, uint8_t port_id,
        port_conf->disable_implicit_release = 0;
 }
 
-static void
-dpaa2_eventdev_port_release(void *port)
-{
-       EVENTDEV_INIT_FUNC_TRACE();
-
-       RTE_SET_USED(port);
-}
-
 static int
 dpaa2_eventdev_port_setup(struct rte_eventdev *dev, uint8_t port_id,
                          const struct rte_event_port_conf *port_conf)
 {
+       char event_port_name[32];
+       struct dpaa2_port *portal;
+
        EVENTDEV_INIT_FUNC_TRACE();
 
        RTE_SET_USED(port_conf);
 
-       if (!dpaa2_io_portal[port_id].dpio_dev) {
-               dpaa2_io_portal[port_id].dpio_dev =
-                               dpaa2_get_qbman_swp(port_id);
-               rte_atomic16_inc(&dpaa2_io_portal[port_id].dpio_dev->ref_count);
-               if (!dpaa2_io_portal[port_id].dpio_dev)
-                       return -1;
+       sprintf(event_port_name, "event-port-%d", port_id);
+       portal = rte_malloc(event_port_name, sizeof(struct dpaa2_port), 0);
+       if (!portal) {
+               DPAA2_EVENTDEV_ERR("Memory allocation failure");
+               return -ENOMEM;
        }
 
-       dpaa2_io_portal[port_id].eventdev = dev;
-       dev->data->ports[port_id] = &dpaa2_io_portal[port_id];
+       memset(portal, 0, sizeof(struct dpaa2_port));
+       dev->data->ports[port_id] = portal;
        return 0;
 }
 
-static int
-dpaa2_eventdev_port_unlink(struct rte_eventdev *dev, void *port,
-                          uint8_t queues[], uint16_t nb_unlinks)
+static void
+dpaa2_eventdev_port_release(void *port)
 {
-       struct dpaa2_eventdev *priv = dev->data->dev_private;
-       struct dpaa2_io_portal_t *dpaa2_portal = port;
-       struct evq_info_t *evq_info;
-       int i;
+       struct dpaa2_port *portal = port;
 
        EVENTDEV_INIT_FUNC_TRACE();
 
-       for (i = 0; i < nb_unlinks; i++) {
-               evq_info = &priv->evq_info[queues[i]];
-               qbman_swp_push_set(dpaa2_portal->dpio_dev->sw_portal,
-                                  evq_info->dpcon->channel_index, 0);
-               dpio_remove_static_dequeue_channel(dpaa2_portal->dpio_dev->dpio,
-                                       0, dpaa2_portal->dpio_dev->token,
-                       evq_info->dpcon->dpcon_id);
-       }
+       /* TODO: Cleanup is required when ports are in linked state. */
+       if (portal->is_port_linked)
+               DPAA2_EVENTDEV_WARN("Event port must be unlinked before release");
 
-       return (int)nb_unlinks;
+       if (portal)
+               rte_free(portal);
+
+       portal = NULL;
 }
 
 static int
@@ -471,51 +571,71 @@ dpaa2_eventdev_port_link(struct rte_eventdev *dev, void *port,
                        uint16_t nb_links)
 {
        struct dpaa2_eventdev *priv = dev->data->dev_private;
-       struct dpaa2_io_portal_t *dpaa2_portal = port;
-       struct evq_info_t *evq_info;
-       uint8_t channel_index;
-       int ret, i, n;
+       struct dpaa2_port *dpaa2_portal = port;
+       struct dpaa2_eventq *evq_info;
+       uint16_t i;
 
        EVENTDEV_INIT_FUNC_TRACE();
 
+       RTE_SET_USED(priorities);
+
        for (i = 0; i < nb_links; i++) {
                evq_info = &priv->evq_info[queues[i]];
+               memcpy(&dpaa2_portal->evq_info[queues[i]], evq_info,
+                          sizeof(struct dpaa2_eventq));
+               dpaa2_portal->evq_info[queues[i]].event_port = port;
+               dpaa2_portal->num_linked_evq++;
+       }
 
-               ret = dpio_add_static_dequeue_channel(
-                       dpaa2_portal->dpio_dev->dpio,
-                       CMD_PRI_LOW, dpaa2_portal->dpio_dev->token,
-                       evq_info->dpcon->dpcon_id, &channel_index);
-               if (ret < 0) {
-                       DPAA2_EVENTDEV_ERR(
-                               "Static dequeue config failed: err(%d)", ret);
-                       goto err;
-               }
+       return (int)nb_links;
+}
 
-               qbman_swp_push_set(dpaa2_portal->dpio_dev->sw_portal,
-                                  channel_index, 1);
-               evq_info->dpcon->channel_index = channel_index;
-       }
+static int
+dpaa2_eventdev_port_unlink(struct rte_eventdev *dev, void *port,
+                          uint8_t queues[], uint16_t nb_unlinks)
+{
+       struct dpaa2_port *dpaa2_portal = port;
+       int i;
+       struct dpaa2_dpio_dev *dpio_dev = NULL;
+       struct dpaa2_eventq *evq_info;
+       struct qbman_swp *swp;
 
-       RTE_SET_USED(priorities);
+       EVENTDEV_INIT_FUNC_TRACE();
 
-       return (int)nb_links;
-err:
-       for (n = 0; n < i; n++) {
-               evq_info = &priv->evq_info[queues[n]];
-               qbman_swp_push_set(dpaa2_portal->dpio_dev->sw_portal,
-                                  evq_info->dpcon->channel_index, 0);
-               dpio_remove_static_dequeue_channel(dpaa2_portal->dpio_dev->dpio,
-                                       0, dpaa2_portal->dpio_dev->token,
-                       evq_info->dpcon->dpcon_id);
+       RTE_SET_USED(dev);
+       RTE_SET_USED(queues);
+
+       for (i = 0; i < nb_unlinks; i++) {
+               evq_info = &dpaa2_portal->evq_info[queues[i]];
+
+               if (DPAA2_PER_LCORE_DPIO && evq_info->dpcon) {
+                       /* todo dpaa2_portal shall have dpio_dev-no per lcore*/
+                       dpio_dev = DPAA2_PER_LCORE_DPIO;
+                       swp = DPAA2_PER_LCORE_PORTAL;
+
+                       qbman_swp_push_set(swp,
+                                       evq_info->dpcon->channel_index, 0);
+                       dpio_remove_static_dequeue_channel(dpio_dev->dpio, 0,
+                                               dpio_dev->token,
+                                               evq_info->dpcon->dpcon_id);
+               }
+               memset(evq_info, 0, sizeof(struct dpaa2_eventq));
+               if (dpaa2_portal->num_linked_evq)
+                       dpaa2_portal->num_linked_evq--;
        }
-       return ret;
+
+       if (!dpaa2_portal->num_linked_evq)
+               dpaa2_portal->is_port_linked = false;
+
+       return (int)nb_unlinks;
 }
 
+
 static int
 dpaa2_eventdev_timeout_ticks(struct rte_eventdev *dev, uint64_t ns,
                             uint64_t *timeout_ticks)
 {
-       uint32_t scale = 1;
+       uint32_t scale = 1000*1000;
 
        EVENTDEV_INIT_FUNC_TRACE();
 
@@ -677,6 +797,151 @@ dpaa2_eventdev_eth_stop(const struct rte_eventdev *dev,
        return 0;
 }
 
+#ifdef RTE_LIBRTE_SECURITY
+static int
+dpaa2_eventdev_crypto_caps_get(const struct rte_eventdev *dev,
+                           const struct rte_cryptodev *cdev,
+                           uint32_t *caps)
+{
+       const char *name = cdev->data->name;
+
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       RTE_SET_USED(dev);
+
+       if (!strncmp(name, "dpsec-", 6))
+               *caps = RTE_EVENT_CRYPTO_ADAPTER_DPAA2_CAP;
+       else
+               return -1;
+
+       return 0;
+}
+
+static int
+dpaa2_eventdev_crypto_queue_add_all(const struct rte_eventdev *dev,
+               const struct rte_cryptodev *cryptodev,
+               const struct rte_event *ev)
+{
+       struct dpaa2_eventdev *priv = dev->data->dev_private;
+       uint8_t ev_qid = ev->queue_id;
+       uint16_t dpcon_id = priv->evq_info[ev_qid].dpcon->dpcon_id;
+       int i, ret;
+
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       for (i = 0; i < cryptodev->data->nb_queue_pairs; i++) {
+               ret = dpaa2_sec_eventq_attach(cryptodev, i,
+                               dpcon_id, ev);
+               if (ret) {
+                       DPAA2_EVENTDEV_ERR("dpaa2_sec_eventq_attach failed: ret %d\n",
+                                   ret);
+                       goto fail;
+               }
+       }
+       return 0;
+fail:
+       for (i = (i - 1); i >= 0 ; i--)
+               dpaa2_sec_eventq_detach(cryptodev, i);
+
+       return ret;
+}
+
+static int
+dpaa2_eventdev_crypto_queue_add(const struct rte_eventdev *dev,
+               const struct rte_cryptodev *cryptodev,
+               int32_t rx_queue_id,
+               const struct rte_event *ev)
+{
+       struct dpaa2_eventdev *priv = dev->data->dev_private;
+       uint8_t ev_qid = ev->queue_id;
+       uint16_t dpcon_id = priv->evq_info[ev_qid].dpcon->dpcon_id;
+       int ret;
+
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       if (rx_queue_id == -1)
+               return dpaa2_eventdev_crypto_queue_add_all(dev,
+                               cryptodev, ev);
+
+       ret = dpaa2_sec_eventq_attach(cryptodev, rx_queue_id,
+                       dpcon_id, ev);
+       if (ret) {
+               DPAA2_EVENTDEV_ERR(
+                       "dpaa2_sec_eventq_attach failed: ret: %d\n", ret);
+               return ret;
+       }
+       return 0;
+}
+
+static int
+dpaa2_eventdev_crypto_queue_del_all(const struct rte_eventdev *dev,
+                            const struct rte_cryptodev *cdev)
+{
+       int i, ret;
+
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       RTE_SET_USED(dev);
+
+       for (i = 0; i < cdev->data->nb_queue_pairs; i++) {
+               ret = dpaa2_sec_eventq_detach(cdev, i);
+               if (ret) {
+                       DPAA2_EVENTDEV_ERR(
+                               "dpaa2_sec_eventq_detach failed:ret %d\n", ret);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static int
+dpaa2_eventdev_crypto_queue_del(const struct rte_eventdev *dev,
+                            const struct rte_cryptodev *cryptodev,
+                            int32_t rx_queue_id)
+{
+       int ret;
+
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       if (rx_queue_id == -1)
+               return dpaa2_eventdev_crypto_queue_del_all(dev, cryptodev);
+
+       ret = dpaa2_sec_eventq_detach(cryptodev, rx_queue_id);
+       if (ret) {
+               DPAA2_EVENTDEV_ERR(
+                       "dpaa2_sec_eventq_detach failed: ret: %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int
+dpaa2_eventdev_crypto_start(const struct rte_eventdev *dev,
+                           const struct rte_cryptodev *cryptodev)
+{
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       RTE_SET_USED(dev);
+       RTE_SET_USED(cryptodev);
+
+       return 0;
+}
+
+static int
+dpaa2_eventdev_crypto_stop(const struct rte_eventdev *dev,
+                          const struct rte_cryptodev *cryptodev)
+{
+       EVENTDEV_INIT_FUNC_TRACE();
+
+       RTE_SET_USED(dev);
+       RTE_SET_USED(cryptodev);
+
+       return 0;
+}
+#endif
+
 static struct rte_eventdev_ops dpaa2_eventdev_ops = {
        .dev_infos_get    = dpaa2_eventdev_info_get,
        .dev_configure    = dpaa2_eventdev_configure,
@@ -698,6 +963,13 @@ static struct rte_eventdev_ops dpaa2_eventdev_ops = {
        .eth_rx_adapter_queue_del = dpaa2_eventdev_eth_queue_del,
        .eth_rx_adapter_start = dpaa2_eventdev_eth_start,
        .eth_rx_adapter_stop = dpaa2_eventdev_eth_stop,
+#ifdef RTE_LIBRTE_SECURITY
+       .crypto_adapter_caps_get        = dpaa2_eventdev_crypto_caps_get,
+       .crypto_adapter_queue_pair_add  = dpaa2_eventdev_crypto_queue_add,
+       .crypto_adapter_queue_pair_del  = dpaa2_eventdev_crypto_queue_del,
+       .crypto_adapter_start           = dpaa2_eventdev_crypto_start,
+       .crypto_adapter_stop            = dpaa2_eventdev_crypto_stop,
+#endif
 };
 
 static int
@@ -789,6 +1061,8 @@ dpaa2_eventdev_create(const char *name)
                priv->max_event_queues++;
        } while (dpcon_dev && dpci_dev);
 
+       RTE_LOG(INFO, PMD, "%s eventdev created\n", name);
+
        return 0;
 fail:
        return -EFAULT;
index 229f66a..c847b3e 100644 (file)
@@ -21,6 +21,7 @@
 #define DPAA2_EVENT_MAX_QUEUES                 16
 #define DPAA2_EVENT_MIN_DEQUEUE_TIMEOUT                1
 #define DPAA2_EVENT_MAX_DEQUEUE_TIMEOUT                (UINT32_MAX - 1)
+#define DPAA2_EVENT_PORT_DEQUEUE_TIMEOUT_NS    100UL
 #define DPAA2_EVENT_MAX_QUEUE_FLOWS            2048
 #define DPAA2_EVENT_MAX_QUEUE_PRIORITY_LEVELS  8
 #define DPAA2_EVENT_MAX_EVENT_PRIORITY_LEVELS  0
@@ -41,6 +42,15 @@ enum {
                (RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT | \
                RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ | \
                RTE_EVENT_ETH_RX_ADAPTER_CAP_OVERRIDE_FLOW_ID)
+
+/**< Crypto Rx adapter cap to return If the packet transfers from
+ * the cryptodev to eventdev with DPAA2 devices.
+ */
+#define RTE_EVENT_CRYPTO_ADAPTER_DPAA2_CAP \
+               (RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_OP_NEW | \
+               RTE_EVENT_CRYPTO_ADAPTER_CAP_INTERNAL_PORT_QP_EV_BIND | \
+               RTE_EVENT_CRYPTO_ADAPTER_CAP_SESSION_PRIVATE_DATA)
+
 /**< Ethernet Rx adapter cap to return If the packet transfers from
  * the ethdev to eventdev with DPAA2 devices.
  */
@@ -56,17 +66,27 @@ struct dpaa2_dpcon_dev {
        uint8_t channel_index;
 };
 
-struct evq_info_t {
+struct dpaa2_eventq {
        /* DPcon device */
        struct dpaa2_dpcon_dev *dpcon;
        /* Attached DPCI device */
        struct dpaa2_dpci_dev *dpci;
+       /* Mapped event port */
+       struct dpaa2_io_portal_t *event_port;
        /* Configuration provided by the user */
        uint32_t event_queue_cfg;
+       uint32_t event_queue_id;
+};
+
+struct dpaa2_port {
+       struct dpaa2_eventq evq_info[DPAA2_EVENT_MAX_QUEUES];
+       uint8_t num_linked_evq;
+       uint8_t is_port_linked;
+       uint64_t timeout_us;
 };
 
 struct dpaa2_eventdev {
-       struct evq_info_t evq_info[DPAA2_EVENT_MAX_QUEUES];
+       struct dpaa2_eventq evq_info[DPAA2_EVENT_MAX_QUEUES];
        uint32_t dequeue_timeout_ns;
        uint8_t max_event_queues;
        uint8_t nb_event_queues;
index de7a461..a0db6fc 100644 (file)
@@ -1,11 +1,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
        build = false
 endif
-deps += ['bus_vdev', 'pmd_dpaa2']
+deps += ['bus_vdev', 'pmd_dpaa2', 'pmd_dpaa2_sec']
 sources = files('dpaa2_hw_dpcon.c',
                'dpaa2_eventdev.c')
 
 allow_experimental_apis = true
+includes += include_directories('../../crypto/dpaa2_sec/')
diff --git a/drivers/event/dsw/Makefile b/drivers/event/dsw/Makefile
new file mode 100644 (file)
index 0000000..490ed0b
--- /dev/null
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Ericsson AB
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+LIB = librte_pmd_dsw_event.a
+
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+ifneq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+CFLAGS += -Wno-format-nonliteral
+endif
+
+LDLIBS += -lrte_eal
+LDLIBS += -lrte_mbuf
+LDLIBS += -lrte_mempool
+LDLIBS += -lrte_ring
+LDLIBS += -lrte_eventdev
+LDLIBS += -lrte_bus_vdev
+
+LIBABIVER := 1
+
+EXPORT_MAP := rte_pmd_dsw_event_version.map
+
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV) += \
+       dsw_evdev.c dsw_event.c dsw_xstats.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/event/dsw/dsw_evdev.c b/drivers/event/dsw/dsw_evdev.c
new file mode 100644 (file)
index 0000000..33ba136
--- /dev/null
@@ -0,0 +1,435 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Ericsson AB
+ */
+
+#include <stdbool.h>
+
+#include <rte_cycles.h>
+#include <rte_eventdev_pmd.h>
+#include <rte_eventdev_pmd_vdev.h>
+#include <rte_random.h>
+
+#include "dsw_evdev.h"
+
+#define EVENTDEV_NAME_DSW_PMD event_dsw
+
+static int
+dsw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
+              const struct rte_event_port_conf *conf)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       struct dsw_port *port;
+       struct rte_event_ring *in_ring;
+       struct rte_ring *ctl_in_ring;
+       char ring_name[RTE_RING_NAMESIZE];
+
+       port = &dsw->ports[port_id];
+
+       *port = (struct dsw_port) {
+               .id = port_id,
+               .dsw = dsw,
+               .dequeue_depth = conf->dequeue_depth,
+               .enqueue_depth = conf->enqueue_depth,
+               .new_event_threshold = conf->new_event_threshold
+       };
+
+       snprintf(ring_name, sizeof(ring_name), "dsw%d_p%u", dev->data->dev_id,
+                port_id);
+
+       in_ring = rte_event_ring_create(ring_name, DSW_IN_RING_SIZE,
+                                       dev->data->socket_id,
+                                       RING_F_SC_DEQ|RING_F_EXACT_SZ);
+
+       if (in_ring == NULL)
+               return -ENOMEM;
+
+       snprintf(ring_name, sizeof(ring_name), "dswctl%d_p%u",
+                dev->data->dev_id, port_id);
+
+       ctl_in_ring = rte_ring_create(ring_name, DSW_CTL_IN_RING_SIZE,
+                                     dev->data->socket_id,
+                                     RING_F_SC_DEQ|RING_F_EXACT_SZ);
+
+       if (ctl_in_ring == NULL) {
+               rte_event_ring_free(in_ring);
+               return -ENOMEM;
+       }
+
+       port->in_ring = in_ring;
+       port->ctl_in_ring = ctl_in_ring;
+
+       rte_atomic16_init(&port->load);
+
+       port->load_update_interval =
+               (DSW_LOAD_UPDATE_INTERVAL * rte_get_timer_hz()) / US_PER_S;
+
+       port->migration_interval =
+               (DSW_MIGRATION_INTERVAL * rte_get_timer_hz()) / US_PER_S;
+
+       dev->data->ports[port_id] = port;
+
+       return 0;
+}
+
+static void
+dsw_port_def_conf(struct rte_eventdev *dev __rte_unused,
+                 uint8_t port_id __rte_unused,
+                 struct rte_event_port_conf *port_conf)
+{
+       *port_conf = (struct rte_event_port_conf) {
+               .new_event_threshold = 1024,
+               .dequeue_depth = DSW_MAX_PORT_DEQUEUE_DEPTH / 4,
+               .enqueue_depth = DSW_MAX_PORT_ENQUEUE_DEPTH / 4
+       };
+}
+
+static void
+dsw_port_release(void *p)
+{
+       struct dsw_port *port = p;
+
+       rte_event_ring_free(port->in_ring);
+       rte_ring_free(port->ctl_in_ring);
+}
+
+static int
+dsw_queue_setup(struct rte_eventdev *dev, uint8_t queue_id,
+               const struct rte_event_queue_conf *conf)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       struct dsw_queue *queue = &dsw->queues[queue_id];
+
+       if (RTE_EVENT_QUEUE_CFG_ALL_TYPES & conf->event_queue_cfg)
+               return -ENOTSUP;
+
+       if (conf->schedule_type == RTE_SCHED_TYPE_ORDERED)
+               return -ENOTSUP;
+
+       /* SINGLE_LINK is better off treated as TYPE_ATOMIC, since it
+        * avoid the "fake" TYPE_PARALLEL flow_id assignment. Since
+        * the queue will only have a single serving port, no
+        * migration will ever happen, so the extra TYPE_ATOMIC
+        * migration overhead is avoided.
+        */
+       if (RTE_EVENT_QUEUE_CFG_SINGLE_LINK & conf->event_queue_cfg)
+               queue->schedule_type = RTE_SCHED_TYPE_ATOMIC;
+       else /* atomic or parallel */
+               queue->schedule_type = conf->schedule_type;
+
+       queue->num_serving_ports = 0;
+
+       return 0;
+}
+
+static void
+dsw_queue_def_conf(struct rte_eventdev *dev __rte_unused,
+                  uint8_t queue_id __rte_unused,
+                  struct rte_event_queue_conf *queue_conf)
+{
+       *queue_conf = (struct rte_event_queue_conf) {
+               .nb_atomic_flows = 4096,
+               .schedule_type = RTE_SCHED_TYPE_ATOMIC,
+               .priority = RTE_EVENT_DEV_PRIORITY_NORMAL
+       };
+}
+
+static void
+dsw_queue_release(struct rte_eventdev *dev __rte_unused,
+                 uint8_t queue_id __rte_unused)
+{
+}
+
+static void
+queue_add_port(struct dsw_queue *queue, uint16_t port_id)
+{
+       queue->serving_ports[queue->num_serving_ports] = port_id;
+       queue->num_serving_ports++;
+}
+
+static bool
+queue_remove_port(struct dsw_queue *queue, uint16_t port_id)
+{
+       uint16_t i;
+
+       for (i = 0; i < queue->num_serving_ports; i++)
+               if (queue->serving_ports[i] == port_id) {
+                       uint16_t last_idx = queue->num_serving_ports - 1;
+                       if (i != last_idx)
+                               queue->serving_ports[i] =
+                                       queue->serving_ports[last_idx];
+                       queue->num_serving_ports--;
+                       return true;
+               }
+       return false;
+}
+
+static int
+dsw_port_link_unlink(struct rte_eventdev *dev, void *port,
+                    const uint8_t queues[], uint16_t num, bool link)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       struct dsw_port *p = port;
+       uint16_t i;
+       uint16_t count = 0;
+
+       for (i = 0; i < num; i++) {
+               uint8_t qid = queues[i];
+               struct dsw_queue *q = &dsw->queues[qid];
+               if (link) {
+                       queue_add_port(q, p->id);
+                       count++;
+               } else {
+                       bool removed = queue_remove_port(q, p->id);
+                       if (removed)
+                               count++;
+               }
+       }
+
+       return count;
+}
+
+static int
+dsw_port_link(struct rte_eventdev *dev, void *port, const uint8_t queues[],
+             const uint8_t priorities[] __rte_unused, uint16_t num)
+{
+       return dsw_port_link_unlink(dev, port, queues, num, true);
+}
+
+static int
+dsw_port_unlink(struct rte_eventdev *dev, void *port, uint8_t queues[],
+               uint16_t num)
+{
+       return dsw_port_link_unlink(dev, port, queues, num, false);
+}
+
+static void
+dsw_info_get(struct rte_eventdev *dev __rte_unused,
+            struct rte_event_dev_info *info)
+{
+       *info = (struct rte_event_dev_info) {
+               .driver_name = DSW_PMD_NAME,
+               .max_event_queues = DSW_MAX_QUEUES,
+               .max_event_queue_flows = DSW_MAX_FLOWS,
+               .max_event_queue_priority_levels = 1,
+               .max_event_priority_levels = 1,
+               .max_event_ports = DSW_MAX_PORTS,
+               .max_event_port_dequeue_depth = DSW_MAX_PORT_DEQUEUE_DEPTH,
+               .max_event_port_enqueue_depth = DSW_MAX_PORT_ENQUEUE_DEPTH,
+               .max_num_events = DSW_MAX_EVENTS,
+               .event_dev_cap = RTE_EVENT_DEV_CAP_BURST_MODE|
+               RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED
+       };
+}
+
+static int
+dsw_configure(const struct rte_eventdev *dev)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       const struct rte_event_dev_config *conf = &dev->data->dev_conf;
+       int32_t min_max_in_flight;
+
+       dsw->num_ports = conf->nb_event_ports;
+       dsw->num_queues = conf->nb_event_queues;
+
+       /* Avoid a situation where consumer ports are holding all the
+        * credits, without making use of them.
+        */
+       min_max_in_flight = conf->nb_event_ports * DSW_PORT_MAX_CREDITS;
+
+       dsw->max_inflight = RTE_MAX(conf->nb_events_limit, min_max_in_flight);
+
+       return 0;
+}
+
+
+static void
+initial_flow_to_port_assignment(struct dsw_evdev *dsw)
+{
+       uint8_t queue_id;
+       for (queue_id = 0; queue_id < dsw->num_queues; queue_id++) {
+               struct dsw_queue *queue = &dsw->queues[queue_id];
+               uint16_t flow_hash;
+               for (flow_hash = 0; flow_hash < DSW_MAX_FLOWS; flow_hash++) {
+                       uint8_t port_idx =
+                               rte_rand() % queue->num_serving_ports;
+                       uint8_t port_id =
+                               queue->serving_ports[port_idx];
+                       dsw->queues[queue_id].flow_to_port_map[flow_hash] =
+                               port_id;
+               }
+       }
+}
+
+static int
+dsw_start(struct rte_eventdev *dev)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       uint16_t i;
+       uint64_t now;
+
+       rte_atomic32_init(&dsw->credits_on_loan);
+
+       initial_flow_to_port_assignment(dsw);
+
+       now = rte_get_timer_cycles();
+       for (i = 0; i < dsw->num_ports; i++) {
+               dsw->ports[i].measurement_start = now;
+               dsw->ports[i].busy_start = now;
+       }
+
+       return 0;
+}
+
+static void
+dsw_port_drain_buf(uint8_t dev_id, struct rte_event *buf, uint16_t buf_len,
+                  eventdev_stop_flush_t flush, void *flush_arg)
+{
+       uint16_t i;
+
+       for (i = 0; i < buf_len; i++)
+               flush(dev_id, buf[i], flush_arg);
+}
+
+static void
+dsw_port_drain_paused(uint8_t dev_id, struct dsw_port *port,
+                     eventdev_stop_flush_t flush, void *flush_arg)
+{
+       dsw_port_drain_buf(dev_id, port->paused_events, port->paused_events_len,
+                          flush, flush_arg);
+}
+
+static void
+dsw_port_drain_out(uint8_t dev_id, struct dsw_evdev *dsw, struct dsw_port *port,
+                  eventdev_stop_flush_t flush, void *flush_arg)
+{
+       uint16_t dport_id;
+
+       for (dport_id = 0; dport_id < dsw->num_ports; dport_id++)
+               if (dport_id != port->id)
+                       dsw_port_drain_buf(dev_id, port->out_buffer[dport_id],
+                                          port->out_buffer_len[dport_id],
+                                          flush, flush_arg);
+}
+
+static void
+dsw_port_drain_in_ring(uint8_t dev_id, struct dsw_port *port,
+                      eventdev_stop_flush_t flush, void *flush_arg)
+{
+       struct rte_event ev;
+
+       while (rte_event_ring_dequeue_burst(port->in_ring, &ev, 1, NULL))
+               flush(dev_id, ev, flush_arg);
+}
+
+static void
+dsw_drain(uint8_t dev_id, struct dsw_evdev *dsw,
+         eventdev_stop_flush_t flush, void *flush_arg)
+{
+       uint16_t port_id;
+
+       if (flush == NULL)
+               return;
+
+       for (port_id = 0; port_id < dsw->num_ports; port_id++) {
+               struct dsw_port *port = &dsw->ports[port_id];
+
+               dsw_port_drain_out(dev_id, dsw, port, flush, flush_arg);
+               dsw_port_drain_paused(dev_id, port, flush, flush_arg);
+               dsw_port_drain_in_ring(dev_id, port, flush, flush_arg);
+       }
+}
+
+static void
+dsw_stop(struct rte_eventdev *dev)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       uint8_t dev_id;
+       eventdev_stop_flush_t flush;
+       void *flush_arg;
+
+       dev_id = dev->data->dev_id;
+       flush = dev->dev_ops->dev_stop_flush;
+       flush_arg = dev->data->dev_stop_flush_arg;
+
+       dsw_drain(dev_id, dsw, flush, flush_arg);
+}
+
+static int
+dsw_close(struct rte_eventdev *dev)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+
+       dsw->num_ports = 0;
+       dsw->num_queues = 0;
+
+       return 0;
+}
+
+static struct rte_eventdev_ops dsw_evdev_ops = {
+       .port_setup = dsw_port_setup,
+       .port_def_conf = dsw_port_def_conf,
+       .port_release = dsw_port_release,
+       .queue_setup = dsw_queue_setup,
+       .queue_def_conf = dsw_queue_def_conf,
+       .queue_release = dsw_queue_release,
+       .port_link = dsw_port_link,
+       .port_unlink = dsw_port_unlink,
+       .dev_infos_get = dsw_info_get,
+       .dev_configure = dsw_configure,
+       .dev_start = dsw_start,
+       .dev_stop = dsw_stop,
+       .dev_close = dsw_close,
+       .xstats_get = dsw_xstats_get,
+       .xstats_get_names = dsw_xstats_get_names,
+       .xstats_get_by_name = dsw_xstats_get_by_name
+};
+
+static int
+dsw_probe(struct rte_vdev_device *vdev)
+{
+       const char *name;
+       struct rte_eventdev *dev;
+       struct dsw_evdev *dsw;
+
+       name = rte_vdev_device_name(vdev);
+
+       dev = rte_event_pmd_vdev_init(name, sizeof(struct dsw_evdev),
+                                     rte_socket_id());
+       if (dev == NULL)
+               return -EFAULT;
+
+       dev->dev_ops = &dsw_evdev_ops;
+       dev->enqueue = dsw_event_enqueue;
+       dev->enqueue_burst = dsw_event_enqueue_burst;
+       dev->enqueue_new_burst = dsw_event_enqueue_new_burst;
+       dev->enqueue_forward_burst = dsw_event_enqueue_forward_burst;
+       dev->dequeue = dsw_event_dequeue;
+       dev->dequeue_burst = dsw_event_dequeue_burst;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       dsw = dev->data->dev_private;
+       dsw->data = dev->data;
+
+       return 0;
+}
+
+static int
+dsw_remove(struct rte_vdev_device *vdev)
+{
+       const char *name;
+
+       name = rte_vdev_device_name(vdev);
+       if (name == NULL)
+               return -EINVAL;
+
+       return rte_event_pmd_vdev_uninit(name);
+}
+
+static struct rte_vdev_driver evdev_dsw_pmd_drv = {
+       .probe = dsw_probe,
+       .remove = dsw_remove
+};
+
+RTE_PMD_REGISTER_VDEV(EVENTDEV_NAME_DSW_PMD, evdev_dsw_pmd_drv);
diff --git a/drivers/event/dsw/dsw_evdev.h b/drivers/event/dsw/dsw_evdev.h
new file mode 100644 (file)
index 0000000..dc28ab1
--- /dev/null
@@ -0,0 +1,279 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Ericsson AB
+ */
+
+#ifndef _DSW_EVDEV_H_
+#define _DSW_EVDEV_H_
+
+#include <rte_event_ring.h>
+#include <rte_eventdev.h>
+
+#define DSW_PMD_NAME RTE_STR(event_dsw)
+
+/* Code changes are required to allow more ports. */
+#define DSW_MAX_PORTS (64)
+#define DSW_MAX_PORT_DEQUEUE_DEPTH (128)
+#define DSW_MAX_PORT_ENQUEUE_DEPTH (128)
+#define DSW_MAX_PORT_OUT_BUFFER (32)
+
+#define DSW_MAX_QUEUES (16)
+
+#define DSW_MAX_EVENTS (16384)
+
+/* Code changes are required to allow more flows than 32k. */
+#define DSW_MAX_FLOWS_BITS (15)
+#define DSW_MAX_FLOWS (1<<(DSW_MAX_FLOWS_BITS))
+#define DSW_MAX_FLOWS_MASK (DSW_MAX_FLOWS-1)
+
+/* Eventdev RTE_SCHED_TYPE_PARALLEL doesn't have a concept of flows,
+ * but the 'dsw' scheduler (more or less) randomly assign flow id to
+ * events on parallel queues, to be able to reuse some of the
+ * migration mechanism and scheduling logic from
+ * RTE_SCHED_TYPE_ATOMIC. By moving one of the parallel "flows" from a
+ * particular port, the likely-hood of events being scheduled to this
+ * port is reduced, and thus a kind of statistical load balancing is
+ * achieved.
+ */
+#define DSW_PARALLEL_FLOWS (1024)
+
+/* 'Background tasks' are polling the control rings for *
+ *  migration-related messages, or flush the output buffer (so
+ *  buffered events doesn't linger too long). Shouldn't be too low,
+ *  since the system won't benefit from the 'batching' effects from
+ *  the output buffer, and shouldn't be too high, since it will make
+ *  buffered events linger too long in case the port goes idle.
+ */
+#define DSW_MAX_PORT_OPS_PER_BG_TASK (128)
+
+/* Avoid making small 'loans' from the central in-flight event credit
+ * pool, to improve efficiency.
+ */
+#define DSW_MIN_CREDIT_LOAN (64)
+#define DSW_PORT_MAX_CREDITS (2*DSW_MIN_CREDIT_LOAN)
+#define DSW_PORT_MIN_CREDITS (DSW_MIN_CREDIT_LOAN)
+
+/* The rings are dimensioned so that all in-flight events can reside
+ * on any one of the port rings, to avoid the trouble of having to
+ * care about the case where there's no room on the destination port's
+ * input ring.
+ */
+#define DSW_IN_RING_SIZE (DSW_MAX_EVENTS)
+
+#define DSW_MAX_LOAD (INT16_MAX)
+#define DSW_LOAD_FROM_PERCENT(x) ((int16_t)(((x)*DSW_MAX_LOAD)/100))
+#define DSW_LOAD_TO_PERCENT(x) ((100*x)/DSW_MAX_LOAD)
+
+/* The thought behind keeping the load update interval shorter than
+ * the migration interval is that the load from newly migrated flows
+ * should 'show up' on the load measurement before new migrations are
+ * considered. This is to avoid having too many flows, from too many
+ * source ports, to be migrated too quickly to a lightly loaded port -
+ * in particular since this might cause the system to oscillate.
+ */
+#define DSW_LOAD_UPDATE_INTERVAL (DSW_MIGRATION_INTERVAL/4)
+#define DSW_OLD_LOAD_WEIGHT (1)
+
+/* The minimum time (in us) between two flow migrations. What puts an
+ * upper limit on the actual migration rate is primarily the pace in
+ * which the ports send and receive control messages, which in turn is
+ * largely a function of how much cycles are spent the processing of
+ * an event burst.
+ */
+#define DSW_MIGRATION_INTERVAL (1000)
+#define DSW_MIN_SOURCE_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(70))
+#define DSW_MAX_TARGET_LOAD_FOR_MIGRATION (DSW_LOAD_FROM_PERCENT(95))
+
+#define DSW_MAX_EVENTS_RECORDED (128)
+
+/* Only one outstanding migration per port is allowed */
+#define DSW_MAX_PAUSED_FLOWS (DSW_MAX_PORTS)
+
+/* Enough room for paus request/confirm and unpaus request/confirm for
+ * all possible senders.
+ */
+#define DSW_CTL_IN_RING_SIZE ((DSW_MAX_PORTS-1)*4)
+
+/* With DSW_SORT_DEQUEUED enabled, the scheduler will, at the point of
+ * dequeue(), arrange events so that events with the same flow id on
+ * the same queue forms a back-to-back "burst", and also so that such
+ * bursts of different flow ids, but on the same queue, also come
+ * consecutively. All this in an attempt to improve data and
+ * instruction cache usage for the application, at the cost of a
+ * scheduler overhead increase.
+ */
+
+/* #define DSW_SORT_DEQUEUED */
+
+struct dsw_queue_flow {
+       uint8_t queue_id;
+       uint16_t flow_hash;
+};
+
+enum dsw_migration_state {
+       DSW_MIGRATION_STATE_IDLE,
+       DSW_MIGRATION_STATE_PAUSING,
+       DSW_MIGRATION_STATE_FORWARDING,
+       DSW_MIGRATION_STATE_UNPAUSING
+};
+
+struct dsw_port {
+       uint16_t id;
+
+       /* Keeping a pointer here to avoid container_of() calls, which
+        * are expensive since they are very frequent and will result
+        * in an integer multiplication (since the port id is an index
+        * into the dsw_evdev port array).
+        */
+       struct dsw_evdev *dsw;
+
+       uint16_t dequeue_depth;
+       uint16_t enqueue_depth;
+
+       int32_t inflight_credits;
+
+       int32_t new_event_threshold;
+
+       uint16_t pending_releases;
+
+       uint16_t next_parallel_flow_id;
+
+       uint16_t ops_since_bg_task;
+
+       /* most recent 'background' processing */
+       uint64_t last_bg;
+
+       /* For port load measurement. */
+       uint64_t next_load_update;
+       uint64_t load_update_interval;
+       uint64_t measurement_start;
+       uint64_t busy_start;
+       uint64_t busy_cycles;
+       uint64_t total_busy_cycles;
+
+       /* For the ctl interface and flow migration mechanism. */
+       uint64_t next_migration;
+       uint64_t migration_interval;
+       enum dsw_migration_state migration_state;
+
+       uint64_t migration_start;
+       uint64_t migrations;
+       uint64_t migration_latency;
+
+       uint8_t migration_target_port_id;
+       struct dsw_queue_flow migration_target_qf;
+       uint8_t cfm_cnt;
+
+       uint16_t paused_flows_len;
+       struct dsw_queue_flow paused_flows[DSW_MAX_PAUSED_FLOWS];
+
+       /* In a very contrived worst case all inflight events can be
+        * laying around paused here.
+        */
+       uint16_t paused_events_len;
+       struct rte_event paused_events[DSW_MAX_EVENTS];
+
+       uint16_t seen_events_len;
+       uint16_t seen_events_idx;
+       struct dsw_queue_flow seen_events[DSW_MAX_EVENTS_RECORDED];
+
+       uint64_t new_enqueued;
+       uint64_t forward_enqueued;
+       uint64_t release_enqueued;
+       uint64_t queue_enqueued[DSW_MAX_QUEUES];
+
+       uint64_t dequeued;
+       uint64_t queue_dequeued[DSW_MAX_QUEUES];
+
+       uint16_t out_buffer_len[DSW_MAX_PORTS];
+       struct rte_event out_buffer[DSW_MAX_PORTS][DSW_MAX_PORT_OUT_BUFFER];
+
+       uint16_t in_buffer_len;
+       uint16_t in_buffer_start;
+       /* This buffer may contain events that were read up from the
+        * in_ring during the flow migration process.
+        */
+       struct rte_event in_buffer[DSW_MAX_EVENTS];
+
+       struct rte_event_ring *in_ring __rte_cache_aligned;
+
+       struct rte_ring *ctl_in_ring __rte_cache_aligned;
+
+       /* Estimate of current port load. */
+       rte_atomic16_t load __rte_cache_aligned;
+} __rte_cache_aligned;
+
+struct dsw_queue {
+       uint8_t schedule_type;
+       uint8_t serving_ports[DSW_MAX_PORTS];
+       uint16_t num_serving_ports;
+
+       uint8_t flow_to_port_map[DSW_MAX_FLOWS] __rte_cache_aligned;
+};
+
+struct dsw_evdev {
+       struct rte_eventdev_data *data;
+
+       struct dsw_port ports[DSW_MAX_PORTS];
+       uint16_t num_ports;
+       struct dsw_queue queues[DSW_MAX_QUEUES];
+       uint8_t num_queues;
+       int32_t max_inflight;
+
+       rte_atomic32_t credits_on_loan __rte_cache_aligned;
+};
+
+#define DSW_CTL_PAUS_REQ (0)
+#define DSW_CTL_UNPAUS_REQ (1)
+#define DSW_CTL_CFM (2)
+
+/* sizeof(struct dsw_ctl_msg) must be equal or less than
+ * sizeof(void *), to fit on the control ring.
+ */
+struct dsw_ctl_msg {
+       uint8_t type:2;
+       uint8_t originating_port_id:6;
+       uint8_t queue_id;
+       uint16_t flow_hash;
+} __rte_packed;
+
+uint16_t dsw_event_enqueue(void *port, const struct rte_event *event);
+uint16_t dsw_event_enqueue_burst(void *port,
+                                const struct rte_event events[],
+                                uint16_t events_len);
+uint16_t dsw_event_enqueue_new_burst(void *port,
+                                    const struct rte_event events[],
+                                    uint16_t events_len);
+uint16_t dsw_event_enqueue_forward_burst(void *port,
+                                        const struct rte_event events[],
+                                        uint16_t events_len);
+
+uint16_t dsw_event_dequeue(void *port, struct rte_event *ev, uint64_t wait);
+uint16_t dsw_event_dequeue_burst(void *port, struct rte_event *events,
+                                uint16_t num, uint64_t wait);
+
+int dsw_xstats_get_names(const struct rte_eventdev *dev,
+                        enum rte_event_dev_xstats_mode mode,
+                        uint8_t queue_port_id,
+                        struct rte_event_dev_xstats_name *xstats_names,
+                        unsigned int *ids, unsigned int size);
+int dsw_xstats_get(const struct rte_eventdev *dev,
+                  enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
+                  const unsigned int ids[], uint64_t values[], unsigned int n);
+uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
+                               const char *name, unsigned int *id);
+
+static inline struct dsw_evdev *
+dsw_pmd_priv(const struct rte_eventdev *eventdev)
+{
+       return eventdev->data->dev_private;
+}
+
+#define DSW_LOG_DP(level, fmt, args...)                                        \
+       RTE_LOG_DP(level, EVENTDEV, "[%s] %s() line %u: " fmt,          \
+                  DSW_PMD_NAME,                                        \
+                  __func__, __LINE__, ## args)
+
+#define DSW_LOG_DP_PORT(level, port_id, fmt, args...)          \
+       DSW_LOG_DP(level, "<Port %d> " fmt, port_id, ## args)
+
+#endif
diff --git a/drivers/event/dsw/dsw_event.c b/drivers/event/dsw/dsw_event.c
new file mode 100644 (file)
index 0000000..61a66fa
--- /dev/null
@@ -0,0 +1,1253 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Ericsson AB
+ */
+
+#include "dsw_evdev.h"
+
+#ifdef DSW_SORT_DEQUEUED
+#include "dsw_sort.h"
+#endif
+
+#include <stdbool.h>
+#include <string.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_memcpy.h>
+#include <rte_random.h>
+
+static bool
+dsw_port_acquire_credits(struct dsw_evdev *dsw, struct dsw_port *port,
+                        int32_t credits)
+{
+       int32_t inflight_credits = port->inflight_credits;
+       int32_t missing_credits = credits - inflight_credits;
+       int32_t total_on_loan;
+       int32_t available;
+       int32_t acquired_credits;
+       int32_t new_total_on_loan;
+
+       if (likely(missing_credits <= 0)) {
+               port->inflight_credits -= credits;
+               return true;
+       }
+
+       total_on_loan = rte_atomic32_read(&dsw->credits_on_loan);
+       available = dsw->max_inflight - total_on_loan;
+       acquired_credits = RTE_MAX(missing_credits, DSW_PORT_MIN_CREDITS);
+
+       if (available < acquired_credits)
+               return false;
+
+       /* This is a race, no locks are involved, and thus some other
+        * thread can allocate tokens in between the check and the
+        * allocation.
+        */
+       new_total_on_loan = rte_atomic32_add_return(&dsw->credits_on_loan,
+                                                   acquired_credits);
+
+       if (unlikely(new_total_on_loan > dsw->max_inflight)) {
+               /* Some other port took the last credits */
+               rte_atomic32_sub(&dsw->credits_on_loan, acquired_credits);
+               return false;
+       }
+
+       DSW_LOG_DP_PORT(DEBUG, port->id, "Acquired %d tokens from pool.\n",
+                       acquired_credits);
+
+       port->inflight_credits += acquired_credits;
+       port->inflight_credits -= credits;
+
+       return true;
+}
+
+static void
+dsw_port_return_credits(struct dsw_evdev *dsw, struct dsw_port *port,
+                       int32_t credits)
+{
+       port->inflight_credits += credits;
+
+       if (unlikely(port->inflight_credits > DSW_PORT_MAX_CREDITS)) {
+               int32_t leave_credits = DSW_PORT_MIN_CREDITS;
+               int32_t return_credits =
+                       port->inflight_credits - leave_credits;
+
+               port->inflight_credits = leave_credits;
+
+               rte_atomic32_sub(&dsw->credits_on_loan, return_credits);
+
+               DSW_LOG_DP_PORT(DEBUG, port->id,
+                               "Returned %d tokens to pool.\n",
+                               return_credits);
+       }
+}
+
+static void
+dsw_port_enqueue_stats(struct dsw_port *port, uint16_t num_new,
+                      uint16_t num_forward, uint16_t num_release)
+{
+       port->new_enqueued += num_new;
+       port->forward_enqueued += num_forward;
+       port->release_enqueued += num_release;
+}
+
+static void
+dsw_port_queue_enqueue_stats(struct dsw_port *source_port, uint8_t queue_id)
+{
+       source_port->queue_enqueued[queue_id]++;
+}
+
+static void
+dsw_port_dequeue_stats(struct dsw_port *port, uint16_t num)
+{
+       port->dequeued += num;
+}
+
+static void
+dsw_port_queue_dequeued_stats(struct dsw_port *source_port, uint8_t queue_id)
+{
+       source_port->queue_dequeued[queue_id]++;
+}
+
+static void
+dsw_port_load_record(struct dsw_port *port, unsigned int dequeued)
+{
+       if (dequeued > 0 && port->busy_start == 0)
+               /* work period begins */
+               port->busy_start = rte_get_timer_cycles();
+       else if (dequeued == 0 && port->busy_start > 0) {
+               /* work period ends */
+               uint64_t work_period =
+                       rte_get_timer_cycles() - port->busy_start;
+               port->busy_cycles += work_period;
+               port->busy_start = 0;
+       }
+}
+
+static int16_t
+dsw_port_load_close_period(struct dsw_port *port, uint64_t now)
+{
+       uint64_t passed = now - port->measurement_start;
+       uint64_t busy_cycles = port->busy_cycles;
+
+       if (port->busy_start > 0) {
+               busy_cycles += (now - port->busy_start);
+               port->busy_start = now;
+       }
+
+       int16_t load = (DSW_MAX_LOAD * busy_cycles) / passed;
+
+       port->measurement_start = now;
+       port->busy_cycles = 0;
+
+       port->total_busy_cycles += busy_cycles;
+
+       return load;
+}
+
+static void
+dsw_port_load_update(struct dsw_port *port, uint64_t now)
+{
+       int16_t old_load;
+       int16_t period_load;
+       int16_t new_load;
+
+       old_load = rte_atomic16_read(&port->load);
+
+       period_load = dsw_port_load_close_period(port, now);
+
+       new_load = (period_load + old_load*DSW_OLD_LOAD_WEIGHT) /
+               (DSW_OLD_LOAD_WEIGHT+1);
+
+       rte_atomic16_set(&port->load, new_load);
+}
+
+static void
+dsw_port_consider_load_update(struct dsw_port *port, uint64_t now)
+{
+       if (now < port->next_load_update)
+               return;
+
+       port->next_load_update = now + port->load_update_interval;
+
+       dsw_port_load_update(port, now);
+}
+
+static void
+dsw_port_ctl_enqueue(struct dsw_port *port, struct dsw_ctl_msg *msg)
+{
+       void *raw_msg;
+
+       memcpy(&raw_msg, msg, sizeof(*msg));
+
+       /* there's always room on the ring */
+       while (rte_ring_enqueue(port->ctl_in_ring, raw_msg) != 0)
+               rte_pause();
+}
+
+static int
+dsw_port_ctl_dequeue(struct dsw_port *port, struct dsw_ctl_msg *msg)
+{
+       void *raw_msg;
+       int rc;
+
+       rc = rte_ring_dequeue(port->ctl_in_ring, &raw_msg);
+
+       if (rc == 0)
+               memcpy(msg, &raw_msg, sizeof(*msg));
+
+       return rc;
+}
+
+static void
+dsw_port_ctl_broadcast(struct dsw_evdev *dsw, struct dsw_port *source_port,
+                      uint8_t type, uint8_t queue_id, uint16_t flow_hash)
+{
+       uint16_t port_id;
+       struct dsw_ctl_msg msg = {
+               .type = type,
+               .originating_port_id = source_port->id,
+               .queue_id = queue_id,
+               .flow_hash = flow_hash
+       };
+
+       for (port_id = 0; port_id < dsw->num_ports; port_id++)
+               if (port_id != source_port->id)
+                       dsw_port_ctl_enqueue(&dsw->ports[port_id], &msg);
+}
+
+static bool
+dsw_port_is_flow_paused(struct dsw_port *port, uint8_t queue_id,
+                       uint16_t flow_hash)
+{
+       uint16_t i;
+
+       for (i = 0; i < port->paused_flows_len; i++) {
+               struct dsw_queue_flow *qf = &port->paused_flows[i];
+               if (qf->queue_id == queue_id &&
+                   qf->flow_hash == flow_hash)
+                       return true;
+       }
+       return false;
+}
+
+static void
+dsw_port_add_paused_flow(struct dsw_port *port, uint8_t queue_id,
+                        uint16_t paused_flow_hash)
+{
+       port->paused_flows[port->paused_flows_len] = (struct dsw_queue_flow) {
+               .queue_id = queue_id,
+               .flow_hash = paused_flow_hash
+       };
+       port->paused_flows_len++;
+}
+
+static void
+dsw_port_remove_paused_flow(struct dsw_port *port, uint8_t queue_id,
+                           uint16_t paused_flow_hash)
+{
+       uint16_t i;
+
+       for (i = 0; i < port->paused_flows_len; i++) {
+               struct dsw_queue_flow *qf = &port->paused_flows[i];
+
+               if (qf->queue_id == queue_id &&
+                   qf->flow_hash == paused_flow_hash) {
+                       uint16_t last_idx = port->paused_flows_len-1;
+                       if (i != last_idx)
+                               port->paused_flows[i] =
+                                       port->paused_flows[last_idx];
+                       port->paused_flows_len--;
+                       break;
+               }
+       }
+}
+
+static void
+dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port);
+
+static void
+dsw_port_handle_pause_flow(struct dsw_evdev *dsw, struct dsw_port *port,
+                          uint8_t originating_port_id, uint8_t queue_id,
+                          uint16_t paused_flow_hash)
+{
+       struct dsw_ctl_msg cfm = {
+               .type = DSW_CTL_CFM,
+               .originating_port_id = port->id,
+               .queue_id = queue_id,
+               .flow_hash = paused_flow_hash
+       };
+
+       DSW_LOG_DP_PORT(DEBUG, port->id, "Pausing queue_id %d flow_hash %d.\n",
+                       queue_id, paused_flow_hash);
+
+       /* There might be already-scheduled events belonging to the
+        * paused flow in the output buffers.
+        */
+       dsw_port_flush_out_buffers(dsw, port);
+
+       dsw_port_add_paused_flow(port, queue_id, paused_flow_hash);
+
+       /* Make sure any stores to the original port's in_ring is seen
+        * before the ctl message.
+        */
+       rte_smp_wmb();
+
+       dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm);
+}
+
+static void
+dsw_find_lowest_load_port(uint8_t *port_ids, uint16_t num_port_ids,
+                         uint8_t exclude_port_id, int16_t *port_loads,
+                         uint8_t *target_port_id, int16_t *target_load)
+{
+       int16_t candidate_port_id = -1;
+       int16_t candidate_load = DSW_MAX_LOAD;
+       uint16_t i;
+
+       for (i = 0; i < num_port_ids; i++) {
+               uint8_t port_id = port_ids[i];
+               if (port_id != exclude_port_id) {
+                       int16_t load = port_loads[port_id];
+                       if (candidate_port_id == -1 ||
+                           load < candidate_load) {
+                               candidate_port_id = port_id;
+                               candidate_load = load;
+                       }
+               }
+       }
+       *target_port_id = candidate_port_id;
+       *target_load = candidate_load;
+}
+
+struct dsw_queue_flow_burst {
+       struct dsw_queue_flow queue_flow;
+       uint16_t count;
+};
+
+static inline int
+dsw_cmp_burst(const void *v_burst_a, const void *v_burst_b)
+{
+       const struct dsw_queue_flow_burst *burst_a = v_burst_a;
+       const struct dsw_queue_flow_burst *burst_b = v_burst_b;
+
+       int a_count = burst_a->count;
+       int b_count = burst_b->count;
+
+       return a_count - b_count;
+}
+
+#define DSW_QF_TO_INT(_qf)                                     \
+       ((int)((((_qf)->queue_id)<<16)|((_qf)->flow_hash)))
+
+static inline int
+dsw_cmp_qf(const void *v_qf_a, const void *v_qf_b)
+{
+       const struct dsw_queue_flow *qf_a = v_qf_a;
+       const struct dsw_queue_flow *qf_b = v_qf_b;
+
+       return DSW_QF_TO_INT(qf_a) - DSW_QF_TO_INT(qf_b);
+}
+
+static uint16_t
+dsw_sort_qfs_to_bursts(struct dsw_queue_flow *qfs, uint16_t qfs_len,
+                      struct dsw_queue_flow_burst *bursts)
+{
+       uint16_t i;
+       struct dsw_queue_flow_burst *current_burst = NULL;
+       uint16_t num_bursts = 0;
+
+       /* We don't need the stable property, and the list is likely
+        * large enough for qsort() to outperform dsw_stable_sort(),
+        * so we use qsort() here.
+        */
+       qsort(qfs, qfs_len, sizeof(qfs[0]), dsw_cmp_qf);
+
+       /* arrange the (now-consecutive) events into bursts */
+       for (i = 0; i < qfs_len; i++) {
+               if (i == 0 ||
+                   dsw_cmp_qf(&qfs[i], &current_burst->queue_flow) != 0) {
+                       current_burst = &bursts[num_bursts];
+                       current_burst->queue_flow = qfs[i];
+                       current_burst->count = 0;
+                       num_bursts++;
+               }
+               current_burst->count++;
+       }
+
+       qsort(bursts, num_bursts, sizeof(bursts[0]), dsw_cmp_burst);
+
+       return num_bursts;
+}
+
+static bool
+dsw_retrieve_port_loads(struct dsw_evdev *dsw, int16_t *port_loads,
+                       int16_t load_limit)
+{
+       bool below_limit = false;
+       uint16_t i;
+
+       for (i = 0; i < dsw->num_ports; i++) {
+               int16_t load = rte_atomic16_read(&dsw->ports[i].load);
+               if (load < load_limit)
+                       below_limit = true;
+               port_loads[i] = load;
+       }
+       return below_limit;
+}
+
+static bool
+dsw_select_migration_target(struct dsw_evdev *dsw,
+                           struct dsw_port *source_port,
+                           struct dsw_queue_flow_burst *bursts,
+                           uint16_t num_bursts, int16_t *port_loads,
+                           int16_t max_load, struct dsw_queue_flow *target_qf,
+                           uint8_t *target_port_id)
+{
+       uint16_t source_load = port_loads[source_port->id];
+       uint16_t i;
+
+       for (i = 0; i < num_bursts; i++) {
+               struct dsw_queue_flow *qf = &bursts[i].queue_flow;
+
+               if (dsw_port_is_flow_paused(source_port, qf->queue_id,
+                                           qf->flow_hash))
+                       continue;
+
+               struct dsw_queue *queue = &dsw->queues[qf->queue_id];
+               int16_t target_load;
+
+               dsw_find_lowest_load_port(queue->serving_ports,
+                                         queue->num_serving_ports,
+                                         source_port->id, port_loads,
+                                         target_port_id, &target_load);
+
+               if (target_load < source_load &&
+                   target_load < max_load) {
+                       *target_qf = *qf;
+                       return true;
+               }
+       }
+
+       DSW_LOG_DP_PORT(DEBUG, source_port->id, "For the %d flows considered, "
+                       "no target port found with load less than %d.\n",
+                       num_bursts, DSW_LOAD_TO_PERCENT(max_load));
+
+       return false;
+}
+
+static uint8_t
+dsw_schedule(struct dsw_evdev *dsw, uint8_t queue_id, uint16_t flow_hash)
+{
+       struct dsw_queue *queue = &dsw->queues[queue_id];
+       uint8_t port_id;
+
+       if (queue->num_serving_ports > 1)
+               port_id = queue->flow_to_port_map[flow_hash];
+       else
+               /* A single-link queue, or atomic/ordered/parallel but
+                * with just a single serving port.
+                */
+               port_id = queue->serving_ports[0];
+
+       DSW_LOG_DP(DEBUG, "Event with queue_id %d flow_hash %d is scheduled "
+                  "to port %d.\n", queue_id, flow_hash, port_id);
+
+       return port_id;
+}
+
+static void
+dsw_port_transmit_buffered(struct dsw_evdev *dsw, struct dsw_port *source_port,
+                          uint8_t dest_port_id)
+{
+       struct dsw_port *dest_port = &(dsw->ports[dest_port_id]);
+       uint16_t *buffer_len = &source_port->out_buffer_len[dest_port_id];
+       struct rte_event *buffer = source_port->out_buffer[dest_port_id];
+       uint16_t enqueued = 0;
+
+       if (*buffer_len == 0)
+               return;
+
+       /* The rings are dimensioned to fit all in-flight events (even
+        * on a single ring), so looping will work.
+        */
+       do {
+               enqueued +=
+                       rte_event_ring_enqueue_burst(dest_port->in_ring,
+                                                    buffer+enqueued,
+                                                    *buffer_len-enqueued,
+                                                    NULL);
+       } while (unlikely(enqueued != *buffer_len));
+
+       (*buffer_len) = 0;
+}
+
+static uint16_t
+dsw_port_get_parallel_flow_id(struct dsw_port *port)
+{
+       uint16_t flow_id = port->next_parallel_flow_id;
+
+       port->next_parallel_flow_id =
+               (port->next_parallel_flow_id + 1) % DSW_PARALLEL_FLOWS;
+
+       return flow_id;
+}
+
+static void
+dsw_port_buffer_paused(struct dsw_port *port,
+                      const struct rte_event *paused_event)
+{
+       port->paused_events[port->paused_events_len] = *paused_event;
+       port->paused_events_len++;
+}
+
+static void
+dsw_port_buffer_non_paused(struct dsw_evdev *dsw, struct dsw_port *source_port,
+                          uint8_t dest_port_id, const struct rte_event *event)
+{
+       struct rte_event *buffer = source_port->out_buffer[dest_port_id];
+       uint16_t *buffer_len = &source_port->out_buffer_len[dest_port_id];
+
+       if (*buffer_len == DSW_MAX_PORT_OUT_BUFFER)
+               dsw_port_transmit_buffered(dsw, source_port, dest_port_id);
+
+       buffer[*buffer_len] = *event;
+
+       (*buffer_len)++;
+}
+
+#define DSW_FLOW_ID_BITS (24)
+static uint16_t
+dsw_flow_id_hash(uint32_t flow_id)
+{
+       uint16_t hash = 0;
+       uint16_t offset = 0;
+
+       do {
+               hash ^= ((flow_id >> offset) & DSW_MAX_FLOWS_MASK);
+               offset += DSW_MAX_FLOWS_BITS;
+       } while (offset < DSW_FLOW_ID_BITS);
+
+       return hash;
+}
+
+static void
+dsw_port_buffer_parallel(struct dsw_evdev *dsw, struct dsw_port *source_port,
+                        struct rte_event event)
+{
+       uint8_t dest_port_id;
+
+       event.flow_id = dsw_port_get_parallel_flow_id(source_port);
+
+       dest_port_id = dsw_schedule(dsw, event.queue_id,
+                                   dsw_flow_id_hash(event.flow_id));
+
+       dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, &event);
+}
+
+static void
+dsw_port_buffer_event(struct dsw_evdev *dsw, struct dsw_port *source_port,
+                     const struct rte_event *event)
+{
+       uint16_t flow_hash;
+       uint8_t dest_port_id;
+
+       if (unlikely(dsw->queues[event->queue_id].schedule_type ==
+                    RTE_SCHED_TYPE_PARALLEL)) {
+               dsw_port_buffer_parallel(dsw, source_port, *event);
+               return;
+       }
+
+       flow_hash = dsw_flow_id_hash(event->flow_id);
+
+       if (unlikely(dsw_port_is_flow_paused(source_port, event->queue_id,
+                                            flow_hash))) {
+               dsw_port_buffer_paused(source_port, event);
+               return;
+       }
+
+       dest_port_id = dsw_schedule(dsw, event->queue_id, flow_hash);
+
+       dsw_port_buffer_non_paused(dsw, source_port, dest_port_id, event);
+}
+
+static void
+dsw_port_flush_paused_events(struct dsw_evdev *dsw,
+                            struct dsw_port *source_port,
+                            uint8_t queue_id, uint16_t paused_flow_hash)
+{
+       uint16_t paused_events_len = source_port->paused_events_len;
+       struct rte_event paused_events[paused_events_len];
+       uint8_t dest_port_id;
+       uint16_t i;
+
+       if (paused_events_len == 0)
+               return;
+
+       if (dsw_port_is_flow_paused(source_port, queue_id, paused_flow_hash))
+               return;
+
+       rte_memcpy(paused_events, source_port->paused_events,
+                  paused_events_len * sizeof(struct rte_event));
+
+       source_port->paused_events_len = 0;
+
+       dest_port_id = dsw_schedule(dsw, queue_id, paused_flow_hash);
+
+       for (i = 0; i < paused_events_len; i++) {
+               struct rte_event *event = &paused_events[i];
+               uint16_t flow_hash;
+
+               flow_hash = dsw_flow_id_hash(event->flow_id);
+
+               if (event->queue_id == queue_id &&
+                   flow_hash == paused_flow_hash)
+                       dsw_port_buffer_non_paused(dsw, source_port,
+                                                  dest_port_id, event);
+               else
+                       dsw_port_buffer_paused(source_port, event);
+       }
+}
+
+static void
+dsw_port_migration_stats(struct dsw_port *port)
+{
+       uint64_t migration_latency;
+
+       migration_latency = (rte_get_timer_cycles() - port->migration_start);
+       port->migration_latency += migration_latency;
+       port->migrations++;
+}
+
+static void
+dsw_port_end_migration(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+       uint8_t queue_id = port->migration_target_qf.queue_id;
+       uint16_t flow_hash = port->migration_target_qf.flow_hash;
+
+       port->migration_state = DSW_MIGRATION_STATE_IDLE;
+       port->seen_events_len = 0;
+
+       dsw_port_migration_stats(port);
+
+       if (dsw->queues[queue_id].schedule_type != RTE_SCHED_TYPE_PARALLEL) {
+               dsw_port_remove_paused_flow(port, queue_id, flow_hash);
+               dsw_port_flush_paused_events(dsw, port, queue_id, flow_hash);
+       }
+
+       DSW_LOG_DP_PORT(DEBUG, port->id, "Migration completed for queue_id "
+                       "%d flow_hash %d.\n", queue_id, flow_hash);
+}
+
+static void
+dsw_port_consider_migration(struct dsw_evdev *dsw,
+                           struct dsw_port *source_port,
+                           uint64_t now)
+{
+       bool any_port_below_limit;
+       struct dsw_queue_flow *seen_events = source_port->seen_events;
+       uint16_t seen_events_len = source_port->seen_events_len;
+       struct dsw_queue_flow_burst bursts[DSW_MAX_EVENTS_RECORDED];
+       uint16_t num_bursts;
+       int16_t source_port_load;
+       int16_t port_loads[dsw->num_ports];
+
+       if (now < source_port->next_migration)
+               return;
+
+       if (dsw->num_ports == 1)
+               return;
+
+       DSW_LOG_DP_PORT(DEBUG, source_port->id, "Considering migration.\n");
+
+       /* Randomize interval to avoid having all threads considering
+        * migration at the same in point in time, which might lead to
+        * all choosing the same target port.
+        */
+       source_port->next_migration = now +
+               source_port->migration_interval / 2 +
+               rte_rand() % source_port->migration_interval;
+
+       if (source_port->migration_state != DSW_MIGRATION_STATE_IDLE) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id,
+                               "Migration already in progress.\n");
+               return;
+       }
+
+       /* For simplicity, avoid migration in the unlikely case there
+        * is still events to consume in the in_buffer (from the last
+        * migration).
+        */
+       if (source_port->in_buffer_len > 0) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id, "There are still "
+                               "events in the input buffer.\n");
+               return;
+       }
+
+       source_port_load = rte_atomic16_read(&source_port->load);
+       if (source_port_load < DSW_MIN_SOURCE_LOAD_FOR_MIGRATION) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id,
+                               "Load %d is below threshold level %d.\n",
+                               DSW_LOAD_TO_PERCENT(source_port_load),
+                      DSW_LOAD_TO_PERCENT(DSW_MIN_SOURCE_LOAD_FOR_MIGRATION));
+               return;
+       }
+
+       /* Avoid starting any expensive operations (sorting etc), in
+        * case of a scenario with all ports above the load limit.
+        */
+       any_port_below_limit =
+               dsw_retrieve_port_loads(dsw, port_loads,
+                                       DSW_MAX_TARGET_LOAD_FOR_MIGRATION);
+       if (!any_port_below_limit) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id,
+                               "Candidate target ports are all too highly "
+                               "loaded.\n");
+               return;
+       }
+
+       /* Sort flows into 'bursts' to allow attempting to migrating
+        * small (but still active) flows first - this it to avoid
+        * having large flows moving around the worker cores too much
+        * (to avoid cache misses, among other things). Of course, the
+        * number of recorded events (queue+flow ids) are limited, and
+        * provides only a snapshot, so only so many conclusions can
+        * be drawn from this data.
+        */
+       num_bursts = dsw_sort_qfs_to_bursts(seen_events, seen_events_len,
+                                           bursts);
+       /* For non-big-little systems, there's no point in moving the
+        * only (known) flow.
+        */
+       if (num_bursts < 2) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id, "Only a single flow "
+                               "queue_id %d flow_hash %d has been seen.\n",
+                               bursts[0].queue_flow.queue_id,
+                               bursts[0].queue_flow.flow_hash);
+               return;
+       }
+
+       /* The strategy is to first try to find a flow to move to a
+        * port with low load (below the migration-attempt
+        * threshold). If that fails, we try to find a port which is
+        * below the max threshold, and also less loaded than this
+        * port is.
+        */
+       if (!dsw_select_migration_target(dsw, source_port, bursts, num_bursts,
+                                        port_loads,
+                                        DSW_MIN_SOURCE_LOAD_FOR_MIGRATION,
+                                        &source_port->migration_target_qf,
+                                        &source_port->migration_target_port_id)
+           &&
+           !dsw_select_migration_target(dsw, source_port, bursts, num_bursts,
+                                        port_loads,
+                                        DSW_MAX_TARGET_LOAD_FOR_MIGRATION,
+                                        &source_port->migration_target_qf,
+                                      &source_port->migration_target_port_id))
+               return;
+
+       DSW_LOG_DP_PORT(DEBUG, source_port->id, "Migrating queue_id %d "
+                       "flow_hash %d from port %d to port %d.\n",
+                       source_port->migration_target_qf.queue_id,
+                       source_port->migration_target_qf.flow_hash,
+                       source_port->id, source_port->migration_target_port_id);
+
+       /* We have a winner. */
+
+       source_port->migration_state = DSW_MIGRATION_STATE_PAUSING;
+       source_port->migration_start = rte_get_timer_cycles();
+
+       /* No need to go through the whole pause procedure for
+        * parallel queues, since atomic/ordered semantics need not to
+        * be maintained.
+        */
+
+       if (dsw->queues[source_port->migration_target_qf.queue_id].schedule_type
+           == RTE_SCHED_TYPE_PARALLEL) {
+               uint8_t queue_id = source_port->migration_target_qf.queue_id;
+               uint16_t flow_hash = source_port->migration_target_qf.flow_hash;
+               uint8_t dest_port_id = source_port->migration_target_port_id;
+
+               /* Single byte-sized stores are always atomic. */
+               dsw->queues[queue_id].flow_to_port_map[flow_hash] =
+                       dest_port_id;
+               rte_smp_wmb();
+
+               dsw_port_end_migration(dsw, source_port);
+
+               return;
+       }
+
+       /* There might be 'loopback' events already scheduled in the
+        * output buffers.
+        */
+       dsw_port_flush_out_buffers(dsw, source_port);
+
+       dsw_port_add_paused_flow(source_port,
+                                source_port->migration_target_qf.queue_id,
+                                source_port->migration_target_qf.flow_hash);
+
+       dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_PAUS_REQ,
+                              source_port->migration_target_qf.queue_id,
+                              source_port->migration_target_qf.flow_hash);
+       source_port->cfm_cnt = 0;
+}
+
+static void
+dsw_port_flush_paused_events(struct dsw_evdev *dsw,
+                            struct dsw_port *source_port,
+                            uint8_t queue_id, uint16_t paused_flow_hash);
+
+static void
+dsw_port_handle_unpause_flow(struct dsw_evdev *dsw, struct dsw_port *port,
+                            uint8_t originating_port_id, uint8_t queue_id,
+                            uint16_t paused_flow_hash)
+{
+       struct dsw_ctl_msg cfm = {
+               .type = DSW_CTL_CFM,
+               .originating_port_id = port->id,
+               .queue_id = queue_id,
+               .flow_hash = paused_flow_hash
+       };
+
+       DSW_LOG_DP_PORT(DEBUG, port->id, "Un-pausing queue_id %d flow_hash %d.\n",
+                       queue_id, paused_flow_hash);
+
+       dsw_port_remove_paused_flow(port, queue_id, paused_flow_hash);
+
+       rte_smp_rmb();
+
+       dsw_port_ctl_enqueue(&dsw->ports[originating_port_id], &cfm);
+
+       dsw_port_flush_paused_events(dsw, port, queue_id, paused_flow_hash);
+}
+
+#define FORWARD_BURST_SIZE (32)
+
+static void
+dsw_port_forward_migrated_flow(struct dsw_port *source_port,
+                              struct rte_event_ring *dest_ring,
+                              uint8_t queue_id,
+                              uint16_t flow_hash)
+{
+       uint16_t events_left;
+
+       /* Control ring message should been seen before the ring count
+        * is read on the port's in_ring.
+        */
+       rte_smp_rmb();
+
+       events_left = rte_event_ring_count(source_port->in_ring);
+
+       while (events_left > 0) {
+               uint16_t in_burst_size =
+                       RTE_MIN(FORWARD_BURST_SIZE, events_left);
+               struct rte_event in_burst[in_burst_size];
+               uint16_t in_len;
+               uint16_t i;
+
+               in_len = rte_event_ring_dequeue_burst(source_port->in_ring,
+                                                     in_burst,
+                                                     in_burst_size, NULL);
+               /* No need to care about bursting forwarded events (to
+                * the destination port's in_ring), since migration
+                * doesn't happen very often, and also the majority of
+                * the dequeued events will likely *not* be forwarded.
+                */
+               for (i = 0; i < in_len; i++) {
+                       struct rte_event *e = &in_burst[i];
+                       if (e->queue_id == queue_id &&
+                           dsw_flow_id_hash(e->flow_id) == flow_hash) {
+                               while (rte_event_ring_enqueue_burst(dest_ring,
+                                                                   e, 1,
+                                                                   NULL) != 1)
+                                       rte_pause();
+                       } else {
+                               uint16_t last_idx = source_port->in_buffer_len;
+                               source_port->in_buffer[last_idx] = *e;
+                               source_port->in_buffer_len++;
+                       }
+               }
+
+               events_left -= in_len;
+       }
+}
+
+static void
+dsw_port_move_migrating_flow(struct dsw_evdev *dsw,
+                            struct dsw_port *source_port)
+{
+       uint8_t queue_id = source_port->migration_target_qf.queue_id;
+       uint16_t flow_hash = source_port->migration_target_qf.flow_hash;
+       uint8_t dest_port_id = source_port->migration_target_port_id;
+       struct dsw_port *dest_port = &dsw->ports[dest_port_id];
+
+       dsw_port_flush_out_buffers(dsw, source_port);
+
+       rte_smp_wmb();
+
+       dsw->queues[queue_id].flow_to_port_map[flow_hash] =
+               dest_port_id;
+
+       dsw_port_forward_migrated_flow(source_port, dest_port->in_ring,
+                                      queue_id, flow_hash);
+
+       /* Flow table update and migration destination port's enqueues
+        * must be seen before the control message.
+        */
+       rte_smp_wmb();
+
+       dsw_port_ctl_broadcast(dsw, source_port, DSW_CTL_UNPAUS_REQ, queue_id,
+                              flow_hash);
+       source_port->cfm_cnt = 0;
+       source_port->migration_state = DSW_MIGRATION_STATE_UNPAUSING;
+}
+
+static void
+dsw_port_handle_confirm(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+       port->cfm_cnt++;
+
+       if (port->cfm_cnt == (dsw->num_ports-1)) {
+               switch (port->migration_state) {
+               case DSW_MIGRATION_STATE_PAUSING:
+                       DSW_LOG_DP_PORT(DEBUG, port->id, "Going into forwarding "
+                                       "migration state.\n");
+                       port->migration_state = DSW_MIGRATION_STATE_FORWARDING;
+                       break;
+               case DSW_MIGRATION_STATE_UNPAUSING:
+                       dsw_port_end_migration(dsw, port);
+                       break;
+               default:
+                       RTE_ASSERT(0);
+                       break;
+               }
+       }
+}
+
+static void
+dsw_port_ctl_process(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+       struct dsw_ctl_msg msg;
+
+       /* So any table loads happens before the ring dequeue, in the
+        * case of a 'paus' message.
+        */
+       rte_smp_rmb();
+
+       if (dsw_port_ctl_dequeue(port, &msg) == 0) {
+               switch (msg.type) {
+               case DSW_CTL_PAUS_REQ:
+                       dsw_port_handle_pause_flow(dsw, port,
+                                                  msg.originating_port_id,
+                                                  msg.queue_id, msg.flow_hash);
+                       break;
+               case DSW_CTL_UNPAUS_REQ:
+                       dsw_port_handle_unpause_flow(dsw, port,
+                                                    msg.originating_port_id,
+                                                    msg.queue_id,
+                                                    msg.flow_hash);
+                       break;
+               case DSW_CTL_CFM:
+                       dsw_port_handle_confirm(dsw, port);
+                       break;
+               }
+       }
+}
+
+static void
+dsw_port_note_op(struct dsw_port *port, uint16_t num_events)
+{
+       /* To pull the control ring reasonbly often on busy ports,
+        * each dequeued/enqueued event is considered an 'op' too.
+        */
+       port->ops_since_bg_task += (num_events+1);
+}
+
+static void
+dsw_port_bg_process(struct dsw_evdev *dsw, struct dsw_port *port)
+{
+       if (unlikely(port->migration_state == DSW_MIGRATION_STATE_FORWARDING &&
+                    port->pending_releases == 0))
+               dsw_port_move_migrating_flow(dsw, port);
+
+       /* Polling the control ring is relatively inexpensive, and
+        * polling it often helps bringing down migration latency, so
+        * do this for every iteration.
+        */
+       dsw_port_ctl_process(dsw, port);
+
+       /* To avoid considering migration and flushing output buffers
+        * on every dequeue/enqueue call, the scheduler only performs
+        * such 'background' tasks every nth
+        * (i.e. DSW_MAX_PORT_OPS_PER_BG_TASK) operation.
+        */
+       if (unlikely(port->ops_since_bg_task >= DSW_MAX_PORT_OPS_PER_BG_TASK)) {
+               uint64_t now;
+
+               now = rte_get_timer_cycles();
+
+               port->last_bg = now;
+
+               /* Logic to avoid having events linger in the output
+                * buffer too long.
+                */
+               dsw_port_flush_out_buffers(dsw, port);
+
+               dsw_port_consider_load_update(port, now);
+
+               dsw_port_consider_migration(dsw, port, now);
+
+               port->ops_since_bg_task = 0;
+       }
+}
+
+static void
+dsw_port_flush_out_buffers(struct dsw_evdev *dsw, struct dsw_port *source_port)
+{
+       uint16_t dest_port_id;
+
+       for (dest_port_id = 0; dest_port_id < dsw->num_ports; dest_port_id++)
+               dsw_port_transmit_buffered(dsw, source_port, dest_port_id);
+}
+
+uint16_t
+dsw_event_enqueue(void *port, const struct rte_event *ev)
+{
+       return dsw_event_enqueue_burst(port, ev, unlikely(ev == NULL) ? 0 : 1);
+}
+
+static __rte_always_inline uint16_t
+dsw_event_enqueue_burst_generic(void *port, const struct rte_event events[],
+                               uint16_t events_len, bool op_types_known,
+                               uint16_t num_new, uint16_t num_release,
+                               uint16_t num_non_release)
+{
+       struct dsw_port *source_port = port;
+       struct dsw_evdev *dsw = source_port->dsw;
+       bool enough_credits;
+       uint16_t i;
+
+       DSW_LOG_DP_PORT(DEBUG, source_port->id, "Attempting to enqueue %d "
+                       "events to port %d.\n", events_len, source_port->id);
+
+       dsw_port_bg_process(dsw, source_port);
+
+       /* XXX: For performance (=ring efficiency) reasons, the
+        * scheduler relies on internal non-ring buffers instead of
+        * immediately sending the event to the destination ring. For
+        * a producer that doesn't intend to produce or consume any
+        * more events, the scheduler provides a way to flush the
+        * buffer, by means of doing an enqueue of zero events. In
+        * addition, a port cannot be left "unattended" (e.g. unused)
+        * for long periods of time, since that would stall
+        * migration. Eventdev API extensions to provide a cleaner way
+        * to archieve both of these functions should be
+        * considered.
+        */
+       if (unlikely(events_len == 0)) {
+               dsw_port_note_op(source_port, DSW_MAX_PORT_OPS_PER_BG_TASK);
+               return 0;
+       }
+
+       if (unlikely(events_len > source_port->enqueue_depth))
+               events_len = source_port->enqueue_depth;
+
+       dsw_port_note_op(source_port, events_len);
+
+       if (!op_types_known)
+               for (i = 0; i < events_len; i++) {
+                       switch (events[i].op) {
+                       case RTE_EVENT_OP_RELEASE:
+                               num_release++;
+                               break;
+                       case RTE_EVENT_OP_NEW:
+                               num_new++;
+                               /* Falls through. */
+                       default:
+                               num_non_release++;
+                               break;
+                       }
+               }
+
+       /* Technically, we could allow the non-new events up to the
+        * first new event in the array into the system, but for
+        * simplicity reasons, we deny the whole burst if the port is
+        * above the water mark.
+        */
+       if (unlikely(num_new > 0 && rte_atomic32_read(&dsw->credits_on_loan) >
+                    source_port->new_event_threshold))
+               return 0;
+
+       enough_credits = dsw_port_acquire_credits(dsw, source_port,
+                                                 num_non_release);
+       if (unlikely(!enough_credits))
+               return 0;
+
+       source_port->pending_releases -= num_release;
+
+       dsw_port_enqueue_stats(source_port, num_new,
+                              num_non_release-num_new, num_release);
+
+       for (i = 0; i < events_len; i++) {
+               const struct rte_event *event = &events[i];
+
+               if (likely(num_release == 0 ||
+                          event->op != RTE_EVENT_OP_RELEASE))
+                       dsw_port_buffer_event(dsw, source_port, event);
+               dsw_port_queue_enqueue_stats(source_port, event->queue_id);
+       }
+
+       DSW_LOG_DP_PORT(DEBUG, source_port->id, "%d non-release events "
+                       "accepted.\n", num_non_release);
+
+       return num_non_release;
+}
+
+uint16_t
+dsw_event_enqueue_burst(void *port, const struct rte_event events[],
+                       uint16_t events_len)
+{
+       return dsw_event_enqueue_burst_generic(port, events, events_len, false,
+                                              0, 0, 0);
+}
+
+uint16_t
+dsw_event_enqueue_new_burst(void *port, const struct rte_event events[],
+                           uint16_t events_len)
+{
+       return dsw_event_enqueue_burst_generic(port, events, events_len, true,
+                                              events_len, 0, events_len);
+}
+
+uint16_t
+dsw_event_enqueue_forward_burst(void *port, const struct rte_event events[],
+                               uint16_t events_len)
+{
+       return dsw_event_enqueue_burst_generic(port, events, events_len, true,
+                                              0, 0, events_len);
+}
+
+uint16_t
+dsw_event_dequeue(void *port, struct rte_event *events, uint64_t wait)
+{
+       return dsw_event_dequeue_burst(port, events, 1, wait);
+}
+
+static void
+dsw_port_record_seen_events(struct dsw_port *port, struct rte_event *events,
+                           uint16_t num)
+{
+       uint16_t i;
+
+       dsw_port_dequeue_stats(port, num);
+
+       for (i = 0; i < num; i++) {
+               uint16_t l_idx = port->seen_events_idx;
+               struct dsw_queue_flow *qf = &port->seen_events[l_idx];
+               struct rte_event *event = &events[i];
+               qf->queue_id = event->queue_id;
+               qf->flow_hash = dsw_flow_id_hash(event->flow_id);
+
+               port->seen_events_idx = (l_idx+1) % DSW_MAX_EVENTS_RECORDED;
+
+               dsw_port_queue_dequeued_stats(port, event->queue_id);
+       }
+
+       if (unlikely(port->seen_events_len != DSW_MAX_EVENTS_RECORDED))
+               port->seen_events_len =
+                       RTE_MIN(port->seen_events_len + num,
+                               DSW_MAX_EVENTS_RECORDED);
+}
+
+#ifdef DSW_SORT_DEQUEUED
+
+#define DSW_EVENT_TO_INT(_event)                               \
+       ((int)((((_event)->queue_id)<<16)|((_event)->flow_id)))
+
+static inline int
+dsw_cmp_event(const void *v_event_a, const void *v_event_b)
+{
+       const struct rte_event *event_a = v_event_a;
+       const struct rte_event *event_b = v_event_b;
+
+       return DSW_EVENT_TO_INT(event_a) - DSW_EVENT_TO_INT(event_b);
+}
+#endif
+
+static uint16_t
+dsw_port_dequeue_burst(struct dsw_port *port, struct rte_event *events,
+                      uint16_t num)
+{
+       struct dsw_port *source_port = port;
+       struct dsw_evdev *dsw = source_port->dsw;
+
+       dsw_port_ctl_process(dsw, source_port);
+
+       if (unlikely(port->in_buffer_len > 0)) {
+               uint16_t dequeued = RTE_MIN(num, port->in_buffer_len);
+
+               rte_memcpy(events, &port->in_buffer[port->in_buffer_start],
+                          dequeued * sizeof(struct rte_event));
+
+               port->in_buffer_start += dequeued;
+               port->in_buffer_len -= dequeued;
+
+               if (port->in_buffer_len == 0)
+                       port->in_buffer_start = 0;
+
+               return dequeued;
+       }
+
+       return rte_event_ring_dequeue_burst(port->in_ring, events, num, NULL);
+}
+
+uint16_t
+dsw_event_dequeue_burst(void *port, struct rte_event *events, uint16_t num,
+                       uint64_t wait __rte_unused)
+{
+       struct dsw_port *source_port = port;
+       struct dsw_evdev *dsw = source_port->dsw;
+       uint16_t dequeued;
+
+       source_port->pending_releases = 0;
+
+       dsw_port_bg_process(dsw, source_port);
+
+       if (unlikely(num > source_port->dequeue_depth))
+               num = source_port->dequeue_depth;
+
+       dequeued = dsw_port_dequeue_burst(source_port, events, num);
+
+       source_port->pending_releases = dequeued;
+
+       dsw_port_load_record(source_port, dequeued);
+
+       dsw_port_note_op(source_port, dequeued);
+
+       if (dequeued > 0) {
+               DSW_LOG_DP_PORT(DEBUG, source_port->id, "Dequeued %d events.\n",
+                               dequeued);
+
+               dsw_port_return_credits(dsw, source_port, dequeued);
+
+               /* One potential optimization one might think of is to
+                * add a migration state (prior to 'pausing'), and
+                * only record seen events when the port is in this
+                * state (and transit to 'pausing' when enough events
+                * have been gathered). However, that schema doesn't
+                * seem to improve performance.
+                */
+               dsw_port_record_seen_events(port, events, dequeued);
+       }
+       /* XXX: Assuming the port can't produce any more work,
+        *      consider flushing the output buffer, on dequeued ==
+        *      0.
+        */
+
+#ifdef DSW_SORT_DEQUEUED
+       dsw_stable_sort(events, dequeued, sizeof(events[0]), dsw_cmp_event);
+#endif
+
+       return dequeued;
+}
diff --git a/drivers/event/dsw/dsw_sort.h b/drivers/event/dsw/dsw_sort.h
new file mode 100644 (file)
index 0000000..609767f
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Ericsson AB
+ */
+
+#ifndef _DSW_SORT_
+#define _DSW_SORT_
+
+#include <string.h>
+
+#include <rte_common.h>
+
+#define DSW_ARY_ELEM_PTR(_ary, _idx, _elem_size)       \
+       RTE_PTR_ADD(_ary, (_idx) * (_elem_size))
+
+#define DSW_ARY_ELEM_SWAP(_ary, _a_idx, _b_idx, _elem_size)            \
+       do {                                                            \
+               char tmp[_elem_size];                                   \
+               void *_a_ptr = DSW_ARY_ELEM_PTR(_ary, _a_idx, _elem_size); \
+               void *_b_ptr = DSW_ARY_ELEM_PTR(_ary, _b_idx, _elem_size); \
+               memcpy(tmp, _a_ptr, _elem_size);                        \
+               memcpy(_a_ptr, _b_ptr, _elem_size);                     \
+               memcpy(_b_ptr, tmp, _elem_size);                        \
+       } while (0)
+
+static inline void
+dsw_insertion_sort(void *ary, uint16_t len, uint16_t elem_size,
+                  int (*cmp_fn)(const void *, const void *))
+{
+       uint16_t i;
+
+       for (i = 1; i < len; i++) {
+               uint16_t j;
+               for (j = i; j > 0 &&
+                            cmp_fn(DSW_ARY_ELEM_PTR(ary, j-1, elem_size),
+                                   DSW_ARY_ELEM_PTR(ary, j, elem_size)) > 0;
+                    j--)
+                       DSW_ARY_ELEM_SWAP(ary, j, j-1, elem_size);
+       }
+}
+
+static inline void
+dsw_stable_sort(void *ary, uint16_t len, uint16_t elem_size,
+               int (*cmp_fn)(const void *, const void *))
+{
+       dsw_insertion_sort(ary, len, elem_size, cmp_fn);
+}
+
+#endif
diff --git a/drivers/event/dsw/dsw_xstats.c b/drivers/event/dsw/dsw_xstats.c
new file mode 100644 (file)
index 0000000..bf2eec5
--- /dev/null
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Ericsson AB
+ */
+
+#include "dsw_evdev.h"
+
+#include <stdbool.h>
+#include <string.h>
+
+#include <rte_debug.h>
+
+/* The high bits in the xstats id is used to store an additional
+ * parameter (beyond the queue or port id already in the xstats
+ * interface).
+ */
+#define DSW_XSTATS_ID_PARAM_BITS (8)
+#define DSW_XSTATS_ID_STAT_BITS                                        \
+       (sizeof(unsigned int)*CHAR_BIT - DSW_XSTATS_ID_PARAM_BITS)
+#define DSW_XSTATS_ID_STAT_MASK ((1 << DSW_XSTATS_ID_STAT_BITS) - 1)
+
+#define DSW_XSTATS_ID_GET_PARAM(id)            \
+       ((id)>>DSW_XSTATS_ID_STAT_BITS)
+
+#define DSW_XSTATS_ID_GET_STAT(id)             \
+       ((id) & DSW_XSTATS_ID_STAT_MASK)
+
+#define DSW_XSTATS_ID_CREATE(id, param_value)                  \
+       (((param_value) << DSW_XSTATS_ID_STAT_BITS) | id)
+
+typedef
+uint64_t (*dsw_xstats_dev_get_value_fn)(struct dsw_evdev *dsw);
+
+struct dsw_xstat_dev {
+       const char *name;
+       dsw_xstats_dev_get_value_fn get_value_fn;
+};
+
+typedef
+uint64_t (*dsw_xstats_port_get_value_fn)(struct dsw_evdev *dsw,
+                                        uint8_t port_id, uint8_t queue_id);
+
+struct dsw_xstats_port {
+       const char *name_fmt;
+       dsw_xstats_port_get_value_fn get_value_fn;
+       bool per_queue;
+};
+
+static uint64_t
+dsw_xstats_dev_credits_on_loan(struct dsw_evdev *dsw)
+{
+       return rte_atomic32_read(&dsw->credits_on_loan);
+}
+
+static struct dsw_xstat_dev dsw_dev_xstats[] = {
+       { "dev_credits_on_loan", dsw_xstats_dev_credits_on_loan }
+};
+
+#define DSW_GEN_PORT_ACCESS_FN(_variable)                              \
+       static uint64_t                                                 \
+       dsw_xstats_port_get_ ## _variable(struct dsw_evdev *dsw,        \
+                                         uint8_t port_id,              \
+                                         uint8_t queue_id __rte_unused) \
+       {                                                               \
+               return dsw->ports[port_id]._variable;                   \
+       }
+
+DSW_GEN_PORT_ACCESS_FN(new_enqueued)
+DSW_GEN_PORT_ACCESS_FN(forward_enqueued)
+DSW_GEN_PORT_ACCESS_FN(release_enqueued)
+
+static uint64_t
+dsw_xstats_port_get_queue_enqueued(struct dsw_evdev *dsw, uint8_t port_id,
+                                  uint8_t queue_id)
+{
+       return dsw->ports[port_id].queue_enqueued[queue_id];
+}
+
+DSW_GEN_PORT_ACCESS_FN(dequeued)
+
+static uint64_t
+dsw_xstats_port_get_queue_dequeued(struct dsw_evdev *dsw, uint8_t port_id,
+                                  uint8_t queue_id)
+{
+       return dsw->ports[port_id].queue_dequeued[queue_id];
+}
+
+DSW_GEN_PORT_ACCESS_FN(migrations)
+
+static uint64_t
+dsw_xstats_port_get_migration_latency(struct dsw_evdev *dsw, uint8_t port_id,
+                                     uint8_t queue_id __rte_unused)
+{
+       uint64_t total_latency = dsw->ports[port_id].migration_latency;
+       uint64_t num_migrations = dsw->ports[port_id].migrations;
+
+       return num_migrations > 0 ? total_latency / num_migrations : 0;
+}
+
+static uint64_t
+dsw_xstats_port_get_event_proc_latency(struct dsw_evdev *dsw, uint8_t port_id,
+                                      uint8_t queue_id __rte_unused)
+{
+       uint64_t total_busy_cycles =
+               dsw->ports[port_id].total_busy_cycles;
+       uint64_t dequeued =
+               dsw->ports[port_id].dequeued;
+
+       return dequeued > 0 ? total_busy_cycles / dequeued : 0;
+}
+
+DSW_GEN_PORT_ACCESS_FN(inflight_credits)
+
+static uint64_t
+dsw_xstats_port_get_load(struct dsw_evdev *dsw, uint8_t port_id,
+                        uint8_t queue_id __rte_unused)
+{
+       int16_t load;
+
+       load = rte_atomic16_read(&dsw->ports[port_id].load);
+
+       return DSW_LOAD_TO_PERCENT(load);
+}
+
+DSW_GEN_PORT_ACCESS_FN(last_bg)
+
+static struct dsw_xstats_port dsw_port_xstats[] = {
+       { "port_%u_new_enqueued", dsw_xstats_port_get_new_enqueued,
+         false },
+       { "port_%u_forward_enqueued", dsw_xstats_port_get_forward_enqueued,
+         false },
+       { "port_%u_release_enqueued", dsw_xstats_port_get_release_enqueued,
+         false },
+       { "port_%u_queue_%u_enqueued", dsw_xstats_port_get_queue_enqueued,
+         true },
+       { "port_%u_dequeued", dsw_xstats_port_get_dequeued,
+         false },
+       { "port_%u_queue_%u_dequeued", dsw_xstats_port_get_queue_dequeued,
+         true },
+       { "port_%u_migrations", dsw_xstats_port_get_migrations,
+         false },
+       { "port_%u_migration_latency", dsw_xstats_port_get_migration_latency,
+         false },
+       { "port_%u_event_proc_latency", dsw_xstats_port_get_event_proc_latency,
+         false },
+       { "port_%u_inflight_credits", dsw_xstats_port_get_inflight_credits,
+         false },
+       { "port_%u_load", dsw_xstats_port_get_load,
+         false },
+       { "port_%u_last_bg", dsw_xstats_port_get_last_bg,
+         false }
+};
+
+static int
+dsw_xstats_dev_get_names(struct rte_event_dev_xstats_name *xstats_names,
+                        unsigned int *ids, unsigned int size)
+{
+       unsigned int i;
+
+       for (i = 0; i < RTE_DIM(dsw_dev_xstats) && i < size; i++) {
+               ids[i] = i;
+               strcpy(xstats_names[i].name, dsw_dev_xstats[i].name);
+       }
+
+       return i;
+}
+
+static int
+dsw_xstats_port_get_names(struct dsw_evdev *dsw, uint8_t port_id,
+                         struct rte_event_dev_xstats_name *xstats_names,
+                         unsigned int *ids, unsigned int size)
+{
+       uint8_t queue_id = 0;
+       unsigned int id_idx;
+       unsigned int stat_idx;
+
+       for (id_idx = 0, stat_idx = 0;
+            id_idx < size && stat_idx < RTE_DIM(dsw_port_xstats);
+            id_idx++) {
+               struct dsw_xstats_port *xstat = &dsw_port_xstats[stat_idx];
+
+               if (xstat->per_queue) {
+                       ids[id_idx] = DSW_XSTATS_ID_CREATE(stat_idx, queue_id);
+                       snprintf(xstats_names[id_idx].name,
+                                RTE_EVENT_DEV_XSTATS_NAME_SIZE,
+                                dsw_port_xstats[stat_idx].name_fmt, port_id,
+                                queue_id);
+                       queue_id++;
+               } else {
+                       ids[id_idx] = stat_idx;
+                       snprintf(xstats_names[id_idx].name,
+                                RTE_EVENT_DEV_XSTATS_NAME_SIZE,
+                                dsw_port_xstats[stat_idx].name_fmt, port_id);
+               }
+
+               if (!(xstat->per_queue && queue_id < dsw->num_queues)) {
+                       stat_idx++;
+                       queue_id = 0;
+               }
+       }
+       return id_idx;
+}
+
+int
+dsw_xstats_get_names(const struct rte_eventdev *dev,
+                    enum rte_event_dev_xstats_mode mode,
+                    uint8_t queue_port_id,
+                    struct rte_event_dev_xstats_name *xstats_names,
+                    unsigned int *ids, unsigned int size)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+
+       switch (mode) {
+       case RTE_EVENT_DEV_XSTATS_DEVICE:
+               return dsw_xstats_dev_get_names(xstats_names, ids, size);
+       case RTE_EVENT_DEV_XSTATS_PORT:
+               return dsw_xstats_port_get_names(dsw, queue_port_id,
+                                                xstats_names, ids, size);
+       case RTE_EVENT_DEV_XSTATS_QUEUE:
+               return 0;
+       default:
+               RTE_ASSERT(false);
+               return -1;
+       }
+}
+
+static int
+dsw_xstats_dev_get(const struct rte_eventdev *dev,
+                  const unsigned int ids[], uint64_t values[], unsigned int n)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       unsigned int i;
+
+       for (i = 0; i < n; i++) {
+               unsigned int id = ids[i];
+               struct dsw_xstat_dev *xstat = &dsw_dev_xstats[id];
+               values[i] = xstat->get_value_fn(dsw);
+       }
+       return n;
+}
+
+static int
+dsw_xstats_port_get(const struct rte_eventdev *dev, uint8_t port_id,
+                   const unsigned int ids[], uint64_t values[], unsigned int n)
+{
+       struct dsw_evdev *dsw = dsw_pmd_priv(dev);
+       unsigned int i;
+
+       for (i = 0; i < n; i++) {
+               unsigned int id = ids[i];
+               unsigned int stat_idx = DSW_XSTATS_ID_GET_STAT(id);
+               struct dsw_xstats_port *xstat = &dsw_port_xstats[stat_idx];
+               uint8_t queue_id = 0;
+
+               if (xstat->per_queue)
+                       queue_id = DSW_XSTATS_ID_GET_PARAM(id);
+
+               values[i] = xstat->get_value_fn(dsw, port_id, queue_id);
+       }
+       return n;
+}
+
+int
+dsw_xstats_get(const struct rte_eventdev *dev,
+              enum rte_event_dev_xstats_mode mode, uint8_t queue_port_id,
+              const unsigned int ids[], uint64_t values[], unsigned int n)
+{
+       switch (mode) {
+       case RTE_EVENT_DEV_XSTATS_DEVICE:
+               return dsw_xstats_dev_get(dev, ids, values, n);
+       case RTE_EVENT_DEV_XSTATS_PORT:
+               return dsw_xstats_port_get(dev, queue_port_id, ids, values, n);
+       case RTE_EVENT_DEV_XSTATS_QUEUE:
+               return 0;
+       default:
+               RTE_ASSERT(false);
+               return -1;
+       }
+       return 0;
+}
+
+uint64_t dsw_xstats_get_by_name(const struct rte_eventdev *dev,
+                               const char *name, unsigned int *id)
+{
+       RTE_SET_USED(dev);
+       RTE_SET_USED(name);
+       RTE_SET_USED(id);
+       return 0;
+}
diff --git a/drivers/event/dsw/meson.build b/drivers/event/dsw/meson.build
new file mode 100644 (file)
index 0000000..a6b7bfa
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Ericsson AB
+
+allow_experimental_apis = true
+deps += ['bus_vdev']
+sources = files('dsw_evdev.c', 'dsw_event.c', 'dsw_xstats.c')
diff --git a/drivers/event/dsw/rte_pmd_dsw_event_version.map b/drivers/event/dsw/rte_pmd_dsw_event_version.map
new file mode 100644 (file)
index 0000000..24bd5cd
--- /dev/null
@@ -0,0 +1,3 @@
+DPDK_18.11 {
+       local: *;
+};
index e951199..836ecbb 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-drivers = ['dpaa', 'dpaa2', 'octeontx', 'skeleton', 'sw']
+drivers = ['dpaa', 'dpaa2', 'octeontx', 'opdl', 'skeleton', 'sw', 'dsw']
 std_deps = ['eventdev', 'kvargs']
 config_flag_fmt = 'RTE_LIBRTE_@0@_EVENTDEV_PMD'
 driver_name_fmt = 'rte_pmd_@0@_event'
index 90ad221..2e07890 100644 (file)
@@ -17,7 +17,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 LDLIBS += -lrte_eal -lrte_eventdev -lrte_common_octeontx -lrte_pmd_octeontx
 LDLIBS += -lrte_bus_pci -lrte_mempool -lrte_mbuf -lrte_kvargs
-LDLIBS += -lrte_bus_vdev
+LDLIBS += -lrte_bus_vdev -lrte_ethdev
 
 EXPORT_MAP := rte_pmd_octeontx_event_version.map
 
index 16a3a04..a273d4c 100644 (file)
@@ -146,6 +146,7 @@ ssovf_fastpath_fns_set(struct rte_eventdev *dev)
        dev->enqueue_forward_burst = ssows_enq_fwd_burst;
        dev->dequeue       = ssows_deq;
        dev->dequeue_burst = ssows_deq_burst;
+       dev->txa_enqueue = sso_event_tx_adapter_enqueue;
 
        if (edev->is_timeout_deq) {
                dev->dequeue       = ssows_deq_timeout;
@@ -454,7 +455,6 @@ ssovf_eth_rx_adapter_queue_del(const struct rte_eventdev *dev,
        const struct octeontx_nic *nic = eth_dev->data->dev_private;
        pki_del_qos_t pki_qos;
        RTE_SET_USED(dev);
-       RTE_SET_USED(rx_queue_id);
 
        ret = strncmp(eth_dev->data->name, "eth_octeontx", 12);
        if (ret)
@@ -466,7 +466,7 @@ ssovf_eth_rx_adapter_queue_del(const struct rte_eventdev *dev,
        ret = octeontx_pki_port_delete_qos(nic->port_id, &pki_qos);
        if (ret < 0)
                ssovf_log_err("Failed to delete QOS port=%d, q=%d",
-                               nic->port_id, queue_conf->ev.queue_id);
+                               nic->port_id, rx_queue_id);
        return ret;
 }
 
@@ -491,6 +491,77 @@ ssovf_eth_rx_adapter_stop(const struct rte_eventdev *dev,
        return 0;
 }
 
+static int
+ssovf_eth_tx_adapter_caps_get(const struct rte_eventdev *dev,
+               const struct rte_eth_dev *eth_dev, uint32_t *caps)
+{
+       int ret;
+       RTE_SET_USED(dev);
+
+       ret = strncmp(eth_dev->data->name, "eth_octeontx", 12);
+       if (ret)
+               *caps = 0;
+       else
+               *caps = RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT;
+
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_create(uint8_t id, const struct rte_eventdev *dev)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_free(uint8_t id, const struct rte_eventdev *dev)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_queue_add(uint8_t id, const struct rte_eventdev *dev,
+               const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       RTE_SET_USED(eth_dev);
+       RTE_SET_USED(tx_queue_id);
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_queue_del(uint8_t id, const struct rte_eventdev *dev,
+               const struct rte_eth_dev *eth_dev, int32_t tx_queue_id)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       RTE_SET_USED(eth_dev);
+       RTE_SET_USED(tx_queue_id);
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_start(uint8_t id, const struct rte_eventdev *dev)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       return 0;
+}
+
+static int
+ssovf_eth_tx_adapter_stop(uint8_t id, const struct rte_eventdev *dev)
+{
+       RTE_SET_USED(id);
+       RTE_SET_USED(dev);
+       return 0;
+}
+
+
 static void
 ssovf_dump(struct rte_eventdev *dev, FILE *f)
 {
@@ -619,6 +690,14 @@ static struct rte_eventdev_ops ssovf_ops = {
        .eth_rx_adapter_start = ssovf_eth_rx_adapter_start,
        .eth_rx_adapter_stop = ssovf_eth_rx_adapter_stop,
 
+       .eth_tx_adapter_caps_get = ssovf_eth_tx_adapter_caps_get,
+       .eth_tx_adapter_create = ssovf_eth_tx_adapter_create,
+       .eth_tx_adapter_free = ssovf_eth_tx_adapter_free,
+       .eth_tx_adapter_queue_add = ssovf_eth_tx_adapter_queue_add,
+       .eth_tx_adapter_queue_del = ssovf_eth_tx_adapter_queue_del,
+       .eth_tx_adapter_start = ssovf_eth_tx_adapter_start,
+       .eth_tx_adapter_stop = ssovf_eth_tx_adapter_stop,
+
        .timer_adapter_caps_get = ssovf_timvf_caps_get,
 
        .dev_selftest = test_eventdev_octeontx,
index 18293e9..0e62215 100644 (file)
@@ -5,6 +5,7 @@
 #ifndef __SSOVF_EVDEV_H__
 #define __SSOVF_EVDEV_H__
 
+#include <rte_event_eth_tx_adapter.h>
 #include <rte_eventdev_pmd_vdev.h>
 #include <rte_io.h>
 
@@ -83,7 +84,7 @@
 #define SSOVF_SELFTEST_ARG               ("selftest")
 
 /*
- * In Cavium OcteonTX SoC, all accesses to the device registers are
+ * In Cavium OCTEON TX SoC, all accesses to the device registers are
  * implictly strongly ordered. So, The relaxed version of IO operation is
  * safe to use with out any IO memory barriers.
  */
@@ -179,6 +180,8 @@ typedef void (*ssows_handle_event_t)(void *arg, struct rte_event ev);
 void ssows_flush_events(struct ssows *ws, uint8_t queue_id,
                ssows_handle_event_t fn, void *arg);
 void ssows_reset(struct ssows *ws);
+uint16_t sso_event_tx_adapter_enqueue(void *port,
+               struct rte_event ev[], uint16_t nb_events);
 int ssovf_info(struct ssovf_info *info);
 void *ssovf_bar(enum ssovf_type, uint8_t id, uint8_t bar);
 int test_eventdev_octeontx(void);
index fffa902..d940b5d 100644 (file)
@@ -261,3 +261,47 @@ ssows_reset(struct ssows *ws)
                        ssows_swtag_untag(ws);
        }
 }
+
+uint16_t
+sso_event_tx_adapter_enqueue(void *port,
+               struct rte_event ev[], uint16_t nb_events)
+{
+       uint16_t port_id;
+       uint16_t queue_id;
+       struct rte_mbuf *m;
+       struct rte_eth_dev *ethdev;
+       struct ssows *ws = port;
+       struct octeontx_txq *txq;
+       octeontx_dq_t *dq;
+
+       RTE_SET_USED(nb_events);
+       switch (ev->sched_type) {
+       case SSO_SYNC_ORDERED:
+               ssows_swtag_norm(ws, ev->event, SSO_SYNC_ATOMIC);
+               rte_cio_wmb();
+               ssows_swtag_wait(ws);
+               break;
+       case SSO_SYNC_UNTAGGED:
+               ssows_swtag_full(ws, ev->u64, ev->event, SSO_SYNC_ATOMIC,
+                               ev->queue_id);
+               rte_cio_wmb();
+               ssows_swtag_wait(ws);
+               break;
+       case SSO_SYNC_ATOMIC:
+               rte_cio_wmb();
+               break;
+       }
+
+       m = ev[0].mbuf;
+       port_id = m->port;
+       queue_id = rte_event_eth_tx_adapter_txq_get(m);
+       ethdev = &rte_eth_devices[port_id];
+       txq = ethdev->data->tx_queues[queue_id];
+       dq = &txq->dq;
+
+       if (__octeontx_xmit_pkts(dq->lmtline_va, dq->ioreg_va, dq->fc_status_va,
+                               m) < 0)
+               return 0;
+
+       return 1;
+}
index 7c7306b..d1d3a52 100644 (file)
@@ -42,6 +42,7 @@ ssovf_octeontx_wqe_to_pkt(uint64_t work, uint16_t port_info)
        mbuf->ol_flags = 0;
        mbuf->port = rte_octeontx_pchan_map[port_info >> 4][port_info & 0xF];
        rte_mbuf_refcnt_set(mbuf, 1);
+
        return mbuf;
 }
 
index cea8118..bf50a60 100644 (file)
@@ -24,7 +24,7 @@ LDLIBS += -lrte_bus_vdev -lrte_mbuf -lrte_mempool
 LIBABIVER := 1
 
 # versioning export map
-EXPORT_MAP := rte_pmd_evdev_opdl_version.map
+EXPORT_MAP := rte_pmd_opdl_event_version.map
 
 # library source files
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV) += opdl_ring.c
diff --git a/drivers/event/opdl/meson.build b/drivers/event/opdl/meson.build
new file mode 100644 (file)
index 0000000..cc6029c
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+sources = files(
+       'opdl_evdev.c',
+       'opdl_evdev_init.c',
+       'opdl_evdev_xstats.c',
+       'opdl_ring.c',
+       'opdl_test.c',
+)
+deps += ['bus_vdev']
index a6bb913..1175d6c 100644 (file)
@@ -113,9 +113,21 @@ sw_port_unlink(struct rte_eventdev *dev, void *port, uint8_t queues[],
                        }
                }
        }
+
+       p->unlinks_in_progress += unlinked;
+       rte_smp_mb();
+
        return unlinked;
 }
 
+static int
+sw_port_unlinks_in_progress(struct rte_eventdev *dev, void *port)
+{
+       RTE_SET_USED(dev);
+       struct sw_port *p = port;
+       return p->unlinks_in_progress;
+}
+
 static int
 sw_port_setup(struct rte_eventdev *dev, uint8_t port_id,
                const struct rte_event_port_conf *conf)
@@ -925,6 +937,7 @@ sw_probe(struct rte_vdev_device *vdev)
                        .port_release = sw_port_release,
                        .port_link = sw_port_link,
                        .port_unlink = sw_port_unlink,
+                       .port_unlinks_in_progress = sw_port_unlinks_in_progress,
 
                        .eth_rx_adapter_caps_get = sw_eth_rx_adapter_caps_get,
 
index d90b96d..7c77b24 100644 (file)
@@ -148,6 +148,14 @@ struct sw_port {
        /* A numeric ID for the port */
        uint8_t id;
 
+       /* An atomic counter for when the port has been unlinked, and the
+        * scheduler has not yet acked this unlink - hence there may still be
+        * events in the buffers going to the port. When the unlinks in
+        * progress is read by the scheduler, no more events will be pushed to
+        * the port - hence the scheduler core can just assign zero.
+        */
+       uint8_t unlinks_in_progress;
+
        int16_t is_directed; /** Takes from a single directed QID */
        /**
         * For loadbalanced we can optimise pulling packets from
index e3a41e0..cff747d 100644 (file)
@@ -51,9 +51,11 @@ sw_schedule_atomic_to_cq(struct sw_evdev *sw, struct sw_qid * const qid,
                int cq = fid->cq;
 
                if (cq < 0) {
-                       uint32_t cq_idx = qid->cq_next_tx++;
-                       if (qid->cq_next_tx == qid->cq_num_mapped_cqs)
+                       uint32_t cq_idx;
+                       if (qid->cq_next_tx >= qid->cq_num_mapped_cqs)
                                qid->cq_next_tx = 0;
+                       cq_idx = qid->cq_next_tx++;
+
                        cq = qid->cq_map[cq_idx];
 
                        /* find least used */
@@ -140,9 +142,10 @@ sw_schedule_parallel_to_cq(struct sw_evdev *sw, struct sw_qid * const qid,
                do {
                        if (++cq_check_count > qid->cq_num_mapped_cqs)
                                goto exit;
-                       cq = qid->cq_map[cq_idx];
-                       if (++cq_idx == qid->cq_num_mapped_cqs)
+                       if (cq_idx >= qid->cq_num_mapped_cqs)
                                cq_idx = 0;
+                       cq = qid->cq_map[cq_idx++];
+
                } while (rte_event_ring_free_count(
                                sw->ports[cq].cq_worker_ring) == 0 ||
                                sw->ports[cq].inflights == SW_PORT_HIST_LIST);
@@ -220,7 +223,7 @@ sw_schedule_qid_to_cq(struct sw_evdev *sw)
                int iq_num = PKT_MASK_TO_IQ(qid->iq_pkt_mask);
 
                /* zero mapped CQs indicates directed */
-               if (iq_num >= SW_IQS_MAX)
+               if (iq_num >= SW_IQS_MAX || qid->cq_num_mapped_cqs == 0)
                        continue;
 
                uint32_t pkts_done = 0;
@@ -517,13 +520,18 @@ sw_event_schedule(struct rte_eventdev *dev)
                /* Pull from rx_ring for ports */
                do {
                        in_pkts = 0;
-                       for (i = 0; i < sw->port_count; i++)
+                       for (i = 0; i < sw->port_count; i++) {
+                               /* ack the unlinks in progress as done */
+                               if (sw->ports[i].unlinks_in_progress)
+                                       sw->ports[i].unlinks_in_progress = 0;
+
                                if (sw->ports[i].is_directed)
                                        in_pkts += sw_schedule_pull_port_dir(sw, i);
                                else if (sw->ports[i].num_ordered_qids > 0)
                                        in_pkts += sw_schedule_pull_port_lb(sw, i);
                                else
                                        in_pkts += sw_schedule_pull_port_no_reorder(sw, i);
+                       }
 
                        /* QID scan for re-ordered */
                        in_pkts += sw_schedule_reorder(sw, 0,
index c40912d..d00d5de 100644 (file)
@@ -1903,6 +1903,77 @@ qid_priorities(struct test *t)
        return 0;
 }
 
+static int
+unlink_in_progress(struct test *t)
+{
+       /* Test unlinking API, in particular that when an unlink request has
+        * not yet been seen by the scheduler thread, that the
+        * unlink_in_progress() function returns the number of unlinks.
+        */
+       unsigned int i;
+       /* Create instance with 1 ports, and 3 qids */
+       if (init(t, 3, 1) < 0 ||
+                       create_ports(t, 1) < 0) {
+               printf("%d: Error initializing device\n", __LINE__);
+               return -1;
+       }
+
+       for (i = 0; i < 3; i++) {
+               /* Create QID */
+               const struct rte_event_queue_conf conf = {
+                       .schedule_type = RTE_SCHED_TYPE_ATOMIC,
+                       /* increase priority (0 == highest), as we go */
+                       .priority = RTE_EVENT_DEV_PRIORITY_NORMAL - i,
+                       .nb_atomic_flows = 1024,
+                       .nb_atomic_order_sequences = 1024,
+               };
+
+               if (rte_event_queue_setup(evdev, i, &conf) < 0) {
+                       printf("%d: error creating qid %d\n", __LINE__, i);
+                       return -1;
+               }
+               t->qid[i] = i;
+       }
+       t->nb_qids = i;
+       /* map all QIDs to port */
+       rte_event_port_link(evdev, t->port[0], NULL, NULL, 0);
+
+       if (rte_event_dev_start(evdev) < 0) {
+               printf("%d: Error with start call\n", __LINE__);
+               return -1;
+       }
+
+       /* unlink all ports to have outstanding unlink requests */
+       int ret = rte_event_port_unlink(evdev, t->port[0], NULL, 0);
+       if (ret < 0) {
+               printf("%d: Failed to unlink queues\n", __LINE__);
+               return -1;
+       }
+
+       /* get active unlinks here, expect 3 */
+       int unlinks_in_progress =
+               rte_event_port_unlinks_in_progress(evdev, t->port[0]);
+       if (unlinks_in_progress != 3) {
+               printf("%d: Expected num unlinks in progress == 3, got %d\n",
+                               __LINE__, unlinks_in_progress);
+               return -1;
+       }
+
+       /* run scheduler service on this thread to ack the unlinks */
+       rte_service_run_iter_on_app_lcore(t->service_id, 1);
+
+       /* active unlinks expected as 0 as scheduler thread has acked */
+       unlinks_in_progress =
+               rte_event_port_unlinks_in_progress(evdev, t->port[0]);
+       if (unlinks_in_progress != 0) {
+               printf("%d: Expected num unlinks in progress == 0, got %d\n",
+                               __LINE__, unlinks_in_progress);
+       }
+
+       cleanup(t);
+       return 0;
+}
+
 static int
 load_balancing(struct test *t)
 {
@@ -3260,6 +3331,12 @@ test_sw_eventdev(void)
                printf("ERROR - QID Priority test FAILED.\n");
                goto test_fail;
        }
+       printf("*** Running Unlink-in-progress test...\n");
+       ret = unlink_in_progress(t);
+       if (ret != 0) {
+               printf("ERROR - Unlink in progress test FAILED.\n");
+               goto test_fail;
+       }
        printf("*** Running Ordered Reconfigure test...\n");
        ret = ordered_reconfigure(t);
        if (ret != 0) {
index da8da1e..ead5029 100644 (file)
@@ -10,7 +10,6 @@ LIB = librte_mempool_dpaa.a
 
 CFLAGS := -I$(SRCDIR) $(CFLAGS)
 CFLAGS += -O3 $(WERROR_FLAGS)
-CFLAGS += -D _GNU_SOURCE
 CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa
 CFLAGS += -I$(RTE_SDK)/drivers/bus/dpaa/include/
 CFLAGS += -I$(RTE_SDK)/drivers/mempool/dpaa
@@ -31,5 +30,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_DPAA_MEMPOOL) += dpaa_mempool.c
 
 LDLIBS += -lrte_bus_dpaa
 LDLIBS += -lrte_eal -lrte_mempool -lrte_ring
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index 10c536b..021b366 100644 (file)
@@ -26,6 +26,7 @@
 #include <rte_ring.h>
 
 #include <dpaa_mempool.h>
+#include <dpaax_iova_table.h>
 
 /* List of all the memseg information locally maintained in dpaa driver. This
  * is to optimize the PA_to_VA searches until a better mechanism (algo) is
@@ -122,7 +123,7 @@ dpaa_buf_free(struct dpaa_bp_info *bp_info, uint64_t addr)
        struct bm_buffer buf;
        int ret;
 
-       DPAA_MEMPOOL_DEBUG("Free 0x%" PRIx64 " to bpid: %d",
+       DPAA_MEMPOOL_DPDEBUG("Free 0x%" PRIx64 " to bpid: %d",
                           addr, bp_info->bpid);
 
        bm_buffer_set64(&buf, addr);
@@ -285,6 +286,9 @@ dpaa_populate(struct rte_mempool *mp, unsigned int max_objs,
                return 0;
        }
 
+       /* Update the PA-VA Table */
+       dpaax_iova_table_update(paddr, vaddr, len);
+
        bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
        total_elt_sz = mp->header_size + mp->elt_size + mp->trailer_size;
 
@@ -324,7 +328,7 @@ dpaa_populate(struct rte_mempool *mp, unsigned int max_objs,
                                               obj_cb, obj_cb_arg);
 }
 
-struct rte_mempool_ops dpaa_mpool_ops = {
+static const struct rte_mempool_ops dpaa_mpool_ops = {
        .name = DPAA_MEMPOOL_OPS_NAME,
        .alloc = dpaa_mbuf_create_pool,
        .free = dpaa_mbuf_free_pool,
index 092f326..533e1c6 100644 (file)
@@ -43,10 +43,8 @@ struct dpaa_bp_info {
 };
 
 static inline void *
-DPAA_MEMPOOL_PTOV(struct dpaa_bp_info *bp_info, uint64_t addr)
+DPAA_MEMPOOL_PTOV(struct dpaa_bp_info *bp_info __rte_unused, uint64_t addr)
 {
-       if (bp_info->ptov_off)
-               return ((void *) (size_t)(addr + bp_info->ptov_off));
        return rte_dpaa_mem_ptov(addr);
 }
 
index 9e4c87d..96c0f2b 100644 (file)
@@ -19,7 +19,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal
 EXPORT_MAP := rte_mempool_dpaa2_version.map
 
 # Lbrary version
-LIBABIVER := 1
+LIBABIVER := 2
 
 # depends on fslmc bus which uses experimental API
 CFLAGS += -DALLOW_EXPERIMENTAL_API
@@ -30,6 +30,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL) += dpaa2_hw_mempool.c
 
 LDLIBS += -lrte_bus_fslmc
 LDLIBS += -lrte_eal -lrte_mempool -lrte_ring
+LDLIBS += -lrte_common_dpaax
 
 SYMLINK-$(CONFIG_RTE_LIBRTE_DPAA2_MEMPOOL)-include := rte_dpaa2_mempool.h
 
index 7d0435f..790cded 100644 (file)
 #include "dpaa2_hw_mempool.h"
 #include "dpaa2_hw_mempool_logs.h"
 
+#include <dpaax_iova_table.h>
+
 struct dpaa2_bp_info rte_dpaa2_bpid_info[MAX_BPID];
 static struct dpaa2_bp_list *h_bp_list;
 
-/* List of all the memseg information locally maintained in dpaa2 driver. This
- * is to optimize the PA_to_VA searches until a better mechanism (algo) is
- * available.
- */
-struct dpaa2_memseg_list rte_dpaa2_memsegs
-       = TAILQ_HEAD_INITIALIZER(rte_dpaa2_memsegs);
-
 /* Dynamic logging identified for mempool */
 int dpaa2_logtype_mempool;
 
@@ -400,37 +395,14 @@ dpaa2_populate(struct rte_mempool *mp, unsigned int max_objs,
              void *vaddr, rte_iova_t paddr, size_t len,
              rte_mempool_populate_obj_cb_t *obj_cb, void *obj_cb_arg)
 {
-       struct dpaa2_memseg *ms;
-
-       /* For each memory chunk pinned to the Mempool, a linked list of the
-        * contained memsegs is created for searching when PA to VA
-        * conversion is required.
-        */
-       ms = rte_zmalloc(NULL, sizeof(struct dpaa2_memseg), 0);
-       if (!ms) {
-               DPAA2_MEMPOOL_ERR("Unable to allocate internal memory.");
-               DPAA2_MEMPOOL_WARN("Fast Physical to Virtual Addr translation would not be available.");
-               /* If the element is not added, it would only lead to failure
-                * in searching for the element and the logic would Fallback
-                * to traditional DPDK memseg traversal code. So, this is not
-                * a blocking error - but, error would be printed on screen.
-                */
-               return 0;
-       }
-
-       ms->vaddr = vaddr;
-       ms->iova = paddr;
-       ms->len = len;
-       /* Head insertions are generally faster than tail insertions as the
-        * buffers pinned are picked from rear end.
-        */
-       TAILQ_INSERT_HEAD(&rte_dpaa2_memsegs, ms, next);
+       /* Insert entry into the PA->VA Table */
+       dpaax_iova_table_update(paddr, vaddr, len);
 
        return rte_mempool_op_populate_default(mp, max_objs, vaddr, paddr, len,
                                               obj_cb, obj_cb_arg);
 }
 
-struct rte_mempool_ops dpaa2_mpool_ops = {
+static const struct rte_mempool_ops dpaa2_mpool_ops = {
        .name = DPAA2_MEMPOOL_OPS_NAME,
        .alloc = rte_hw_mbuf_create_pool,
        .free = rte_hw_mbuf_free_pool,
index 90bab60..6b6ead6 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
         build = false
 endif
index b9d996a..b45e7a9 100644 (file)
@@ -3,7 +3,6 @@ DPDK_17.05 {
 
        rte_dpaa2_bpid_info;
        rte_dpaa2_mbuf_alloc_bulk;
-       rte_dpaa2_memsegs;
 
        local: *;
 };
index b00be13..e27c437 100644 (file)
@@ -50,7 +50,7 @@
 #define OCTEONTX_FPAVF_BUF_OFFSET      128
 
 /*
- * In Cavium OcteonTX SoC, all accesses to the device registers are
+ * In Cavium OCTEON TX SoC, all accesses to the device registers are
  * implicitly strongly ordered. So, the relaxed version of IO operation is
  * safe to use with out any IO memory barriers.
  */
index f94e2fe..c3c66bb 100644 (file)
@@ -9,12 +9,17 @@ driver_classes = ['common',
               'crypto',  # depends on common, bus and mempool (net in future).
               'compress', # depends on common, bus, mempool.
               'event',   # depends on common, bus, mempool and net.
+              'baseband', # depends on common and bus.
               'raw']     # depends on common, bus, mempool, net and event.
 
 default_cflags = machine_args
 if cc.has_argument('-Wno-format-truncation')
        default_cflags += '-Wno-format-truncation'
 endif
+
+# specify -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 foreach class:driver_classes
        drivers = []
        std_deps = []
@@ -24,6 +29,7 @@ foreach class:driver_classes
                             # version file for linking
 
        subdir(class)
+       class_drivers = []
 
        foreach drv:drivers
                drv_path = join_paths(class, drv)
@@ -51,6 +57,8 @@ foreach class:driver_classes
                subdir(drv_path)
 
                if build
+                       class_drivers += name
+
                        dpdk_conf.set(config_flag_fmt.format(name.to_upper()),1)
                        lib_name = driver_name_fmt.format(name)
 
@@ -94,10 +102,8 @@ foreach class:driver_classes
                                lib_version = '@0@.1'.format(version)
                                so_version = '@0@'.format(version)
                        else
-                               pver = meson.project_version().split('.')
-                               lib_version = '@0@.@1@'.format(pver.get(0),
-                                               pver.get(1))
-                               so_version = lib_version
+                               lib_version = major_version
+                               so_version = major_version
                        endif
 
                        # now build the static driver
@@ -141,4 +147,9 @@ foreach class:driver_classes
                        set_variable('static_@0@'.format(lib_name), static_dep)
                endif # build
        endforeach
+
+       if meson.version().version_compare('>=0.47')
+               # prior to 0.47, set_variable can't take array params
+               set_variable(class + '_drivers', class_drivers)
+       endif
 endforeach
index 664398d..c0386fe 100644 (file)
@@ -10,6 +10,7 @@ endif
 
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += af_packet
 DIRS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += ark
+DIRS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += atlantic
 DIRS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += avf
 DIRS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp
 DIRS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD) += axgbe
@@ -24,6 +25,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD) += dpaa2
 endif
 DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += e1000
 DIRS-$(CONFIG_RTE_LIBRTE_ENA_PMD) += ena
+DIRS-$(CONFIG_RTE_LIBRTE_ENETC_PMD) += enetc
 DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE) += failsafe
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
@@ -32,6 +34,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
 DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
+DIRS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta
 DIRS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mvpp2
 DIRS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += netvsc
 DIRS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp
index eb3cce3..95a98c6 100644 (file)
@@ -305,7 +305,6 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_rx_queues = (uint16_t)internals->nb_queues;
        dev_info->max_tx_queues = (uint16_t)internals->nb_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
@@ -927,8 +926,7 @@ rte_pmd_af_packet_probe(struct rte_vdev_device *dev)
 
        PMD_LOG(INFO, "Initializing pmd_af_packet for %s", name);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(rte_vdev_device_args(dev)) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        PMD_LOG(ERR, "Failed to probe %s", name);
@@ -988,6 +986,12 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return -1;
 
+       /* mac_addrs must not be freed alone because part of dev_private */
+       eth_dev->data->mac_addrs = NULL;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return rte_eth_dev_release_port(eth_dev);
+
        internals = eth_dev->data->dev_private;
        for (q = 0; q < internals->nb_queues; q++) {
                rte_free(internals->rx_queue[q].rd);
@@ -995,8 +999,6 @@ rte_pmd_af_packet_remove(struct rte_vdev_device *dev)
        }
        free(internals->if_name);
 
-       rte_free(eth_dev->data->dev_private);
-
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
index eea388a..57026f8 100644 (file)
 int
 ark_ddm_verify(struct ark_ddm_t *ddm)
 {
+       uint32_t hw_const;
        if (sizeof(struct ark_ddm_t) != ARK_DDM_EXPECTED_SIZE) {
                PMD_DRV_LOG(ERR, "ARK: DDM structure looks incorrect %d vs %zd\n",
                            ARK_DDM_EXPECTED_SIZE, sizeof(struct ark_ddm_t));
                return -1;
        }
 
-       if (ddm->cfg.const0 != ARK_DDM_CONST) {
-               PMD_DRV_LOG(ERR, "ARK: DDM module not found as expected 0x%08x\n",
+       hw_const = ddm->cfg.const0;
+       if (hw_const == ARK_DDM_CONST1) {
+               PMD_DRV_LOG(ERR,
+                           "ARK: DDM module is version 1, "
+                           "PMD expects version 2\n");
+               return -1;
+       } else if (hw_const != ARK_DDM_CONST2) {
+               PMD_DRV_LOG(ERR,
+                           "ARK: DDM module not found as expected 0x%08x\n",
                            ddm->cfg.const0);
                return -1;
        }
index b37d1e0..5456b4b 100644 (file)
@@ -19,7 +19,7 @@
 /* struct defining Tx meta data --  fixed in FPGA -- 16 bytes */
 struct ark_tx_meta {
        uint64_t physaddr;
-       uint32_t delta_ns;
+       uint32_t user1;
        uint16_t data_len;              /* of this MBUF */
 #define   ARK_DDM_EOP   0x01
 #define   ARK_DDM_SOP   0x02
@@ -34,7 +34,10 @@ struct ark_tx_meta {
  * structs will never be instantiated in ram memory
  */
 #define ARK_DDM_CFG 0x0000
-#define ARK_DDM_CONST 0xfacecafe
+/* Set unique HW ID for hardware version */
+#define ARK_DDM_CONST2 (0x324d4444)
+#define ARK_DDM_CONST1 (0xfacecafe)
+
 struct ark_ddm_cfg_t {
        uint32_t r0;
        volatile uint32_t tlp_stats_clear;
index 552ca01..4f52e2b 100644 (file)
@@ -313,6 +313,9 @@ eth_ark_dev_init(struct rte_eth_dev *dev)
 
        /* We are a single function multi-port device. */
        ret = ark_config_device(dev);
+       if (ret)
+               return -1;
+
        dev->dev_ops = &ark_eth_dev_ops;
 
        dev->data->mac_addrs = rte_zmalloc("ark", ETHER_ADDR_LEN, 0);
@@ -506,7 +509,6 @@ eth_ark_dev_uninit(struct rte_eth_dev *dev)
        dev->dev_ops = NULL;
        dev->rx_pkt_burst = NULL;
        dev->tx_pkt_burst = NULL;
-       rte_free(dev->data->mac_addrs);
        return 0;
 }
 
index 16f0d11..300029d 100644 (file)
@@ -25,6 +25,9 @@ static uint32_t eth_ark_rx_jumbo(struct ark_rx_queue *queue,
                                 struct rte_mbuf *mbuf0,
                                 uint32_t cons_index);
 static inline int eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue);
+static int eth_ark_rx_seed_recovery(struct ark_rx_queue *queue,
+                                   uint32_t *pnb,
+                                   struct rte_mbuf **mbufs);
 
 /* ************************************************************************* */
 struct ark_rx_queue {
@@ -50,7 +53,7 @@ struct ark_rx_queue {
        /* The queue Index is used within the dpdk device structures */
        uint16_t queue_index;
 
-       uint32_t pad1;
+       uint32_t last_cons;
 
        /* separate cache line */
        /* second cache line - fields only used in slow path */
@@ -102,7 +105,10 @@ eth_ark_rx_update_cons_index(struct ark_rx_queue *queue, uint32_t cons_index)
 {
        queue->cons_index = cons_index;
        eth_ark_rx_seed_mbufs(queue);
-       ark_mpu_set_producer(queue->mpu, queue->seed_index);
+       if (((cons_index - queue->last_cons) >= 64U)) {
+               queue->last_cons = cons_index;
+               ark_mpu_set_producer(queue->mpu, queue->seed_index);
+       }
 }
 
 /* ************************************************************************* */
@@ -196,20 +202,25 @@ eth_ark_dev_rx_queue_setup(struct rte_eth_dev *dev,
        /* populate mbuf reserve */
        status = eth_ark_rx_seed_mbufs(queue);
 
+       if (queue->seed_index != nb_desc) {
+               PMD_DRV_LOG(ERR, "ARK: Failed to allocate %u mbufs for RX queue %d\n",
+                           nb_desc, qidx);
+               status = -1;
+       }
        /* MPU Setup */
        if (status == 0)
                status = eth_ark_rx_hw_setup(dev, queue, qidx, queue_idx);
 
        if (unlikely(status != 0)) {
-               struct rte_mbuf *mbuf;
+               struct rte_mbuf **mbuf;
 
                PMD_DRV_LOG(ERR, "Failed to initialize RX queue %d %s\n",
                            qidx,
                            __func__);
                /* Free the mbufs allocated */
-               for (i = 0, mbuf = queue->reserve_q[0];
-                    i < nb_desc; ++i, mbuf++) {
-                       rte_pktmbuf_free(mbuf);
+               for (i = 0, mbuf = queue->reserve_q;
+                    i < queue->seed_index; ++i, mbuf++) {
+                       rte_pktmbuf_free(*mbuf);
                }
                rte_free(queue->reserve_q);
                rte_free(queue->paddress_q);
@@ -446,8 +457,13 @@ eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
        struct rte_mbuf **mbufs = &queue->reserve_q[seed_m];
        int status = rte_pktmbuf_alloc_bulk(queue->mb_pool, mbufs, nb);
 
-       if (unlikely(status != 0))
-               return -1;
+       if (unlikely(status != 0)) {
+               /* Try to recover from lack of mbufs in pool */
+               status = eth_ark_rx_seed_recovery(queue, &nb, mbufs);
+               if (unlikely(status != 0)) {
+                       return -1;
+               }
+       }
 
        if (ARK_RX_DEBUG) {             /* DEBUG */
                while (count != nb) {
@@ -495,6 +511,29 @@ eth_ark_rx_seed_mbufs(struct ark_rx_queue *queue)
        return 0;
 }
 
+int
+eth_ark_rx_seed_recovery(struct ark_rx_queue *queue,
+                        uint32_t *pnb,
+                        struct rte_mbuf **mbufs)
+{
+       int status = -1;
+
+       /* Ignore small allocation failures */
+       if (*pnb <= 64)
+               return -1;
+
+       *pnb = 64U;
+       status = rte_pktmbuf_alloc_bulk(queue->mb_pool, mbufs, *pnb);
+       if (status != 0) {
+               PMD_DRV_LOG(ERR,
+                           "ARK: Could not allocate %u mbufs from pool for RX queue %u;"
+                           " %u free buffers remaining in queue\n",
+                           *pnb, queue->queue_index,
+                           queue->seed_index - queue->cons_index);
+       }
+       return status;
+}
+
 void
 eth_ark_rx_dump_queue(struct rte_eth_dev *dev, uint16_t queue_id,
                      const char *msg)
index 57188c2..94da5f9 100644 (file)
@@ -65,7 +65,7 @@ eth_ark_tx_meta_from_mbuf(struct ark_tx_meta *meta,
                          uint8_t flags)
 {
        meta->physaddr = rte_mbuf_data_iova(mbuf);
-       meta->delta_ns = 0;
+       meta->user1 = (uint32_t)mbuf->udata64;
        meta->data_len = rte_pktmbuf_data_len(mbuf);
        meta->flags = flags;
 }
diff --git a/drivers/net/atlantic/Makefile b/drivers/net/atlantic/Makefile
new file mode 100644 (file)
index 0000000..62dcdbf
--- /dev/null
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Aquantia Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_atlantic.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_pmd_atlantic_version.map
+
+LIBABIVER := 1
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net
+LDLIBS += -lrte_bus_pci
+
+VPATH += $(SRCDIR)/hw_atl
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += atl_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += atl_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += atl_hw_regs.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += hw_atl_utils.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += hw_atl_llh.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += hw_atl_utils_fw2x.c
+SRCS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD) += hw_atl_b0.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/atlantic/atl_common.h b/drivers/net/atlantic/atl_common.h
new file mode 100644 (file)
index 0000000..b3a0aa5
--- /dev/null
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+
+#ifndef AQ_COMMON_H
+#define AQ_COMMON_H
+
+#define ATL_PMD_DRIVER_VERSION "0.4.1"
+
+#define PCI_VENDOR_ID_AQUANTIA  0x1D6A
+
+#define AQ_DEVICE_ID_0001      0x0001
+#define AQ_DEVICE_ID_D100      0xD100
+#define AQ_DEVICE_ID_D107      0xD107
+#define AQ_DEVICE_ID_D108      0xD108
+#define AQ_DEVICE_ID_D109      0xD109
+
+#define AQ_DEVICE_ID_AQC100    0x00B1
+#define AQ_DEVICE_ID_AQC107    0x07B1
+#define AQ_DEVICE_ID_AQC108    0x08B1
+#define AQ_DEVICE_ID_AQC109    0x09B1
+#define AQ_DEVICE_ID_AQC111    0x11B1
+#define AQ_DEVICE_ID_AQC112    0x12B1
+
+#define AQ_DEVICE_ID_AQC100S   0x80B1
+#define AQ_DEVICE_ID_AQC107S   0x87B1
+#define AQ_DEVICE_ID_AQC108S   0x88B1
+#define AQ_DEVICE_ID_AQC109S   0x89B1
+#define AQ_DEVICE_ID_AQC111S   0x91B1
+#define AQ_DEVICE_ID_AQC112S   0x92B1
+
+#define AQ_DEVICE_ID_AQC111E   0x51B1
+#define AQ_DEVICE_ID_AQC112E   0x52B1
+
+#define HW_ATL_NIC_NAME "aQuantia AQtion 10Gbit Network Adapter"
+
+#define AQ_HWREV_ANY   0
+#define AQ_HWREV_1     1
+#define AQ_HWREV_2     2
+
+#define AQ_NIC_RATE_10G                BIT(0)
+#define AQ_NIC_RATE_5G         BIT(1)
+#define AQ_NIC_RATE_5G5R       BIT(2)
+#define AQ_NIC_RATE_2G5                BIT(3)
+#define AQ_NIC_RATE_1G         BIT(4)
+#define AQ_NIC_RATE_100M       BIT(5)
+
+#define AQ_NIC_RATE_EEE_10G    BIT(6)
+#define AQ_NIC_RATE_EEE_5G     BIT(7)
+#define AQ_NIC_RATE_EEE_2G5    BIT(8)
+#define AQ_NIC_RATE_EEE_1G     BIT(9)
+
+
+#define ATL_MAX_RING_DESC      (8 * 1024 - 8)
+#define ATL_MIN_RING_DESC      32
+#define ATL_RXD_ALIGN          8
+#define ATL_TXD_ALIGN          8
+#define ATL_TX_MAX_SEG         16
+
+#define ATL_MAX_INTR_QUEUE_NUM  15
+
+#define ATL_MISC_VEC_ID 10
+#define ATL_RX_VEC_START 0
+
+#define AQ_NIC_WOL_ENABLED           BIT(0)
+
+
+#define AQ_NIC_FC_OFF    0U
+#define AQ_NIC_FC_TX     1U
+#define AQ_NIC_FC_RX     2U
+#define AQ_NIC_FC_FULL   3U
+#define AQ_NIC_FC_AUTO   4U
+
+
+#define AQ_CFG_TX_FRAME_MAX  (16U * 1024U)
+#define AQ_CFG_RX_FRAME_MAX  (2U * 1024U)
+
+#define AQ_HW_MULTICAST_ADDRESS_MAX     32
+#define AQ_HW_MAX_SEGS_SIZE    40
+
+#define AQ_HW_MAX_RX_QUEUES    8
+#define AQ_HW_MAX_TX_QUEUES    8
+#define AQ_HW_MIN_RX_RING_SIZE 512
+#define AQ_HW_MAX_RX_RING_SIZE 8192
+#define AQ_HW_MIN_TX_RING_SIZE 512
+#define AQ_HW_MAX_TX_RING_SIZE 8192
+
+#define ATL_DEFAULT_RX_FREE_THRESH 64
+#define ATL_DEFAULT_TX_FREE_THRESH 64
+
+#define ATL_IRQ_CAUSE_LINK 0x8
+
+#define AQ_HW_LED_BLINK    0x2U
+#define AQ_HW_LED_DEFAULT  0x0U
+
+#endif /* AQ_COMMON_H */
diff --git a/drivers/net/atlantic/atl_ethdev.c b/drivers/net/atlantic/atl_ethdev.c
new file mode 100644 (file)
index 0000000..5bc04f5
--- /dev/null
@@ -0,0 +1,1539 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+
+#include <rte_ethdev_pci.h>
+
+#include "atl_ethdev.h"
+#include "atl_common.h"
+#include "atl_hw_regs.h"
+#include "atl_logs.h"
+#include "hw_atl/hw_atl_llh.h"
+#include "hw_atl/hw_atl_b0.h"
+#include "hw_atl/hw_atl_b0_internal.h"
+
+static int eth_atl_dev_init(struct rte_eth_dev *eth_dev);
+static int eth_atl_dev_uninit(struct rte_eth_dev *eth_dev);
+
+static int  atl_dev_configure(struct rte_eth_dev *dev);
+static int  atl_dev_start(struct rte_eth_dev *dev);
+static void atl_dev_stop(struct rte_eth_dev *dev);
+static int  atl_dev_set_link_up(struct rte_eth_dev *dev);
+static int  atl_dev_set_link_down(struct rte_eth_dev *dev);
+static void atl_dev_close(struct rte_eth_dev *dev);
+static int  atl_dev_reset(struct rte_eth_dev *dev);
+static void atl_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static void atl_dev_promiscuous_disable(struct rte_eth_dev *dev);
+static void atl_dev_allmulticast_enable(struct rte_eth_dev *dev);
+static void atl_dev_allmulticast_disable(struct rte_eth_dev *dev);
+static int  atl_dev_link_update(struct rte_eth_dev *dev, int wait);
+
+static int atl_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
+                                   struct rte_eth_xstat_name *xstats_names,
+                                   unsigned int size);
+
+static int atl_dev_stats_get(struct rte_eth_dev *dev,
+                               struct rte_eth_stats *stats);
+
+static int atl_dev_xstats_get(struct rte_eth_dev *dev,
+                             struct rte_eth_xstat *stats, unsigned int n);
+
+static void atl_dev_stats_reset(struct rte_eth_dev *dev);
+
+static int atl_fw_version_get(struct rte_eth_dev *dev, char *fw_version,
+                             size_t fw_size);
+
+static void atl_dev_info_get(struct rte_eth_dev *dev,
+                              struct rte_eth_dev_info *dev_info);
+
+static const uint32_t *atl_dev_supported_ptypes_get(struct rte_eth_dev *dev);
+
+static int atl_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu);
+
+/* VLAN stuff */
+static int atl_vlan_filter_set(struct rte_eth_dev *dev,
+               uint16_t vlan_id, int on);
+
+static int atl_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+
+static void atl_vlan_strip_queue_set(struct rte_eth_dev *dev,
+                                    uint16_t queue_id, int on);
+
+static int atl_vlan_tpid_set(struct rte_eth_dev *dev,
+                            enum rte_vlan_type vlan_type, uint16_t tpid);
+
+/* EEPROM */
+static int atl_dev_get_eeprom_length(struct rte_eth_dev *dev);
+static int atl_dev_get_eeprom(struct rte_eth_dev *dev,
+                             struct rte_dev_eeprom_info *eeprom);
+static int atl_dev_set_eeprom(struct rte_eth_dev *dev,
+                             struct rte_dev_eeprom_info *eeprom);
+
+/* Regs */
+static int atl_dev_get_regs(struct rte_eth_dev *dev,
+                           struct rte_dev_reg_info *regs);
+
+/* Flow control */
+static int atl_flow_ctrl_get(struct rte_eth_dev *dev,
+                              struct rte_eth_fc_conf *fc_conf);
+static int atl_flow_ctrl_set(struct rte_eth_dev *dev,
+                              struct rte_eth_fc_conf *fc_conf);
+
+static void atl_dev_link_status_print(struct rte_eth_dev *dev);
+
+/* Interrupts */
+static int atl_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
+static int atl_dev_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on);
+static int atl_dev_interrupt_get_status(struct rte_eth_dev *dev);
+static int atl_dev_interrupt_action(struct rte_eth_dev *dev,
+                                   struct rte_intr_handle *handle);
+static void atl_dev_interrupt_handler(void *param);
+
+
+static int atl_add_mac_addr(struct rte_eth_dev *dev,
+                           struct ether_addr *mac_addr,
+                           uint32_t index, uint32_t pool);
+static void atl_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index);
+static int atl_set_default_mac_addr(struct rte_eth_dev *dev,
+                                          struct ether_addr *mac_addr);
+
+static int atl_dev_set_mc_addr_list(struct rte_eth_dev *dev,
+                                   struct ether_addr *mc_addr_set,
+                                   uint32_t nb_mc_addr);
+
+/* RSS */
+static int atl_reta_update(struct rte_eth_dev *dev,
+                            struct rte_eth_rss_reta_entry64 *reta_conf,
+                            uint16_t reta_size);
+static int atl_reta_query(struct rte_eth_dev *dev,
+                           struct rte_eth_rss_reta_entry64 *reta_conf,
+                           uint16_t reta_size);
+static int atl_rss_hash_update(struct rte_eth_dev *dev,
+                                struct rte_eth_rss_conf *rss_conf);
+static int atl_rss_hash_conf_get(struct rte_eth_dev *dev,
+                                  struct rte_eth_rss_conf *rss_conf);
+
+
+static int eth_atl_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+       struct rte_pci_device *pci_dev);
+static int eth_atl_pci_remove(struct rte_pci_device *pci_dev);
+
+static void atl_dev_info_get(struct rte_eth_dev *dev,
+                               struct rte_eth_dev_info *dev_info);
+
+int atl_logtype_init;
+int atl_logtype_driver;
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static const struct rte_pci_id pci_id_atl_map[] = {
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_0001) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_D100) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_D107) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_D108) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_D109) },
+
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC100) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC107) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC108) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC109) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC111) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC112) },
+
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC100S) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC107S) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC108S) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC109S) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC111S) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC112S) },
+
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC111E) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_AQUANTIA, AQ_DEVICE_ID_AQC112E) },
+       { .vendor_id = 0, /* sentinel */ },
+};
+
+static struct rte_pci_driver rte_atl_pmd = {
+       .id_table = pci_id_atl_map,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
+       .probe = eth_atl_pci_probe,
+       .remove = eth_atl_pci_remove,
+};
+
+#define ATL_RX_OFFLOADS (DEV_RX_OFFLOAD_VLAN_STRIP \
+                       | DEV_RX_OFFLOAD_IPV4_CKSUM \
+                       | DEV_RX_OFFLOAD_UDP_CKSUM \
+                       | DEV_RX_OFFLOAD_TCP_CKSUM \
+                       | DEV_RX_OFFLOAD_JUMBO_FRAME)
+
+#define ATL_TX_OFFLOADS (DEV_TX_OFFLOAD_VLAN_INSERT \
+                       | DEV_TX_OFFLOAD_IPV4_CKSUM \
+                       | DEV_TX_OFFLOAD_UDP_CKSUM \
+                       | DEV_TX_OFFLOAD_TCP_CKSUM \
+                       | DEV_TX_OFFLOAD_TCP_TSO \
+                       | DEV_TX_OFFLOAD_MULTI_SEGS)
+
+static const struct rte_eth_desc_lim rx_desc_lim = {
+       .nb_max = ATL_MAX_RING_DESC,
+       .nb_min = ATL_MIN_RING_DESC,
+       .nb_align = ATL_RXD_ALIGN,
+};
+
+static const struct rte_eth_desc_lim tx_desc_lim = {
+       .nb_max = ATL_MAX_RING_DESC,
+       .nb_min = ATL_MIN_RING_DESC,
+       .nb_align = ATL_TXD_ALIGN,
+       .nb_seg_max = ATL_TX_MAX_SEG,
+       .nb_mtu_seg_max = ATL_TX_MAX_SEG,
+};
+
+#define ATL_XSTATS_FIELD(name) { \
+       #name, \
+       offsetof(struct aq_stats_s, name) \
+}
+
+struct atl_xstats_tbl_s {
+       const char *name;
+       unsigned int offset;
+};
+
+static struct atl_xstats_tbl_s atl_xstats_tbl[] = {
+       ATL_XSTATS_FIELD(uprc),
+       ATL_XSTATS_FIELD(mprc),
+       ATL_XSTATS_FIELD(bprc),
+       ATL_XSTATS_FIELD(erpt),
+       ATL_XSTATS_FIELD(uptc),
+       ATL_XSTATS_FIELD(mptc),
+       ATL_XSTATS_FIELD(bptc),
+       ATL_XSTATS_FIELD(erpr),
+       ATL_XSTATS_FIELD(ubrc),
+       ATL_XSTATS_FIELD(ubtc),
+       ATL_XSTATS_FIELD(mbrc),
+       ATL_XSTATS_FIELD(mbtc),
+       ATL_XSTATS_FIELD(bbrc),
+       ATL_XSTATS_FIELD(bbtc),
+};
+
+static const struct eth_dev_ops atl_eth_dev_ops = {
+       .dev_configure        = atl_dev_configure,
+       .dev_start            = atl_dev_start,
+       .dev_stop             = atl_dev_stop,
+       .dev_set_link_up      = atl_dev_set_link_up,
+       .dev_set_link_down    = atl_dev_set_link_down,
+       .dev_close            = atl_dev_close,
+       .dev_reset            = atl_dev_reset,
+
+       /* PROMISC */
+       .promiscuous_enable   = atl_dev_promiscuous_enable,
+       .promiscuous_disable  = atl_dev_promiscuous_disable,
+       .allmulticast_enable  = atl_dev_allmulticast_enable,
+       .allmulticast_disable = atl_dev_allmulticast_disable,
+
+       /* Link */
+       .link_update          = atl_dev_link_update,
+
+       .get_reg              = atl_dev_get_regs,
+
+       /* Stats */
+       .stats_get            = atl_dev_stats_get,
+       .xstats_get           = atl_dev_xstats_get,
+       .xstats_get_names     = atl_dev_xstats_get_names,
+       .stats_reset          = atl_dev_stats_reset,
+       .xstats_reset         = atl_dev_stats_reset,
+
+       .fw_version_get       = atl_fw_version_get,
+       .dev_infos_get        = atl_dev_info_get,
+       .dev_supported_ptypes_get = atl_dev_supported_ptypes_get,
+
+       .mtu_set              = atl_dev_mtu_set,
+
+       /* VLAN */
+       .vlan_filter_set      = atl_vlan_filter_set,
+       .vlan_offload_set     = atl_vlan_offload_set,
+       .vlan_tpid_set        = atl_vlan_tpid_set,
+       .vlan_strip_queue_set = atl_vlan_strip_queue_set,
+
+       /* Queue Control */
+       .rx_queue_start       = atl_rx_queue_start,
+       .rx_queue_stop        = atl_rx_queue_stop,
+       .rx_queue_setup       = atl_rx_queue_setup,
+       .rx_queue_release     = atl_rx_queue_release,
+
+       .tx_queue_start       = atl_tx_queue_start,
+       .tx_queue_stop        = atl_tx_queue_stop,
+       .tx_queue_setup       = atl_tx_queue_setup,
+       .tx_queue_release     = atl_tx_queue_release,
+
+       .rx_queue_intr_enable = atl_dev_rx_queue_intr_enable,
+       .rx_queue_intr_disable = atl_dev_rx_queue_intr_disable,
+
+       .rx_queue_count       = atl_rx_queue_count,
+       .rx_descriptor_status = atl_dev_rx_descriptor_status,
+       .tx_descriptor_status = atl_dev_tx_descriptor_status,
+
+       /* EEPROM */
+       .get_eeprom_length    = atl_dev_get_eeprom_length,
+       .get_eeprom           = atl_dev_get_eeprom,
+       .set_eeprom           = atl_dev_set_eeprom,
+
+       /* Flow Control */
+       .flow_ctrl_get        = atl_flow_ctrl_get,
+       .flow_ctrl_set        = atl_flow_ctrl_set,
+
+       /* MAC */
+       .mac_addr_add         = atl_add_mac_addr,
+       .mac_addr_remove      = atl_remove_mac_addr,
+       .mac_addr_set         = atl_set_default_mac_addr,
+       .set_mc_addr_list     = atl_dev_set_mc_addr_list,
+       .rxq_info_get         = atl_rxq_info_get,
+       .txq_info_get         = atl_txq_info_get,
+
+       .reta_update          = atl_reta_update,
+       .reta_query           = atl_reta_query,
+       .rss_hash_update      = atl_rss_hash_update,
+       .rss_hash_conf_get    = atl_rss_hash_conf_get,
+};
+
+static inline int32_t
+atl_reset_hw(struct aq_hw_s *hw)
+{
+       return hw_atl_b0_hw_reset(hw);
+}
+
+static inline void
+atl_enable_intr(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       hw_atl_itr_irq_msk_setlsw_set(hw, 0xffffffff);
+}
+
+static void
+atl_disable_intr(struct aq_hw_s *hw)
+{
+       PMD_INIT_FUNC_TRACE();
+       hw_atl_itr_irq_msk_clearlsw_set(hw, 0xffffffff);
+}
+
+static int
+eth_atl_dev_init(struct rte_eth_dev *eth_dev)
+{
+       struct atl_adapter *adapter =
+               (struct atl_adapter *)eth_dev->data->dev_private;
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       eth_dev->dev_ops = &atl_eth_dev_ops;
+       eth_dev->rx_pkt_burst = &atl_recv_pkts;
+       eth_dev->tx_pkt_burst = &atl_xmit_pkts;
+       eth_dev->tx_pkt_prepare = &atl_prep_pkts;
+
+       /* For secondary processes, the primary process has done all the work */
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
+       /* Vendor and Device ID need to be set before init of shared code */
+       hw->device_id = pci_dev->id.device_id;
+       hw->vendor_id = pci_dev->id.vendor_id;
+       hw->mmio = (void *)pci_dev->mem_resource[0].addr;
+
+       /* Hardware configuration - hardcode */
+       adapter->hw_cfg.is_lro = false;
+       adapter->hw_cfg.wol = false;
+       adapter->hw_cfg.is_rss = false;
+       adapter->hw_cfg.num_rss_queues = HW_ATL_B0_RSS_MAX;
+
+       adapter->hw_cfg.link_speed_msk = AQ_NIC_RATE_10G |
+                         AQ_NIC_RATE_5G |
+                         AQ_NIC_RATE_2G5 |
+                         AQ_NIC_RATE_1G |
+                         AQ_NIC_RATE_100M;
+
+       adapter->hw_cfg.flow_control = (AQ_NIC_FC_RX | AQ_NIC_FC_TX);
+       adapter->hw_cfg.aq_rss.indirection_table_size =
+               HW_ATL_B0_RSS_REDIRECTION_MAX;
+
+       hw->aq_nic_cfg = &adapter->hw_cfg;
+
+       /* disable interrupt */
+       atl_disable_intr(hw);
+
+       /* Allocate memory for storing MAC addresses */
+       eth_dev->data->mac_addrs = rte_zmalloc("atlantic", ETHER_ADDR_LEN, 0);
+       if (eth_dev->data->mac_addrs == NULL) {
+               PMD_INIT_LOG(ERR, "MAC Malloc failed");
+               return -ENOMEM;
+       }
+
+       err = hw_atl_utils_initfw(hw, &hw->aq_fw_ops);
+       if (err)
+               return err;
+
+       /* Copy the permanent MAC address */
+       if (hw->aq_fw_ops->get_mac_permanent(hw,
+                       eth_dev->data->mac_addrs->addr_bytes) != 0)
+               return -EINVAL;
+
+       /* Reset the hw statistics */
+       atl_dev_stats_reset(eth_dev);
+
+       rte_intr_callback_register(intr_handle,
+                                  atl_dev_interrupt_handler, eth_dev);
+
+       /* enable uio/vfio intr/eventfd mapping */
+       rte_intr_enable(intr_handle);
+
+       /* enable support intr */
+       atl_enable_intr(eth_dev);
+
+       return err;
+}
+
+static int
+eth_atl_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       struct aq_hw_s *hw;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return -EPERM;
+
+       hw = ATL_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+       if (hw->adapter_stopped == 0)
+               atl_dev_close(eth_dev);
+
+       eth_dev->dev_ops = NULL;
+       eth_dev->rx_pkt_burst = NULL;
+       eth_dev->tx_pkt_burst = NULL;
+
+       /* disable uio intr before callback unregister */
+       rte_intr_disable(intr_handle);
+       rte_intr_callback_unregister(intr_handle,
+                                    atl_dev_interrupt_handler, eth_dev);
+
+       rte_free(eth_dev->data->mac_addrs);
+       eth_dev->data->mac_addrs = NULL;
+
+       return 0;
+}
+
+static int
+eth_atl_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+       struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_probe(pci_dev,
+               sizeof(struct atl_adapter), eth_atl_dev_init);
+}
+
+static int
+eth_atl_pci_remove(struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_remove(pci_dev, eth_atl_dev_uninit);
+}
+
+static int
+atl_dev_configure(struct rte_eth_dev *dev)
+{
+       struct atl_interrupt *intr =
+               ATL_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* set flag to update link status after init */
+       intr->flags |= ATL_FLAG_NEED_LINK_UPDATE;
+
+       return 0;
+}
+
+/*
+ * Configure device link speed and setup link.
+ * It returns 0 on success.
+ */
+static int
+atl_dev_start(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       uint32_t intr_vector = 0;
+       uint32_t *link_speeds;
+       uint32_t speed = 0;
+       int status;
+       int err;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* set adapter started */
+       hw->adapter_stopped = 0;
+
+       if (dev->data->dev_conf.link_speeds & ETH_LINK_SPEED_FIXED) {
+               PMD_INIT_LOG(ERR,
+               "Invalid link_speeds for port %u, fix speed not supported",
+                               dev->data->port_id);
+               return -EINVAL;
+       }
+
+       /* disable uio/vfio intr/eventfd mapping */
+       rte_intr_disable(intr_handle);
+
+       /* reinitialize adapter
+        * this calls reset and start
+        */
+       status = atl_reset_hw(hw);
+       if (status != 0)
+               return -EIO;
+
+       err = hw_atl_b0_hw_init(hw, dev->data->mac_addrs->addr_bytes);
+
+       hw_atl_b0_hw_start(hw);
+       /* check and configure queue intr-vector mapping */
+       if ((rte_intr_cap_multiple(intr_handle) ||
+           !RTE_ETH_DEV_SRIOV(dev).active) &&
+           dev->data->dev_conf.intr_conf.rxq != 0) {
+               intr_vector = dev->data->nb_rx_queues;
+               if (intr_vector > ATL_MAX_INTR_QUEUE_NUM) {
+                       PMD_INIT_LOG(ERR, "At most %d intr queues supported",
+                                       ATL_MAX_INTR_QUEUE_NUM);
+                       return -ENOTSUP;
+               }
+               if (rte_intr_efd_enable(intr_handle, intr_vector)) {
+                       PMD_INIT_LOG(ERR, "rte_intr_efd_enable failed");
+                       return -1;
+               }
+       }
+
+       if (rte_intr_dp_is_en(intr_handle) && !intr_handle->intr_vec) {
+               intr_handle->intr_vec = rte_zmalloc("intr_vec",
+                                   dev->data->nb_rx_queues * sizeof(int), 0);
+               if (intr_handle->intr_vec == NULL) {
+                       PMD_INIT_LOG(ERR, "Failed to allocate %d rx_queues"
+                                    " intr_vec", dev->data->nb_rx_queues);
+                       return -ENOMEM;
+               }
+       }
+
+       /* initialize transmission unit */
+       atl_tx_init(dev);
+
+       /* This can fail when allocating mbufs for descriptor rings */
+       err = atl_rx_init(dev);
+       if (err) {
+               PMD_INIT_LOG(ERR, "Unable to initialize RX hardware");
+               goto error;
+       }
+
+       PMD_INIT_LOG(DEBUG, "FW version: %u.%u.%u",
+               hw->fw_ver_actual >> 24,
+               (hw->fw_ver_actual >> 16) & 0xFF,
+               hw->fw_ver_actual & 0xFFFF);
+       PMD_INIT_LOG(DEBUG, "Driver version: %s", ATL_PMD_DRIVER_VERSION);
+
+       err = atl_start_queues(dev);
+       if (err < 0) {
+               PMD_INIT_LOG(ERR, "Unable to start rxtx queues");
+               goto error;
+       }
+
+       err = hw->aq_fw_ops->update_link_status(hw);
+
+       if (err)
+               goto error;
+
+       dev->data->dev_link.link_status = hw->aq_link_status.mbps != 0;
+
+       link_speeds = &dev->data->dev_conf.link_speeds;
+
+       speed = 0x0;
+
+       if (*link_speeds == ETH_LINK_SPEED_AUTONEG) {
+               speed = hw->aq_nic_cfg->link_speed_msk;
+       } else {
+               if (*link_speeds & ETH_LINK_SPEED_10G)
+                       speed |= AQ_NIC_RATE_10G;
+               if (*link_speeds & ETH_LINK_SPEED_5G)
+                       speed |= AQ_NIC_RATE_5G;
+               if (*link_speeds & ETH_LINK_SPEED_1G)
+                       speed |= AQ_NIC_RATE_1G;
+               if (*link_speeds & ETH_LINK_SPEED_2_5G)
+                       speed |=  AQ_NIC_RATE_2G5;
+               if (*link_speeds & ETH_LINK_SPEED_100M)
+                       speed |= AQ_NIC_RATE_100M;
+       }
+
+       err = hw->aq_fw_ops->set_link_speed(hw, speed);
+       if (err)
+               goto error;
+
+       if (rte_intr_allow_others(intr_handle)) {
+               /* check if lsc interrupt is enabled */
+               if (dev->data->dev_conf.intr_conf.lsc != 0)
+                       atl_dev_lsc_interrupt_setup(dev, true);
+               else
+                       atl_dev_lsc_interrupt_setup(dev, false);
+       } else {
+               rte_intr_callback_unregister(intr_handle,
+                                            atl_dev_interrupt_handler, dev);
+               if (dev->data->dev_conf.intr_conf.lsc != 0)
+                       PMD_INIT_LOG(INFO, "lsc won't enable because of"
+                                    " no intr multiplex");
+       }
+
+       /* check if rxq interrupt is enabled */
+       if (dev->data->dev_conf.intr_conf.rxq != 0 &&
+           rte_intr_dp_is_en(intr_handle))
+               atl_dev_rxq_interrupt_setup(dev);
+
+       /* enable uio/vfio intr/eventfd mapping */
+       rte_intr_enable(intr_handle);
+
+       /* resume enabled intr since hw reset */
+       atl_enable_intr(dev);
+
+       return 0;
+
+error:
+       atl_stop_queues(dev);
+       return -EIO;
+}
+
+/*
+ * Stop device: disable rx and tx functions to allow for reconfiguring.
+ */
+static void
+atl_dev_stop(struct rte_eth_dev *dev)
+{
+       struct rte_eth_link link;
+       struct aq_hw_s *hw =
+               ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+       struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* disable interrupts */
+       atl_disable_intr(hw);
+
+       /* reset the NIC */
+       atl_reset_hw(hw);
+       hw->adapter_stopped = 1;
+
+       atl_stop_queues(dev);
+
+       /* Clear stored conf */
+       dev->data->scattered_rx = 0;
+       dev->data->lro = 0;
+
+       /* Clear recorded link status */
+       memset(&link, 0, sizeof(link));
+       rte_eth_linkstatus_set(dev, &link);
+
+       if (!rte_intr_allow_others(intr_handle))
+               /* resume to the default handler */
+               rte_intr_callback_register(intr_handle,
+                                          atl_dev_interrupt_handler,
+                                          (void *)dev);
+
+       /* Clean datapath event and queue/vec mapping */
+       rte_intr_efd_disable(intr_handle);
+       if (intr_handle->intr_vec != NULL) {
+               rte_free(intr_handle->intr_vec);
+               intr_handle->intr_vec = NULL;
+       }
+}
+
+/*
+ * Set device link up: enable tx.
+ */
+static int
+atl_dev_set_link_up(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       return hw->aq_fw_ops->set_link_speed(hw,
+                       hw->aq_nic_cfg->link_speed_msk);
+}
+
+/*
+ * Set device link down: disable tx.
+ */
+static int
+atl_dev_set_link_down(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       return hw->aq_fw_ops->set_link_speed(hw, 0);
+}
+
+/*
+ * Reset and stop device.
+ */
+static void
+atl_dev_close(struct rte_eth_dev *dev)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       atl_dev_stop(dev);
+
+       atl_free_queues(dev);
+}
+
+static int
+atl_dev_reset(struct rte_eth_dev *dev)
+{
+       int ret;
+
+       ret = eth_atl_dev_uninit(dev);
+       if (ret)
+               return ret;
+
+       ret = eth_atl_dev_init(dev);
+
+       return ret;
+}
+
+
+static int
+atl_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+       struct atl_adapter *adapter = ATL_DEV_TO_ADAPTER(dev);
+       struct aq_hw_s *hw = &adapter->hw;
+       struct atl_sw_stats *swstats = &adapter->sw_stats;
+       unsigned int i;
+
+       hw->aq_fw_ops->update_stats(hw);
+
+       /* Fill out the rte_eth_stats statistics structure */
+       stats->ipackets = hw->curr_stats.dma_pkt_rc;
+       stats->ibytes = hw->curr_stats.dma_oct_rc;
+       stats->imissed = hw->curr_stats.dpc;
+       stats->ierrors = hw->curr_stats.erpt;
+
+       stats->opackets = hw->curr_stats.dma_pkt_tc;
+       stats->obytes = hw->curr_stats.dma_oct_tc;
+       stats->oerrors = 0;
+
+       stats->rx_nombuf = swstats->rx_nombuf;
+
+       for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
+               stats->q_ipackets[i] = swstats->q_ipackets[i];
+               stats->q_opackets[i] = swstats->q_opackets[i];
+               stats->q_ibytes[i] = swstats->q_ibytes[i];
+               stats->q_obytes[i] = swstats->q_obytes[i];
+               stats->q_errors[i] = swstats->q_errors[i];
+       }
+       return 0;
+}
+
+static void
+atl_dev_stats_reset(struct rte_eth_dev *dev)
+{
+       struct atl_adapter *adapter = ATL_DEV_TO_ADAPTER(dev);
+       struct aq_hw_s *hw = &adapter->hw;
+
+       hw->aq_fw_ops->update_stats(hw);
+
+       /* Reset software totals */
+       memset(&hw->curr_stats, 0, sizeof(hw->curr_stats));
+
+       memset(&adapter->sw_stats, 0, sizeof(adapter->sw_stats));
+}
+
+static int
+atl_dev_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
+                        struct rte_eth_xstat_name *xstats_names,
+                        unsigned int size)
+{
+       unsigned int i;
+
+       if (!xstats_names)
+               return RTE_DIM(atl_xstats_tbl);
+
+       for (i = 0; i < size && i < RTE_DIM(atl_xstats_tbl); i++)
+               snprintf(xstats_names[i].name, RTE_ETH_XSTATS_NAME_SIZE, "%s",
+                       atl_xstats_tbl[i].name);
+
+       return size;
+}
+
+static int
+atl_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
+                  unsigned int n)
+{
+       struct atl_adapter *adapter = ATL_DEV_TO_ADAPTER(dev);
+       struct aq_hw_s *hw = &adapter->hw;
+       unsigned int i;
+
+       if (!stats)
+               return 0;
+
+       for (i = 0; i < n && i < RTE_DIM(atl_xstats_tbl); i++) {
+               stats[i].id = i;
+               stats[i].value = *(u64 *)((uint8_t *)&hw->curr_stats +
+                                       atl_xstats_tbl[i].offset);
+       }
+
+       return n;
+}
+
+static int
+atl_fw_version_get(struct rte_eth_dev *dev, char *fw_version, size_t fw_size)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t fw_ver = 0;
+       unsigned int ret = 0;
+
+       ret = hw_atl_utils_get_fw_version(hw, &fw_ver);
+       if (ret)
+               return -EIO;
+
+       ret = snprintf(fw_version, fw_size, "%u.%u.%u", fw_ver >> 24,
+                      (fw_ver >> 16) & 0xFFU, fw_ver & 0xFFFFU);
+
+       ret += 1; /* add string null-terminator */
+
+       if (fw_size < ret)
+               return ret;
+
+       return 0;
+}
+
+static void
+atl_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+
+       dev_info->max_rx_queues = AQ_HW_MAX_RX_QUEUES;
+       dev_info->max_tx_queues = AQ_HW_MAX_TX_QUEUES;
+
+       dev_info->min_rx_bufsize = 1024;
+       dev_info->max_rx_pktlen = HW_ATL_B0_MTU_JUMBO;
+       dev_info->max_mac_addrs = HW_ATL_B0_MAC_MAX;
+       dev_info->max_vfs = pci_dev->max_vfs;
+
+       dev_info->max_hash_mac_addrs = 0;
+       dev_info->max_vmdq_pools = 0;
+       dev_info->vmdq_queue_num = 0;
+
+       dev_info->rx_offload_capa = ATL_RX_OFFLOADS;
+
+       dev_info->tx_offload_capa = ATL_TX_OFFLOADS;
+
+
+       dev_info->default_rxconf = (struct rte_eth_rxconf) {
+               .rx_free_thresh = ATL_DEFAULT_RX_FREE_THRESH,
+       };
+
+       dev_info->default_txconf = (struct rte_eth_txconf) {
+               .tx_free_thresh = ATL_DEFAULT_TX_FREE_THRESH,
+       };
+
+       dev_info->rx_desc_lim = rx_desc_lim;
+       dev_info->tx_desc_lim = tx_desc_lim;
+
+       dev_info->hash_key_size = HW_ATL_B0_RSS_HASHKEY_BITS / 8;
+       dev_info->reta_size = HW_ATL_B0_RSS_REDIRECTION_MAX;
+       dev_info->flow_type_rss_offloads = ATL_RSS_OFFLOAD_ALL;
+
+       dev_info->speed_capa = ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G;
+       dev_info->speed_capa |= ETH_LINK_SPEED_100M;
+       dev_info->speed_capa |= ETH_LINK_SPEED_2_5G;
+       dev_info->speed_capa |= ETH_LINK_SPEED_5G;
+}
+
+static const uint32_t *
+atl_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+{
+       static const uint32_t ptypes[] = {
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L2_ETHER_ARP,
+               RTE_PTYPE_L2_ETHER_VLAN,
+               RTE_PTYPE_L3_IPV4,
+               RTE_PTYPE_L3_IPV6,
+               RTE_PTYPE_L4_TCP,
+               RTE_PTYPE_L4_UDP,
+               RTE_PTYPE_L4_SCTP,
+               RTE_PTYPE_L4_ICMP,
+               RTE_PTYPE_UNKNOWN
+       };
+
+       if (dev->rx_pkt_burst == atl_recv_pkts)
+               return ptypes;
+
+       return NULL;
+}
+
+/* return 0 means link status changed, -1 means not changed */
+static int
+atl_dev_link_update(struct rte_eth_dev *dev, int wait __rte_unused)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct atl_interrupt *intr =
+               ATL_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+       struct rte_eth_link link, old;
+       int err = 0;
+
+       link.link_status = ETH_LINK_DOWN;
+       link.link_speed = 0;
+       link.link_duplex = ETH_LINK_FULL_DUPLEX;
+       link.link_autoneg = hw->is_autoneg ? ETH_LINK_AUTONEG : ETH_LINK_FIXED;
+       memset(&old, 0, sizeof(old));
+
+       /* load old link status */
+       rte_eth_linkstatus_get(dev, &old);
+
+       /* read current link status */
+       err = hw->aq_fw_ops->update_link_status(hw);
+
+       if (err)
+               return 0;
+
+       if (hw->aq_link_status.mbps == 0) {
+               /* write default (down) link status */
+               rte_eth_linkstatus_set(dev, &link);
+               if (link.link_status == old.link_status)
+                       return -1;
+               return 0;
+       }
+
+       intr->flags &= ~ATL_FLAG_NEED_LINK_CONFIG;
+
+       link.link_status = ETH_LINK_UP;
+       link.link_duplex = ETH_LINK_FULL_DUPLEX;
+       link.link_speed = hw->aq_link_status.mbps;
+
+       rte_eth_linkstatus_set(dev, &link);
+
+       if (link.link_status == old.link_status)
+               return -1;
+
+       return 0;
+}
+
+static void
+atl_dev_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       hw_atl_rpfl2promiscuous_mode_en_set(hw, true);
+}
+
+static void
+atl_dev_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       hw_atl_rpfl2promiscuous_mode_en_set(hw, false);
+}
+
+static void
+atl_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       hw_atl_rpfl2_accept_all_mc_packets_set(hw, true);
+}
+
+static void
+atl_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (dev->data->promiscuous == 1)
+               return; /* must remain in all_multicast mode */
+
+       hw_atl_rpfl2_accept_all_mc_packets_set(hw, false);
+}
+
+/**
+ * It clears the interrupt causes and enables the interrupt.
+ * It will be called once only during nic initialized.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ * @param on
+ *  Enable or Disable.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+
+static int
+atl_dev_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on __rte_unused)
+{
+       atl_dev_link_status_print(dev);
+       return 0;
+}
+
+static int
+atl_dev_rxq_interrupt_setup(struct rte_eth_dev *dev __rte_unused)
+{
+       return 0;
+}
+
+
+static int
+atl_dev_interrupt_get_status(struct rte_eth_dev *dev)
+{
+       struct atl_interrupt *intr =
+               ATL_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       u64 cause = 0;
+
+       hw_atl_b0_hw_irq_read(hw, &cause);
+
+       atl_disable_intr(hw);
+       intr->flags = cause & BIT(ATL_IRQ_CAUSE_LINK) ?
+                       ATL_FLAG_NEED_LINK_UPDATE : 0;
+
+       return 0;
+}
+
+/**
+ * It gets and then prints the link status.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static void
+atl_dev_link_status_print(struct rte_eth_dev *dev)
+{
+       struct rte_eth_link link;
+
+       memset(&link, 0, sizeof(link));
+       rte_eth_linkstatus_get(dev, &link);
+       if (link.link_status) {
+               PMD_DRV_LOG(INFO, "Port %d: Link Up - speed %u Mbps - %s",
+                                       (int)(dev->data->port_id),
+                                       (unsigned int)link.link_speed,
+                       link.link_duplex == ETH_LINK_FULL_DUPLEX ?
+                                       "full-duplex" : "half-duplex");
+       } else {
+               PMD_DRV_LOG(INFO, " Port %d: Link Down",
+                               (int)(dev->data->port_id));
+       }
+
+
+#ifdef DEBUG
+{
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
+
+       PMD_DRV_LOG(DEBUG, "PCI Address: " PCI_PRI_FMT,
+                               pci_dev->addr.domain,
+                               pci_dev->addr.bus,
+                               pci_dev->addr.devid,
+                               pci_dev->addr.function);
+}
+#endif
+
+       PMD_DRV_LOG(INFO, "Link speed:%d", link.link_speed);
+}
+
+/*
+ * It executes link_update after knowing an interrupt occurred.
+ *
+ * @param dev
+ *  Pointer to struct rte_eth_dev.
+ *
+ * @return
+ *  - On success, zero.
+ *  - On failure, a negative value.
+ */
+static int
+atl_dev_interrupt_action(struct rte_eth_dev *dev,
+                          struct rte_intr_handle *intr_handle)
+{
+       struct atl_interrupt *intr =
+               ATL_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
+
+       if (intr->flags & ATL_FLAG_NEED_LINK_UPDATE) {
+               atl_dev_link_update(dev, 0);
+               intr->flags &= ~ATL_FLAG_NEED_LINK_UPDATE;
+               atl_dev_link_status_print(dev);
+               _rte_eth_dev_callback_process(dev,
+                       RTE_ETH_EVENT_INTR_LSC, NULL);
+       }
+
+       atl_enable_intr(dev);
+       rte_intr_enable(intr_handle);
+
+       return 0;
+}
+
+/**
+ * Interrupt handler triggered by NIC  for handling
+ * specific interrupt.
+ *
+ * @param handle
+ *  Pointer to interrupt handle.
+ * @param param
+ *  The address of parameter (struct rte_eth_dev *) regsitered before.
+ *
+ * @return
+ *  void
+ */
+static void
+atl_dev_interrupt_handler(void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+
+       atl_dev_interrupt_get_status(dev);
+       atl_dev_interrupt_action(dev, dev->intr_handle);
+}
+
+#define SFP_EEPROM_SIZE 0xff
+
+static int
+atl_dev_get_eeprom_length(struct rte_eth_dev *dev __rte_unused)
+{
+       return SFP_EEPROM_SIZE;
+}
+
+static int
+atl_dev_get_eeprom(struct rte_eth_dev *dev, struct rte_dev_eeprom_info *eeprom)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->aq_fw_ops->get_eeprom == NULL)
+               return -ENOTSUP;
+
+       if (eeprom->length != SFP_EEPROM_SIZE || eeprom->data == NULL)
+               return -EINVAL;
+
+       return hw->aq_fw_ops->get_eeprom(hw, eeprom->data, eeprom->length);
+}
+
+static int
+atl_dev_set_eeprom(struct rte_eth_dev *dev, struct rte_dev_eeprom_info *eeprom)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->aq_fw_ops->set_eeprom == NULL)
+               return -ENOTSUP;
+
+       if (eeprom->length != SFP_EEPROM_SIZE || eeprom->data == NULL)
+               return -EINVAL;
+
+       return hw->aq_fw_ops->set_eeprom(hw, eeprom->data, eeprom->length);
+}
+
+static int
+atl_dev_get_regs(struct rte_eth_dev *dev, struct rte_dev_reg_info *regs)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       u32 mif_id;
+       int err;
+
+       if (regs->data == NULL) {
+               regs->length = hw_atl_utils_hw_get_reg_length();
+               regs->width = sizeof(u32);
+               return 0;
+       }
+
+       /* Only full register dump is supported */
+       if (regs->length && regs->length != hw_atl_utils_hw_get_reg_length())
+               return -ENOTSUP;
+
+       err = hw_atl_utils_hw_get_regs(hw, regs->data);
+
+       /* Device version */
+       mif_id = hw_atl_reg_glb_mif_id_get(hw);
+       regs->version = mif_id & 0xFFU;
+
+       return err;
+}
+
+static int
+atl_flow_ctrl_get(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       if (hw->aq_nic_cfg->flow_control == AQ_NIC_FC_OFF)
+               fc_conf->mode = RTE_FC_NONE;
+       else if (hw->aq_nic_cfg->flow_control & (AQ_NIC_FC_RX | AQ_NIC_FC_TX))
+               fc_conf->mode = RTE_FC_FULL;
+       else if (hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+               fc_conf->mode = RTE_FC_RX_PAUSE;
+       else if (hw->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+               fc_conf->mode = RTE_FC_TX_PAUSE;
+
+       return 0;
+}
+
+static int
+atl_flow_ctrl_set(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t old_flow_control = hw->aq_nic_cfg->flow_control;
+
+
+       if (hw->aq_fw_ops->set_flow_control == NULL)
+               return -ENOTSUP;
+
+       if (fc_conf->mode == RTE_FC_NONE)
+               hw->aq_nic_cfg->flow_control = AQ_NIC_FC_OFF;
+       else if (fc_conf->mode == RTE_FC_RX_PAUSE)
+               hw->aq_nic_cfg->flow_control = AQ_NIC_FC_RX;
+       else if (fc_conf->mode == RTE_FC_TX_PAUSE)
+               hw->aq_nic_cfg->flow_control = AQ_NIC_FC_TX;
+       else if (fc_conf->mode == RTE_FC_FULL)
+               hw->aq_nic_cfg->flow_control = (AQ_NIC_FC_RX | AQ_NIC_FC_TX);
+
+       if (old_flow_control != hw->aq_nic_cfg->flow_control)
+               return hw->aq_fw_ops->set_flow_control(hw);
+
+       return 0;
+}
+
+static int
+atl_update_mac_addr(struct rte_eth_dev *dev, uint32_t index,
+                   u8 *mac_addr, bool enable)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       unsigned int h = 0U;
+       unsigned int l = 0U;
+       int err;
+
+       if (mac_addr) {
+               h = (mac_addr[0] << 8) | (mac_addr[1]);
+               l = (mac_addr[2] << 24) | (mac_addr[3] << 16) |
+                       (mac_addr[4] << 8) | mac_addr[5];
+       }
+
+       hw_atl_rpfl2_uc_flr_en_set(hw, 0U, index);
+       hw_atl_rpfl2unicast_dest_addresslsw_set(hw, l, index);
+       hw_atl_rpfl2unicast_dest_addressmsw_set(hw, h, index);
+
+       if (enable)
+               hw_atl_rpfl2_uc_flr_en_set(hw, 1U, index);
+
+       err = aq_hw_err_from_flags(hw);
+
+       return err;
+}
+
+static int
+atl_add_mac_addr(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+                       uint32_t index __rte_unused, uint32_t pool __rte_unused)
+{
+       if (is_zero_ether_addr(mac_addr)) {
+               PMD_DRV_LOG(ERR, "Invalid Ethernet Address");
+               return -EINVAL;
+       }
+
+       return atl_update_mac_addr(dev, index, (u8 *)mac_addr, true);
+}
+
+static void
+atl_remove_mac_addr(struct rte_eth_dev *dev, uint32_t index)
+{
+       atl_update_mac_addr(dev, index, NULL, false);
+}
+
+static int
+atl_set_default_mac_addr(struct rte_eth_dev *dev, struct ether_addr *addr)
+{
+       atl_remove_mac_addr(dev, 0);
+       atl_add_mac_addr(dev, addr, 0, 0);
+       return 0;
+}
+
+static int
+atl_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct rte_eth_dev_info dev_info;
+       uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
+
+       atl_dev_info_get(dev, &dev_info);
+
+       if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen))
+               return -EINVAL;
+
+       /* update max frame size */
+       dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
+
+       return 0;
+}
+
+static int
+atl_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
+{
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int err = 0;
+       int i = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < HW_ATL_B0_MAX_VLAN_IDS; i++) {
+               if (cfg->vlan_filter[i] == vlan_id) {
+                       if (!on) {
+                               /* Disable VLAN filter. */
+                               hw_atl_rpf_vlan_flr_en_set(hw, 0U, i);
+
+                               /* Clear VLAN filter entry */
+                               cfg->vlan_filter[i] = 0;
+                       }
+                       break;
+               }
+       }
+
+       /* VLAN_ID was not found. So, nothing to delete. */
+       if (i == HW_ATL_B0_MAX_VLAN_IDS && !on)
+               goto exit;
+
+       /* VLAN_ID already exist, or already removed above. Nothing to do. */
+       if (i != HW_ATL_B0_MAX_VLAN_IDS)
+               goto exit;
+
+       /* Try to found free VLAN filter to add new VLAN_ID */
+       for (i = 0; i < HW_ATL_B0_MAX_VLAN_IDS; i++) {
+               if (cfg->vlan_filter[i] == 0)
+                       break;
+       }
+
+       if (i == HW_ATL_B0_MAX_VLAN_IDS) {
+               /* We have no free VLAN filter to add new VLAN_ID*/
+               err = -ENOMEM;
+               goto exit;
+       }
+
+       cfg->vlan_filter[i] = vlan_id;
+       hw_atl_rpf_vlan_flr_act_set(hw, 1U, i);
+       hw_atl_rpf_vlan_id_flr_set(hw, vlan_id, i);
+       hw_atl_rpf_vlan_flr_en_set(hw, 1U, i);
+
+exit:
+       /* Enable VLAN promisc mode if vlan_filter empty  */
+       for (i = 0; i < HW_ATL_B0_MAX_VLAN_IDS; i++) {
+               if (cfg->vlan_filter[i] != 0)
+                       break;
+       }
+
+       hw_atl_rpf_vlan_prom_mode_en_set(hw, i == HW_ATL_B0_MAX_VLAN_IDS);
+
+       return err;
+}
+
+static int
+atl_enable_vlan_filter(struct rte_eth_dev *dev, int en)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < HW_ATL_B0_MAX_VLAN_IDS; i++) {
+               if (cfg->vlan_filter[i])
+                       hw_atl_rpf_vlan_flr_en_set(hw, en, i);
+       }
+       return 0;
+}
+
+static int
+atl_vlan_offload_set(struct rte_eth_dev *dev, int mask)
+{
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int ret = 0;
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       ret = atl_enable_vlan_filter(dev, mask & ETH_VLAN_FILTER_MASK);
+
+       cfg->vlan_strip = !!(mask & ETH_VLAN_STRIP_MASK);
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)
+               hw_atl_rpo_rx_desc_vlan_stripping_set(hw, cfg->vlan_strip, i);
+
+       if (mask & ETH_VLAN_EXTEND_MASK)
+               ret = -ENOTSUP;
+
+       return ret;
+}
+
+static int
+atl_vlan_tpid_set(struct rte_eth_dev *dev, enum rte_vlan_type vlan_type,
+                 uint16_t tpid)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       switch (vlan_type) {
+       case ETH_VLAN_TYPE_INNER:
+               hw_atl_rpf_vlan_inner_etht_set(hw, tpid);
+               break;
+       case ETH_VLAN_TYPE_OUTER:
+               hw_atl_rpf_vlan_outer_etht_set(hw, tpid);
+               break;
+       default:
+               PMD_DRV_LOG(ERR, "Unsupported VLAN type");
+               err = -ENOTSUP;
+       }
+
+       return err;
+}
+
+static void
+atl_vlan_strip_queue_set(struct rte_eth_dev *dev, uint16_t queue_id, int on)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (queue_id > dev->data->nb_rx_queues) {
+               PMD_DRV_LOG(ERR, "Invalid queue id");
+               return;
+       }
+
+       hw_atl_rpo_rx_desc_vlan_stripping_set(hw, on, queue_id);
+}
+
+static int
+atl_dev_set_mc_addr_list(struct rte_eth_dev *dev,
+                         struct ether_addr *mc_addr_set,
+                         uint32_t nb_mc_addr)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       u32 i;
+
+       if (nb_mc_addr > AQ_HW_MULTICAST_ADDRESS_MAX - HW_ATL_B0_MAC_MIN)
+               return -EINVAL;
+
+       /* Update whole uc filters table */
+       for (i = 0; i < AQ_HW_MULTICAST_ADDRESS_MAX - HW_ATL_B0_MAC_MIN; i++) {
+               u8 *mac_addr = NULL;
+               u32 l = 0, h = 0;
+
+               if (i < nb_mc_addr) {
+                       mac_addr = mc_addr_set[i].addr_bytes;
+                       l = (mac_addr[2] << 24) | (mac_addr[3] << 16) |
+                               (mac_addr[4] << 8) | mac_addr[5];
+                       h = (mac_addr[0] << 8) | mac_addr[1];
+               }
+
+               hw_atl_rpfl2_uc_flr_en_set(hw, 0U, HW_ATL_B0_MAC_MIN + i);
+               hw_atl_rpfl2unicast_dest_addresslsw_set(hw, l,
+                                                       HW_ATL_B0_MAC_MIN + i);
+               hw_atl_rpfl2unicast_dest_addressmsw_set(hw, h,
+                                                       HW_ATL_B0_MAC_MIN + i);
+               hw_atl_rpfl2_uc_flr_en_set(hw, !!mac_addr,
+                                          HW_ATL_B0_MAC_MIN + i);
+       }
+
+       return 0;
+}
+
+static int
+atl_reta_update(struct rte_eth_dev *dev,
+                  struct rte_eth_rss_reta_entry64 *reta_conf,
+                  uint16_t reta_size)
+{
+       int i;
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct aq_hw_cfg_s *cf = ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+
+       for (i = 0; i < reta_size && i < cf->aq_rss.indirection_table_size; i++)
+               cf->aq_rss.indirection_table[i] = min(reta_conf->reta[i],
+                                       dev->data->nb_rx_queues - 1);
+
+       hw_atl_b0_hw_rss_set(hw, &cf->aq_rss);
+       return 0;
+}
+
+static int
+atl_reta_query(struct rte_eth_dev *dev,
+                   struct rte_eth_rss_reta_entry64 *reta_conf,
+                   uint16_t reta_size)
+{
+       int i;
+       struct aq_hw_cfg_s *cf = ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+
+       for (i = 0; i < reta_size && i < cf->aq_rss.indirection_table_size; i++)
+               reta_conf->reta[i] = cf->aq_rss.indirection_table[i];
+       reta_conf->mask = ~0U;
+       return 0;
+}
+
+static int
+atl_rss_hash_update(struct rte_eth_dev *dev,
+                                struct rte_eth_rss_conf *rss_conf)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+       static u8 def_rss_key[40] = {
+               0x1e, 0xad, 0x71, 0x87, 0x65, 0xfc, 0x26, 0x7d,
+               0x0d, 0x45, 0x67, 0x74, 0xcd, 0x06, 0x1a, 0x18,
+               0xb6, 0xc1, 0xf0, 0xc7, 0xbb, 0x18, 0xbe, 0xf8,
+               0x19, 0x13, 0x4b, 0xa9, 0xd0, 0x3e, 0xfe, 0x70,
+               0x25, 0x03, 0xab, 0x50, 0x6a, 0x8b, 0x82, 0x0c
+       };
+
+       cfg->is_rss = !!rss_conf->rss_hf;
+       if (rss_conf->rss_key) {
+               memcpy(cfg->aq_rss.hash_secret_key, rss_conf->rss_key,
+                      rss_conf->rss_key_len);
+               cfg->aq_rss.hash_secret_key_size = rss_conf->rss_key_len;
+       } else {
+               memcpy(cfg->aq_rss.hash_secret_key, def_rss_key,
+                      sizeof(def_rss_key));
+               cfg->aq_rss.hash_secret_key_size = sizeof(def_rss_key);
+       }
+
+       hw_atl_b0_hw_rss_set(hw, &cfg->aq_rss);
+       hw_atl_b0_hw_rss_hash_set(hw, &cfg->aq_rss);
+       return 0;
+}
+
+static int
+atl_rss_hash_conf_get(struct rte_eth_dev *dev,
+                                struct rte_eth_rss_conf *rss_conf)
+{
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+
+       rss_conf->rss_hf = cfg->is_rss ? ATL_RSS_OFFLOAD_ALL : 0;
+       if (rss_conf->rss_key) {
+               rss_conf->rss_key_len = cfg->aq_rss.hash_secret_key_size;
+               memcpy(rss_conf->rss_key, cfg->aq_rss.hash_secret_key,
+                      rss_conf->rss_key_len);
+       }
+
+       return 0;
+}
+
+RTE_PMD_REGISTER_PCI(net_atlantic, rte_atl_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_atlantic, pci_id_atl_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_atlantic, "* igb_uio | uio_pci_generic");
+
+RTE_INIT(atl_init_log)
+{
+       atl_logtype_init = rte_log_register("pmd.net.atlantic.init");
+       if (atl_logtype_init >= 0)
+               rte_log_set_level(atl_logtype_init, RTE_LOG_NOTICE);
+       atl_logtype_driver = rte_log_register("pmd.net.atlantic.driver");
+       if (atl_logtype_driver >= 0)
+               rte_log_set_level(atl_logtype_driver, RTE_LOG_NOTICE);
+}
+
diff --git a/drivers/net/atlantic/atl_ethdev.h b/drivers/net/atlantic/atl_ethdev.h
new file mode 100644 (file)
index 0000000..1e29999
--- /dev/null
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+
+#ifndef _ATLANTIC_ETHDEV_H_
+#define _ATLANTIC_ETHDEV_H_
+#include <rte_errno.h>
+#include "rte_ethdev.h"
+
+#include "atl_types.h"
+#include "hw_atl/hw_atl_utils.h"
+
+#define ATL_RSS_OFFLOAD_ALL ( \
+       ETH_RSS_IPV4 | \
+       ETH_RSS_NONFRAG_IPV4_TCP | \
+       ETH_RSS_NONFRAG_IPV4_UDP | \
+       ETH_RSS_IPV6 | \
+       ETH_RSS_NONFRAG_IPV6_TCP | \
+       ETH_RSS_NONFRAG_IPV6_UDP | \
+       ETH_RSS_IPV6_EX | \
+       ETH_RSS_IPV6_TCP_EX | \
+       ETH_RSS_IPV6_UDP_EX)
+
+#define ATL_DEV_PRIVATE_TO_HW(adapter) \
+       (&((struct atl_adapter *)adapter)->hw)
+
+#define ATL_DEV_TO_ADAPTER(dev) \
+       ((struct atl_adapter *)(dev)->data->dev_private)
+
+#define ATL_DEV_PRIVATE_TO_INTR(adapter) \
+       (&((struct atl_adapter *)adapter)->intr)
+
+#define ATL_DEV_PRIVATE_TO_CFG(adapter) \
+       (&((struct atl_adapter *)adapter)->hw_cfg)
+
+#define ATL_FLAG_NEED_LINK_UPDATE (uint32_t)(1 << 0)
+#define ATL_FLAG_NEED_LINK_CONFIG (uint32_t)(4 << 0)
+
+struct atl_interrupt {
+       uint32_t flags;
+       uint32_t mask;
+};
+
+/*
+ * Structure to store private data for each driver instance (for each port).
+ */
+struct atl_adapter {
+       struct aq_hw_s             hw;
+       struct aq_hw_cfg_s         hw_cfg;
+       struct atl_sw_stats        sw_stats;
+       struct atl_interrupt       intr;
+};
+
+/*
+ * RX/TX function prototypes
+ */
+void atl_rx_queue_release(void *rxq);
+void atl_tx_queue_release(void *txq);
+
+int atl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+               uint16_t nb_rx_desc, unsigned int socket_id,
+               const struct rte_eth_rxconf *rx_conf,
+               struct rte_mempool *mb_pool);
+
+int atl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+               uint16_t nb_tx_desc, unsigned int socket_id,
+               const struct rte_eth_txconf *tx_conf);
+
+uint32_t atl_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id);
+
+int atl_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
+int atl_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
+
+int atl_dev_rx_queue_intr_enable(struct rte_eth_dev *eth_dev,
+                                uint16_t queue_id);
+int atl_dev_rx_queue_intr_disable(struct rte_eth_dev *eth_dev,
+                                 uint16_t queue_id);
+
+int atl_rx_init(struct rte_eth_dev *dev);
+int atl_tx_init(struct rte_eth_dev *dev);
+
+int atl_start_queues(struct rte_eth_dev *dev);
+int atl_stop_queues(struct rte_eth_dev *dev);
+void atl_free_queues(struct rte_eth_dev *dev);
+
+int atl_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id);
+int atl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id);
+
+int atl_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);
+int atl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id);
+
+void atl_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_rxq_info *qinfo);
+
+void atl_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+       struct rte_eth_txq_info *qinfo);
+
+uint16_t atl_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts);
+
+uint16_t atl_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+
+uint16_t atl_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+
+#endif /* _ATLANTIC_ETHDEV_H_ */
diff --git a/drivers/net/atlantic/atl_hw_regs.c b/drivers/net/atlantic/atl_hw_regs.c
new file mode 100644 (file)
index 0000000..bd42c83
--- /dev/null
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File aq_hw_utils.c: Definitions of helper functions used across
+ * hardware layer.
+ */
+
+#include "atl_hw_regs.h"
+
+#include <rte_io.h>
+#include <rte_byteorder.h>
+
+void aq_hw_write_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk,
+                        u32 shift, u32 val)
+{
+       if (msk ^ ~0) {
+               u32 reg_old, reg_new;
+
+               reg_old = aq_hw_read_reg(aq_hw, addr);
+               reg_new = (reg_old & (~msk)) | (val << shift);
+
+               if (reg_old != reg_new)
+                       aq_hw_write_reg(aq_hw, addr, reg_new);
+       } else {
+               aq_hw_write_reg(aq_hw, addr, val);
+       }
+}
+
+u32 aq_hw_read_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, u32 shift)
+{
+       return ((aq_hw_read_reg(aq_hw, addr) & msk) >> shift);
+}
+
+u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg)
+{
+       return rte_le_to_cpu_32(rte_read32((u8 *)hw->mmio + reg));
+}
+
+void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value)
+{
+       rte_write32((rte_cpu_to_le_32(value)), (u8 *)hw->mmio + reg);
+}
+
+int aq_hw_err_from_flags(struct aq_hw_s *hw)
+{
+       int err = 0;
+
+       if (aq_hw_read_reg(hw, 0x10U) == ~0U)
+               return -ENXIO;
+
+       return err;
+}
diff --git a/drivers/net/atlantic/atl_hw_regs.h b/drivers/net/atlantic/atl_hw_regs.h
new file mode 100644 (file)
index 0000000..a2d6ca8
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File aq_hw_utils.h: Declaration of helper functions used across hardware
+ * layer.
+ */
+
+#ifndef AQ_HW_UTILS_H
+#define AQ_HW_UTILS_H
+
+#include <rte_common.h>
+#include <rte_io.h>
+#include <rte_byteorder.h>
+#include <rte_random.h>
+#include <rte_cycles.h>
+#include "atl_common.h"
+#include "atl_types.h"
+
+
+#ifndef HIDWORD
+#define LODWORD(_qw)    ((u32)(_qw))
+#define HIDWORD(_qw)    ((u32)(((_qw) >> 32) & 0xffffffff))
+#endif
+
+#define AQ_HW_SLEEP(_US_) rte_delay_ms(_US_)
+
+#define mdelay rte_delay_ms
+#define udelay rte_delay_us
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define BIT(x) (1UL << (x))
+
+#define AQ_HW_WAIT_FOR(_B_, _US_, _N_) \
+do { \
+       unsigned int AQ_HW_WAIT_FOR_i; \
+       for (AQ_HW_WAIT_FOR_i = _N_; (!(_B_)) && (AQ_HW_WAIT_FOR_i);\
+       --AQ_HW_WAIT_FOR_i) {\
+               udelay(_US_); \
+       } \
+       if (!AQ_HW_WAIT_FOR_i) {\
+               err = -ETIMEDOUT; \
+       } \
+} while (0)
+
+#define ATL_WRITE_FLUSH(aq_hw) { (void)aq_hw_read_reg(aq_hw, 0x10); }
+
+void aq_hw_write_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk,
+                        u32 shift, u32 val);
+u32 aq_hw_read_reg_bit(struct aq_hw_s *aq_hw, u32 addr, u32 msk, u32 shift);
+u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg);
+void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value);
+int aq_hw_err_from_flags(struct aq_hw_s *hw);
+
+#endif /* AQ_HW_UTILS_H */
diff --git a/drivers/net/atlantic/atl_logs.h b/drivers/net/atlantic/atl_logs.h
new file mode 100644 (file)
index 0000000..e3dba33
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+#ifndef ATL_LOGS_H
+#define ATL_LOGS_H
+
+#include <rte_log.h>
+
+extern int atl_logtype_init;
+
+#define PMD_INIT_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, atl_logtype_init, \
+               "%s(): " fmt "\n", __func__, ##args)
+
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+
+#define PMD_RX_LOG(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+
+#define PMD_TX_LOG(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+
+extern int atl_logtype_driver;
+#define PMD_DRV_LOG_RAW(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, atl_logtype_driver, "%s(): " fmt, \
+               __func__, ## args)
+
+#define PMD_DRV_LOG(level, fmt, args...) \
+       PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
+
+#endif
diff --git a/drivers/net/atlantic/atl_rxtx.c b/drivers/net/atlantic/atl_rxtx.c
new file mode 100644 (file)
index 0000000..fd90947
--- /dev/null
@@ -0,0 +1,1357 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_ethdev_driver.h>
+#include <rte_net.h>
+
+#include "atl_ethdev.h"
+#include "atl_hw_regs.h"
+
+#include "atl_logs.h"
+#include "hw_atl/hw_atl_llh.h"
+#include "hw_atl/hw_atl_b0.h"
+#include "hw_atl/hw_atl_b0_internal.h"
+
+#define ATL_TX_CKSUM_OFFLOAD_MASK (                     \
+       PKT_TX_IP_CKSUM |                                \
+       PKT_TX_L4_MASK |                                 \
+       PKT_TX_TCP_SEG)
+
+#define ATL_TX_OFFLOAD_MASK (                           \
+       PKT_TX_VLAN |                                    \
+       PKT_TX_IP_CKSUM |                                \
+       PKT_TX_L4_MASK |                                 \
+       PKT_TX_TCP_SEG)
+
+#define ATL_TX_OFFLOAD_NOTSUP_MASK \
+       (PKT_TX_OFFLOAD_MASK ^ ATL_TX_OFFLOAD_MASK)
+
+/**
+ * Structure associated with each descriptor of the RX ring of a RX queue.
+ */
+struct atl_rx_entry {
+       struct rte_mbuf *mbuf;
+};
+
+/**
+ * Structure associated with each descriptor of the TX ring of a TX queue.
+ */
+struct atl_tx_entry {
+       struct rte_mbuf *mbuf;
+       uint16_t next_id;
+       uint16_t last_id;
+};
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct atl_rx_queue {
+       struct rte_mempool      *mb_pool;
+       struct hw_atl_rxd_s     *hw_ring;
+       uint64_t                hw_ring_phys_addr;
+       struct atl_rx_entry     *sw_ring;
+       uint16_t                nb_rx_desc;
+       uint16_t                rx_tail;
+       uint16_t                nb_rx_hold;
+       uint16_t                rx_free_thresh;
+       uint16_t                queue_id;
+       uint16_t                port_id;
+       uint16_t                buff_size;
+       bool                    l3_csum_enabled;
+       bool                    l4_csum_enabled;
+};
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct atl_tx_queue {
+       struct hw_atl_txd_s     *hw_ring;
+       uint64_t                hw_ring_phys_addr;
+       struct atl_tx_entry     *sw_ring;
+       uint16_t                nb_tx_desc;
+       uint16_t                tx_tail;
+       uint16_t                tx_head;
+       uint16_t                queue_id;
+       uint16_t                port_id;
+       uint16_t                tx_free_thresh;
+       uint16_t                tx_free;
+};
+
+static inline void
+atl_reset_rx_queue(struct atl_rx_queue *rxq)
+{
+       struct hw_atl_rxd_s *rxd = NULL;
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < rxq->nb_rx_desc; i++) {
+               rxd = (struct hw_atl_rxd_s *)&rxq->hw_ring[i];
+               rxd->buf_addr = 0;
+               rxd->hdr_addr = 0;
+       }
+
+       rxq->rx_tail = 0;
+}
+
+int
+atl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+                  uint16_t nb_rx_desc, unsigned int socket_id,
+                  const struct rte_eth_rxconf *rx_conf,
+                  struct rte_mempool *mb_pool)
+{
+       struct atl_rx_queue *rxq;
+       const struct rte_memzone *mz;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* make sure a valid number of descriptors have been requested */
+       if (nb_rx_desc < AQ_HW_MIN_RX_RING_SIZE ||
+                       nb_rx_desc > AQ_HW_MAX_RX_RING_SIZE) {
+               PMD_INIT_LOG(ERR, "Number of Rx descriptors must be "
+               "less than or equal to %d, "
+               "greater than or equal to %d", AQ_HW_MAX_RX_RING_SIZE,
+               AQ_HW_MIN_RX_RING_SIZE);
+               return -EINVAL;
+       }
+
+       /*
+        * if this queue existed already, free the associated memory. The
+        * queue cannot be reused in case we need to allocate memory on
+        * different socket than was previously used.
+        */
+       if (dev->data->rx_queues[rx_queue_id] != NULL) {
+               atl_rx_queue_release(dev->data->rx_queues[rx_queue_id]);
+               dev->data->rx_queues[rx_queue_id] = NULL;
+       }
+
+       /* allocate memory for the queue structure */
+       rxq = rte_zmalloc_socket("atlantic Rx queue", sizeof(*rxq),
+                                RTE_CACHE_LINE_SIZE, socket_id);
+       if (rxq == NULL) {
+               PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
+               return -ENOMEM;
+       }
+
+       /* setup queue */
+       rxq->mb_pool = mb_pool;
+       rxq->nb_rx_desc = nb_rx_desc;
+       rxq->port_id = dev->data->port_id;
+       rxq->queue_id = rx_queue_id;
+       rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+
+       rxq->l3_csum_enabled = dev->data->dev_conf.rxmode.offloads &
+               DEV_RX_OFFLOAD_IPV4_CKSUM;
+       rxq->l4_csum_enabled = dev->data->dev_conf.rxmode.offloads &
+               (DEV_RX_OFFLOAD_UDP_CKSUM | DEV_RX_OFFLOAD_TCP_CKSUM);
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+               PMD_DRV_LOG(ERR, "PMD does not support KEEP_CRC offload");
+
+       /* allocate memory for the software ring */
+       rxq->sw_ring = rte_zmalloc_socket("atlantic sw rx ring",
+                               nb_rx_desc * sizeof(struct atl_rx_entry),
+                               RTE_CACHE_LINE_SIZE, socket_id);
+       if (rxq->sw_ring == NULL) {
+               PMD_INIT_LOG(ERR,
+                       "Port %d: Cannot allocate software ring for queue %d",
+                       rxq->port_id, rxq->queue_id);
+               rte_free(rxq);
+               return -ENOMEM;
+       }
+
+       /*
+        * allocate memory for the hardware descriptor ring. A memzone large
+        * enough to hold the maximum ring size is requested to allow for
+        * resizing in later calls to the queue setup function.
+        */
+       mz = rte_eth_dma_zone_reserve(dev, "rx hw_ring", rx_queue_id,
+                                     HW_ATL_B0_MAX_RXD *
+                                       sizeof(struct hw_atl_rxd_s),
+                                     128, socket_id);
+       if (mz == NULL) {
+               PMD_INIT_LOG(ERR,
+                       "Port %d: Cannot allocate hardware ring for queue %d",
+                       rxq->port_id, rxq->queue_id);
+               rte_free(rxq->sw_ring);
+               rte_free(rxq);
+               return -ENOMEM;
+       }
+       rxq->hw_ring = mz->addr;
+       rxq->hw_ring_phys_addr = mz->iova;
+
+       atl_reset_rx_queue(rxq);
+
+       dev->data->rx_queues[rx_queue_id] = rxq;
+       return 0;
+}
+
+static inline void
+atl_reset_tx_queue(struct atl_tx_queue *txq)
+{
+       struct atl_tx_entry *tx_entry;
+       union hw_atl_txc_s *txc;
+       uint16_t i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (!txq) {
+               PMD_DRV_LOG(ERR, "Pointer to txq is NULL");
+               return;
+       }
+
+       tx_entry = txq->sw_ring;
+
+       for (i = 0; i < txq->nb_tx_desc; i++) {
+               txc = (union hw_atl_txc_s *)&txq->hw_ring[i];
+               txc->flags1 = 0;
+               txc->flags2 = 2;
+       }
+
+       for (i = 0; i < txq->nb_tx_desc; i++) {
+               txq->hw_ring[i].dd = 1;
+               tx_entry[i].mbuf = NULL;
+       }
+
+       txq->tx_tail = 0;
+       txq->tx_head = 0;
+       txq->tx_free = txq->nb_tx_desc - 1;
+}
+
+int
+atl_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+                  uint16_t nb_tx_desc, unsigned int socket_id,
+                  const struct rte_eth_txconf *tx_conf)
+{
+       struct atl_tx_queue *txq;
+       const struct rte_memzone *mz;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* make sure a valid number of descriptors have been requested */
+       if (nb_tx_desc < AQ_HW_MIN_TX_RING_SIZE ||
+               nb_tx_desc > AQ_HW_MAX_TX_RING_SIZE) {
+               PMD_INIT_LOG(ERR, "Number of Tx descriptors must be "
+                       "less than or equal to %d, "
+                       "greater than or equal to %d", AQ_HW_MAX_TX_RING_SIZE,
+                       AQ_HW_MIN_TX_RING_SIZE);
+               return -EINVAL;
+       }
+
+       /*
+        * if this queue existed already, free the associated memory. The
+        * queue cannot be reused in case we need to allocate memory on
+        * different socket than was previously used.
+        */
+       if (dev->data->tx_queues[tx_queue_id] != NULL) {
+               atl_tx_queue_release(dev->data->tx_queues[tx_queue_id]);
+               dev->data->tx_queues[tx_queue_id] = NULL;
+       }
+
+       /* allocate memory for the queue structure */
+       txq = rte_zmalloc_socket("atlantic Tx queue", sizeof(*txq),
+                                RTE_CACHE_LINE_SIZE, socket_id);
+       if (txq == NULL) {
+               PMD_INIT_LOG(ERR, "Cannot allocate queue structure");
+               return -ENOMEM;
+       }
+
+       /* setup queue */
+       txq->nb_tx_desc = nb_tx_desc;
+       txq->port_id = dev->data->port_id;
+       txq->queue_id = tx_queue_id;
+       txq->tx_free_thresh = tx_conf->tx_free_thresh;
+
+
+       /* allocate memory for the software ring */
+       txq->sw_ring = rte_zmalloc_socket("atlantic sw tx ring",
+                               nb_tx_desc * sizeof(struct atl_tx_entry),
+                               RTE_CACHE_LINE_SIZE, socket_id);
+       if (txq->sw_ring == NULL) {
+               PMD_INIT_LOG(ERR,
+                       "Port %d: Cannot allocate software ring for queue %d",
+                       txq->port_id, txq->queue_id);
+               rte_free(txq);
+               return -ENOMEM;
+       }
+
+       /*
+        * allocate memory for the hardware descriptor ring. A memzone large
+        * enough to hold the maximum ring size is requested to allow for
+        * resizing in later calls to the queue setup function.
+        */
+       mz = rte_eth_dma_zone_reserve(dev, "tx hw_ring", tx_queue_id,
+                               HW_ATL_B0_MAX_TXD * sizeof(struct hw_atl_txd_s),
+                               128, socket_id);
+       if (mz == NULL) {
+               PMD_INIT_LOG(ERR,
+                       "Port %d: Cannot allocate hardware ring for queue %d",
+                       txq->port_id, txq->queue_id);
+               rte_free(txq->sw_ring);
+               rte_free(txq);
+               return -ENOMEM;
+       }
+       txq->hw_ring = mz->addr;
+       txq->hw_ring_phys_addr = mz->iova;
+
+       atl_reset_tx_queue(txq);
+
+       dev->data->tx_queues[tx_queue_id] = txq;
+       return 0;
+}
+
+int
+atl_tx_init(struct rte_eth_dev *eth_dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+       struct atl_tx_queue *txq;
+       uint64_t base_addr = 0;
+       int i = 0;
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < eth_dev->data->nb_tx_queues; i++) {
+               txq = eth_dev->data->tx_queues[i];
+               base_addr = txq->hw_ring_phys_addr;
+
+               err = hw_atl_b0_hw_ring_tx_init(hw, base_addr,
+                                               txq->queue_id,
+                                               txq->nb_tx_desc, 0,
+                                               txq->port_id);
+
+               if (err) {
+                       PMD_INIT_LOG(ERR,
+                               "Port %d: Cannot init TX queue %d",
+                               txq->port_id, txq->queue_id);
+                       break;
+               }
+       }
+
+       return err;
+}
+
+int
+atl_rx_init(struct rte_eth_dev *eth_dev)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+       struct aq_rss_parameters *rss_params = &hw->aq_nic_cfg->aq_rss;
+       struct atl_rx_queue *rxq;
+       uint64_t base_addr = 0;
+       int i = 0;
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+               rxq = eth_dev->data->rx_queues[i];
+               base_addr = rxq->hw_ring_phys_addr;
+
+               /* Take requested pool mbuf size and adapt
+                * descriptor buffer to best fit
+                */
+               int buff_size = rte_pktmbuf_data_room_size(rxq->mb_pool) -
+                               RTE_PKTMBUF_HEADROOM;
+
+               buff_size = RTE_ALIGN_FLOOR(buff_size, 1024);
+               if (buff_size > HW_ATL_B0_RXD_BUF_SIZE_MAX) {
+                       PMD_INIT_LOG(WARNING,
+                               "Port %d queue %d: mem pool buff size is too big\n",
+                               rxq->port_id, rxq->queue_id);
+                       buff_size = HW_ATL_B0_RXD_BUF_SIZE_MAX;
+               }
+               if (buff_size < 1024) {
+                       PMD_INIT_LOG(ERR,
+                               "Port %d queue %d: mem pool buff size is too small\n",
+                               rxq->port_id, rxq->queue_id);
+                       return -EINVAL;
+               }
+               rxq->buff_size = buff_size;
+
+               err = hw_atl_b0_hw_ring_rx_init(hw, base_addr, rxq->queue_id,
+                                               rxq->nb_rx_desc, buff_size, 0,
+                                               rxq->port_id);
+
+               if (err) {
+                       PMD_INIT_LOG(ERR, "Port %d: Cannot init RX queue %d",
+                                    rxq->port_id, rxq->queue_id);
+                       break;
+               }
+       }
+
+       for (i = rss_params->indirection_table_size; i--;)
+               rss_params->indirection_table[i] = i &
+                       (eth_dev->data->nb_rx_queues - 1);
+       hw_atl_b0_hw_rss_set(hw, rss_params);
+       return err;
+}
+
+static int
+atl_alloc_rx_queue_mbufs(struct atl_rx_queue *rxq)
+{
+       struct atl_rx_entry *rx_entry = rxq->sw_ring;
+       struct hw_atl_rxd_s *rxd;
+       uint64_t dma_addr = 0;
+       uint32_t i = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* fill Rx ring */
+       for (i = 0; i < rxq->nb_rx_desc; i++) {
+               struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+
+               if (mbuf == NULL) {
+                       PMD_INIT_LOG(ERR,
+                               "Port %d: mbuf alloc failed for rx queue %d",
+                               rxq->port_id, rxq->queue_id);
+                       return -ENOMEM;
+               }
+
+               mbuf->data_off = RTE_PKTMBUF_HEADROOM;
+               mbuf->port = rxq->port_id;
+
+               dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
+               rxd = (struct hw_atl_rxd_s *)&rxq->hw_ring[i];
+               rxd->buf_addr = dma_addr;
+               rxd->hdr_addr = 0;
+               rx_entry[i].mbuf = mbuf;
+       }
+
+       return 0;
+}
+
+static void
+atl_rx_queue_release_mbufs(struct atl_rx_queue *rxq)
+{
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rxq->sw_ring != NULL) {
+               for (i = 0; i < rxq->nb_rx_desc; i++) {
+                       if (rxq->sw_ring[i].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
+                               rxq->sw_ring[i].mbuf = NULL;
+                       }
+               }
+       }
+}
+
+int
+atl_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct atl_rx_queue *rxq = NULL;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rx_queue_id < dev->data->nb_rx_queues) {
+               rxq = dev->data->rx_queues[rx_queue_id];
+
+               if (atl_alloc_rx_queue_mbufs(rxq) != 0) {
+                       PMD_INIT_LOG(ERR,
+                               "Port %d: Allocate mbufs for queue %d failed",
+                               rxq->port_id, rxq->queue_id);
+                       return -1;
+               }
+
+               hw_atl_b0_hw_ring_rx_start(hw, rx_queue_id);
+
+               rte_wmb();
+               hw_atl_reg_rx_dma_desc_tail_ptr_set(hw, rxq->nb_rx_desc - 1,
+                                                   rx_queue_id);
+               dev->data->rx_queue_state[rx_queue_id] =
+                       RTE_ETH_QUEUE_STATE_STARTED;
+       } else {
+               return -1;
+       }
+
+       return 0;
+}
+
+int
+atl_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct atl_rx_queue *rxq = NULL;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rx_queue_id < dev->data->nb_rx_queues) {
+               rxq = dev->data->rx_queues[rx_queue_id];
+
+               hw_atl_b0_hw_ring_rx_stop(hw, rx_queue_id);
+
+               atl_rx_queue_release_mbufs(rxq);
+               atl_reset_rx_queue(rxq);
+
+               dev->data->rx_queue_state[rx_queue_id] =
+                       RTE_ETH_QUEUE_STATE_STOPPED;
+       } else {
+               return -1;
+       }
+
+       return 0;
+}
+
+void
+atl_rx_queue_release(void *rx_queue)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       if (rx_queue != NULL) {
+               struct atl_rx_queue *rxq = (struct atl_rx_queue *)rx_queue;
+
+               atl_rx_queue_release_mbufs(rxq);
+               rte_free(rxq->sw_ring);
+               rte_free(rxq);
+       }
+}
+
+static void
+atl_tx_queue_release_mbufs(struct atl_tx_queue *txq)
+{
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (txq->sw_ring != NULL) {
+               for (i = 0; i < txq->nb_tx_desc; i++) {
+                       if (txq->sw_ring[i].mbuf != NULL) {
+                               rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
+                               txq->sw_ring[i].mbuf = NULL;
+                       }
+               }
+       }
+}
+
+int
+atl_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (tx_queue_id < dev->data->nb_tx_queues) {
+               hw_atl_b0_hw_ring_tx_start(hw, tx_queue_id);
+
+               rte_wmb();
+               hw_atl_b0_hw_tx_ring_tail_update(hw, 0, tx_queue_id);
+               dev->data->tx_queue_state[tx_queue_id] =
+                       RTE_ETH_QUEUE_STATE_STARTED;
+       } else {
+               return -1;
+       }
+
+       return 0;
+}
+
+int
+atl_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct atl_tx_queue *txq;
+
+       PMD_INIT_FUNC_TRACE();
+
+       txq = dev->data->tx_queues[tx_queue_id];
+
+       hw_atl_b0_hw_ring_tx_stop(hw, tx_queue_id);
+
+       atl_tx_queue_release_mbufs(txq);
+       atl_reset_tx_queue(txq);
+       dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+
+       return 0;
+}
+
+void
+atl_tx_queue_release(void *tx_queue)
+{
+       PMD_INIT_FUNC_TRACE();
+
+       if (tx_queue != NULL) {
+               struct atl_tx_queue *txq = (struct atl_tx_queue *)tx_queue;
+
+               atl_tx_queue_release_mbufs(txq);
+               rte_free(txq->sw_ring);
+               rte_free(txq);
+       }
+}
+
+void
+atl_free_queues(struct rte_eth_dev *dev)
+{
+       unsigned int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               atl_rx_queue_release(dev->data->rx_queues[i]);
+               dev->data->rx_queues[i] = 0;
+       }
+       dev->data->nb_rx_queues = 0;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               atl_tx_queue_release(dev->data->tx_queues[i]);
+               dev->data->tx_queues[i] = 0;
+       }
+       dev->data->nb_tx_queues = 0;
+}
+
+int
+atl_start_queues(struct rte_eth_dev *dev)
+{
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               if (atl_tx_queue_start(dev, i) != 0) {
+                       PMD_DRV_LOG(ERR,
+                               "Port %d: Start Tx queue %d failed",
+                               dev->data->port_id, i);
+                       return -1;
+               }
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               if (atl_rx_queue_start(dev, i) != 0) {
+                       PMD_DRV_LOG(ERR,
+                               "Port %d: Start Rx queue %d failed",
+                               dev->data->port_id, i);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+int
+atl_stop_queues(struct rte_eth_dev *dev)
+{
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               if (atl_tx_queue_stop(dev, i) != 0) {
+                       PMD_DRV_LOG(ERR,
+                               "Port %d: Stop Tx queue %d failed",
+                               dev->data->port_id, i);
+                       return -1;
+               }
+       }
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               if (atl_rx_queue_stop(dev, i) != 0) {
+                       PMD_DRV_LOG(ERR,
+                               "Port %d: Stop Rx queue %d failed",
+                               dev->data->port_id, i);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+void
+atl_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+                struct rte_eth_rxq_info *qinfo)
+{
+       struct atl_rx_queue *rxq;
+
+       PMD_INIT_FUNC_TRACE();
+
+       rxq = dev->data->rx_queues[queue_id];
+
+       qinfo->mp = rxq->mb_pool;
+       qinfo->scattered_rx = dev->data->scattered_rx;
+       qinfo->nb_desc = rxq->nb_rx_desc;
+}
+
+void
+atl_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+                struct rte_eth_txq_info *qinfo)
+{
+       struct atl_tx_queue *txq;
+
+       PMD_INIT_FUNC_TRACE();
+
+       txq = dev->data->tx_queues[queue_id];
+
+       qinfo->nb_desc = txq->nb_tx_desc;
+}
+
+/* Return Rx queue avail count */
+
+uint32_t
+atl_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct atl_rx_queue *rxq;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (rx_queue_id >= dev->data->nb_rx_queues) {
+               PMD_DRV_LOG(ERR, "Invalid RX queue id=%d", rx_queue_id);
+               return 0;
+       }
+
+       rxq = dev->data->rx_queues[rx_queue_id];
+
+       if (rxq == NULL)
+               return 0;
+
+       return rxq->nb_rx_desc - rxq->nb_rx_hold;
+}
+
+int
+atl_dev_rx_descriptor_status(void *rx_queue, uint16_t offset)
+{
+       struct atl_rx_queue *rxq = rx_queue;
+       struct hw_atl_rxd_wb_s *rxd;
+       uint32_t idx;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (unlikely(offset >= rxq->nb_rx_desc))
+               return -EINVAL;
+
+       if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
+               return RTE_ETH_RX_DESC_UNAVAIL;
+
+       idx = rxq->rx_tail + offset;
+
+       if (idx >= rxq->nb_rx_desc)
+               idx -= rxq->nb_rx_desc;
+
+       rxd = (struct hw_atl_rxd_wb_s *)&rxq->hw_ring[idx];
+
+       if (rxd->dd)
+               return RTE_ETH_RX_DESC_DONE;
+
+       return RTE_ETH_RX_DESC_AVAIL;
+}
+
+int
+atl_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
+{
+       struct atl_tx_queue *txq = tx_queue;
+       struct hw_atl_txd_s *txd;
+       uint32_t idx;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (unlikely(offset >= txq->nb_tx_desc))
+               return -EINVAL;
+
+       idx = txq->tx_tail + offset;
+
+       if (idx >= txq->nb_tx_desc)
+               idx -= txq->nb_tx_desc;
+
+       txd = &txq->hw_ring[idx];
+
+       if (txd->dd)
+               return RTE_ETH_TX_DESC_DONE;
+
+       return RTE_ETH_TX_DESC_FULL;
+}
+
+static int
+atl_rx_enable_intr(struct rte_eth_dev *dev, uint16_t queue_id, bool enable)
+{
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct atl_rx_queue *rxq;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (queue_id >= dev->data->nb_rx_queues) {
+               PMD_DRV_LOG(ERR, "Invalid RX queue id=%d", queue_id);
+               return -EINVAL;
+       }
+
+       rxq = dev->data->rx_queues[queue_id];
+
+       if (rxq == NULL)
+               return 0;
+
+       /* Mapping interrupt vector */
+       hw_atl_itr_irq_map_en_rx_set(hw, enable, queue_id);
+
+       return 0;
+}
+
+int
+atl_dev_rx_queue_intr_enable(struct rte_eth_dev *eth_dev, uint16_t queue_id)
+{
+       return atl_rx_enable_intr(eth_dev, queue_id, true);
+}
+
+int
+atl_dev_rx_queue_intr_disable(struct rte_eth_dev *eth_dev, uint16_t queue_id)
+{
+       return atl_rx_enable_intr(eth_dev, queue_id, false);
+}
+
+uint16_t
+atl_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+             uint16_t nb_pkts)
+{
+       int i, ret;
+       uint64_t ol_flags;
+       struct rte_mbuf *m;
+
+       PMD_INIT_FUNC_TRACE();
+
+       for (i = 0; i < nb_pkts; i++) {
+               m = tx_pkts[i];
+               ol_flags = m->ol_flags;
+
+               if (m->nb_segs > AQ_HW_MAX_SEGS_SIZE) {
+                       rte_errno = -EINVAL;
+                       return i;
+               }
+
+               if (ol_flags & ATL_TX_OFFLOAD_NOTSUP_MASK) {
+                       rte_errno = -ENOTSUP;
+                       return i;
+               }
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+               ret = rte_validate_tx_offload(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+#endif
+               ret = rte_net_intel_cksum_prepare(m);
+               if (ret != 0) {
+                       rte_errno = ret;
+                       return i;
+               }
+       }
+
+       return i;
+}
+
+static uint64_t
+atl_desc_to_offload_flags(struct atl_rx_queue *rxq,
+                         struct hw_atl_rxd_wb_s *rxd_wb)
+{
+       uint64_t mbuf_flags = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* IPv4 ? */
+       if (rxq->l3_csum_enabled && ((rxd_wb->pkt_type & 0x3) == 0)) {
+               /* IPv4 csum error ? */
+               if (rxd_wb->rx_stat & BIT(1))
+                       mbuf_flags |= PKT_RX_IP_CKSUM_BAD;
+               else
+                       mbuf_flags |= PKT_RX_IP_CKSUM_GOOD;
+       } else {
+               mbuf_flags |= PKT_RX_IP_CKSUM_UNKNOWN;
+       }
+
+       /* CSUM calculated ? */
+       if (rxq->l4_csum_enabled && (rxd_wb->rx_stat & BIT(3))) {
+               if (rxd_wb->rx_stat & BIT(2))
+                       mbuf_flags |= PKT_RX_L4_CKSUM_BAD;
+               else
+                       mbuf_flags |= PKT_RX_L4_CKSUM_GOOD;
+       } else {
+               mbuf_flags |= PKT_RX_L4_CKSUM_UNKNOWN;
+       }
+
+       return mbuf_flags;
+}
+
+static uint32_t
+atl_desc_to_pkt_type(struct hw_atl_rxd_wb_s *rxd_wb)
+{
+       uint32_t type = RTE_PTYPE_UNKNOWN;
+       uint16_t l2_l3_type = rxd_wb->pkt_type & 0x3;
+       uint16_t l4_type = (rxd_wb->pkt_type & 0x1C) >> 2;
+
+       switch (l2_l3_type) {
+       case 0:
+               type = RTE_PTYPE_L3_IPV4;
+               break;
+       case 1:
+               type = RTE_PTYPE_L3_IPV6;
+               break;
+       case 2:
+               type = RTE_PTYPE_L2_ETHER;
+               break;
+       case 3:
+               type = RTE_PTYPE_L2_ETHER_ARP;
+               break;
+       }
+
+       switch (l4_type) {
+       case 0:
+               type |= RTE_PTYPE_L4_TCP;
+               break;
+       case 1:
+               type |= RTE_PTYPE_L4_UDP;
+               break;
+       case 2:
+               type |= RTE_PTYPE_L4_SCTP;
+               break;
+       case 3:
+               type |= RTE_PTYPE_L4_ICMP;
+               break;
+       }
+
+       if (rxd_wb->pkt_type & BIT(5))
+               type |= RTE_PTYPE_L2_ETHER_VLAN;
+
+       return type;
+}
+
+uint16_t
+atl_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       struct atl_rx_queue *rxq = (struct atl_rx_queue *)rx_queue;
+       struct rte_eth_dev *dev = &rte_eth_devices[rxq->port_id];
+       struct atl_adapter *adapter =
+               ATL_DEV_TO_ADAPTER(&rte_eth_devices[rxq->port_id]);
+       struct aq_hw_s *hw = ATL_DEV_PRIVATE_TO_HW(adapter);
+       struct aq_hw_cfg_s *cfg =
+               ATL_DEV_PRIVATE_TO_CFG(dev->data->dev_private);
+       struct atl_rx_entry *sw_ring = rxq->sw_ring;
+
+       struct rte_mbuf *new_mbuf;
+       struct rte_mbuf *rx_mbuf, *rx_mbuf_prev, *rx_mbuf_first;
+       struct atl_rx_entry *rx_entry;
+       uint16_t nb_rx = 0;
+       uint16_t nb_hold = 0;
+       struct hw_atl_rxd_wb_s rxd_wb;
+       struct hw_atl_rxd_s *rxd = NULL;
+       uint16_t tail = rxq->rx_tail;
+       uint64_t dma_addr;
+       uint16_t pkt_len = 0;
+
+       while (nb_rx < nb_pkts) {
+               uint16_t eop_tail = tail;
+
+               rxd = (struct hw_atl_rxd_s *)&rxq->hw_ring[tail];
+               rxd_wb = *(struct hw_atl_rxd_wb_s *)rxd;
+
+               if (!rxd_wb.dd) { /* RxD is not done */
+                       break;
+               }
+
+               PMD_RX_LOG(ERR, "port_id=%u queue_id=%u tail=%u "
+                          "eop=0x%x pkt_len=%u hash=0x%x hash_type=0x%x",
+                          (unsigned int)rxq->port_id,
+                          (unsigned int)rxq->queue_id,
+                          (unsigned int)tail, (unsigned int)rxd_wb.eop,
+                          (unsigned int)rte_le_to_cpu_16(rxd_wb.pkt_len),
+                       rxd_wb.rss_hash, rxd_wb.rss_type);
+
+               /* RxD is not done */
+               if (!rxd_wb.eop) {
+                       while (true) {
+                               struct hw_atl_rxd_wb_s *eop_rxwbd;
+
+                               eop_tail = (eop_tail + 1) % rxq->nb_rx_desc;
+                               eop_rxwbd = (struct hw_atl_rxd_wb_s *)
+                                       &rxq->hw_ring[eop_tail];
+                               if (!eop_rxwbd->dd) {
+                                       /* no EOP received yet */
+                                       eop_tail = tail;
+                                       break;
+                               }
+                               if (eop_rxwbd->dd && eop_rxwbd->eop)
+                                       break;
+                       }
+                       /* No EOP in ring */
+                       if (eop_tail == tail)
+                               break;
+               }
+               rx_mbuf_prev = NULL;
+               rx_mbuf_first = NULL;
+
+               /* Run through packet segments */
+               while (true) {
+                       new_mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
+                       if (new_mbuf == NULL) {
+                               PMD_RX_LOG(ERR,
+                                  "RX mbuf alloc failed port_id=%u "
+                                  "queue_id=%u", (unsigned int)rxq->port_id,
+                                  (unsigned int)rxq->queue_id);
+                               dev->data->rx_mbuf_alloc_failed++;
+                               adapter->sw_stats.rx_nombuf++;
+                               goto err_stop;
+                       }
+
+                       nb_hold++;
+                       rx_entry = &sw_ring[tail];
+
+                       rx_mbuf = rx_entry->mbuf;
+                       rx_entry->mbuf = new_mbuf;
+                       dma_addr = rte_cpu_to_le_64(
+                               rte_mbuf_data_iova_default(new_mbuf));
+
+                       /* setup RX descriptor */
+                       rxd->hdr_addr = 0;
+                       rxd->buf_addr = dma_addr;
+
+                       /*
+                        * Initialize the returned mbuf.
+                        * 1) setup generic mbuf fields:
+                        *        - number of segments,
+                        *        - next segment,
+                        *        - packet length,
+                        *        - RX port identifier.
+                        * 2) integrate hardware offload data, if any:
+                        *      <  - RSS flag & hash,
+                        *        - IP checksum flag,
+                        *        - VLAN TCI, if any,
+                        *        - error flags.
+                        */
+                       pkt_len = (uint16_t)rte_le_to_cpu_16(rxd_wb.pkt_len);
+                       rx_mbuf->data_off = RTE_PKTMBUF_HEADROOM;
+                       rte_prefetch1((char *)rx_mbuf->buf_addr +
+                               rx_mbuf->data_off);
+                       rx_mbuf->nb_segs = 0;
+                       rx_mbuf->next = NULL;
+                       rx_mbuf->pkt_len = pkt_len;
+                       rx_mbuf->data_len = pkt_len;
+                       if (rxd_wb.eop) {
+                               u16 remainder_len = pkt_len % rxq->buff_size;
+                               if (!remainder_len)
+                                       remainder_len = rxq->buff_size;
+                               rx_mbuf->data_len = remainder_len;
+                       } else {
+                               rx_mbuf->data_len = pkt_len > rxq->buff_size ?
+                                               rxq->buff_size : pkt_len;
+                       }
+                       rx_mbuf->port = rxq->port_id;
+
+                       rx_mbuf->hash.rss = rxd_wb.rss_hash;
+
+                       rx_mbuf->vlan_tci = rxd_wb.vlan;
+
+                       rx_mbuf->ol_flags =
+                               atl_desc_to_offload_flags(rxq, &rxd_wb);
+
+                       rx_mbuf->packet_type = atl_desc_to_pkt_type(&rxd_wb);
+
+                       if (rx_mbuf->packet_type & RTE_PTYPE_L2_ETHER_VLAN) {
+                               rx_mbuf->ol_flags |= PKT_RX_VLAN;
+                               rx_mbuf->vlan_tci = rxd_wb.vlan;
+
+                               if (cfg->vlan_strip)
+                                       rx_mbuf->ol_flags |=
+                                               PKT_RX_VLAN_STRIPPED;
+                       }
+
+                       if (!rx_mbuf_first)
+                               rx_mbuf_first = rx_mbuf;
+                       rx_mbuf_first->nb_segs++;
+
+                       if (rx_mbuf_prev)
+                               rx_mbuf_prev->next = rx_mbuf;
+                       rx_mbuf_prev = rx_mbuf;
+
+                       tail = (tail + 1) % rxq->nb_rx_desc;
+                       /* Prefetch next mbufs */
+                       rte_prefetch0(sw_ring[tail].mbuf);
+                       if ((tail & 0x3) == 0) {
+                               rte_prefetch0(&sw_ring[tail]);
+                               rte_prefetch0(&sw_ring[tail]);
+                       }
+
+                       /* filled mbuf_first */
+                       if (rxd_wb.eop)
+                               break;
+                       rxd = (struct hw_atl_rxd_s *)&rxq->hw_ring[tail];
+                       rxd_wb = *(struct hw_atl_rxd_wb_s *)rxd;
+               };
+
+               /*
+                * Store the mbuf address into the next entry of the array
+                * of returned packets.
+                */
+               rx_pkts[nb_rx++] = rx_mbuf_first;
+               adapter->sw_stats.q_ipackets[rxq->queue_id]++;
+               adapter->sw_stats.q_ibytes[rxq->queue_id] +=
+                       rx_mbuf_first->pkt_len;
+
+               PMD_RX_LOG(ERR, "add mbuf segs=%d pkt_len=%d",
+                       rx_mbuf_first->nb_segs,
+                       rx_mbuf_first->pkt_len);
+       }
+
+err_stop:
+
+       rxq->rx_tail = tail;
+
+       /*
+        * If the number of free RX descriptors is greater than the RX free
+        * threshold of the queue, advance the Receive Descriptor Tail (RDT)
+        * register.
+        * Update the RDT with the value of the last processed RX descriptor
+        * minus 1, to guarantee that the RDT register is never equal to the
+        * RDH register, which creates a "full" ring situtation from the
+        * hardware point of view...
+        */
+       nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+       if (nb_hold > rxq->rx_free_thresh) {
+               PMD_RX_LOG(ERR, "port_id=%u queue_id=%u rx_tail=%u "
+                       "nb_hold=%u nb_rx=%u",
+                       (unsigned int)rxq->port_id, (unsigned int)rxq->queue_id,
+                       (unsigned int)tail, (unsigned int)nb_hold,
+                       (unsigned int)nb_rx);
+               tail = (uint16_t)((tail == 0) ?
+                       (rxq->nb_rx_desc - 1) : (tail - 1));
+
+               hw_atl_reg_rx_dma_desc_tail_ptr_set(hw, tail, rxq->queue_id);
+
+               nb_hold = 0;
+       }
+
+       rxq->nb_rx_hold = nb_hold;
+
+       return nb_rx;
+}
+
+static void
+atl_xmit_cleanup(struct atl_tx_queue *txq)
+{
+       struct atl_tx_entry *sw_ring;
+       struct hw_atl_txd_s *txd;
+       int to_clean = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (txq != NULL) {
+               sw_ring = txq->sw_ring;
+               int head = txq->tx_head;
+               int cnt;
+               int i;
+
+               for (i = 0, cnt = head; ; i++) {
+                       txd = &txq->hw_ring[cnt];
+
+                       if (txd->dd)
+                               to_clean++;
+
+                       cnt = (cnt + 1) % txq->nb_tx_desc;
+                       if (cnt == txq->tx_tail)
+                               break;
+               }
+
+               if (to_clean == 0)
+                       return;
+
+               while (to_clean) {
+                       txd = &txq->hw_ring[head];
+
+                       struct atl_tx_entry *rx_entry = &sw_ring[head];
+
+                       if (rx_entry->mbuf) {
+                               rte_pktmbuf_free_seg(rx_entry->mbuf);
+                               rx_entry->mbuf = NULL;
+                       }
+
+                       if (txd->dd)
+                               to_clean--;
+
+                       txd->buf_addr = 0;
+                       txd->flags = 0;
+
+                       head = (head + 1) % txq->nb_tx_desc;
+                       txq->tx_free++;
+               }
+
+               txq->tx_head = head;
+       }
+}
+
+static int
+atl_tso_setup(struct rte_mbuf *tx_pkt, union hw_atl_txc_s *txc)
+{
+       uint32_t tx_cmd = 0;
+       uint64_t ol_flags = tx_pkt->ol_flags;
+
+       PMD_INIT_FUNC_TRACE();
+
+       if (ol_flags & PKT_TX_TCP_SEG) {
+               PMD_DRV_LOG(DEBUG, "xmit TSO pkt");
+
+               tx_cmd |= tx_desc_cmd_lso | tx_desc_cmd_l4cs;
+
+               txc->cmd = 0x4;
+
+               if (ol_flags & PKT_TX_IPV6)
+                       txc->cmd |= 0x2;
+
+               txc->l2_len = tx_pkt->l2_len;
+               txc->l3_len = tx_pkt->l3_len;
+               txc->l4_len = tx_pkt->l4_len;
+
+               txc->mss_len = tx_pkt->tso_segsz;
+       }
+
+       if (ol_flags & PKT_TX_VLAN) {
+               tx_cmd |= tx_desc_cmd_vlan;
+               txc->vlan_tag = tx_pkt->vlan_tci;
+       }
+
+       if (tx_cmd) {
+               txc->type = tx_desc_type_ctx;
+               txc->idx = 0;
+       }
+
+       return tx_cmd;
+}
+
+static inline void
+atl_setup_csum_offload(struct rte_mbuf *mbuf, struct hw_atl_txd_s *txd,
+                      uint32_t tx_cmd)
+{
+       txd->cmd |= tx_desc_cmd_fcs;
+       txd->cmd |= (mbuf->ol_flags & PKT_TX_IP_CKSUM) ? tx_desc_cmd_ipv4 : 0;
+       /* L4 csum requested */
+       txd->cmd |= (mbuf->ol_flags & PKT_TX_L4_MASK) ? tx_desc_cmd_l4cs : 0;
+       txd->cmd |= tx_cmd;
+}
+
+static inline void
+atl_xmit_pkt(struct aq_hw_s *hw, struct atl_tx_queue *txq,
+            struct rte_mbuf *tx_pkt)
+{
+       struct atl_adapter *adapter =
+               ATL_DEV_TO_ADAPTER(&rte_eth_devices[txq->port_id]);
+       uint32_t pay_len = 0;
+       int tail = 0;
+       struct atl_tx_entry *tx_entry;
+       uint64_t buf_dma_addr;
+       struct rte_mbuf *m_seg;
+       union hw_atl_txc_s *txc = NULL;
+       struct hw_atl_txd_s *txd = NULL;
+       u32 tx_cmd = 0U;
+       int desc_count = 0;
+
+       PMD_INIT_FUNC_TRACE();
+
+       tail = txq->tx_tail;
+
+       txc = (union hw_atl_txc_s *)&txq->hw_ring[tail];
+
+       txc->flags1 = 0U;
+       txc->flags2 = 0U;
+
+       tx_cmd = atl_tso_setup(tx_pkt, txc);
+
+       if (tx_cmd) {
+               /* We've consumed the first desc, adjust counters */
+               tail = (tail + 1) % txq->nb_tx_desc;
+               txq->tx_tail = tail;
+               txq->tx_free -= 1;
+
+               txd = &txq->hw_ring[tail];
+               txd->flags = 0U;
+       } else {
+               txd = (struct hw_atl_txd_s *)txc;
+       }
+
+       txd->ct_en = !!tx_cmd;
+
+       txd->type = tx_desc_type_desc;
+
+       atl_setup_csum_offload(tx_pkt, txd, tx_cmd);
+
+       if (tx_cmd)
+               txd->ct_idx = 0;
+
+       pay_len = tx_pkt->pkt_len;
+
+       txd->pay_len = pay_len;
+
+       for (m_seg = tx_pkt; m_seg; m_seg = m_seg->next) {
+               if (desc_count > 0) {
+                       txd = &txq->hw_ring[tail];
+                       txd->flags = 0U;
+               }
+
+               buf_dma_addr = rte_mbuf_data_iova(m_seg);
+               txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+
+               txd->type = tx_desc_type_desc;
+               txd->len = m_seg->data_len;
+               txd->pay_len = pay_len;
+
+               /* Store mbuf for freeing later */
+               tx_entry = &txq->sw_ring[tail];
+
+               if (tx_entry->mbuf)
+                       rte_pktmbuf_free_seg(tx_entry->mbuf);
+               tx_entry->mbuf = m_seg;
+
+               tail = (tail + 1) % txq->nb_tx_desc;
+
+               desc_count++;
+       }
+
+       // Last descriptor requires EOP and WB
+       txd->eop = 1U;
+       txd->cmd |= tx_desc_cmd_wb;
+
+       hw_atl_b0_hw_tx_ring_tail_update(hw, tail, txq->queue_id);
+
+       txq->tx_tail = tail;
+
+       txq->tx_free -= desc_count;
+
+       adapter->sw_stats.q_opackets[txq->queue_id]++;
+       adapter->sw_stats.q_obytes[txq->queue_id] += pay_len;
+}
+
+uint16_t
+atl_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct rte_eth_dev *dev = NULL;
+       struct aq_hw_s *hw = NULL;
+       struct atl_tx_queue *txq = tx_queue;
+       struct rte_mbuf *tx_pkt;
+       uint16_t nb_tx;
+
+       dev = &rte_eth_devices[txq->port_id];
+       hw = ATL_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+       PMD_TX_LOG(DEBUG,
+               "port %d txq %d pkts: %d tx_free=%d tx_tail=%d tx_head=%d",
+               txq->port_id, txq->queue_id, nb_pkts, txq->tx_free,
+               txq->tx_tail, txq->tx_head);
+
+       for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+               tx_pkt = *tx_pkts++;
+
+               /* Clean Tx queue if needed */
+               if (txq->tx_free < txq->tx_free_thresh)
+                       atl_xmit_cleanup(txq);
+
+               /* Check if we have enough free descriptors */
+               if (txq->tx_free < tx_pkt->nb_segs)
+                       break;
+
+               /* check mbuf is valid */
+               if ((tx_pkt->nb_segs == 0) ||
+                       ((tx_pkt->nb_segs > 1) && (tx_pkt->next == NULL)))
+                       break;
+
+               /* Send the packet */
+               atl_xmit_pkt(hw, txq, tx_pkt);
+       }
+
+       PMD_TX_LOG(DEBUG, "atl_xmit_pkts %d transmitted", nb_tx);
+
+       return nb_tx;
+}
+
diff --git a/drivers/net/atlantic/atl_types.h b/drivers/net/atlantic/atl_types.h
new file mode 100644 (file)
index 0000000..3d90f6c
--- /dev/null
@@ -0,0 +1,186 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Aquantia Corporation
+ */
+#ifndef ATL_TYPES_H
+#define ATL_TYPES_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdbool.h>
+#include <netinet/in.h>
+
+typedef uint8_t                u8;
+typedef int8_t         s8;
+typedef uint16_t       u16;
+typedef int16_t                s16;
+typedef uint32_t       u32;
+typedef int32_t                s32;
+typedef uint64_t       u64;
+
+#define min(a, b)      RTE_MIN(a, b)
+#define max(a, b)      RTE_MAX(a, b)
+
+#include "hw_atl/hw_atl_b0_internal.h"
+#include "hw_atl/hw_atl_utils.h"
+
+struct aq_hw_link_status_s {
+       unsigned int mbps;
+};
+
+struct aq_stats_s {
+       u64 uprc;
+       u64 mprc;
+       u64 bprc;
+       u64 erpt;
+       u64 uptc;
+       u64 mptc;
+       u64 bptc;
+       u64 erpr;
+       u64 mbtc;
+       u64 bbtc;
+       u64 mbrc;
+       u64 bbrc;
+       u64 ubrc;
+       u64 ubtc;
+       u64 dpc;
+       u64 dma_pkt_rc;
+       u64 dma_pkt_tc;
+       u64 dma_oct_rc;
+       u64 dma_oct_tc;
+};
+
+struct aq_rss_parameters {
+       u16 base_cpu_number;
+       u16 indirection_table_size;
+       u16 hash_secret_key_size;
+       u32 hash_secret_key[HW_ATL_B0_RSS_HASHKEY_BITS / 8];
+       u8 indirection_table[HW_ATL_B0_RSS_REDIRECTION_MAX];
+};
+
+struct aq_hw_cfg_s {
+       bool is_lro;
+       bool is_rss;
+       unsigned int num_rss_queues;
+       int wol;
+
+       int link_speed_msk;
+       int irq_type;
+       int irq_mask;
+       unsigned int vecs;
+
+       bool vlan_strip;
+       uint32_t vlan_filter[HW_ATL_B0_MAX_VLAN_IDS];
+       uint32_t flow_control;
+
+       struct aq_rss_parameters aq_rss;
+};
+
+struct aq_hw_s {
+       u16 device_id;
+       u16 vendor_id;
+       bool adapter_stopped;
+
+       u8 rbl_enabled:1;
+       struct aq_hw_cfg_s *aq_nic_cfg;
+       const struct aq_fw_ops *aq_fw_ops;
+       void *mmio;
+
+       struct aq_hw_link_status_s aq_link_status;
+       bool is_autoneg;
+
+       struct hw_aq_atl_utils_mbox mbox;
+       struct hw_atl_stats_s last_stats;
+       struct aq_stats_s curr_stats;
+
+       u64 speed;
+       unsigned int chip_features;
+       u32 fw_ver_actual;
+       u32 mbox_addr;
+       u32 rpc_addr;
+       u32 rpc_tid;
+       struct hw_aq_atl_utils_fw_rpc rpc;
+};
+
+struct aq_fw_ops {
+       int (*init)(struct aq_hw_s *self);
+
+       int (*deinit)(struct aq_hw_s *self);
+
+       int (*reset)(struct aq_hw_s *self);
+
+       int (*get_mac_permanent)(struct aq_hw_s *self, u8 *mac);
+
+       int (*set_link_speed)(struct aq_hw_s *self, u32 speed);
+
+       int (*set_state)(struct aq_hw_s *self,
+                       enum hal_atl_utils_fw_state_e state);
+
+       int (*update_link_status)(struct aq_hw_s *self);
+
+       int (*update_stats)(struct aq_hw_s *self);
+
+       int (*set_power)(struct aq_hw_s *self, unsigned int power_state,
+                       u8 *mac);
+
+       int (*get_temp)(struct aq_hw_s *self, int *temp);
+
+       int (*get_cable_len)(struct aq_hw_s *self, int *cable_len);
+
+       int (*set_eee_rate)(struct aq_hw_s *self, u32 speed);
+
+       int (*get_eee_rate)(struct aq_hw_s *self, u32 *rate,
+                       u32 *supported_rates);
+
+       int (*set_flow_control)(struct aq_hw_s *self);
+
+       int (*led_control)(struct aq_hw_s *self, u32 mode);
+
+       int (*get_eeprom)(struct aq_hw_s *self, u32 *data, u32 len);
+
+       int (*set_eeprom)(struct aq_hw_s *self, u32 *data, u32 len);
+};
+
+struct atl_sw_stats {
+       u64 crcerrs;
+       u64 errbc;
+       u64 mspdc;
+       u64 mpctotal;
+       u64 mpc[8];
+       u64 mlfc;
+       u64 mrfc;
+       u64 rlec;
+       u64 lxontxc;
+       u64 lxonrxc;
+       u64 lxofftxc;
+       u64 lxoffrxc;
+       u64 pxontxc[8];
+       u64 pxonrxc[8];
+       u64 pxofftxc[8];
+       u64 pxoffrxc[8];
+       u64 gprc;
+       u64 bprc;
+       u64 mprc;
+       u64 gptc;
+       u64 gorc;
+       u64 gotc;
+       u64 tor;
+       u64 tpr;
+       u64 tpt;
+       u64 mptc;
+       u64 bptc;
+       u64 xec;
+       u64 fccrc;
+       u64 ldpcec;
+       u64 pcrc8ec;
+
+       u64 rx_nombuf;
+       u64 q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+       u64 q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+       u64 q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+       u64 q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+       u64 q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS];
+};
+
+#endif
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/atlantic/hw_atl/hw_atl_b0.c
new file mode 100644 (file)
index 0000000..9400e0e
--- /dev/null
@@ -0,0 +1,510 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_b0.c: Definition of Atlantic hardware specific functions. */
+
+#include "../atl_types.h"
+#include "hw_atl_b0.h"
+
+#include "../atl_hw_regs.h"
+#include "hw_atl_utils.h"
+#include "hw_atl_llh.h"
+#include "hw_atl_b0_internal.h"
+#include "hw_atl_llh_internal.h"
+#include "../atl_logs.h"
+
+int hw_atl_b0_hw_reset(struct aq_hw_s *self)
+{
+       int err = 0;
+
+       err = hw_atl_utils_soft_reset(self);
+       if (err)
+               return err;
+
+       self->aq_fw_ops->set_state(self, MPI_RESET);
+
+       return err;
+}
+
+static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
+{
+       u32 tc = 0U;
+       u32 buff_size = 0U;
+       unsigned int i_priority = 0U;
+       bool is_rx_flow_control = false;
+
+       /* TPS Descriptor rate init */
+       hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
+       hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
+
+       /* TPS VM init */
+       hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
+
+       /* TPS TC credits init */
+       hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+       hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
+
+       hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U);
+       hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U);
+       hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U);
+       hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U);
+
+       /* Tx buf size */
+       buff_size = HW_ATL_B0_TXBUF_MAX;
+
+       hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc);
+       hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
+                                                  (buff_size *
+                                                  (1024 / 32U) * 66U) /
+                                                  100U, tc);
+       hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self,
+                                                  (buff_size *
+                                                  (1024 / 32U) * 50U) /
+                                                  100U, tc);
+
+       /* QoS Rx buf size per TC */
+       tc = 0;
+       is_rx_flow_control = 0;
+       buff_size = HW_ATL_B0_RXBUF_MAX;
+
+       hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
+       hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
+                                                  (buff_size *
+                                                  (1024U / 32U) * 66U) /
+                                                  100U, tc);
+       hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self,
+                                                  (buff_size *
+                                                  (1024U / 32U) * 50U) /
+                                                  100U, tc);
+       hw_atl_rpb_rx_xoff_en_per_tc_set(self,
+                                        is_rx_flow_control ? 1U : 0U,
+                                        tc);
+
+       /* QoS 802.1p priority -> TC mapping */
+       for (i_priority = 8U; i_priority--;)
+               hw_atl_rpf_rpb_user_priority_tc_map_set(self, i_priority, 0U);
+
+       return aq_hw_err_from_flags(self);
+}
+
+/* calc hash only in IPv4 header, regardless of presence of TCP */
+#define pif_rpf_rss_ipv4_hdr_only_i     (1 << 4)
+/* calc hash only if TCP header and IPv4 */
+#define pif_rpf_rss_ipv4_tcp_hdr_only_i (1 << 3)
+/* calc hash only in IPv6 header, regardless of presence of TCP */
+#define pif_rpf_rss_ipv6_hdr_only_i     (1 << 2)
+/* calc hash only if TCP header and IPv4 */
+#define pif_rpf_rss_ipv6_tcp_hdr_only_i (1 << 1)
+/* bug 5124 - rss hashing types - FIXME */
+#define pif_rpf_rss_dont_use_udp_i      (1 << 0)
+
+static int hw_atl_b0_hw_rss_hash_type_set(struct aq_hw_s *self)
+{
+       /* misc */
+       unsigned int control_reg_val =
+               IS_CHIP_FEATURE(RPF2) ? 0x000F0000U : 0x00000000U;
+
+       /* RSS hash type set for IP/TCP */
+       control_reg_val |= pif_rpf_rss_ipv4_hdr_only_i;//0x1EU;
+
+       aq_hw_write_reg(self, 0x5040U, control_reg_val);
+
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
+                                    struct aq_rss_parameters *rss_params)
+{
+       struct aq_hw_cfg_s *cfg = self->aq_nic_cfg;
+       int err = 0;
+       unsigned int i = 0U;
+       unsigned int addr = 0U;
+
+       for (i = 10, addr = 0U; i--; ++addr) {
+               u32 key_data = cfg->is_rss ?
+                       htonl(rss_params->hash_secret_key[i]) : 0U;
+               hw_atl_rpf_rss_key_wr_data_set(self, key_data);
+               hw_atl_rpf_rss_key_addr_set(self, addr);
+               hw_atl_rpf_rss_key_wr_en_set(self, 1U);
+               AQ_HW_WAIT_FOR(hw_atl_rpf_rss_key_wr_en_get(self) == 0,
+                              1000U, 10U);
+               if (err < 0)
+                       goto err_exit;
+       }
+
+       /* RSS Ring selection */
+       hw_atl_reg_rx_flr_rss_control1set(self,
+                               cfg->is_rss ? 0xB3333333U : 0x00000000U);
+       hw_atl_b0_hw_rss_hash_type_set(self);
+
+       err = aq_hw_err_from_flags(self);
+
+err_exit:
+       return err;
+}
+
+
+int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
+                       struct aq_rss_parameters *rss_params)
+{
+       u8 *indirection_table = rss_params->indirection_table;
+       u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
+       u32 i = 0;
+       u32 addr = 0;
+       u32 val = 0;
+       u32 shift = 0;
+       int err = 0;
+
+       for (i = 0; i < HW_ATL_B0_RSS_REDIRECTION_MAX; i++) {
+               val |= (u32)(indirection_table[i] % num_rss_queues) << shift;
+               shift += 3;
+
+               if (shift < 16)
+                       continue;
+
+               hw_atl_rpf_rss_redir_tbl_wr_data_set(self, val & 0xffff);
+               hw_atl_rpf_rss_redir_tbl_addr_set(self, addr);
+
+               hw_atl_rpf_rss_redir_wr_en_set(self, 1U);
+               AQ_HW_WAIT_FOR(hw_atl_rpf_rss_redir_wr_en_get(self) == 0,
+                       1000U, 10U);
+
+               if (err < 0)
+                       goto err_exit;
+
+               shift -= 16;
+               val >>= 16;
+               addr++;
+       }
+
+err_exit:
+       return err;
+}
+
+static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self)
+                                   /*struct aq_nic_cfg_s *aq_nic_cfg)*/
+{
+       unsigned int i;
+
+       /* TX checksums offloads*/
+       hw_atl_tpo_ipv4header_crc_offload_en_set(self, 1);
+       hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1);
+
+       /* RX checksums offloads*/
+       hw_atl_rpo_ipv4header_crc_offload_en_set(self, 1);
+       hw_atl_rpo_tcp_udp_crc_offload_en_set(self, 1);
+
+       /* LSO offloads*/
+       hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
+
+/* LRO offloads */
+       {
+               unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U :
+                       ((4U < HW_ATL_B0_LRO_RXD_MAX) ? 0x2U :
+                       ((2U < HW_ATL_B0_LRO_RXD_MAX) ? 0x1U : 0x0));
+
+               for (i = 0; i < HW_ATL_B0_RINGS_MAX; i++)
+                       hw_atl_rpo_lro_max_num_of_descriptors_set(self, val, i);
+
+               hw_atl_rpo_lro_time_base_divider_set(self, 0x61AU);
+               hw_atl_rpo_lro_inactive_interval_set(self, 0);
+               hw_atl_rpo_lro_max_coalescing_interval_set(self, 2);
+
+               hw_atl_rpo_lro_qsessions_lim_set(self, 1U);
+
+               hw_atl_rpo_lro_total_desc_lim_set(self, 2U);
+
+               hw_atl_rpo_lro_patch_optimization_en_set(self, 0U);
+
+               hw_atl_rpo_lro_min_pay_of_first_pkt_set(self, 10U);
+
+               hw_atl_rpo_lro_pkt_lim_set(self, 1U);
+
+               hw_atl_rpo_lro_en_set(self,
+                               self->aq_nic_cfg->is_lro ? 0xFFFFFFFFU : 0U);
+       }
+       return aq_hw_err_from_flags(self);
+}
+
+static
+int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
+{
+       /* Tx TC/RSS number config */
+       hw_atl_rpb_tps_tx_tc_mode_set(self, 1U);
+
+       hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
+       hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
+       hw_atl_thm_lso_tcp_flag_of_last_pkt_set(self, 0x0F7FU);
+
+       /* Tx interrupts */
+       hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 0U);
+
+       /* misc */
+       aq_hw_write_reg(self, 0x00007040U, IS_CHIP_FEATURE(TPO2) ?
+                       0x00010000U : 0x00000000U);
+       hw_atl_tdm_tx_dca_en_set(self, 0U);
+       hw_atl_tdm_tx_dca_mode_set(self, 0U);
+
+       hw_atl_tpb_tx_path_scp_ins_en_set(self, 1U);
+
+       return aq_hw_err_from_flags(self);
+}
+
+static
+int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
+{
+       struct aq_hw_cfg_s *cfg = self->aq_nic_cfg;
+       int i;
+
+       /* Rx TC/RSS number config */
+       hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U); /* 1: 4TC/8Queues */
+
+       /* Rx flow control */
+       hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U);
+
+       /* RSS Ring selection */
+       hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ?
+                                       0xB3333333U : 0x00000000U);
+
+       /* Multicast filters */
+       for (i = HW_ATL_B0_MAC_MAX; i--;) {
+               hw_atl_rpfl2_uc_flr_en_set(self, (i == 0U) ? 1U : 0U, i);
+               hw_atl_rpfl2unicast_flr_act_set(self, 1U, i);
+       }
+
+       hw_atl_reg_rx_flr_mcst_flr_msk_set(self, 0x00000000U);
+       hw_atl_reg_rx_flr_mcst_flr_set(self, 0x00010FFFU, 0U);
+
+       /* Vlan filters */
+       hw_atl_rpf_vlan_outer_etht_set(self, 0x88A8U);
+       hw_atl_rpf_vlan_inner_etht_set(self, 0x8100U);
+
+       /* VLAN proimisc bu defauld */
+       hw_atl_rpf_vlan_prom_mode_en_set(self, 1);
+
+       /* Rx Interrupts */
+       hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 0U);
+
+       hw_atl_b0_hw_rss_hash_type_set(self);
+
+       hw_atl_rpfl2broadcast_flr_act_set(self, 1U);
+       hw_atl_rpfl2broadcast_count_threshold_set(self, 0xFFFFU & (~0U / 256U));
+
+       hw_atl_rdm_rx_dca_en_set(self, 0U);
+       hw_atl_rdm_rx_dca_mode_set(self, 0U);
+
+       return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
+{
+       int err = 0;
+       unsigned int h = 0U;
+       unsigned int l = 0U;
+
+       if (!mac_addr) {
+               err = -EINVAL;
+               goto err_exit;
+       }
+       h = (mac_addr[0] << 8) | (mac_addr[1]);
+       l = (mac_addr[2] << 24) | (mac_addr[3] << 16) |
+               (mac_addr[4] << 8) | mac_addr[5];
+
+       hw_atl_rpfl2_uc_flr_en_set(self, 0U, HW_ATL_B0_MAC);
+       hw_atl_rpfl2unicast_dest_addresslsw_set(self, l, HW_ATL_B0_MAC);
+       hw_atl_rpfl2unicast_dest_addressmsw_set(self, h, HW_ATL_B0_MAC);
+       hw_atl_rpfl2_uc_flr_en_set(self, 1U, HW_ATL_B0_MAC);
+
+       err = aq_hw_err_from_flags(self);
+
+err_exit:
+       return err;
+}
+
+int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr)
+{
+       static u32 aq_hw_atl_igcr_table_[4][2] = {
+               { 0x20000080U, 0x20000080U }, /* AQ_IRQ_INVALID */
+               { 0x20000080U, 0x20000080U }, /* AQ_IRQ_LEGACY */
+               { 0x20000021U, 0x20000025U }, /* AQ_IRQ_MSI */
+               { 0x200000A2U, 0x200000A6U }  /* AQ_IRQ_MSIX */
+       };
+
+       int err = 0;
+       u32 val;
+
+       struct aq_hw_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
+
+       hw_atl_b0_hw_init_tx_path(self);
+       hw_atl_b0_hw_init_rx_path(self);
+
+       hw_atl_b0_hw_mac_addr_set(self, mac_addr);
+
+       self->aq_fw_ops->set_link_speed(self, aq_nic_cfg->link_speed_msk);
+       self->aq_fw_ops->set_state(self, MPI_INIT);
+
+       hw_atl_b0_hw_qos_set(self);
+       hw_atl_b0_hw_rss_set(self, &aq_nic_cfg->aq_rss);
+       hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
+
+       /* Force limit MRRS on RDM/TDM to 2K */
+       val = aq_hw_read_reg(self, HW_ATL_PCI_REG_CONTROL6_ADR);
+       aq_hw_write_reg(self, HW_ATL_PCI_REG_CONTROL6_ADR,
+                       (val & ~0x707) | 0x404);
+
+       /* TX DMA total request limit. B0 hardware is not capable to
+        * handle more than (8K-MRRS) incoming DMA data.
+        * Value 24 in 256byte units
+        */
+       aq_hw_write_reg(self, HW_ATL_TX_DMA_TOTAL_REQ_LIMIT_ADR, 24);
+
+       /* Reset link status and read out initial hardware counters */
+       self->aq_link_status.mbps = 0;
+       self->aq_fw_ops->update_stats(self);
+
+       err = aq_hw_err_from_flags(self);
+       if (err < 0)
+               goto err_exit;
+
+       /* Interrupts */
+       hw_atl_reg_irq_glb_ctl_set(self,
+                                  aq_hw_atl_igcr_table_[aq_nic_cfg->irq_type]
+                                        [(aq_nic_cfg->vecs > 1U) ?
+                                        1 : 0]);
+
+       hw_atl_itr_irq_auto_masklsw_set(self, 0xffffffff);
+
+       /* Interrupts */
+       hw_atl_reg_gen_irq_map_set(self, 0, 0);
+       hw_atl_reg_gen_irq_map_set(self, 0x80 | ATL_IRQ_CAUSE_LINK, 3);
+
+       hw_atl_b0_hw_offload_set(self);
+
+err_exit:
+       return err;
+}
+
+int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self, int index)
+{
+       hw_atl_tdm_tx_desc_en_set(self, 1, index);
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self, int index)
+{
+       hw_atl_rdm_rx_desc_en_set(self, 1, index);
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_start(struct aq_hw_s *self)
+{
+       hw_atl_tpb_tx_buff_en_set(self, 1);
+       hw_atl_rpb_rx_buff_en_set(self, 1);
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_tx_ring_tail_update(struct aq_hw_s *self, int tail, int index)
+{
+       hw_atl_reg_tx_dma_desc_tail_ptr_set(self, tail, index);
+       return 0;
+}
+
+int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, uint64_t base_addr,
+               int index, int size, int buff_size, int cpu, int vec)
+{
+       u32 dma_desc_addr_lsw = (u32)base_addr;
+       u32 dma_desc_addr_msw = (u32)(base_addr >> 32);
+
+       hw_atl_rdm_rx_desc_en_set(self, false, index);
+
+       hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, index);
+
+       hw_atl_reg_rx_dma_desc_base_addresslswset(self, dma_desc_addr_lsw,
+                                                 index);
+
+       hw_atl_reg_rx_dma_desc_base_addressmswset(self, dma_desc_addr_msw,
+                                                 index);
+
+       hw_atl_rdm_rx_desc_len_set(self, size / 8U, index);
+
+       hw_atl_rdm_rx_desc_data_buff_size_set(self, buff_size / 1024U, index);
+
+       hw_atl_rdm_rx_desc_head_buff_size_set(self, 0U, index);
+       hw_atl_rdm_rx_desc_head_splitting_set(self, 0U, index);
+       hw_atl_rpo_rx_desc_vlan_stripping_set(self, 0U, index);
+
+       /* Rx ring set mode */
+
+       /* Mapping interrupt vector */
+       hw_atl_itr_irq_map_rx_set(self, vec, index);
+       hw_atl_itr_irq_map_en_rx_set(self, true, index);
+
+       hw_atl_rdm_cpu_id_set(self, cpu, index);
+       hw_atl_rdm_rx_desc_dca_en_set(self, 0U, index);
+       hw_atl_rdm_rx_head_dca_en_set(self, 0U, index);
+       hw_atl_rdm_rx_pld_dca_en_set(self, 0U, index);
+
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self, uint64_t base_addr,
+                             int index, int size, int cpu, int vec)
+{
+       u32 dma_desc_lsw_addr = (u32)base_addr;
+       u32 dma_desc_msw_addr = (u32)(base_addr >> 32);
+
+       hw_atl_reg_tx_dma_desc_base_addresslswset(self, dma_desc_lsw_addr,
+                                                 index);
+
+       hw_atl_reg_tx_dma_desc_base_addressmswset(self, dma_desc_msw_addr,
+                                                 index);
+
+       hw_atl_tdm_tx_desc_len_set(self, size / 8U, index);
+
+       hw_atl_b0_hw_tx_ring_tail_update(self, 0, index);
+
+       /* Set Tx threshold */
+       hw_atl_tdm_tx_desc_wr_wb_threshold_set(self, 0U, index);
+
+       /* Mapping interrupt vector */
+       hw_atl_itr_irq_map_tx_set(self, vec, index);
+       hw_atl_itr_irq_map_en_tx_set(self, true, index);
+
+       hw_atl_tdm_cpu_id_set(self, cpu, index);
+       hw_atl_tdm_tx_desc_dca_en_set(self, 0U, index);
+
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask)
+{
+       hw_atl_itr_irq_msk_setlsw_set(self, LODWORD(mask));
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
+{
+       hw_atl_itr_irq_msk_clearlsw_set(self, LODWORD(mask));
+       hw_atl_itr_irq_status_clearlsw_set(self, LODWORD(mask));
+
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
+{
+       *mask = hw_atl_itr_irq_statuslsw_get(self);
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, int index)
+{
+       hw_atl_tdm_tx_desc_en_set(self, 0U, index);
+       return aq_hw_err_from_flags(self);
+}
+
+int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, int index)
+{
+       hw_atl_rdm_rx_desc_en_set(self, 0U, index);
+       return aq_hw_err_from_flags(self);
+}
+
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/atlantic/hw_atl/hw_atl_b0.h
new file mode 100644 (file)
index 0000000..06feb56
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_b0.h: Declaration of abstract interface for Atlantic hardware
+ * specific functions.
+ */
+
+#ifndef HW_ATL_B0_H
+#define HW_ATL_B0_H
+
+int hw_atl_b0_hw_reset(struct aq_hw_s *self);
+int hw_atl_b0_hw_init(struct aq_hw_s *self, u8 *mac_addr);
+
+int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self, uint64_t base_addr,
+               int index, int size, int cpu, int vec);
+int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, uint64_t base_addr,
+               int index, int size, int buff_size, int cpu, int vec);
+
+int hw_atl_b0_hw_start(struct aq_hw_s *self);
+
+int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self, int index);
+int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self, int index);
+
+
+int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, int index);
+int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, int index);
+
+
+int hw_atl_b0_hw_tx_ring_tail_update(struct aq_hw_s *self, int tail, int index);
+
+int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
+                                    struct aq_rss_parameters *rss_params);
+int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
+                               struct aq_rss_parameters *rss_params);
+
+int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask);
+int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask);
+int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask);
+
+#endif /* HW_ATL_B0_H */
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/atlantic/hw_atl/hw_atl_b0_internal.h
new file mode 100644 (file)
index 0000000..48152ea
--- /dev/null
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_b0_internal.h: Definition of Atlantic B0 chip specific
+ * constants.
+ */
+
+#ifndef HW_ATL_B0_INTERNAL_H
+#define HW_ATL_B0_INTERNAL_H
+
+
+#define HW_ATL_B0_MTU_JUMBO  16352U
+#define HW_ATL_B0_MTU        1514U
+
+#define HW_ATL_B0_TX_RINGS 4U
+#define HW_ATL_B0_RX_RINGS 4U
+
+#define HW_ATL_B0_RINGS_MAX 32U
+#define HW_ATL_B0_TXD_SIZE       (16U)
+#define HW_ATL_B0_RXD_SIZE       (16U)
+
+#define HW_ATL_B0_MAC      0U
+#define HW_ATL_B0_MAC_MIN  1U
+#define HW_ATL_B0_MAC_MAX  33U
+
+/* Maximum supported VLAN filters */
+#define HW_ATL_B0_MAX_VLAN_IDS 16
+
+/* UCAST/MCAST filters */
+#define HW_ATL_B0_UCAST_FILTERS_MAX 38
+#define HW_ATL_B0_MCAST_FILTERS_MAX 8
+
+/* interrupts */
+#define HW_ATL_B0_ERR_INT 8U
+#define HW_ATL_B0_INT_MASK  (0xFFFFFFFFU)
+
+#define HW_ATL_B0_TXD_CTL2_LEN        (0xFFFFC000)
+#define HW_ATL_B0_TXD_CTL2_CTX_EN     (0x00002000)
+#define HW_ATL_B0_TXD_CTL2_CTX_IDX    (0x00001000)
+
+#define HW_ATL_B0_TXD_CTL_DESC_TYPE_TXD   (0x00000001)
+#define HW_ATL_B0_TXD_CTL_DESC_TYPE_TXC   (0x00000002)
+#define HW_ATL_B0_TXD_CTL_BLEN        (0x000FFFF0)
+#define HW_ATL_B0_TXD_CTL_DD          (0x00100000)
+#define HW_ATL_B0_TXD_CTL_EOP         (0x00200000)
+
+#define HW_ATL_B0_TXD_CTL_CMD_X       (0x3FC00000)
+
+#define HW_ATL_B0_TXD_CTL_CMD_VLAN    BIT(22)
+#define HW_ATL_B0_TXD_CTL_CMD_FCS     BIT(23)
+#define HW_ATL_B0_TXD_CTL_CMD_IPCSO   BIT(24)
+#define HW_ATL_B0_TXD_CTL_CMD_TUCSO   BIT(25)
+#define HW_ATL_B0_TXD_CTL_CMD_LSO     BIT(26)
+#define HW_ATL_B0_TXD_CTL_CMD_WB      BIT(27)
+#define HW_ATL_B0_TXD_CTL_CMD_VXLAN   BIT(28)
+
+#define HW_ATL_B0_TXD_CTL_CMD_IPV6    BIT(21)
+#define HW_ATL_B0_TXD_CTL_CMD_TCP     BIT(22)
+
+#define HW_ATL_B0_MPI_CONTROL_ADR       0x0368U
+#define HW_ATL_B0_MPI_STATE_ADR         0x036CU
+
+#define HW_ATL_B0_MPI_SPEED_MSK         0xFFFFU
+#define HW_ATL_B0_MPI_SPEED_SHIFT       16U
+
+#define HW_ATL_B0_TXBUF_MAX  160U
+#define HW_ATL_B0_RXBUF_MAX  320U
+
+#define HW_ATL_B0_RXD_BUF_SIZE_MAX  (16 * 1024)
+
+#define HW_ATL_B0_RSS_REDIRECTION_MAX 64U
+#define HW_ATL_B0_RSS_REDIRECTION_BITS 3U
+#define HW_ATL_B0_RSS_HASHKEY_BITS 320U
+
+#define HW_ATL_B0_TCRSS_4_8  1
+#define HW_ATL_B0_TC_MAX 1U
+#define HW_ATL_B0_RSS_MAX 8U
+
+#define HW_ATL_B0_LRO_RXD_MAX 2U
+#define HW_ATL_B0_RS_SLIP_ENABLED  0U
+
+/* (256k -1(max pay_len) - 54(header)) */
+#define HAL_ATL_B0_LSO_MAX_SEGMENT_SIZE 262089U
+
+/* (256k -1(max pay_len) - 74(header)) */
+#define HAL_ATL_B0_LSO_IPV6_MAX_SEGMENT_SIZE 262069U
+
+#define HW_ATL_B0_CHIP_REVISION_B0      0xA0U
+#define HW_ATL_B0_CHIP_REVISION_UNKNOWN 0xFFU
+
+#define HW_ATL_B0_FW_SEMA_RAM           0x2U
+
+#define HW_ATL_B0_TXC_LEN_TUNLEN    (0x0000FF00)
+#define HW_ATL_B0_TXC_LEN_OUTLEN    (0xFFFF0000)
+
+#define HW_ATL_B0_TXC_CTL_DESC_TYPE (0x00000007)
+#define HW_ATL_B0_TXC_CTL_CTX_ID    (0x00000008)
+#define HW_ATL_B0_TXC_CTL_VLAN      (0x000FFFF0)
+#define HW_ATL_B0_TXC_CTL_CMD       (0x00F00000)
+#define HW_ATL_B0_TXC_CTL_L2LEN     (0x7F000000)
+
+#define HW_ATL_B0_TXC_CTL_L3LEN     (0x80000000)       /* L3LEN lsb */
+#define HW_ATL_B0_TXC_LEN2_L3LEN    (0x000000FF)       /* L3LE upper bits */
+#define HW_ATL_B0_TXC_LEN2_L4LEN    (0x0000FF00)
+#define HW_ATL_B0_TXC_LEN2_MSSLEN   (0xFFFF0000)
+
+#define HW_ATL_B0_RXD_DD    (0x1)
+#define HW_ATL_B0_RXD_NCEA0 (0x1)
+
+#define HW_ATL_B0_RXD_WB_STAT_RSSTYPE (0x0000000F)
+#define HW_ATL_B0_RXD_WB_STAT_PKTTYPE (0x00000FF0)
+#define HW_ATL_B0_RXD_WB_STAT_RXCTRL  (0x00180000)
+#define HW_ATL_B0_RXD_WB_STAT_SPLHDR  (0x00200000)
+#define HW_ATL_B0_RXD_WB_STAT_HDRLEN  (0xFFC00000)
+
+#define HW_ATL_B0_RXD_WB_STAT2_DD      (0x0001)
+#define HW_ATL_B0_RXD_WB_STAT2_EOP     (0x0002)
+#define HW_ATL_B0_RXD_WB_STAT2_RXSTAT  (0x003C)
+#define HW_ATL_B0_RXD_WB_STAT2_MACERR  (0x0004)
+#define HW_ATL_B0_RXD_WB_STAT2_IP4ERR  (0x0008)
+#define HW_ATL_B0_RXD_WB_STAT2_TCPUPDERR  (0x0010)
+#define HW_ATL_B0_RXD_WB_STAT2_RXESTAT (0x0FC0)
+#define HW_ATL_B0_RXD_WB_STAT2_RSCCNT  (0xF000)
+
+#define L2_FILTER_ACTION_DISCARD (0x0)
+#define L2_FILTER_ACTION_HOST    (0x1)
+
+#define HW_ATL_B0_UCP_0X370_REG  (0x370)
+
+#define HW_ATL_B0_FLUSH() AQ_HW_READ_REG(self, 0x10)
+
+#define HW_ATL_INTR_MODER_MAX  0x1FF
+#define HW_ATL_INTR_MODER_MIN  0xFF
+
+#define HW_ATL_B0_MIN_RXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL_B0_MIN_TXD \
+       (ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL_B0_MAX_RXD 8184U
+#define HW_ATL_B0_MAX_TXD 8184U
+
+/* HW layer capabilities */
+
+#endif /* HW_ATL_B0_INTERNAL_H */
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/atlantic/hw_atl/hw_atl_llh.c
new file mode 100644 (file)
index 0000000..2dc5be2
--- /dev/null
@@ -0,0 +1,1490 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_llh.c: Definitions of bitfield and register access functions for
+ * Atlantic registers.
+ */
+
+#include "hw_atl_llh.h"
+
+#include "../atl_hw_regs.h"
+#include "hw_atl_llh_internal.h"
+
+/* global */
+void hw_atl_reg_glb_cpu_sem_set(struct aq_hw_s *aq_hw, u32 glb_cpu_sem,
+                               u32 semaphore)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_CPU_SEM_ADR(semaphore), glb_cpu_sem);
+}
+
+u32 hw_atl_reg_glb_cpu_sem_get(struct aq_hw_s *aq_hw, u32 semaphore)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_CPU_SEM_ADR(semaphore));
+}
+
+void hw_atl_glb_glb_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 glb_reg_res_dis)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_GLB_REG_RES_DIS_ADR,
+                           HW_ATL_GLB_REG_RES_DIS_MSK,
+                           HW_ATL_GLB_REG_RES_DIS_SHIFT,
+                           glb_reg_res_dis);
+}
+
+void hw_atl_glb_soft_res_set(struct aq_hw_s *aq_hw, u32 soft_res)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_GLB_SOFT_RES_ADR,
+                           HW_ATL_GLB_SOFT_RES_MSK,
+                           HW_ATL_GLB_SOFT_RES_SHIFT, soft_res);
+}
+
+u32 hw_atl_glb_soft_res_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_GLB_SOFT_RES_ADR,
+                                 HW_ATL_GLB_SOFT_RES_MSK,
+                                 HW_ATL_GLB_SOFT_RES_SHIFT);
+}
+
+u32 hw_atl_reg_glb_mif_id_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GLB_MIF_ID_ADR);
+}
+
+/* stats */
+u32 hw_atl_rpb_rx_dma_drop_pkt_cnt_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_RPB_RX_DMA_DROP_PKT_CNT_ADR);
+}
+
+u32 hw_atl_stats_rx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERLSW);
+}
+
+u32 hw_atl_stats_rx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERLSW);
+}
+
+u32 hw_atl_stats_tx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERLSW);
+}
+
+u32 hw_atl_stats_tx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERLSW);
+}
+
+u32 hw_atl_stats_rx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERMSW);
+}
+
+u32 hw_atl_stats_rx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERMSW);
+}
+
+u32 hw_atl_stats_tx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERMSW);
+}
+
+u32 hw_atl_stats_tx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERMSW);
+}
+
+/* interrupt */
+void hw_atl_itr_irq_auto_masklsw_set(struct aq_hw_s *aq_hw,
+                                    u32 irq_auto_masklsw)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_ITR_IAMRLSW_ADR, irq_auto_masklsw);
+}
+
+void hw_atl_itr_irq_map_en_rx_set(struct aq_hw_s *aq_hw, u32 irq_map_en_rx,
+                                 u32 rx)
+{
+/* register address for bitfield imr_rx{r}_en */
+       static const u32 itr_imr_rxren_adr[32] = {
+                       0x00002100U, 0x00002100U, 0x00002104U, 0x00002104U,
+                       0x00002108U, 0x00002108U, 0x0000210CU, 0x0000210CU,
+                       0x00002110U, 0x00002110U, 0x00002114U, 0x00002114U,
+                       0x00002118U, 0x00002118U, 0x0000211CU, 0x0000211CU,
+                       0x00002120U, 0x00002120U, 0x00002124U, 0x00002124U,
+                       0x00002128U, 0x00002128U, 0x0000212CU, 0x0000212CU,
+                       0x00002130U, 0x00002130U, 0x00002134U, 0x00002134U,
+                       0x00002138U, 0x00002138U, 0x0000213CU, 0x0000213CU
+               };
+
+/* bitmask for bitfield imr_rx{r}_en */
+       static const u32 itr_imr_rxren_msk[32] = {
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U,
+                       0x00008000U, 0x00000080U, 0x00008000U, 0x00000080U
+               };
+
+/* lower bit position of bitfield imr_rx{r}_en */
+       static const u32 itr_imr_rxren_shift[32] = {
+                       15U, 7U, 15U, 7U, 15U, 7U, 15U, 7U,
+                       15U, 7U, 15U, 7U, 15U, 7U, 15U, 7U,
+                       15U, 7U, 15U, 7U, 15U, 7U, 15U, 7U,
+                       15U, 7U, 15U, 7U, 15U, 7U, 15U, 7U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, itr_imr_rxren_adr[rx],
+                           itr_imr_rxren_msk[rx],
+                           itr_imr_rxren_shift[rx],
+                           irq_map_en_rx);
+}
+
+void hw_atl_itr_irq_map_en_tx_set(struct aq_hw_s *aq_hw, u32 irq_map_en_tx,
+                                 u32 tx)
+{
+/* register address for bitfield imr_tx{t}_en */
+       static const u32 itr_imr_txten_adr[32] = {
+                       0x00002100U, 0x00002100U, 0x00002104U, 0x00002104U,
+                       0x00002108U, 0x00002108U, 0x0000210CU, 0x0000210CU,
+                       0x00002110U, 0x00002110U, 0x00002114U, 0x00002114U,
+                       0x00002118U, 0x00002118U, 0x0000211CU, 0x0000211CU,
+                       0x00002120U, 0x00002120U, 0x00002124U, 0x00002124U,
+                       0x00002128U, 0x00002128U, 0x0000212CU, 0x0000212CU,
+                       0x00002130U, 0x00002130U, 0x00002134U, 0x00002134U,
+                       0x00002138U, 0x00002138U, 0x0000213CU, 0x0000213CU
+               };
+
+/* bitmask for bitfield imr_tx{t}_en */
+       static const u32 itr_imr_txten_msk[32] = {
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U,
+                       0x80000000U, 0x00800000U, 0x80000000U, 0x00800000U
+               };
+
+/* lower bit position of bitfield imr_tx{t}_en */
+       static const u32 itr_imr_txten_shift[32] = {
+                       31U, 23U, 31U, 23U, 31U, 23U, 31U, 23U,
+                       31U, 23U, 31U, 23U, 31U, 23U, 31U, 23U,
+                       31U, 23U, 31U, 23U, 31U, 23U, 31U, 23U,
+                       31U, 23U, 31U, 23U, 31U, 23U, 31U, 23U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, itr_imr_txten_adr[tx],
+                           itr_imr_txten_msk[tx],
+                           itr_imr_txten_shift[tx],
+                           irq_map_en_tx);
+}
+
+void hw_atl_itr_irq_map_rx_set(struct aq_hw_s *aq_hw, u32 irq_map_rx, u32 rx)
+{
+/* register address for bitfield imr_rx{r}[4:0] */
+       static const u32 itr_imr_rxr_adr[32] = {
+                       0x00002100U, 0x00002100U, 0x00002104U, 0x00002104U,
+                       0x00002108U, 0x00002108U, 0x0000210CU, 0x0000210CU,
+                       0x00002110U, 0x00002110U, 0x00002114U, 0x00002114U,
+                       0x00002118U, 0x00002118U, 0x0000211CU, 0x0000211CU,
+                       0x00002120U, 0x00002120U, 0x00002124U, 0x00002124U,
+                       0x00002128U, 0x00002128U, 0x0000212CU, 0x0000212CU,
+                       0x00002130U, 0x00002130U, 0x00002134U, 0x00002134U,
+                       0x00002138U, 0x00002138U, 0x0000213CU, 0x0000213CU
+               };
+
+/* bitmask for bitfield imr_rx{r}[4:0] */
+       static const u32 itr_imr_rxr_msk[32] = {
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU,
+                       0x00001f00U, 0x0000001FU, 0x00001F00U, 0x0000001FU
+               };
+
+/* lower bit position of bitfield imr_rx{r}[4:0] */
+       static const u32 itr_imr_rxr_shift[32] = {
+                       8U, 0U, 8U, 0U, 8U, 0U, 8U, 0U,
+                       8U, 0U, 8U, 0U, 8U, 0U, 8U, 0U,
+                       8U, 0U, 8U, 0U, 8U, 0U, 8U, 0U,
+                       8U, 0U, 8U, 0U, 8U, 0U, 8U, 0U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, itr_imr_rxr_adr[rx],
+                           itr_imr_rxr_msk[rx],
+                           itr_imr_rxr_shift[rx],
+                           irq_map_rx);
+}
+
+void hw_atl_itr_irq_map_tx_set(struct aq_hw_s *aq_hw, u32 irq_map_tx, u32 tx)
+{
+/* register address for bitfield imr_tx{t}[4:0] */
+       static const u32 itr_imr_txt_adr[32] = {
+                       0x00002100U, 0x00002100U, 0x00002104U, 0x00002104U,
+                       0x00002108U, 0x00002108U, 0x0000210CU, 0x0000210CU,
+                       0x00002110U, 0x00002110U, 0x00002114U, 0x00002114U,
+                       0x00002118U, 0x00002118U, 0x0000211CU, 0x0000211CU,
+                       0x00002120U, 0x00002120U, 0x00002124U, 0x00002124U,
+                       0x00002128U, 0x00002128U, 0x0000212CU, 0x0000212CU,
+                       0x00002130U, 0x00002130U, 0x00002134U, 0x00002134U,
+                       0x00002138U, 0x00002138U, 0x0000213CU, 0x0000213CU
+               };
+
+/* bitmask for bitfield imr_tx{t}[4:0] */
+       static const u32 itr_imr_txt_msk[32] = {
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U,
+                       0x1f000000U, 0x001F0000U, 0x1F000000U, 0x001F0000U
+               };
+
+/* lower bit position of bitfield imr_tx{t}[4:0] */
+       static const u32 itr_imr_txt_shift[32] = {
+                       24U, 16U, 24U, 16U, 24U, 16U, 24U, 16U,
+                       24U, 16U, 24U, 16U, 24U, 16U, 24U, 16U,
+                       24U, 16U, 24U, 16U, 24U, 16U, 24U, 16U,
+                       24U, 16U, 24U, 16U, 24U, 16U, 24U, 16U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, itr_imr_txt_adr[tx],
+                           itr_imr_txt_msk[tx],
+                           itr_imr_txt_shift[tx],
+                           irq_map_tx);
+}
+
+void hw_atl_itr_irq_msk_clearlsw_set(struct aq_hw_s *aq_hw,
+                                    u32 irq_msk_clearlsw)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_ITR_IMCRLSW_ADR, irq_msk_clearlsw);
+}
+
+void hw_atl_itr_irq_msk_setlsw_set(struct aq_hw_s *aq_hw, u32 irq_msk_setlsw)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_ITR_IMSRLSW_ADR, irq_msk_setlsw);
+}
+
+void hw_atl_itr_irq_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 irq_reg_res_dis)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_ITR_REG_RES_DSBL_ADR,
+                           HW_ATL_ITR_REG_RES_DSBL_MSK,
+                           HW_ATL_ITR_REG_RES_DSBL_SHIFT, irq_reg_res_dis);
+}
+
+void hw_atl_itr_irq_status_clearlsw_set(struct aq_hw_s *aq_hw,
+                                       u32 irq_status_clearlsw)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_ITR_ISCRLSW_ADR, irq_status_clearlsw);
+}
+
+u32 hw_atl_itr_irq_statuslsw_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_ITR_ISRLSW_ADR);
+}
+
+u32 hw_atl_itr_res_irq_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_ITR_RES_ADR, HW_ATL_ITR_RES_MSK,
+                                 HW_ATL_ITR_RES_SHIFT);
+}
+
+void hw_atl_itr_res_irq_set(struct aq_hw_s *aq_hw, u32 res_irq)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_ITR_RES_ADR, HW_ATL_ITR_RES_MSK,
+                           HW_ATL_ITR_RES_SHIFT, res_irq);
+}
+
+/* rdm */
+void hw_atl_rdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCADCPUID_ADR(dca),
+                           HW_ATL_RDM_DCADCPUID_MSK,
+                           HW_ATL_RDM_DCADCPUID_SHIFT, cpuid);
+}
+
+void hw_atl_rdm_rx_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_dca_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCA_EN_ADR, HW_ATL_RDM_DCA_EN_MSK,
+                           HW_ATL_RDM_DCA_EN_SHIFT, rx_dca_en);
+}
+
+void hw_atl_rdm_rx_dca_mode_set(struct aq_hw_s *aq_hw, u32 rx_dca_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCA_MODE_ADR,
+                           HW_ATL_RDM_DCA_MODE_MSK,
+                           HW_ATL_RDM_DCA_MODE_SHIFT, rx_dca_mode);
+}
+
+void hw_atl_rdm_rx_desc_data_buff_size_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_data_buff_size,
+                                          u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDDATA_SIZE_ADR(descriptor),
+                           HW_ATL_RDM_DESCDDATA_SIZE_MSK,
+                           HW_ATL_RDM_DESCDDATA_SIZE_SHIFT,
+                           rx_desc_data_buff_size);
+}
+
+void hw_atl_rdm_rx_desc_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_desc_dca_en,
+                                  u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCADDESC_EN_ADR(dca),
+                           HW_ATL_RDM_DCADDESC_EN_MSK,
+                           HW_ATL_RDM_DCADDESC_EN_SHIFT,
+                           rx_desc_dca_en);
+}
+
+void hw_atl_rdm_rx_desc_en_set(struct aq_hw_s *aq_hw, u32 rx_desc_en,
+                              u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDEN_ADR(descriptor),
+                           HW_ATL_RDM_DESCDEN_MSK,
+                           HW_ATL_RDM_DESCDEN_SHIFT,
+                           rx_desc_en);
+}
+
+void hw_atl_rdm_rx_desc_head_buff_size_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_head_buff_size,
+                                          u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDHDR_SIZE_ADR(descriptor),
+                           HW_ATL_RDM_DESCDHDR_SIZE_MSK,
+                           HW_ATL_RDM_DESCDHDR_SIZE_SHIFT,
+                           rx_desc_head_buff_size);
+}
+
+void hw_atl_rdm_rx_desc_head_splitting_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_head_splitting,
+                                          u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDHDR_SPLIT_ADR(descriptor),
+                           HW_ATL_RDM_DESCDHDR_SPLIT_MSK,
+                           HW_ATL_RDM_DESCDHDR_SPLIT_SHIFT,
+                           rx_desc_head_splitting);
+}
+
+u32 hw_atl_rdm_rx_desc_head_ptr_get(struct aq_hw_s *aq_hw, u32 descriptor)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_RDM_DESCDHD_ADR(descriptor),
+                                 HW_ATL_RDM_DESCDHD_MSK,
+                                 HW_ATL_RDM_DESCDHD_SHIFT);
+}
+
+void hw_atl_rdm_rx_desc_len_set(struct aq_hw_s *aq_hw, u32 rx_desc_len,
+                               u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDLEN_ADR(descriptor),
+                           HW_ATL_RDM_DESCDLEN_MSK, HW_ATL_RDM_DESCDLEN_SHIFT,
+                           rx_desc_len);
+}
+
+void hw_atl_rdm_rx_desc_res_set(struct aq_hw_s *aq_hw, u32 rx_desc_res,
+                               u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DESCDRESET_ADR(descriptor),
+                           HW_ATL_RDM_DESCDRESET_MSK,
+                           HW_ATL_RDM_DESCDRESET_SHIFT,
+                           rx_desc_res);
+}
+
+void hw_atl_rdm_rx_desc_wr_wb_irq_en_set(struct aq_hw_s *aq_hw,
+                                        u32 rx_desc_wr_wb_irq_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_INT_DESC_WRB_EN_ADR,
+                           HW_ATL_RDM_INT_DESC_WRB_EN_MSK,
+                           HW_ATL_RDM_INT_DESC_WRB_EN_SHIFT,
+                           rx_desc_wr_wb_irq_en);
+}
+
+void hw_atl_rdm_rx_head_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_head_dca_en,
+                                  u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCADHDR_EN_ADR(dca),
+                           HW_ATL_RDM_DCADHDR_EN_MSK,
+                           HW_ATL_RDM_DCADHDR_EN_SHIFT,
+                           rx_head_dca_en);
+}
+
+void hw_atl_rdm_rx_pld_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_pld_dca_en,
+                                 u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_DCADPAY_EN_ADR(dca),
+                           HW_ATL_RDM_DCADPAY_EN_MSK,
+                           HW_ATL_RDM_DCADPAY_EN_SHIFT,
+                           rx_pld_dca_en);
+}
+
+void hw_atl_rdm_rdm_intr_moder_en_set(struct aq_hw_s *aq_hw,
+                                     u32 rdm_intr_moder_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RDM_INT_RIM_EN_ADR,
+                           HW_ATL_RDM_INT_RIM_EN_MSK,
+                           HW_ATL_RDM_INT_RIM_EN_SHIFT,
+                           rdm_intr_moder_en);
+}
+
+/* reg */
+void hw_atl_reg_gen_irq_map_set(struct aq_hw_s *aq_hw, u32 gen_intr_map,
+                               u32 regidx)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GEN_INTR_MAP_ADR(regidx), gen_intr_map);
+}
+
+u32 hw_atl_reg_gen_irq_status_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_GEN_INTR_STAT_ADR);
+}
+
+void hw_atl_reg_irq_glb_ctl_set(struct aq_hw_s *aq_hw, u32 intr_glb_ctl)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_INTR_GLB_CTL_ADR, intr_glb_ctl);
+}
+
+void hw_atl_reg_irq_thr_set(struct aq_hw_s *aq_hw, u32 intr_thr, u32 throttle)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_INTR_THR_ADR(throttle), intr_thr);
+}
+
+void hw_atl_reg_rx_dma_desc_base_addresslswset(struct aq_hw_s *aq_hw,
+                                              u32 rx_dma_desc_base_addrlsw,
+                                              u32 descriptor)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_DMA_DESC_BASE_ADDRLSW_ADR(descriptor),
+                       rx_dma_desc_base_addrlsw);
+}
+
+void hw_atl_reg_rx_dma_desc_base_addressmswset(struct aq_hw_s *aq_hw,
+                                              u32 rx_dma_desc_base_addrmsw,
+                                              u32 descriptor)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_DMA_DESC_BASE_ADDRMSW_ADR(descriptor),
+                       rx_dma_desc_base_addrmsw);
+}
+
+u32 hw_atl_reg_rx_dma_desc_status_get(struct aq_hw_s *aq_hw, u32 descriptor)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_RX_DMA_DESC_STAT_ADR(descriptor));
+}
+
+void hw_atl_reg_rx_dma_desc_tail_ptr_set(struct aq_hw_s *aq_hw,
+                                        u32 rx_dma_desc_tail_ptr,
+                                        u32 descriptor)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_DMA_DESC_TAIL_PTR_ADR(descriptor),
+                       rx_dma_desc_tail_ptr);
+}
+
+void hw_atl_reg_rx_flr_mcst_flr_msk_set(struct aq_hw_s *aq_hw,
+                                       u32 rx_flr_mcst_flr_msk)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_FLR_MCST_FLR_MSK_ADR,
+                       rx_flr_mcst_flr_msk);
+}
+
+void hw_atl_reg_rx_flr_mcst_flr_set(struct aq_hw_s *aq_hw, u32 rx_flr_mcst_flr,
+                                   u32 filter)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_FLR_MCST_FLR_ADR(filter),
+                       rx_flr_mcst_flr);
+}
+
+void hw_atl_reg_rx_flr_rss_control1set(struct aq_hw_s *aq_hw,
+                                      u32 rx_flr_rss_control1)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_FLR_RSS_CONTROL1_ADR,
+                       rx_flr_rss_control1);
+}
+
+void hw_atl_reg_rx_flr_control2_set(struct aq_hw_s *aq_hw,
+                                   u32 rx_filter_control2)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_FLR_CONTROL2_ADR, rx_filter_control2);
+}
+
+void hw_atl_reg_rx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+                                      u32 rx_intr_moderation_ctl,
+                                      u32 queue)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RX_INTR_MODERATION_CTL_ADR(queue),
+                       rx_intr_moderation_ctl);
+}
+
+void hw_atl_reg_tx_dma_debug_ctl_set(struct aq_hw_s *aq_hw,
+                                    u32 tx_dma_debug_ctl)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_TX_DMA_DEBUG_CTL_ADR, tx_dma_debug_ctl);
+}
+
+void hw_atl_reg_tx_dma_desc_base_addresslswset(struct aq_hw_s *aq_hw,
+                                              u32 tx_dma_desc_base_addrlsw,
+                                              u32 descriptor)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_TX_DMA_DESC_BASE_ADDRLSW_ADR(descriptor),
+                       tx_dma_desc_base_addrlsw);
+}
+
+void hw_atl_reg_tx_dma_desc_base_addressmswset(struct aq_hw_s *aq_hw,
+                                              u32 tx_dma_desc_base_addrmsw,
+                                              u32 descriptor)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_TX_DMA_DESC_BASE_ADDRMSW_ADR(descriptor),
+                       tx_dma_desc_base_addrmsw);
+}
+
+void hw_atl_reg_tx_dma_desc_tail_ptr_set(struct aq_hw_s *aq_hw,
+                                        u32 tx_dma_desc_tail_ptr,
+                                        u32 descriptor)
+{
+       rte_wmb();
+
+       aq_hw_write_reg(aq_hw, HW_ATL_TX_DMA_DESC_TAIL_PTR_ADR(descriptor),
+                       tx_dma_desc_tail_ptr);
+}
+
+void hw_atl_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+                                      u32 tx_intr_moderation_ctl,
+                                      u32 queue)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_TX_INTR_MODERATION_CTL_ADR(queue),
+                       tx_intr_moderation_ctl);
+}
+
+/* RPB: rx packet buffer */
+void hw_atl_rpb_dma_sys_lbk_set(struct aq_hw_s *aq_hw, u32 dma_sys_lbk)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_DMA_SYS_LBK_ADR,
+                           HW_ATL_RPB_DMA_SYS_LBK_MSK,
+                           HW_ATL_RPB_DMA_SYS_LBK_SHIFT, dma_sys_lbk);
+}
+
+void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_traf_class_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RPF_RX_TC_MODE_ADR,
+                           HW_ATL_RPB_RPF_RX_TC_MODE_MSK,
+                           HW_ATL_RPB_RPF_RX_TC_MODE_SHIFT,
+                           rx_traf_class_mode);
+}
+
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPB_RPF_RX_TC_MODE_ADR,
+                       HW_ATL_RPB_RPF_RX_TC_MODE_MSK,
+                       HW_ATL_RPB_RPF_RX_TC_MODE_SHIFT);
+}
+
+void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RX_BUF_EN_ADR,
+                           HW_ATL_RPB_RX_BUF_EN_MSK,
+                           HW_ATL_RPB_RX_BUF_EN_SHIFT, rx_buff_en);
+}
+
+void hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 rx_buff_hi_threshold_per_tc,
+                                               u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBHI_THRESH_ADR(buffer),
+                           HW_ATL_RPB_RXBHI_THRESH_MSK,
+                           HW_ATL_RPB_RXBHI_THRESH_SHIFT,
+                           rx_buff_hi_threshold_per_tc);
+}
+
+void hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 rx_buff_lo_threshold_per_tc,
+                                               u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBLO_THRESH_ADR(buffer),
+                           HW_ATL_RPB_RXBLO_THRESH_MSK,
+                           HW_ATL_RPB_RXBLO_THRESH_SHIFT,
+                           rx_buff_lo_threshold_per_tc);
+}
+
+void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw,
+                                    u32 rx_flow_ctl_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RX_FC_MODE_ADR,
+                           HW_ATL_RPB_RX_FC_MODE_MSK,
+                           HW_ATL_RPB_RX_FC_MODE_SHIFT, rx_flow_ctl_mode);
+}
+
+void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
+                                           u32 rx_pkt_buff_size_per_tc,
+                                           u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBBUF_SIZE_ADR(buffer),
+                           HW_ATL_RPB_RXBBUF_SIZE_MSK,
+                           HW_ATL_RPB_RXBBUF_SIZE_SHIFT,
+                           rx_pkt_buff_size_per_tc);
+}
+
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+                                     u32 rx_xoff_en_per_tc,
+                                     u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPB_RXBXOFF_EN_ADR(buffer),
+                           HW_ATL_RPB_RXBXOFF_EN_MSK,
+                           HW_ATL_RPB_RXBXOFF_EN_SHIFT,
+                           rx_xoff_en_per_tc);
+}
+
+/* rpf */
+
+void hw_atl_rpfl2broadcast_count_threshold_set(struct aq_hw_s *aq_hw,
+                                              u32 l2broadcast_count_threshold)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2BC_THRESH_ADR,
+                           HW_ATL_RPFL2BC_THRESH_MSK,
+                           HW_ATL_RPFL2BC_THRESH_SHIFT,
+                           l2broadcast_count_threshold);
+}
+
+void hw_atl_rpfl2broadcast_en_set(struct aq_hw_s *aq_hw, u32 l2broadcast_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2BC_EN_ADR, HW_ATL_RPFL2BC_EN_MSK,
+                           HW_ATL_RPFL2BC_EN_SHIFT, l2broadcast_en);
+}
+
+void hw_atl_rpfl2broadcast_flr_act_set(struct aq_hw_s *aq_hw,
+                                      u32 l2broadcast_flr_act)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2BC_ACT_ADR,
+                           HW_ATL_RPFL2BC_ACT_MSK,
+                           HW_ATL_RPFL2BC_ACT_SHIFT, l2broadcast_flr_act);
+}
+
+void hw_atl_rpfl2multicast_flr_en_set(struct aq_hw_s *aq_hw,
+                                     u32 l2multicast_flr_en,
+                                     u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2MC_ENF_ADR(filter),
+                           HW_ATL_RPFL2MC_ENF_MSK,
+                           HW_ATL_RPFL2MC_ENF_SHIFT, l2multicast_flr_en);
+}
+
+void hw_atl_rpfl2promiscuous_mode_en_set(struct aq_hw_s *aq_hw,
+                                        u32 l2promiscuous_mode_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2PROMIS_MODE_ADR,
+                           HW_ATL_RPFL2PROMIS_MODE_MSK,
+                           HW_ATL_RPFL2PROMIS_MODE_SHIFT,
+                           l2promiscuous_mode_en);
+}
+
+void hw_atl_rpfl2unicast_flr_act_set(struct aq_hw_s *aq_hw,
+                                    u32 l2unicast_flr_act,
+                                    u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2UC_ACTF_ADR(filter),
+                           HW_ATL_RPFL2UC_ACTF_MSK, HW_ATL_RPFL2UC_ACTF_SHIFT,
+                           l2unicast_flr_act);
+}
+
+void hw_atl_rpfl2_uc_flr_en_set(struct aq_hw_s *aq_hw, u32 l2unicast_flr_en,
+                               u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2UC_ENF_ADR(filter),
+                           HW_ATL_RPFL2UC_ENF_MSK,
+                           HW_ATL_RPFL2UC_ENF_SHIFT, l2unicast_flr_en);
+}
+
+void hw_atl_rpfl2unicast_dest_addresslsw_set(struct aq_hw_s *aq_hw,
+                                            u32 l2unicast_dest_addresslsw,
+                                            u32 filter)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RPFL2UC_DAFLSW_ADR(filter),
+                       l2unicast_dest_addresslsw);
+}
+
+void hw_atl_rpfl2unicast_dest_addressmsw_set(struct aq_hw_s *aq_hw,
+                                            u32 l2unicast_dest_addressmsw,
+                                            u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2UC_DAFMSW_ADR(filter),
+                           HW_ATL_RPFL2UC_DAFMSW_MSK,
+                           HW_ATL_RPFL2UC_DAFMSW_SHIFT,
+                           l2unicast_dest_addressmsw);
+}
+
+void hw_atl_rpfl2_accept_all_mc_packets_set(struct aq_hw_s *aq_hw,
+                                           u32 l2_accept_all_mc_packets)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPFL2MC_ACCEPT_ALL_ADR,
+                           HW_ATL_RPFL2MC_ACCEPT_ALL_MSK,
+                           HW_ATL_RPFL2MC_ACCEPT_ALL_SHIFT,
+                           l2_accept_all_mc_packets);
+}
+
+void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw,
+                                            u32 user_priority_tc_map, u32 tc)
+{
+/* register address for bitfield rx_tc_up{t}[2:0] */
+       static const u32 rpf_rpb_rx_tc_upt_adr[8] = {
+                       0x000054c4U, 0x000054C4U, 0x000054C4U, 0x000054C4U,
+                       0x000054c4U, 0x000054C4U, 0x000054C4U, 0x000054C4U
+               };
+
+/* bitmask for bitfield rx_tc_up{t}[2:0] */
+       static const u32 rpf_rpb_rx_tc_upt_msk[8] = {
+                       0x00000007U, 0x00000070U, 0x00000700U, 0x00007000U,
+                       0x00070000U, 0x00700000U, 0x07000000U, 0x70000000U
+               };
+
+/* lower bit position of bitfield rx_tc_up{t}[2:0] */
+       static const u32 rpf_rpb_rx_tc_upt_shft[8] = {
+                       0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, rpf_rpb_rx_tc_upt_adr[tc],
+                           rpf_rpb_rx_tc_upt_msk[tc],
+                           rpf_rpb_rx_tc_upt_shft[tc],
+                           user_priority_tc_map);
+}
+
+void hw_atl_rpf_rss_key_addr_set(struct aq_hw_s *aq_hw, u32 rss_key_addr)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_RSS_KEY_ADDR_ADR,
+                           HW_ATL_RPF_RSS_KEY_ADDR_MSK,
+                           HW_ATL_RPF_RSS_KEY_ADDR_SHIFT,
+                           rss_key_addr);
+}
+
+void hw_atl_rpf_rss_key_wr_data_set(struct aq_hw_s *aq_hw, u32 rss_key_wr_data)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RPF_RSS_KEY_WR_DATA_ADR,
+                       rss_key_wr_data);
+}
+
+u32 hw_atl_rpf_rss_key_wr_en_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPF_RSS_KEY_WR_ENI_ADR,
+                                 HW_ATL_RPF_RSS_KEY_WR_ENI_MSK,
+                                 HW_ATL_RPF_RSS_KEY_WR_ENI_SHIFT);
+}
+
+void hw_atl_rpf_rss_key_wr_en_set(struct aq_hw_s *aq_hw, u32 rss_key_wr_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_RSS_KEY_WR_ENI_ADR,
+                           HW_ATL_RPF_RSS_KEY_WR_ENI_MSK,
+                           HW_ATL_RPF_RSS_KEY_WR_ENI_SHIFT,
+                           rss_key_wr_en);
+}
+
+void hw_atl_rpf_rss_redir_tbl_addr_set(struct aq_hw_s *aq_hw,
+                                      u32 rss_redir_tbl_addr)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_RSS_REDIR_ADDR_ADR,
+                           HW_ATL_RPF_RSS_REDIR_ADDR_MSK,
+                           HW_ATL_RPF_RSS_REDIR_ADDR_SHIFT,
+                           rss_redir_tbl_addr);
+}
+
+void hw_atl_rpf_rss_redir_tbl_wr_data_set(struct aq_hw_s *aq_hw,
+                                         u32 rss_redir_tbl_wr_data)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_RSS_REDIR_WR_DATA_ADR,
+                           HW_ATL_RPF_RSS_REDIR_WR_DATA_MSK,
+                           HW_ATL_RPF_RSS_REDIR_WR_DATA_SHIFT,
+                           rss_redir_tbl_wr_data);
+}
+
+u32 hw_atl_rpf_rss_redir_wr_en_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPF_RSS_REDIR_WR_ENI_ADR,
+                                 HW_ATL_RPF_RSS_REDIR_WR_ENI_MSK,
+                                 HW_ATL_RPF_RSS_REDIR_WR_ENI_SHIFT);
+}
+
+void hw_atl_rpf_rss_redir_wr_en_set(struct aq_hw_s *aq_hw, u32 rss_redir_wr_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_RSS_REDIR_WR_ENI_ADR,
+                           HW_ATL_RPF_RSS_REDIR_WR_ENI_MSK,
+                           HW_ATL_RPF_RSS_REDIR_WR_ENI_SHIFT, rss_redir_wr_en);
+}
+
+void hw_atl_rpf_tpo_to_rpf_sys_lbk_set(struct aq_hw_s *aq_hw,
+                                      u32 tpo_to_rpf_sys_lbk)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_TPO_RPF_SYS_LBK_ADR,
+                           HW_ATL_RPF_TPO_RPF_SYS_LBK_MSK,
+                           HW_ATL_RPF_TPO_RPF_SYS_LBK_SHIFT,
+                           tpo_to_rpf_sys_lbk);
+}
+
+void hw_atl_rpf_vlan_inner_etht_set(struct aq_hw_s *aq_hw, u32 vlan_inner_etht)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_INNER_TPID_ADR,
+                           HW_ATL_RPF_VL_INNER_TPID_MSK,
+                           HW_ATL_RPF_VL_INNER_TPID_SHIFT,
+                           vlan_inner_etht);
+}
+
+void hw_atl_rpf_vlan_outer_etht_set(struct aq_hw_s *aq_hw, u32 vlan_outer_etht)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_OUTER_TPID_ADR,
+                           HW_ATL_RPF_VL_OUTER_TPID_MSK,
+                           HW_ATL_RPF_VL_OUTER_TPID_SHIFT,
+                           vlan_outer_etht);
+}
+
+void hw_atl_rpf_vlan_prom_mode_en_set(struct aq_hw_s *aq_hw,
+                                     u32 vlan_prom_mode_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_PROMIS_MODE_ADR,
+                           HW_ATL_RPF_VL_PROMIS_MODE_MSK,
+                           HW_ATL_RPF_VL_PROMIS_MODE_SHIFT,
+                           vlan_prom_mode_en);
+}
+
+void hw_atl_rpf_vlan_accept_untagged_packets_set(struct aq_hw_s *aq_hw,
+                                                u32 vlan_acc_untagged_packets)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_ADR,
+                           HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_MSK,
+                           HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_SHIFT,
+                           vlan_acc_untagged_packets);
+}
+
+void hw_atl_rpf_vlan_untagged_act_set(struct aq_hw_s *aq_hw,
+                                     u32 vlan_untagged_act)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_UNTAGGED_ACT_ADR,
+                           HW_ATL_RPF_VL_UNTAGGED_ACT_MSK,
+                           HW_ATL_RPF_VL_UNTAGGED_ACT_SHIFT,
+                           vlan_untagged_act);
+}
+
+void hw_atl_rpf_vlan_flr_en_set(struct aq_hw_s *aq_hw, u32 vlan_flr_en,
+                               u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_EN_F_ADR(filter),
+                           HW_ATL_RPF_VL_EN_F_MSK,
+                           HW_ATL_RPF_VL_EN_F_SHIFT,
+                           vlan_flr_en);
+}
+
+void hw_atl_rpf_vlan_flr_act_set(struct aq_hw_s *aq_hw, u32 vlan_flr_act,
+                                u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_ACT_F_ADR(filter),
+                           HW_ATL_RPF_VL_ACT_F_MSK,
+                           HW_ATL_RPF_VL_ACT_F_SHIFT,
+                           vlan_flr_act);
+}
+
+void hw_atl_rpf_vlan_id_flr_set(struct aq_hw_s *aq_hw, u32 vlan_id_flr,
+                               u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_VL_ID_F_ADR(filter),
+                           HW_ATL_RPF_VL_ID_F_MSK,
+                           HW_ATL_RPF_VL_ID_F_SHIFT,
+                           vlan_id_flr);
+}
+
+void hw_atl_rpf_etht_flr_en_set(struct aq_hw_s *aq_hw, u32 etht_flr_en,
+                               u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_ENF_ADR(filter),
+                           HW_ATL_RPF_ET_ENF_MSK,
+                           HW_ATL_RPF_ET_ENF_SHIFT, etht_flr_en);
+}
+
+void hw_atl_rpf_etht_user_priority_en_set(struct aq_hw_s *aq_hw,
+                                         u32 etht_user_priority_en, u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_UPFEN_ADR(filter),
+                           HW_ATL_RPF_ET_UPFEN_MSK, HW_ATL_RPF_ET_UPFEN_SHIFT,
+                           etht_user_priority_en);
+}
+
+void hw_atl_rpf_etht_rx_queue_en_set(struct aq_hw_s *aq_hw,
+                                    u32 etht_rx_queue_en,
+                                    u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_RXQFEN_ADR(filter),
+                           HW_ATL_RPF_ET_RXQFEN_MSK,
+                           HW_ATL_RPF_ET_RXQFEN_SHIFT,
+                           etht_rx_queue_en);
+}
+
+void hw_atl_rpf_etht_user_priority_set(struct aq_hw_s *aq_hw,
+                                      u32 etht_user_priority,
+                                      u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_UPF_ADR(filter),
+                           HW_ATL_RPF_ET_UPF_MSK,
+                           HW_ATL_RPF_ET_UPF_SHIFT, etht_user_priority);
+}
+
+void hw_atl_rpf_etht_rx_queue_set(struct aq_hw_s *aq_hw, u32 etht_rx_queue,
+                                 u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_RXQF_ADR(filter),
+                           HW_ATL_RPF_ET_RXQF_MSK,
+                           HW_ATL_RPF_ET_RXQF_SHIFT, etht_rx_queue);
+}
+
+void hw_atl_rpf_etht_mgt_queue_set(struct aq_hw_s *aq_hw, u32 etht_mgt_queue,
+                                  u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_MNG_RXQF_ADR(filter),
+                           HW_ATL_RPF_ET_MNG_RXQF_MSK,
+                           HW_ATL_RPF_ET_MNG_RXQF_SHIFT,
+                           etht_mgt_queue);
+}
+
+void hw_atl_rpf_etht_flr_act_set(struct aq_hw_s *aq_hw, u32 etht_flr_act,
+                                u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_ACTF_ADR(filter),
+                           HW_ATL_RPF_ET_ACTF_MSK,
+                           HW_ATL_RPF_ET_ACTF_SHIFT, etht_flr_act);
+}
+
+void hw_atl_rpf_etht_flr_set(struct aq_hw_s *aq_hw, u32 etht_flr, u32 filter)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPF_ET_VALF_ADR(filter),
+                           HW_ATL_RPF_ET_VALF_MSK,
+                           HW_ATL_RPF_ET_VALF_SHIFT, etht_flr);
+}
+
+/* RPO: rx packet offload */
+void hw_atl_rpo_ipv4header_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                             u32 ipv4header_crc_offload_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_IPV4CHK_EN_ADR,
+                           HW_ATL_RPO_IPV4CHK_EN_MSK,
+                           HW_ATL_RPO_IPV4CHK_EN_SHIFT,
+                           ipv4header_crc_offload_en);
+}
+
+void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_vlan_stripping,
+                                          u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_DESCDVL_STRIP_ADR(descriptor),
+                           HW_ATL_RPO_DESCDVL_STRIP_MSK,
+                           HW_ATL_RPO_DESCDVL_STRIP_SHIFT,
+                           rx_desc_vlan_stripping);
+}
+
+void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                          u32 tcp_udp_crc_offload_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPOL4CHK_EN_ADR,
+                           HW_ATL_RPOL4CHK_EN_MSK,
+                           HW_ATL_RPOL4CHK_EN_SHIFT, tcp_udp_crc_offload_en);
+}
+
+void hw_atl_rpo_lro_en_set(struct aq_hw_s *aq_hw, u32 lro_en)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RPO_LRO_EN_ADR, lro_en);
+}
+
+void hw_atl_rpo_lro_patch_optimization_en_set(struct aq_hw_s *aq_hw,
+                                             u32 lro_patch_optimization_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_PTOPT_EN_ADR,
+                           HW_ATL_RPO_LRO_PTOPT_EN_MSK,
+                           HW_ATL_RPO_LRO_PTOPT_EN_SHIFT,
+                           lro_patch_optimization_en);
+}
+
+void hw_atl_rpo_lro_qsessions_lim_set(struct aq_hw_s *aq_hw,
+                                     u32 lro_qsessions_lim)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_QSES_LMT_ADR,
+                           HW_ATL_RPO_LRO_QSES_LMT_MSK,
+                           HW_ATL_RPO_LRO_QSES_LMT_SHIFT,
+                           lro_qsessions_lim);
+}
+
+void hw_atl_rpo_lro_total_desc_lim_set(struct aq_hw_s *aq_hw,
+                                      u32 lro_total_desc_lim)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_TOT_DSC_LMT_ADR,
+                           HW_ATL_RPO_LRO_TOT_DSC_LMT_MSK,
+                           HW_ATL_RPO_LRO_TOT_DSC_LMT_SHIFT,
+                           lro_total_desc_lim);
+}
+
+void hw_atl_rpo_lro_min_pay_of_first_pkt_set(struct aq_hw_s *aq_hw,
+                                            u32 lro_min_pld_of_first_pkt)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_PKT_MIN_ADR,
+                           HW_ATL_RPO_LRO_PKT_MIN_MSK,
+                           HW_ATL_RPO_LRO_PKT_MIN_SHIFT,
+                           lro_min_pld_of_first_pkt);
+}
+
+void hw_atl_rpo_lro_pkt_lim_set(struct aq_hw_s *aq_hw, u32 lro_pkt_lim)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_RPO_LRO_RSC_MAX_ADR, lro_pkt_lim);
+}
+
+void hw_atl_rpo_lro_max_num_of_descriptors_set(struct aq_hw_s *aq_hw,
+                                       u32 lro_max_number_of_descriptors,
+                                       u32 lro)
+{
+/* Register address for bitfield lro{L}_des_max[1:0] */
+       static const u32 rpo_lro_ldes_max_adr[32] = {
+                       0x000055A0U, 0x000055A0U, 0x000055A0U, 0x000055A0U,
+                       0x000055A0U, 0x000055A0U, 0x000055A0U, 0x000055A0U,
+                       0x000055A4U, 0x000055A4U, 0x000055A4U, 0x000055A4U,
+                       0x000055A4U, 0x000055A4U, 0x000055A4U, 0x000055A4U,
+                       0x000055A8U, 0x000055A8U, 0x000055A8U, 0x000055A8U,
+                       0x000055A8U, 0x000055A8U, 0x000055A8U, 0x000055A8U,
+                       0x000055ACU, 0x000055ACU, 0x000055ACU, 0x000055ACU,
+                       0x000055ACU, 0x000055ACU, 0x000055ACU, 0x000055ACU
+               };
+
+/* Bitmask for bitfield lro{L}_des_max[1:0] */
+       static const u32 rpo_lro_ldes_max_msk[32] = {
+                       0x00000003U, 0x00000030U, 0x00000300U, 0x00003000U,
+                       0x00030000U, 0x00300000U, 0x03000000U, 0x30000000U,
+                       0x00000003U, 0x00000030U, 0x00000300U, 0x00003000U,
+                       0x00030000U, 0x00300000U, 0x03000000U, 0x30000000U,
+                       0x00000003U, 0x00000030U, 0x00000300U, 0x00003000U,
+                       0x00030000U, 0x00300000U, 0x03000000U, 0x30000000U,
+                       0x00000003U, 0x00000030U, 0x00000300U, 0x00003000U,
+                       0x00030000U, 0x00300000U, 0x03000000U, 0x30000000U
+               };
+
+/* Lower bit position of bitfield lro{L}_des_max[1:0] */
+       static const u32 rpo_lro_ldes_max_shift[32] = {
+                       0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U,
+                       0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U,
+                       0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U,
+                       0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U
+               };
+
+       aq_hw_write_reg_bit(aq_hw, rpo_lro_ldes_max_adr[lro],
+                           rpo_lro_ldes_max_msk[lro],
+                           rpo_lro_ldes_max_shift[lro],
+                           lro_max_number_of_descriptors);
+}
+
+void hw_atl_rpo_lro_time_base_divider_set(struct aq_hw_s *aq_hw,
+                                         u32 lro_time_base_divider)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_TB_DIV_ADR,
+                           HW_ATL_RPO_LRO_TB_DIV_MSK,
+                           HW_ATL_RPO_LRO_TB_DIV_SHIFT,
+                           lro_time_base_divider);
+}
+
+void hw_atl_rpo_lro_inactive_interval_set(struct aq_hw_s *aq_hw,
+                                         u32 lro_inactive_interval)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_INA_IVAL_ADR,
+                           HW_ATL_RPO_LRO_INA_IVAL_MSK,
+                           HW_ATL_RPO_LRO_INA_IVAL_SHIFT,
+                           lro_inactive_interval);
+}
+
+void hw_atl_rpo_lro_max_coalescing_interval_set(struct aq_hw_s *aq_hw,
+                                               u32 lro_max_coal_interval)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RPO_LRO_MAX_IVAL_ADR,
+                           HW_ATL_RPO_LRO_MAX_IVAL_MSK,
+                           HW_ATL_RPO_LRO_MAX_IVAL_SHIFT,
+                           lro_max_coal_interval);
+}
+
+/* rx */
+void hw_atl_rx_rx_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 rx_reg_res_dis)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_RX_REG_RES_DSBL_ADR,
+                           HW_ATL_RX_REG_RES_DSBL_MSK,
+                           HW_ATL_RX_REG_RES_DSBL_SHIFT,
+                           rx_reg_res_dis);
+}
+
+/* tdm */
+void hw_atl_tdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DCADCPUID_ADR(dca),
+                           HW_ATL_TDM_DCADCPUID_MSK,
+                           HW_ATL_TDM_DCADCPUID_SHIFT, cpuid);
+}
+
+void hw_atl_tdm_large_send_offload_en_set(struct aq_hw_s *aq_hw,
+                                         u32 large_send_offload_en)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_TDM_LSO_EN_ADR, large_send_offload_en);
+}
+
+void hw_atl_tdm_tx_dca_en_set(struct aq_hw_s *aq_hw, u32 tx_dca_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DCA_EN_ADR, HW_ATL_TDM_DCA_EN_MSK,
+                           HW_ATL_TDM_DCA_EN_SHIFT, tx_dca_en);
+}
+
+void hw_atl_tdm_tx_dca_mode_set(struct aq_hw_s *aq_hw, u32 tx_dca_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DCA_MODE_ADR,
+                           HW_ATL_TDM_DCA_MODE_MSK,
+                           HW_ATL_TDM_DCA_MODE_SHIFT, tx_dca_mode);
+}
+
+void hw_atl_tdm_tx_desc_dca_en_set(struct aq_hw_s *aq_hw, u32 tx_desc_dca_en,
+                                  u32 dca)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DCADDESC_EN_ADR(dca),
+                           HW_ATL_TDM_DCADDESC_EN_MSK,
+                           HW_ATL_TDM_DCADDESC_EN_SHIFT,
+                           tx_desc_dca_en);
+}
+
+void hw_atl_tdm_tx_desc_en_set(struct aq_hw_s *aq_hw, u32 tx_desc_en,
+                              u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DESCDEN_ADR(descriptor),
+                           HW_ATL_TDM_DESCDEN_MSK,
+                           HW_ATL_TDM_DESCDEN_SHIFT,
+                           tx_desc_en);
+}
+
+u32 hw_atl_tdm_tx_desc_head_ptr_get(struct aq_hw_s *aq_hw, u32 descriptor)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_TDM_DESCDHD_ADR(descriptor),
+                                 HW_ATL_TDM_DESCDHD_MSK,
+                                 HW_ATL_TDM_DESCDHD_SHIFT);
+}
+
+void hw_atl_tdm_tx_desc_len_set(struct aq_hw_s *aq_hw, u32 tx_desc_len,
+                               u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DESCDLEN_ADR(descriptor),
+                           HW_ATL_TDM_DESCDLEN_MSK,
+                           HW_ATL_TDM_DESCDLEN_SHIFT,
+                           tx_desc_len);
+}
+
+void hw_atl_tdm_tx_desc_wr_wb_irq_en_set(struct aq_hw_s *aq_hw,
+                                        u32 tx_desc_wr_wb_irq_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_INT_DESC_WRB_EN_ADR,
+                           HW_ATL_TDM_INT_DESC_WRB_EN_MSK,
+                           HW_ATL_TDM_INT_DESC_WRB_EN_SHIFT,
+                           tx_desc_wr_wb_irq_en);
+}
+
+void hw_atl_tdm_tx_desc_wr_wb_threshold_set(struct aq_hw_s *aq_hw,
+                                           u32 tx_desc_wr_wb_threshold,
+                                           u32 descriptor)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_DESCDWRB_THRESH_ADR(descriptor),
+                           HW_ATL_TDM_DESCDWRB_THRESH_MSK,
+                           HW_ATL_TDM_DESCDWRB_THRESH_SHIFT,
+                           tx_desc_wr_wb_threshold);
+}
+
+void hw_atl_tdm_tdm_intr_moder_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tdm_irq_moderation_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TDM_INT_MOD_EN_ADR,
+                           HW_ATL_TDM_INT_MOD_EN_MSK,
+                           HW_ATL_TDM_INT_MOD_EN_SHIFT,
+                           tdm_irq_moderation_en);
+}
+
+/* thm */
+void hw_atl_thm_lso_tcp_flag_of_first_pkt_set(struct aq_hw_s *aq_hw,
+                                             u32 lso_tcp_flag_of_first_pkt)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_THM_LSO_TCP_FLAG_FIRST_ADR,
+                           HW_ATL_THM_LSO_TCP_FLAG_FIRST_MSK,
+                           HW_ATL_THM_LSO_TCP_FLAG_FIRST_SHIFT,
+                           lso_tcp_flag_of_first_pkt);
+}
+
+void hw_atl_thm_lso_tcp_flag_of_last_pkt_set(struct aq_hw_s *aq_hw,
+                                            u32 lso_tcp_flag_of_last_pkt)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_THM_LSO_TCP_FLAG_LAST_ADR,
+                           HW_ATL_THM_LSO_TCP_FLAG_LAST_MSK,
+                           HW_ATL_THM_LSO_TCP_FLAG_LAST_SHIFT,
+                           lso_tcp_flag_of_last_pkt);
+}
+
+void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
+                                              u32 lso_tcp_flag_of_middle_pkt)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_THM_LSO_TCP_FLAG_MID_ADR,
+                           HW_ATL_THM_LSO_TCP_FLAG_MID_MSK,
+                           HW_ATL_THM_LSO_TCP_FLAG_MID_SHIFT,
+                           lso_tcp_flag_of_middle_pkt);
+}
+
+/* TPB: tx packet buffer */
+void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_BUF_EN_ADR,
+                           HW_ATL_TPB_TX_BUF_EN_MSK,
+                           HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
+}
+
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+                       HW_ATL_TPB_TX_TC_MODE_MSK,
+                       HW_ATL_TPB_TX_TC_MODE_SHIFT);
+}
+
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+                                  u32 tx_traf_class_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
+                       HW_ATL_TPB_TX_TC_MODE_MSK,
+                       HW_ATL_TPB_TX_TC_MODE_SHIFT,
+                       tx_traf_class_mode);
+}
+
+void hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 tx_buff_hi_threshold_per_tc,
+                                        u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TXBHI_THRESH_ADR(buffer),
+                           HW_ATL_TPB_TXBHI_THRESH_MSK,
+                           HW_ATL_TPB_TXBHI_THRESH_SHIFT,
+                           tx_buff_hi_threshold_per_tc);
+}
+
+void hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 tx_buff_lo_threshold_per_tc,
+                                               u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TXBLO_THRESH_ADR(buffer),
+                           HW_ATL_TPB_TXBLO_THRESH_MSK,
+                           HW_ATL_TPB_TXBLO_THRESH_SHIFT,
+                           tx_buff_lo_threshold_per_tc);
+}
+
+void hw_atl_tpb_tx_dma_sys_lbk_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tx_dma_sys_lbk_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_DMA_SYS_LBK_ADR,
+                           HW_ATL_TPB_DMA_SYS_LBK_MSK,
+                           HW_ATL_TPB_DMA_SYS_LBK_SHIFT,
+                           tx_dma_sys_lbk_en);
+}
+
+void hw_atl_tpb_tx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
+                                           u32 tx_pkt_buff_size_per_tc,
+                                           u32 buffer)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TXBBUF_SIZE_ADR(buffer),
+                           HW_ATL_TPB_TXBBUF_SIZE_MSK,
+                           HW_ATL_TPB_TXBBUF_SIZE_SHIFT,
+                           tx_pkt_buff_size_per_tc);
+}
+
+void hw_atl_tpb_tx_path_scp_ins_en_set(struct aq_hw_s *aq_hw,
+                                      u32 tx_path_scp_ins_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_SCP_INS_EN_ADR,
+                           HW_ATL_TPB_TX_SCP_INS_EN_MSK,
+                           HW_ATL_TPB_TX_SCP_INS_EN_SHIFT,
+                           tx_path_scp_ins_en);
+}
+
+/* TPO: tx packet offload */
+void hw_atl_tpo_ipv4header_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                             u32 ipv4header_crc_offload_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPO_IPV4CHK_EN_ADR,
+                           HW_ATL_TPO_IPV4CHK_EN_MSK,
+                           HW_ATL_TPO_IPV4CHK_EN_SHIFT,
+                           ipv4header_crc_offload_en);
+}
+
+void hw_atl_tpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                          u32 tcp_udp_crc_offload_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPOL4CHK_EN_ADR,
+                           HW_ATL_TPOL4CHK_EN_MSK,
+                           HW_ATL_TPOL4CHK_EN_SHIFT,
+                           tcp_udp_crc_offload_en);
+}
+
+void hw_atl_tpo_tx_pkt_sys_lbk_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tx_pkt_sys_lbk_en)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPO_PKT_SYS_LBK_ADR,
+                           HW_ATL_TPO_PKT_SYS_LBK_MSK,
+                           HW_ATL_TPO_PKT_SYS_LBK_SHIFT,
+                           tx_pkt_sys_lbk_en);
+}
+
+/* TPS: tx packet scheduler */
+void hw_atl_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw,
+                                             u32 tx_pkt_shed_data_arb_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TC_ARB_MODE_ADR,
+                           HW_ATL_TPS_DATA_TC_ARB_MODE_MSK,
+                           HW_ATL_TPS_DATA_TC_ARB_MODE_SHIFT,
+                           tx_pkt_shed_data_arb_mode);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(struct aq_hw_s *aq_hw,
+                                                       u32 curr_time_res)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_TA_RST_ADR,
+                           HW_ATL_TPS_DESC_RATE_TA_RST_MSK,
+                           HW_ATL_TPS_DESC_RATE_TA_RST_SHIFT,
+                           curr_time_res);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(struct aq_hw_s *aq_hw,
+                                             u32 tx_pkt_shed_desc_rate_lim)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_LIM_ADR,
+                           HW_ATL_TPS_DESC_RATE_LIM_MSK,
+                           HW_ATL_TPS_DESC_RATE_LIM_SHIFT,
+                           tx_pkt_shed_desc_rate_lim);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw,
+                                                u32 arb_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TC_ARB_MODE_ADR,
+                           HW_ATL_TPS_DESC_TC_ARB_MODE_MSK,
+                           HW_ATL_TPS_DESC_TC_ARB_MODE_SHIFT,
+                           arb_mode);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw,
+                                                  u32 max_credit,
+                                                  u32 tc)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTCREDIT_MAX_ADR(tc),
+                           HW_ATL_TPS_DESC_TCTCREDIT_MAX_MSK,
+                           HW_ATL_TPS_DESC_TCTCREDIT_MAX_SHIFT,
+                           max_credit);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw,
+                                              u32 tx_pkt_shed_desc_tc_weight,
+                                              u32 tc)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTWEIGHT_ADR(tc),
+                           HW_ATL_TPS_DESC_TCTWEIGHT_MSK,
+                           HW_ATL_TPS_DESC_TCTWEIGHT_SHIFT,
+                           tx_pkt_shed_desc_tc_weight);
+}
+
+void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
+                                                u32 arb_mode)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_VM_ARB_MODE_ADR,
+                           HW_ATL_TPS_DESC_VM_ARB_MODE_MSK,
+                           HW_ATL_TPS_DESC_VM_ARB_MODE_SHIFT,
+                           arb_mode);
+}
+
+void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
+                                                  u32 max_credit,
+                                                  u32 tc)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTCREDIT_MAX_ADR(tc),
+                           HW_ATL_TPS_DATA_TCTCREDIT_MAX_MSK,
+                           HW_ATL_TPS_DATA_TCTCREDIT_MAX_SHIFT,
+                           max_credit);
+}
+
+void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
+                                              u32 tx_pkt_shed_tc_data_weight,
+                                              u32 tc)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTWEIGHT_ADR(tc),
+                           HW_ATL_TPS_DATA_TCTWEIGHT_MSK,
+                           HW_ATL_TPS_DATA_TCTWEIGHT_SHIFT,
+                           tx_pkt_shed_tc_data_weight);
+}
+
+/* tx */
+void hw_atl_tx_tx_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 tx_reg_res_dis)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_TX_REG_RES_DSBL_ADR,
+                           HW_ATL_TX_REG_RES_DSBL_MSK,
+                           HW_ATL_TX_REG_RES_DSBL_SHIFT, tx_reg_res_dis);
+}
+
+/* msm */
+u32 hw_atl_msm_reg_access_status_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg_bit(aq_hw, HW_ATL_MSM_REG_ACCESS_BUSY_ADR,
+                                 HW_ATL_MSM_REG_ACCESS_BUSY_MSK,
+                                 HW_ATL_MSM_REG_ACCESS_BUSY_SHIFT);
+}
+
+void hw_atl_msm_reg_addr_for_indirect_addr_set(struct aq_hw_s *aq_hw,
+                                              u32 reg_addr_for_indirect_addr)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_MSM_REG_ADDR_ADR,
+                           HW_ATL_MSM_REG_ADDR_MSK,
+                           HW_ATL_MSM_REG_ADDR_SHIFT,
+                           reg_addr_for_indirect_addr);
+}
+
+void hw_atl_msm_reg_rd_strobe_set(struct aq_hw_s *aq_hw, u32 reg_rd_strobe)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_MSM_REG_RD_STROBE_ADR,
+                           HW_ATL_MSM_REG_RD_STROBE_MSK,
+                           HW_ATL_MSM_REG_RD_STROBE_SHIFT,
+                           reg_rd_strobe);
+}
+
+u32 hw_atl_msm_reg_rd_data_get(struct aq_hw_s *aq_hw)
+{
+       return aq_hw_read_reg(aq_hw, HW_ATL_MSM_REG_RD_DATA_ADR);
+}
+
+void hw_atl_msm_reg_wr_data_set(struct aq_hw_s *aq_hw, u32 reg_wr_data)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_MSM_REG_WR_DATA_ADR, reg_wr_data);
+}
+
+void hw_atl_msm_reg_wr_strobe_set(struct aq_hw_s *aq_hw, u32 reg_wr_strobe)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_MSM_REG_WR_STROBE_ADR,
+                           HW_ATL_MSM_REG_WR_STROBE_MSK,
+                           HW_ATL_MSM_REG_WR_STROBE_SHIFT,
+                           reg_wr_strobe);
+}
+
+/* pci */
+void hw_atl_pci_pci_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 pci_reg_res_dis)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_PCI_REG_RES_DSBL_ADR,
+                           HW_ATL_PCI_REG_RES_DSBL_MSK,
+                           HW_ATL_PCI_REG_RES_DSBL_SHIFT,
+                           pci_reg_res_dis);
+}
+
+void hw_atl_reg_glb_cpu_scratch_scp_set(struct aq_hw_s *aq_hw,
+                                       u32 glb_cpu_scratch_scp,
+                                       u32 scratch_scp)
+{
+       aq_hw_write_reg(aq_hw, HW_ATL_GLB_CPU_SCRATCH_SCP_ADR(scratch_scp),
+                       glb_cpu_scratch_scp);
+}
+
+void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr)
+{
+       aq_hw_write_reg_bit(aq_hw, HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR,
+                       HW_ATL_MCP_UP_FORCE_INTERRUPT_MSK,
+                       HW_ATL_MCP_UP_FORCE_INTERRUPT_SHIFT, up_force_intr);
+}
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/atlantic/hw_atl/hw_atl_llh.h
new file mode 100644 (file)
index 0000000..e30083c
--- /dev/null
@@ -0,0 +1,714 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_llh.h: Declarations of bitfield and register access functions for
+ * Atlantic registers.
+ */
+
+#ifndef HW_ATL_LLH_H
+#define HW_ATL_LLH_H
+
+#include "../atl_types.h"
+
+struct aq_hw_s;
+
+/* global */
+
+/* set global microprocessor semaphore */
+void hw_atl_reg_glb_cpu_sem_set(struct aq_hw_s *aq_hw, u32 glb_cpu_sem,
+                               u32 semaphore);
+
+/* get global microprocessor semaphore */
+u32 hw_atl_reg_glb_cpu_sem_get(struct aq_hw_s *aq_hw, u32 semaphore);
+
+/* set global register reset disable */
+void hw_atl_glb_glb_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 glb_reg_res_dis);
+
+/* set soft reset */
+void hw_atl_glb_soft_res_set(struct aq_hw_s *aq_hw, u32 soft_res);
+
+/* get soft reset */
+u32 hw_atl_glb_soft_res_get(struct aq_hw_s *aq_hw);
+
+/* stats */
+
+u32 hw_atl_rpb_rx_dma_drop_pkt_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get rx dma good octet counter lsw */
+u32 hw_atl_stats_rx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw);
+
+/* get rx dma good packet counter lsw */
+u32 hw_atl_stats_rx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw);
+
+/* get tx dma good octet counter lsw */
+u32 hw_atl_stats_tx_dma_good_octet_counterlsw_get(struct aq_hw_s *aq_hw);
+
+/* get tx dma good packet counter lsw */
+u32 hw_atl_stats_tx_dma_good_pkt_counterlsw_get(struct aq_hw_s *aq_hw);
+
+/* get rx dma good octet counter msw */
+u32 hw_atl_stats_rx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw);
+
+/* get rx dma good packet counter msw */
+u32 hw_atl_stats_rx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw);
+
+/* get tx dma good octet counter msw */
+u32 hw_atl_stats_tx_dma_good_octet_countermsw_get(struct aq_hw_s *aq_hw);
+
+/* get tx dma good packet counter msw */
+u32 hw_atl_stats_tx_dma_good_pkt_countermsw_get(struct aq_hw_s *aq_hw);
+
+/* get msm rx errors counter register */
+u32 hw_atl_reg_mac_msm_rx_errs_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm rx unicast frames counter register */
+u32 hw_atl_reg_mac_msm_rx_ucst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm rx multicast frames counter register */
+u32 hw_atl_reg_mac_msm_rx_mcst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm rx broadcast frames counter register */
+u32 hw_atl_reg_mac_msm_rx_bcst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm rx broadcast octets counter register 1 */
+u32 hw_atl_reg_mac_msm_rx_bcst_octets_counter1get(struct aq_hw_s *aq_hw);
+
+/* get msm rx unicast octets counter register 0 */
+u32 hw_atl_reg_mac_msm_rx_ucst_octets_counter0get(struct aq_hw_s *aq_hw);
+
+/* get rx dma statistics counter 7 */
+u32 hw_atl_reg_rx_dma_stat_counter7get(struct aq_hw_s *aq_hw);
+
+/* get msm tx errors counter register */
+u32 hw_atl_reg_mac_msm_tx_errs_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm tx unicast frames counter register */
+u32 hw_atl_reg_mac_msm_tx_ucst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm tx multicast frames counter register */
+u32 hw_atl_reg_mac_msm_tx_mcst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm tx broadcast frames counter register */
+u32 hw_atl_reg_mac_msm_tx_bcst_frm_cnt_get(struct aq_hw_s *aq_hw);
+
+/* get msm tx multicast octets counter register 1 */
+u32 hw_atl_reg_mac_msm_tx_mcst_octets_counter1get(struct aq_hw_s *aq_hw);
+
+/* get msm tx broadcast octets counter register 1 */
+u32 hw_atl_reg_mac_msm_tx_bcst_octets_counter1get(struct aq_hw_s *aq_hw);
+
+/* get msm tx unicast octets counter register 0 */
+u32 hw_atl_reg_mac_msm_tx_ucst_octets_counter0get(struct aq_hw_s *aq_hw);
+
+/* get global mif identification */
+u32 hw_atl_reg_glb_mif_id_get(struct aq_hw_s *aq_hw);
+
+/* interrupt */
+
+/* set interrupt auto mask lsw */
+void hw_atl_itr_irq_auto_masklsw_set(struct aq_hw_s *aq_hw,
+                                    u32 irq_auto_masklsw);
+
+/* set interrupt mapping enable rx */
+void hw_atl_itr_irq_map_en_rx_set(struct aq_hw_s *aq_hw, u32 irq_map_en_rx,
+                                 u32 rx);
+
+/* set interrupt mapping enable tx */
+void hw_atl_itr_irq_map_en_tx_set(struct aq_hw_s *aq_hw, u32 irq_map_en_tx,
+                                 u32 tx);
+
+/* set interrupt mapping rx */
+void hw_atl_itr_irq_map_rx_set(struct aq_hw_s *aq_hw, u32 irq_map_rx, u32 rx);
+
+/* set interrupt mapping tx */
+void hw_atl_itr_irq_map_tx_set(struct aq_hw_s *aq_hw, u32 irq_map_tx, u32 tx);
+
+/* set interrupt mask clear lsw */
+void hw_atl_itr_irq_msk_clearlsw_set(struct aq_hw_s *aq_hw,
+                                    u32 irq_msk_clearlsw);
+
+/* set interrupt mask set lsw */
+void hw_atl_itr_irq_msk_setlsw_set(struct aq_hw_s *aq_hw, u32 irq_msk_setlsw);
+
+/* set interrupt register reset disable */
+void hw_atl_itr_irq_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 irq_reg_res_dis);
+
+/* set interrupt status clear lsw */
+void hw_atl_itr_irq_status_clearlsw_set(struct aq_hw_s *aq_hw,
+                                       u32 irq_status_clearlsw);
+
+/* get interrupt status lsw */
+u32 hw_atl_itr_irq_statuslsw_get(struct aq_hw_s *aq_hw);
+
+/* get reset interrupt */
+u32 hw_atl_itr_res_irq_get(struct aq_hw_s *aq_hw);
+
+/* set reset interrupt */
+void hw_atl_itr_res_irq_set(struct aq_hw_s *aq_hw, u32 res_irq);
+
+/* rdm */
+
+/* set cpu id */
+void hw_atl_rdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca);
+
+/* set rx dca enable */
+void hw_atl_rdm_rx_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_dca_en);
+
+/* set rx dca mode */
+void hw_atl_rdm_rx_dca_mode_set(struct aq_hw_s *aq_hw, u32 rx_dca_mode);
+
+/* set rx descriptor data buffer size */
+void hw_atl_rdm_rx_desc_data_buff_size_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_data_buff_size,
+                                   u32 descriptor);
+
+/* set rx descriptor dca enable */
+void hw_atl_rdm_rx_desc_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_desc_dca_en,
+                                  u32 dca);
+
+/* set rx descriptor enable */
+void hw_atl_rdm_rx_desc_en_set(struct aq_hw_s *aq_hw, u32 rx_desc_en,
+                              u32 descriptor);
+
+/* set rx descriptor header splitting */
+void hw_atl_rdm_rx_desc_head_splitting_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_head_splitting,
+                                   u32 descriptor);
+
+/* get rx descriptor head pointer */
+u32 hw_atl_rdm_rx_desc_head_ptr_get(struct aq_hw_s *aq_hw, u32 descriptor);
+
+/* set rx descriptor length */
+void hw_atl_rdm_rx_desc_len_set(struct aq_hw_s *aq_hw, u32 rx_desc_len,
+                               u32 descriptor);
+
+/* set rx descriptor write-back interrupt enable */
+void hw_atl_rdm_rx_desc_wr_wb_irq_en_set(struct aq_hw_s *aq_hw,
+                                        u32 rx_desc_wr_wb_irq_en);
+
+/* set rx header dca enable */
+void hw_atl_rdm_rx_head_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_head_dca_en,
+                                  u32 dca);
+
+/* set rx payload dca enable */
+void hw_atl_rdm_rx_pld_dca_en_set(struct aq_hw_s *aq_hw, u32 rx_pld_dca_en,
+                                 u32 dca);
+
+/* set rx descriptor header buffer size */
+void hw_atl_rdm_rx_desc_head_buff_size_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_head_buff_size,
+                                          u32 descriptor);
+
+/* set rx descriptor reset */
+void hw_atl_rdm_rx_desc_res_set(struct aq_hw_s *aq_hw, u32 rx_desc_res,
+                               u32 descriptor);
+
+/* Set RDM Interrupt Moderation Enable */
+void hw_atl_rdm_rdm_intr_moder_en_set(struct aq_hw_s *aq_hw,
+                                     u32 rdm_intr_moder_en);
+
+/* reg */
+
+/* set general interrupt mapping register */
+void hw_atl_reg_gen_irq_map_set(struct aq_hw_s *aq_hw, u32 gen_intr_map,
+                               u32 regidx);
+
+/* get general interrupt status register */
+u32 hw_atl_reg_gen_irq_status_get(struct aq_hw_s *aq_hw);
+
+/* set interrupt global control register */
+void hw_atl_reg_irq_glb_ctl_set(struct aq_hw_s *aq_hw, u32 intr_glb_ctl);
+
+/* set interrupt throttle register */
+void hw_atl_reg_irq_thr_set(struct aq_hw_s *aq_hw, u32 intr_thr, u32 throttle);
+
+/* set rx dma descriptor base address lsw */
+void hw_atl_reg_rx_dma_desc_base_addresslswset(struct aq_hw_s *aq_hw,
+                                              u32 rx_dma_desc_base_addrlsw,
+                                       u32 descriptor);
+
+/* set rx dma descriptor base address msw */
+void hw_atl_reg_rx_dma_desc_base_addressmswset(struct aq_hw_s *aq_hw,
+                                              u32 rx_dma_desc_base_addrmsw,
+                                       u32 descriptor);
+
+/* get rx dma descriptor status register */
+u32 hw_atl_reg_rx_dma_desc_status_get(struct aq_hw_s *aq_hw, u32 descriptor);
+
+/* set rx dma descriptor tail pointer register */
+void hw_atl_reg_rx_dma_desc_tail_ptr_set(struct aq_hw_s *aq_hw,
+                                        u32 rx_dma_desc_tail_ptr,
+                                 u32 descriptor);
+
+/* set rx filter multicast filter mask register */
+void hw_atl_reg_rx_flr_mcst_flr_msk_set(struct aq_hw_s *aq_hw,
+                                       u32 rx_flr_mcst_flr_msk);
+
+/* set rx filter multicast filter register */
+void hw_atl_reg_rx_flr_mcst_flr_set(struct aq_hw_s *aq_hw, u32 rx_flr_mcst_flr,
+                                   u32 filter);
+
+/* set rx filter rss control register 1 */
+void hw_atl_reg_rx_flr_rss_control1set(struct aq_hw_s *aq_hw,
+                                      u32 rx_flr_rss_control1);
+
+/* Set RX Filter Control Register 2 */
+void hw_atl_reg_rx_flr_control2_set(struct aq_hw_s *aq_hw, u32 rx_flr_control2);
+
+/* Set RX Interrupt Moderation Control Register */
+void hw_atl_reg_rx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+                                      u32 rx_intr_moderation_ctl,
+                               u32 queue);
+
+/* set tx dma debug control */
+void hw_atl_reg_tx_dma_debug_ctl_set(struct aq_hw_s *aq_hw,
+                                    u32 tx_dma_debug_ctl);
+
+/* set tx dma descriptor base address lsw */
+void hw_atl_reg_tx_dma_desc_base_addresslswset(struct aq_hw_s *aq_hw,
+                                              u32 tx_dma_desc_base_addrlsw,
+                                       u32 descriptor);
+
+/* set tx dma descriptor base address msw */
+void hw_atl_reg_tx_dma_desc_base_addressmswset(struct aq_hw_s *aq_hw,
+                                              u32 tx_dma_desc_base_addrmsw,
+                                       u32 descriptor);
+
+/* set tx dma descriptor tail pointer register */
+void hw_atl_reg_tx_dma_desc_tail_ptr_set(struct aq_hw_s *aq_hw,
+                                        u32 tx_dma_desc_tail_ptr,
+                                        u32 descriptor);
+
+/* Set TX Interrupt Moderation Control Register */
+void hw_atl_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+                                      u32 tx_intr_moderation_ctl,
+                                      u32 queue);
+
+/* set global microprocessor scratch pad */
+void hw_atl_reg_glb_cpu_scratch_scp_set(struct aq_hw_s *aq_hw,
+                                       u32 glb_cpu_scratch_scp,
+                                       u32 scratch_scp);
+
+/* rpb */
+
+/* set dma system loopback */
+void hw_atl_rpb_dma_sys_lbk_set(struct aq_hw_s *aq_hw, u32 dma_sys_lbk);
+
+/* set rx traffic class mode */
+void hw_atl_rpb_rpf_rx_traf_class_mode_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_traf_class_mode);
+
+/* get rx traffic class mode */
+u32 hw_atl_rpb_rpf_rx_traf_class_mode_get(struct aq_hw_s *aq_hw);
+
+/* set rx buffer enable */
+void hw_atl_rpb_rx_buff_en_set(struct aq_hw_s *aq_hw, u32 rx_buff_en);
+
+/* set rx buffer high threshold (per tc) */
+void hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 rx_buff_hi_threshold_per_tc,
+                                               u32 buffer);
+
+/* set rx buffer low threshold (per tc) */
+void hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 rx_buff_lo_threshold_per_tc,
+                                        u32 buffer);
+
+/* set rx flow control mode */
+void hw_atl_rpb_rx_flow_ctl_mode_set(struct aq_hw_s *aq_hw,
+                                    u32 rx_flow_ctl_mode);
+
+/* set rx packet buffer size (per tc) */
+void hw_atl_rpb_rx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
+                                           u32 rx_pkt_buff_size_per_tc,
+                                           u32 buffer);
+
+/* set rx xoff enable (per tc) */
+void hw_atl_rpb_rx_xoff_en_per_tc_set(struct aq_hw_s *aq_hw,
+                                     u32 rx_xoff_en_per_tc,
+                                     u32 buffer);
+
+/* rpf */
+
+/* set l2 broadcast count threshold */
+void hw_atl_rpfl2broadcast_count_threshold_set(struct aq_hw_s *aq_hw,
+                                              u32 l2broadcast_count_threshold);
+
+/* set l2 broadcast enable */
+void hw_atl_rpfl2broadcast_en_set(struct aq_hw_s *aq_hw, u32 l2broadcast_en);
+
+/* set l2 broadcast filter action */
+void hw_atl_rpfl2broadcast_flr_act_set(struct aq_hw_s *aq_hw,
+                                      u32 l2broadcast_flr_act);
+
+/* set l2 multicast filter enable */
+void hw_atl_rpfl2multicast_flr_en_set(struct aq_hw_s *aq_hw,
+                                     u32 l2multicast_flr_en,
+                                     u32 filter);
+
+/* set l2 promiscuous mode enable */
+void hw_atl_rpfl2promiscuous_mode_en_set(struct aq_hw_s *aq_hw,
+                                        u32 l2promiscuous_mode_en);
+
+/* set l2 unicast filter action */
+void hw_atl_rpfl2unicast_flr_act_set(struct aq_hw_s *aq_hw,
+                                    u32 l2unicast_flr_act,
+                                    u32 filter);
+
+/* set l2 unicast filter enable */
+void hw_atl_rpfl2_uc_flr_en_set(struct aq_hw_s *aq_hw, u32 l2unicast_flr_en,
+                               u32 filter);
+
+/* set l2 unicast destination address lsw */
+void hw_atl_rpfl2unicast_dest_addresslsw_set(struct aq_hw_s *aq_hw,
+                                            u32 l2unicast_dest_addresslsw,
+                                            u32 filter);
+
+/* set l2 unicast destination address msw */
+void hw_atl_rpfl2unicast_dest_addressmsw_set(struct aq_hw_s *aq_hw,
+                                            u32 l2unicast_dest_addressmsw,
+                                     u32 filter);
+
+/* Set L2 Accept all Multicast packets */
+void hw_atl_rpfl2_accept_all_mc_packets_set(struct aq_hw_s *aq_hw,
+                                           u32 l2_accept_all_mc_packets);
+
+/* set user-priority tc mapping */
+void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw,
+                                            u32 user_priority_tc_map, u32 tc);
+
+/* set rss key address */
+void hw_atl_rpf_rss_key_addr_set(struct aq_hw_s *aq_hw, u32 rss_key_addr);
+
+/* set rss key write data */
+void hw_atl_rpf_rss_key_wr_data_set(struct aq_hw_s *aq_hw, u32 rss_key_wr_data);
+
+/* get rss key write enable */
+u32 hw_atl_rpf_rss_key_wr_en_get(struct aq_hw_s *aq_hw);
+
+/* set rss key write enable */
+void hw_atl_rpf_rss_key_wr_en_set(struct aq_hw_s *aq_hw, u32 rss_key_wr_en);
+
+/* set rss redirection table address */
+void hw_atl_rpf_rss_redir_tbl_addr_set(struct aq_hw_s *aq_hw,
+                                      u32 rss_redir_tbl_addr);
+
+/* set rss redirection table write data */
+void hw_atl_rpf_rss_redir_tbl_wr_data_set(struct aq_hw_s *aq_hw,
+                                         u32 rss_redir_tbl_wr_data);
+
+/* get rss redirection write enable */
+u32 hw_atl_rpf_rss_redir_wr_en_get(struct aq_hw_s *aq_hw);
+
+/* set rss redirection write enable */
+void hw_atl_rpf_rss_redir_wr_en_set(struct aq_hw_s *aq_hw, u32 rss_redir_wr_en);
+
+/* set tpo to rpf system loopback */
+void hw_atl_rpf_tpo_to_rpf_sys_lbk_set(struct aq_hw_s *aq_hw,
+                                      u32 tpo_to_rpf_sys_lbk);
+
+/* set vlan inner ethertype */
+void hw_atl_rpf_vlan_inner_etht_set(struct aq_hw_s *aq_hw, u32 vlan_inner_etht);
+
+/* set vlan outer ethertype */
+void hw_atl_rpf_vlan_outer_etht_set(struct aq_hw_s *aq_hw, u32 vlan_outer_etht);
+
+/* set vlan promiscuous mode enable */
+void hw_atl_rpf_vlan_prom_mode_en_set(struct aq_hw_s *aq_hw,
+                                     u32 vlan_prom_mode_en);
+
+/* Set VLAN untagged action */
+void hw_atl_rpf_vlan_untagged_act_set(struct aq_hw_s *aq_hw,
+                                     u32 vlan_untagged_act);
+
+/* Set VLAN accept untagged packets */
+void hw_atl_rpf_vlan_accept_untagged_packets_set(struct aq_hw_s *aq_hw,
+                                                u32 vlan_acc_untagged_packets);
+
+/* Set VLAN filter enable */
+void hw_atl_rpf_vlan_flr_en_set(struct aq_hw_s *aq_hw, u32 vlan_flr_en,
+                               u32 filter);
+
+/* Set VLAN Filter Action */
+void hw_atl_rpf_vlan_flr_act_set(struct aq_hw_s *aq_hw, u32 vlan_filter_act,
+                                u32 filter);
+
+/* Set VLAN ID Filter */
+void hw_atl_rpf_vlan_id_flr_set(struct aq_hw_s *aq_hw, u32 vlan_id_flr,
+                               u32 filter);
+
+/* set ethertype filter enable */
+void hw_atl_rpf_etht_flr_en_set(struct aq_hw_s *aq_hw, u32 etht_flr_en,
+                               u32 filter);
+
+/* set  ethertype user-priority enable */
+void hw_atl_rpf_etht_user_priority_en_set(struct aq_hw_s *aq_hw,
+                                         u32 etht_user_priority_en,
+                                         u32 filter);
+
+/* set  ethertype rx queue enable */
+void hw_atl_rpf_etht_rx_queue_en_set(struct aq_hw_s *aq_hw,
+                                    u32 etht_rx_queue_en,
+                                    u32 filter);
+
+/* set ethertype rx queue */
+void hw_atl_rpf_etht_rx_queue_set(struct aq_hw_s *aq_hw, u32 etht_rx_queue,
+                                 u32 filter);
+
+/* set ethertype user-priority */
+void hw_atl_rpf_etht_user_priority_set(struct aq_hw_s *aq_hw,
+                                      u32 etht_user_priority,
+                                      u32 filter);
+
+/* set ethertype management queue */
+void hw_atl_rpf_etht_mgt_queue_set(struct aq_hw_s *aq_hw, u32 etht_mgt_queue,
+                                  u32 filter);
+
+/* set ethertype filter action */
+void hw_atl_rpf_etht_flr_act_set(struct aq_hw_s *aq_hw, u32 etht_flr_act,
+                                u32 filter);
+
+/* set ethertype filter */
+void hw_atl_rpf_etht_flr_set(struct aq_hw_s *aq_hw, u32 etht_flr, u32 filter);
+
+/* rpo */
+
+/* set ipv4 header checksum offload enable */
+void hw_atl_rpo_ipv4header_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                             u32 ipv4header_crc_offload_en);
+
+/* set rx descriptor vlan stripping */
+void hw_atl_rpo_rx_desc_vlan_stripping_set(struct aq_hw_s *aq_hw,
+                                          u32 rx_desc_vlan_stripping,
+                                          u32 descriptor);
+
+/* set tcp/udp checksum offload enable */
+void hw_atl_rpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                          u32 tcp_udp_crc_offload_en);
+
+/* Set LRO Patch Optimization Enable. */
+void hw_atl_rpo_lro_patch_optimization_en_set(struct aq_hw_s *aq_hw,
+                                             u32 lro_patch_optimization_en);
+
+/* Set Large Receive Offload Enable */
+void hw_atl_rpo_lro_en_set(struct aq_hw_s *aq_hw, u32 lro_en);
+
+/* Set LRO Q Sessions Limit */
+void hw_atl_rpo_lro_qsessions_lim_set(struct aq_hw_s *aq_hw,
+                                     u32 lro_qsessions_lim);
+
+/* Set LRO Total Descriptor Limit */
+void hw_atl_rpo_lro_total_desc_lim_set(struct aq_hw_s *aq_hw,
+                                      u32 lro_total_desc_lim);
+
+/* Set LRO Min Payload of First Packet */
+void hw_atl_rpo_lro_min_pay_of_first_pkt_set(struct aq_hw_s *aq_hw,
+                                            u32 lro_min_pld_of_first_pkt);
+
+/* Set LRO Packet Limit */
+void hw_atl_rpo_lro_pkt_lim_set(struct aq_hw_s *aq_hw, u32 lro_packet_lim);
+
+/* Set LRO Max Number of Descriptors */
+void hw_atl_rpo_lro_max_num_of_descriptors_set(struct aq_hw_s *aq_hw,
+                                              u32 lro_max_desc_num, u32 lro);
+
+/* Set LRO Time Base Divider */
+void hw_atl_rpo_lro_time_base_divider_set(struct aq_hw_s *aq_hw,
+                                         u32 lro_time_base_divider);
+
+/*Set LRO Inactive Interval */
+void hw_atl_rpo_lro_inactive_interval_set(struct aq_hw_s *aq_hw,
+                                         u32 lro_inactive_interval);
+
+/*Set LRO Max Coalescing Interval */
+void hw_atl_rpo_lro_max_coalescing_interval_set(struct aq_hw_s *aq_hw,
+                                               u32 lro_max_coal_interval);
+
+/* rx */
+
+/* set rx register reset disable */
+void hw_atl_rx_rx_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 rx_reg_res_dis);
+
+/* tdm */
+
+/* set cpu id */
+void hw_atl_tdm_cpu_id_set(struct aq_hw_s *aq_hw, u32 cpuid, u32 dca);
+
+/* set large send offload enable */
+void hw_atl_tdm_large_send_offload_en_set(struct aq_hw_s *aq_hw,
+                                         u32 large_send_offload_en);
+
+/* set tx descriptor enable */
+void hw_atl_tdm_tx_desc_en_set(struct aq_hw_s *aq_hw, u32 tx_desc_en,
+                              u32 descriptor);
+
+/* set tx dca enable */
+void hw_atl_tdm_tx_dca_en_set(struct aq_hw_s *aq_hw, u32 tx_dca_en);
+
+/* set tx dca mode */
+void hw_atl_tdm_tx_dca_mode_set(struct aq_hw_s *aq_hw, u32 tx_dca_mode);
+
+/* set tx descriptor dca enable */
+void hw_atl_tdm_tx_desc_dca_en_set(struct aq_hw_s *aq_hw, u32 tx_desc_dca_en,
+                                  u32 dca);
+
+/* get tx descriptor head pointer */
+u32 hw_atl_tdm_tx_desc_head_ptr_get(struct aq_hw_s *aq_hw, u32 descriptor);
+
+/* set tx descriptor length */
+void hw_atl_tdm_tx_desc_len_set(struct aq_hw_s *aq_hw, u32 tx_desc_len,
+                               u32 descriptor);
+
+/* set tx descriptor write-back interrupt enable */
+void hw_atl_tdm_tx_desc_wr_wb_irq_en_set(struct aq_hw_s *aq_hw,
+                                        u32 tx_desc_wr_wb_irq_en);
+
+/* set tx descriptor write-back threshold */
+void hw_atl_tdm_tx_desc_wr_wb_threshold_set(struct aq_hw_s *aq_hw,
+                                           u32 tx_desc_wr_wb_threshold,
+                                    u32 descriptor);
+
+/* Set TDM Interrupt Moderation Enable */
+void hw_atl_tdm_tdm_intr_moder_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tdm_irq_moderation_en);
+/* thm */
+
+/* set lso tcp flag of first packet */
+void hw_atl_thm_lso_tcp_flag_of_first_pkt_set(struct aq_hw_s *aq_hw,
+                                             u32 lso_tcp_flag_of_first_pkt);
+
+/* set lso tcp flag of last packet */
+void hw_atl_thm_lso_tcp_flag_of_last_pkt_set(struct aq_hw_s *aq_hw,
+                                            u32 lso_tcp_flag_of_last_pkt);
+
+/* set lso tcp flag of middle packet */
+void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
+                                              u32 lso_tcp_flag_of_middle_pkt);
+
+/* tpb */
+
+/* set TX Traffic Class Mode */
+void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+                                  u32 tx_traf_class_mode);
+
+/* get TX Traffic Class Mode */
+u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw);
+
+/* set tx buffer enable */
+void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
+
+/* set tx buffer high threshold (per tc) */
+void hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 tx_buff_hi_threshold_per_tc,
+                                        u32 buffer);
+
+/* set tx buffer low threshold (per tc) */
+void hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(struct aq_hw_s *aq_hw,
+                                               u32 tx_buff_lo_threshold_per_tc,
+                                        u32 buffer);
+
+/* set tx dma system loopback enable */
+void hw_atl_tpb_tx_dma_sys_lbk_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tx_dma_sys_lbk_en);
+
+/* set tx packet buffer size (per tc) */
+void hw_atl_tpb_tx_pkt_buff_size_per_tc_set(struct aq_hw_s *aq_hw,
+                                           u32 tx_pkt_buff_size_per_tc,
+                                           u32 buffer);
+
+/* set tx path pad insert enable */
+void hw_atl_tpb_tx_path_scp_ins_en_set(struct aq_hw_s *aq_hw,
+                                      u32 tx_path_scp_ins_en);
+
+/* tpo */
+
+/* set ipv4 header checksum offload enable */
+void hw_atl_tpo_ipv4header_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                             u32 ipv4header_crc_offload_en);
+
+/* set tcp/udp checksum offload enable */
+void hw_atl_tpo_tcp_udp_crc_offload_en_set(struct aq_hw_s *aq_hw,
+                                          u32 tcp_udp_crc_offload_en);
+
+/* set tx pkt system loopback enable */
+void hw_atl_tpo_tx_pkt_sys_lbk_en_set(struct aq_hw_s *aq_hw,
+                                     u32 tx_pkt_sys_lbk_en);
+
+/* tps */
+
+/* set tx packet scheduler data arbitration mode */
+void hw_atl_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw,
+                                             u32 tx_pkt_shed_data_arb_mode);
+
+/* set tx packet scheduler descriptor rate current time reset */
+void hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(struct aq_hw_s *aq_hw,
+                                                       u32 curr_time_res);
+
+/* set tx packet scheduler descriptor rate limit */
+void hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(struct aq_hw_s *aq_hw,
+                                             u32 tx_pkt_shed_desc_rate_lim);
+
+/* set tx packet scheduler descriptor tc arbitration mode */
+void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw,
+                                                u32 arb_mode);
+
+/* set tx packet scheduler descriptor tc max credit */
+void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw,
+                                                  u32 max_credit,
+                                           u32 tc);
+
+/* set tx packet scheduler descriptor tc weight */
+void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw,
+                                              u32 tx_pkt_shed_desc_tc_weight,
+                                       u32 tc);
+
+/* set tx packet scheduler descriptor vm arbitration mode */
+void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
+                                                u32 arb_mode);
+
+/* set tx packet scheduler tc data max credit */
+void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
+                                                  u32 max_credit,
+                                           u32 tc);
+
+/* set tx packet scheduler tc data weight */
+void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
+                                              u32 tx_pkt_shed_tc_data_weight,
+                                       u32 tc);
+
+/* tx */
+
+/* set tx register reset disable */
+void hw_atl_tx_tx_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 tx_reg_res_dis);
+
+/* msm */
+
+/* get register access status */
+u32 hw_atl_msm_reg_access_status_get(struct aq_hw_s *aq_hw);
+
+/* set  register address for indirect address */
+void hw_atl_msm_reg_addr_for_indirect_addr_set(struct aq_hw_s *aq_hw,
+                                              u32 reg_addr_for_indirect_addr);
+
+/* set register read strobe */
+void hw_atl_msm_reg_rd_strobe_set(struct aq_hw_s *aq_hw, u32 reg_rd_strobe);
+
+/* get  register read data */
+u32 hw_atl_msm_reg_rd_data_get(struct aq_hw_s *aq_hw);
+
+/* set  register write data */
+void hw_atl_msm_reg_wr_data_set(struct aq_hw_s *aq_hw, u32 reg_wr_data);
+
+/* set register write strobe */
+void hw_atl_msm_reg_wr_strobe_set(struct aq_hw_s *aq_hw, u32 reg_wr_strobe);
+
+/* pci */
+
+/* set pci register reset disable */
+void hw_atl_pci_pci_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 pci_reg_res_dis);
+
+/* set uP Force Interrupt */
+void hw_atl_mcp_up_force_intr_set(struct aq_hw_s *aq_hw, u32 up_force_intr);
+
+
+#endif /* HW_ATL_LLH_H */
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/atlantic/hw_atl/hw_atl_llh_internal.h
new file mode 100644 (file)
index 0000000..27b9b9c
--- /dev/null
@@ -0,0 +1,2407 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_llh_internal.h: Preprocessor definitions
+ * for Atlantic registers.
+ */
+
+#ifndef HW_ATL_LLH_INTERNAL_H
+#define HW_ATL_LLH_INTERNAL_H
+
+/* global microprocessor semaphore  definitions
+ * base address: 0x000003a0
+ * parameter: semaphore {s} | stride size 0x4 | range [0, 15]
+ */
+#define HW_ATL_GLB_CPU_SEM_ADR(semaphore)  (0x000003a0u + (semaphore) * 0x4)
+/* register address for bitfield rx dma good octet counter lsw [1f:0] */
+#define HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERLSW 0x00006808
+/* register address for bitfield rx dma good packet counter lsw [1f:0] */
+#define HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERLSW 0x00006800
+/* register address for bitfield tx dma good octet counter lsw [1f:0] */
+#define HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERLSW 0x00008808
+/* register address for bitfield tx dma good packet counter lsw [1f:0] */
+#define HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERLSW 0x00008800
+
+/* register address for bitfield rx dma good octet counter msw [3f:20] */
+#define HW_ATL_STATS_RX_DMA_GOOD_OCTET_COUNTERMSW 0x0000680c
+/* register address for bitfield rx dma good packet counter msw [3f:20] */
+#define HW_ATL_STATS_RX_DMA_GOOD_PKT_COUNTERMSW 0x00006804
+/* register address for bitfield tx dma good octet counter msw [3f:20] */
+#define HW_ATL_STATS_TX_DMA_GOOD_OCTET_COUNTERMSW 0x0000880c
+/* register address for bitfield tx dma good packet counter msw [3f:20] */
+#define HW_ATL_STATS_TX_DMA_GOOD_PKT_COUNTERMSW 0x00008804
+
+/* preprocessor definitions for msm rx errors counter register */
+#define HW_ATL_MAC_MSM_RX_ERRS_CNT_ADR 0x00000120u
+
+/* preprocessor definitions for msm rx unicast frames counter register */
+#define HW_ATL_MAC_MSM_RX_UCST_FRM_CNT_ADR 0x000000e0u
+
+/* preprocessor definitions for msm rx multicast frames counter register */
+#define HW_ATL_MAC_MSM_RX_MCST_FRM_CNT_ADR 0x000000e8u
+
+/* preprocessor definitions for msm rx broadcast frames counter register */
+#define HW_ATL_MAC_MSM_RX_BCST_FRM_CNT_ADR 0x000000f0u
+
+/* preprocessor definitions for msm rx broadcast octets counter register 1 */
+#define HW_ATL_MAC_MSM_RX_BCST_OCTETS_COUNTER1_ADR 0x000001b0u
+
+/* preprocessor definitions for msm rx broadcast octets counter register 2 */
+#define HW_ATL_MAC_MSM_RX_BCST_OCTETS_COUNTER2_ADR 0x000001b4u
+
+/* preprocessor definitions for msm rx unicast octets counter register 0 */
+#define HW_ATL_MAC_MSM_RX_UCST_OCTETS_COUNTER0_ADR 0x000001b8u
+
+/* preprocessor definitions for msm tx unicast frames counter register */
+#define HW_ATL_MAC_MSM_TX_UCST_FRM_CNT_ADR 0x00000108u
+
+/* preprocessor definitions for msm tx multicast frames counter register */
+#define HW_ATL_MAC_MSM_TX_MCST_FRM_CNT_ADR 0x00000110u
+
+/* preprocessor definitions for global mif identification */
+#define HW_ATL_GLB_MIF_ID_ADR 0x0000001cu
+
+/* register address for bitfield iamr_lsw[1f:0] */
+#define HW_ATL_ITR_IAMRLSW_ADR 0x00002090
+/* register address for bitfield rx dma drop packet counter [1f:0] */
+#define HW_ATL_RPB_RX_DMA_DROP_PKT_CNT_ADR 0x00006818
+
+/* register address for bitfield imcr_lsw[1f:0] */
+#define HW_ATL_ITR_IMCRLSW_ADR 0x00002070
+/* register address for bitfield imsr_lsw[1f:0] */
+#define HW_ATL_ITR_IMSRLSW_ADR 0x00002060
+/* register address for bitfield itr_reg_res_dsbl */
+#define HW_ATL_ITR_REG_RES_DSBL_ADR 0x00002300
+/* bitmask for bitfield itr_reg_res_dsbl */
+#define HW_ATL_ITR_REG_RES_DSBL_MSK 0x20000000
+/* lower bit position of bitfield itr_reg_res_dsbl */
+#define HW_ATL_ITR_REG_RES_DSBL_SHIFT 29
+/* register address for bitfield iscr_lsw[1f:0] */
+#define HW_ATL_ITR_ISCRLSW_ADR 0x00002050
+/* register address for bitfield isr_lsw[1f:0] */
+#define HW_ATL_ITR_ISRLSW_ADR 0x00002000
+/* register address for bitfield itr_reset */
+#define HW_ATL_ITR_RES_ADR 0x00002300
+/* bitmask for bitfield itr_reset */
+#define HW_ATL_ITR_RES_MSK 0x80000000
+/* lower bit position of bitfield itr_reset */
+#define HW_ATL_ITR_RES_SHIFT 31
+/* register address for bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_RDM_DCADCPUID_ADR(dca) (0x00006100 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_RDM_DCADCPUID_MSK 0x000000ff
+/* lower bit position of bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_RDM_DCADCPUID_SHIFT 0
+/* register address for bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_ADR 0x00006180
+
+/* rx dca_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca_en".
+ * port="pif_rdm_dca_en_i"
+ */
+
+/* register address for bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_ADR 0x00006180
+/* bitmask for bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_MSK 0x80000000
+/* inverted bitmask for bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_MSKN 0x7fffffff
+/* lower bit position of bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_SHIFT 31
+/* width of bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_WIDTH 1
+/* default value of bitfield dca_en */
+#define HW_ATL_RDM_DCA_EN_DEFAULT 0x1
+
+/* rx dca_mode[3:0] bitfield definitions
+ * preprocessor definitions for the bitfield "dca_mode[3:0]".
+ * port="pif_rdm_dca_mode_i[3:0]"
+ */
+
+/* register address for bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_ADR 0x00006180
+/* bitmask for bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_MSK 0x0000000f
+/* inverted bitmask for bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_MSKN 0xfffffff0
+/* lower bit position of bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_SHIFT 0
+/* width of bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_WIDTH 4
+/* default value of bitfield dca_mode[3:0] */
+#define HW_ATL_RDM_DCA_MODE_DEFAULT 0x0
+
+/* rx desc{d}_data_size[4:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_data_size[4:0]".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_desc0_data_size_i[4:0]"
+ */
+
+/* register address for bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_ADR(descriptor) \
+       (0x00005b18 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_MSK 0x0000001f
+/* inverted bitmask for bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_MSKN 0xffffffe0
+/* lower bit position of bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_SHIFT 0
+/* width of bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_WIDTH 5
+/* default value of bitfield desc{d}_data_size[4:0] */
+#define HW_ATL_RDM_DESCDDATA_SIZE_DEFAULT 0x0
+
+/* rx dca{d}_desc_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca{d}_desc_en".
+ * parameter: dca {d} | stride size 0x4 | range [0, 31]
+ * port="pif_rdm_dca_desc_en_i[0]"
+ */
+
+/* register address for bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_ADR(dca) (0x00006100 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_MSK 0x80000000
+/* inverted bitmask for bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_MSKN 0x7fffffff
+/* lower bit position of bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_SHIFT 31
+/* width of bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_WIDTH 1
+/* default value of bitfield dca{d}_desc_en */
+#define HW_ATL_RDM_DCADDESC_EN_DEFAULT 0x0
+
+/* rx desc{d}_en bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_en".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_desc_en_i[0]"
+ */
+
+/* register address for bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_ADR(descriptor) (0x00005b08 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_MSK 0x80000000
+/* inverted bitmask for bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_MSKN 0x7fffffff
+/* lower bit position of bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_SHIFT 31
+/* width of bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_WIDTH 1
+/* default value of bitfield desc{d}_en */
+#define HW_ATL_RDM_DESCDEN_DEFAULT 0x0
+
+/* rx desc{d}_hdr_size[4:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_hdr_size[4:0]".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_desc0_hdr_size_i[4:0]"
+ */
+
+/* register address for bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_ADR(descriptor) \
+       (0x00005b18 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_MSK 0x00001f00
+/* inverted bitmask for bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_MSKN 0xffffe0ff
+/* lower bit position of bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_SHIFT 8
+/* width of bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_WIDTH 5
+/* default value of bitfield desc{d}_hdr_size[4:0] */
+#define HW_ATL_RDM_DESCDHDR_SIZE_DEFAULT 0x0
+
+/* rx desc{d}_hdr_split bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_hdr_split".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_desc_hdr_split_i[0]"
+ */
+
+/* register address for bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_ADR(descriptor) \
+       (0x00005b08 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_MSK 0x10000000
+/* inverted bitmask for bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_MSKN 0xefffffff
+/* lower bit position of bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_SHIFT 28
+/* width of bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_WIDTH 1
+/* default value of bitfield desc{d}_hdr_split */
+#define HW_ATL_RDM_DESCDHDR_SPLIT_DEFAULT 0x0
+
+/* rx desc{d}_hd[c:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_hd[c:0]".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="rdm_pif_desc0_hd_o[12:0]"
+ */
+
+/* register address for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_RDM_DESCDHD_ADR(descriptor) (0x00005b0c + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_RDM_DESCDHD_MSK 0x00001fff
+/* inverted bitmask for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_RDM_DESCDHD_MSKN 0xffffe000
+/* lower bit position of bitfield desc{d}_hd[c:0] */
+#define HW_ATL_RDM_DESCDHD_SHIFT 0
+/* width of bitfield desc{d}_hd[c:0] */
+#define HW_ATL_RDM_DESCDHD_WIDTH 13
+
+/* rx desc{d}_len[9:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_len[9:0]".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_desc0_len_i[9:0]"
+ */
+
+/* register address for bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_ADR(descriptor) (0x00005b08 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_MSK 0x00001ff8
+/* inverted bitmask for bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_MSKN 0xffffe007
+/* lower bit position of bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_SHIFT 3
+/* width of bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_WIDTH 10
+/* default value of bitfield desc{d}_len[9:0] */
+#define HW_ATL_RDM_DESCDLEN_DEFAULT 0x0
+
+/* rx desc{d}_reset bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_reset".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rdm_q_pf_res_i[0]"
+ */
+
+/* register address for bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_ADR(descriptor) (0x00005b08 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_MSK 0x02000000
+/* inverted bitmask for bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_MSKN 0xfdffffff
+/* lower bit position of bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_SHIFT 25
+/* width of bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_WIDTH 1
+/* default value of bitfield desc{d}_reset */
+#define HW_ATL_RDM_DESCDRESET_DEFAULT 0x0
+
+/* rx int_desc_wrb_en bitfield definitions
+ * preprocessor definitions for the bitfield "int_desc_wrb_en".
+ * port="pif_rdm_int_desc_wrb_en_i"
+ */
+
+/* register address for bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_ADR 0x00005a30
+/* bitmask for bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_MSK 0x00000004
+/* inverted bitmask for bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_MSKN 0xfffffffb
+/* lower bit position of bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_SHIFT 2
+/* width of bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_WIDTH 1
+/* default value of bitfield int_desc_wrb_en */
+#define HW_ATL_RDM_INT_DESC_WRB_EN_DEFAULT 0x0
+
+/* rx dca{d}_hdr_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca{d}_hdr_en".
+ * parameter: dca {d} | stride size 0x4 | range [0, 31]
+ * port="pif_rdm_dca_hdr_en_i[0]"
+ */
+
+/* register address for bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_ADR(dca) (0x00006100 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_MSK 0x40000000
+/* inverted bitmask for bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_MSKN 0xbfffffff
+/* lower bit position of bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_SHIFT 30
+/* width of bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_WIDTH 1
+/* default value of bitfield dca{d}_hdr_en */
+#define HW_ATL_RDM_DCADHDR_EN_DEFAULT 0x0
+
+/* rx dca{d}_pay_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca{d}_pay_en".
+ * parameter: dca {d} | stride size 0x4 | range [0, 31]
+ * port="pif_rdm_dca_pay_en_i[0]"
+ */
+
+/* register address for bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_ADR(dca) (0x00006100 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_MSK 0x20000000
+/* inverted bitmask for bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_MSKN 0xdfffffff
+/* lower bit position of bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_SHIFT 29
+/* width of bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_WIDTH 1
+/* default value of bitfield dca{d}_pay_en */
+#define HW_ATL_RDM_DCADPAY_EN_DEFAULT 0x0
+
+/* RX rdm_int_rim_en Bitfield Definitions
+ * Preprocessor definitions for the bitfield "rdm_int_rim_en".
+ * PORT="pif_rdm_int_rim_en_i"
+ */
+
+/* Register address for bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_ADR 0x00005A30
+/* Bitmask for bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_MSK 0x00000008
+/* Inverted bitmask for bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_MSKN 0xFFFFFFF7
+/* Lower bit position of bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_SHIFT 3
+/* Width of bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_WIDTH 1
+/* Default value of bitfield rdm_int_rim_en */
+#define HW_ATL_RDM_INT_RIM_EN_DEFAULT 0x0
+
+/* general interrupt mapping register definitions
+ * preprocessor definitions for general interrupt mapping register
+ * base address: 0x00002180
+ * parameter: regidx {f} | stride size 0x4 | range [0, 3]
+ */
+#define HW_ATL_GEN_INTR_MAP_ADR(regidx) (0x00002180u + (regidx) * 0x4)
+
+/* general interrupt status register definitions
+ * preprocessor definitions for general interrupt status register
+ * address: 0x000021A0
+ */
+
+#define HW_ATL_GEN_INTR_STAT_ADR 0x000021A4U
+
+/* interrupt global control register  definitions
+ * preprocessor definitions for interrupt global control register
+ * address: 0x00002300
+ */
+#define HW_ATL_INTR_GLB_CTL_ADR 0x00002300u
+
+/* interrupt throttle register definitions
+ * preprocessor definitions for interrupt throttle register
+ * base address: 0x00002800
+ * parameter: throttle {t} | stride size 0x4 | range [0, 31]
+ */
+#define HW_ATL_INTR_THR_ADR(throttle) (0x00002800u + (throttle) * 0x4)
+
+/* rx dma descriptor base address lsw definitions
+ * preprocessor definitions for rx dma descriptor base address lsw
+ * base address: 0x00005b00
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ */
+#define HW_ATL_RX_DMA_DESC_BASE_ADDRLSW_ADR(descriptor) \
+(0x00005b00u + (descriptor) * 0x20)
+
+/* rx dma descriptor base address msw definitions
+ * preprocessor definitions for rx dma descriptor base address msw
+ * base address: 0x00005b04
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ */
+#define HW_ATL_RX_DMA_DESC_BASE_ADDRMSW_ADR(descriptor) \
+(0x00005b04u + (descriptor) * 0x20)
+
+/* rx dma descriptor status register definitions
+ * preprocessor definitions for rx dma descriptor status register
+ * base address: 0x00005b14
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ */
+#define HW_ATL_RX_DMA_DESC_STAT_ADR(descriptor) \
+       (0x00005b14u + (descriptor) * 0x20)
+
+/* rx dma descriptor tail pointer register definitions
+ * preprocessor definitions for rx dma descriptor tail pointer register
+ * base address: 0x00005b10
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ */
+#define HW_ATL_RX_DMA_DESC_TAIL_PTR_ADR(descriptor) \
+       (0x00005b10u + (descriptor) * 0x20)
+
+/* rx interrupt moderation control register definitions
+ * Preprocessor definitions for RX Interrupt Moderation Control Register
+ * Base Address: 0x00005A40
+ * Parameter: RIM {R} | stride size 0x4 | range [0, 31]
+ */
+#define HW_ATL_RX_INTR_MODERATION_CTL_ADR(rim) (0x00005A40u + (rim) * 0x4)
+
+/* rx filter multicast filter mask register definitions
+ * preprocessor definitions for rx filter multicast filter mask register
+ * address: 0x00005270
+ */
+#define HW_ATL_RX_FLR_MCST_FLR_MSK_ADR 0x00005270u
+
+/* rx filter multicast filter register definitions
+ * preprocessor definitions for rx filter multicast filter register
+ * base address: 0x00005250
+ * parameter: filter {f} | stride size 0x4 | range [0, 7]
+ */
+#define HW_ATL_RX_FLR_MCST_FLR_ADR(filter) (0x00005250u + (filter) * 0x4)
+
+/* RX Filter RSS Control Register 1 Definitions
+ * Preprocessor definitions for RX Filter RSS Control Register 1
+ * Address: 0x000054C0
+ */
+#define HW_ATL_RX_FLR_RSS_CONTROL1_ADR 0x000054C0u
+
+/* RX Filter Control Register 2 Definitions
+ * Preprocessor definitions for RX Filter Control Register 2
+ * Address: 0x00005104
+ */
+#define HW_ATL_RX_FLR_CONTROL2_ADR 0x00005104u
+
+/* tx tx dma debug control [1f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "tx dma debug control [1f:0]".
+ * port="pif_tdm_debug_cntl_i[31:0]"
+ */
+
+/* register address for bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_ADR 0x00008920
+/* bitmask for bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_MSK 0xffffffff
+/* inverted bitmask for bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_MSKN 0x00000000
+/* lower bit position of bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_SHIFT 0
+/* width of bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_WIDTH 32
+/* default value of bitfield tx dma debug control [1f:0] */
+#define HW_ATL_TDM_TX_DMA_DEBUG_CTL_DEFAULT 0x0
+
+/* tx dma descriptor base address lsw definitions
+ * preprocessor definitions for tx dma descriptor base address lsw
+ * base address: 0x00007c00
+ * parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ */
+#define HW_ATL_TX_DMA_DESC_BASE_ADDRLSW_ADR(descriptor) \
+       (0x00007c00u + (descriptor) * 0x40)
+
+/* tx dma descriptor tail pointer register definitions
+ * preprocessor definitions for tx dma descriptor tail pointer register
+ * base address: 0x00007c10
+ *  parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ */
+#define HW_ATL_TX_DMA_DESC_TAIL_PTR_ADR(descriptor) \
+       (0x00007c10u + (descriptor) * 0x40)
+
+/* rx dma_sys_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "dma_sys_loopback".
+ * port="pif_rpb_dma_sys_lbk_i"
+ */
+
+/* register address for bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_ADR 0x00005000
+/* bitmask for bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_MSK 0x00000040
+/* inverted bitmask for bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_MSKN 0xffffffbf
+/* lower bit position of bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_SHIFT 6
+/* width of bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_WIDTH 1
+/* default value of bitfield dma_sys_loopback */
+#define HW_ATL_RPB_DMA_SYS_LBK_DEFAULT 0x0
+
+/* rx rx_tc_mode bitfield definitions
+ * preprocessor definitions for the bitfield "rx_tc_mode".
+ * port="pif_rpb_rx_tc_mode_i,pif_rpf_rx_tc_mode_i"
+ */
+
+/* register address for bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_ADR 0x00005700
+/* bitmask for bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_MSK 0x00000100
+/* inverted bitmask for bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_MSKN 0xfffffeff
+/* lower bit position of bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_SHIFT 8
+/* width of bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_WIDTH 1
+/* default value of bitfield rx_tc_mode */
+#define HW_ATL_RPB_RPF_RX_TC_MODE_DEFAULT 0x0
+
+/* rx rx_buf_en bitfield definitions
+ * preprocessor definitions for the bitfield "rx_buf_en".
+ * port="pif_rpb_rx_buf_en_i"
+ */
+
+/* register address for bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_ADR 0x00005700
+/* bitmask for bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_MSK 0x00000001
+/* inverted bitmask for bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_MSKN 0xfffffffe
+/* lower bit position of bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_SHIFT 0
+/* width of bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_WIDTH 1
+/* default value of bitfield rx_buf_en */
+#define HW_ATL_RPB_RX_BUF_EN_DEFAULT 0x0
+
+/* rx rx{b}_hi_thresh[d:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rx{b}_hi_thresh[d:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_rpb_rx0_hi_thresh_i[13:0]"
+ */
+
+/* register address for bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_ADR(buffer) (0x00005714 + (buffer) * 0x10)
+/* bitmask for bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_MSK 0x3fff0000
+/* inverted bitmask for bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_MSKN 0xc000ffff
+/* lower bit position of bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_SHIFT 16
+/* width of bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_WIDTH 14
+/* default value of bitfield rx{b}_hi_thresh[d:0] */
+#define HW_ATL_RPB_RXBHI_THRESH_DEFAULT 0x0
+
+/* rx rx{b}_lo_thresh[d:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rx{b}_lo_thresh[d:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_rpb_rx0_lo_thresh_i[13:0]"
+ */
+
+/* register address for bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_ADR(buffer) (0x00005714 + (buffer) * 0x10)
+/* bitmask for bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_MSK 0x00003fff
+/* inverted bitmask for bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_MSKN 0xffffc000
+/* lower bit position of bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_SHIFT 0
+/* width of bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_WIDTH 14
+/* default value of bitfield rx{b}_lo_thresh[d:0] */
+#define HW_ATL_RPB_RXBLO_THRESH_DEFAULT 0x0
+
+/* rx rx_fc_mode[1:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rx_fc_mode[1:0]".
+ * port="pif_rpb_rx_fc_mode_i[1:0]"
+ */
+
+/* register address for bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_ADR 0x00005700
+/* bitmask for bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_MSK 0x00000030
+/* inverted bitmask for bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_MSKN 0xffffffcf
+/* lower bit position of bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_SHIFT 4
+/* width of bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_WIDTH 2
+/* default value of bitfield rx_fc_mode[1:0] */
+#define HW_ATL_RPB_RX_FC_MODE_DEFAULT 0x0
+
+/* rx rx{b}_buf_size[8:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rx{b}_buf_size[8:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_rpb_rx0_buf_size_i[8:0]"
+ */
+
+/* register address for bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_ADR(buffer) (0x00005710 + (buffer) * 0x10)
+/* bitmask for bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_MSK 0x000001ff
+/* inverted bitmask for bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_MSKN 0xfffffe00
+/* lower bit position of bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_SHIFT 0
+/* width of bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_WIDTH 9
+/* default value of bitfield rx{b}_buf_size[8:0] */
+#define HW_ATL_RPB_RXBBUF_SIZE_DEFAULT 0x0
+
+/* rx rx{b}_xoff_en bitfield definitions
+ * preprocessor definitions for the bitfield "rx{b}_xoff_en".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_rpb_rx_xoff_en_i[0]"
+ */
+
+/* register address for bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_ADR(buffer) (0x00005714 + (buffer) * 0x10)
+/* bitmask for bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_MSK 0x80000000
+/* inverted bitmask for bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_MSKN 0x7fffffff
+/* lower bit position of bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_SHIFT 31
+/* width of bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_WIDTH 1
+/* default value of bitfield rx{b}_xoff_en */
+#define HW_ATL_RPB_RXBXOFF_EN_DEFAULT 0x0
+
+/* rx l2_bc_thresh[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "l2_bc_thresh[f:0]".
+ * port="pif_rpf_l2_bc_thresh_i[15:0]"
+ */
+
+/* register address for bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_ADR 0x00005100
+/* bitmask for bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_MSK 0xffff0000
+/* inverted bitmask for bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_MSKN 0x0000ffff
+/* lower bit position of bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_SHIFT 16
+/* width of bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_WIDTH 16
+/* default value of bitfield l2_bc_thresh[f:0] */
+#define HW_ATL_RPFL2BC_THRESH_DEFAULT 0x0
+
+/* rx l2_bc_en bitfield definitions
+ * preprocessor definitions for the bitfield "l2_bc_en".
+ * port="pif_rpf_l2_bc_en_i"
+ */
+
+/* register address for bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_ADR 0x00005100
+/* bitmask for bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_MSK 0x00000001
+/* inverted bitmask for bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_MSKN 0xfffffffe
+/* lower bit position of bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_SHIFT 0
+/* width of bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_WIDTH 1
+/* default value of bitfield l2_bc_en */
+#define HW_ATL_RPFL2BC_EN_DEFAULT 0x0
+
+/* rx l2_bc_act[2:0] bitfield definitions
+ * preprocessor definitions for the bitfield "l2_bc_act[2:0]".
+ * port="pif_rpf_l2_bc_act_i[2:0]"
+ */
+
+/* register address for bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_ADR 0x00005100
+/* bitmask for bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_MSK 0x00007000
+/* inverted bitmask for bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_MSKN 0xffff8fff
+/* lower bit position of bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_SHIFT 12
+/* width of bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_WIDTH 3
+/* default value of bitfield l2_bc_act[2:0] */
+#define HW_ATL_RPFL2BC_ACT_DEFAULT 0x0
+
+/* rx l2_mc_en{f} bitfield definitions
+ * preprocessor definitions for the bitfield "l2_mc_en{f}".
+ * parameter: filter {f} | stride size 0x4 | range [0, 7]
+ * port="pif_rpf_l2_mc_en_i[0]"
+ */
+
+/* register address for bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_ADR(filter) (0x00005250 + (filter) * 0x4)
+/* bitmask for bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_MSK 0x80000000
+/* inverted bitmask for bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_MSKN 0x7fffffff
+/* lower bit position of bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_SHIFT 31
+/* width of bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_WIDTH 1
+/* default value of bitfield l2_mc_en{f} */
+#define HW_ATL_RPFL2MC_ENF_DEFAULT 0x0
+
+/* rx l2_promis_mode bitfield definitions
+ * preprocessor definitions for the bitfield "l2_promis_mode".
+ * port="pif_rpf_l2_promis_mode_i"
+ */
+
+/* register address for bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_ADR 0x00005100
+/* bitmask for bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_MSK 0x00000008
+/* inverted bitmask for bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_MSKN 0xfffffff7
+/* lower bit position of bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_SHIFT 3
+/* width of bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_WIDTH 1
+/* default value of bitfield l2_promis_mode */
+#define HW_ATL_RPFL2PROMIS_MODE_DEFAULT 0x0
+
+/* rx l2_uc_act{f}[2:0] bitfield definitions
+ * preprocessor definitions for the bitfield "l2_uc_act{f}[2:0]".
+ * parameter: filter {f} | stride size 0x8 | range [0, 37]
+ * port="pif_rpf_l2_uc_act0_i[2:0]"
+ */
+
+/* register address for bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_ADR(filter) (0x00005114 + (filter) * 0x8)
+/* bitmask for bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_MSK 0x00070000
+/* inverted bitmask for bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_MSKN 0xfff8ffff
+/* lower bit position of bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_SHIFT 16
+/* width of bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_WIDTH 3
+/* default value of bitfield l2_uc_act{f}[2:0] */
+#define HW_ATL_RPFL2UC_ACTF_DEFAULT 0x0
+
+/* rx l2_uc_en{f} bitfield definitions
+ * preprocessor definitions for the bitfield "l2_uc_en{f}".
+ * parameter: filter {f} | stride size 0x8 | range [0, 37]
+ * port="pif_rpf_l2_uc_en_i[0]"
+ */
+
+/* register address for bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_ADR(filter) (0x00005114 + (filter) * 0x8)
+/* bitmask for bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_MSK 0x80000000
+/* inverted bitmask for bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_MSKN 0x7fffffff
+/* lower bit position of bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_SHIFT 31
+/* width of bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_WIDTH 1
+/* default value of bitfield l2_uc_en{f} */
+#define HW_ATL_RPFL2UC_ENF_DEFAULT 0x0
+
+/* register address for bitfield l2_uc_da{f}_lsw[1f:0] */
+#define HW_ATL_RPFL2UC_DAFLSW_ADR(filter) (0x00005110 + (filter) * 0x8)
+/* register address for bitfield l2_uc_da{f}_msw[f:0] */
+#define HW_ATL_RPFL2UC_DAFMSW_ADR(filter) (0x00005114 + (filter) * 0x8)
+/* bitmask for bitfield l2_uc_da{f}_msw[f:0] */
+#define HW_ATL_RPFL2UC_DAFMSW_MSK 0x0000ffff
+/* lower bit position of bitfield l2_uc_da{f}_msw[f:0] */
+#define HW_ATL_RPFL2UC_DAFMSW_SHIFT 0
+
+/* rx l2_mc_accept_all bitfield definitions
+ * Preprocessor definitions for the bitfield "l2_mc_accept_all".
+ * PORT="pif_rpf_l2_mc_all_accept_i"
+ */
+
+/* Register address for bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_ADR 0x00005270
+/* Bitmask for bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_MSK 0x00004000
+/* Inverted bitmask for bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_MSKN 0xFFFFBFFF
+/* Lower bit position of bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_SHIFT 14
+/* Width of bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_WIDTH 1
+/* Default value of bitfield l2_mc_accept_all */
+#define HW_ATL_RPFL2MC_ACCEPT_ALL_DEFAULT 0x0
+
+/* width of bitfield rx_tc_up{t}[2:0] */
+#define HW_ATL_RPF_RPB_RX_TC_UPT_WIDTH 3
+/* default value of bitfield rx_tc_up{t}[2:0] */
+#define HW_ATL_RPF_RPB_RX_TC_UPT_DEFAULT 0x0
+
+/* rx rss_key_addr[4:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rss_key_addr[4:0]".
+ * port="pif_rpf_rss_key_addr_i[4:0]"
+ */
+
+/* register address for bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_ADR 0x000054d0
+/* bitmask for bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_MSK 0x0000001f
+/* inverted bitmask for bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_MSKN 0xffffffe0
+/* lower bit position of bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_SHIFT 0
+/* width of bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_WIDTH 5
+/* default value of bitfield rss_key_addr[4:0] */
+#define HW_ATL_RPF_RSS_KEY_ADDR_DEFAULT 0x0
+
+/* rx rss_key_wr_data[1f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rss_key_wr_data[1f:0]".
+ * port="pif_rpf_rss_key_wr_data_i[31:0]"
+ */
+
+/* register address for bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_ADR 0x000054d4
+/* bitmask for bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_MSK 0xffffffff
+/* inverted bitmask for bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_MSKN 0x00000000
+/* lower bit position of bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_SHIFT 0
+/* width of bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_WIDTH 32
+/* default value of bitfield rss_key_wr_data[1f:0] */
+#define HW_ATL_RPF_RSS_KEY_WR_DATA_DEFAULT 0x0
+
+/* rx rss_key_wr_en_i bitfield definitions
+ * preprocessor definitions for the bitfield "rss_key_wr_en_i".
+ * port="pif_rpf_rss_key_wr_en_i"
+ */
+
+/* register address for bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_ADR 0x000054d0
+/* bitmask for bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_MSK 0x00000020
+/* inverted bitmask for bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_MSKN 0xffffffdf
+/* lower bit position of bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_SHIFT 5
+/* width of bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_WIDTH 1
+/* default value of bitfield rss_key_wr_en_i */
+#define HW_ATL_RPF_RSS_KEY_WR_ENI_DEFAULT 0x0
+
+/* rx rss_redir_addr[3:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rss_redir_addr[3:0]".
+ * port="pif_rpf_rss_redir_addr_i[3:0]"
+ */
+
+/* register address for bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_ADR 0x000054e0
+/* bitmask for bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_MSK 0x0000000f
+/* inverted bitmask for bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_MSKN 0xfffffff0
+/* lower bit position of bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_SHIFT 0
+/* width of bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_WIDTH 4
+/* default value of bitfield rss_redir_addr[3:0] */
+#define HW_ATL_RPF_RSS_REDIR_ADDR_DEFAULT 0x0
+
+/* rx rss_redir_wr_data[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rss_redir_wr_data[f:0]".
+ * port="pif_rpf_rss_redir_wr_data_i[15:0]"
+ */
+
+/* register address for bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_ADR 0x000054e4
+/* bitmask for bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_MSK 0x0000ffff
+/* inverted bitmask for bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_MSKN 0xffff0000
+/* lower bit position of bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_SHIFT 0
+/* width of bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_WIDTH 16
+/* default value of bitfield rss_redir_wr_data[f:0] */
+#define HW_ATL_RPF_RSS_REDIR_WR_DATA_DEFAULT 0x0
+
+/* rx rss_redir_wr_en_i bitfield definitions
+ * preprocessor definitions for the bitfield "rss_redir_wr_en_i".
+ * port="pif_rpf_rss_redir_wr_en_i"
+ */
+
+/* register address for bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_ADR 0x000054e0
+/* bitmask for bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_MSK 0x00000010
+/* inverted bitmask for bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_MSKN 0xffffffef
+/* lower bit position of bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_SHIFT 4
+/* width of bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_WIDTH 1
+/* default value of bitfield rss_redir_wr_en_i */
+#define HW_ATL_RPF_RSS_REDIR_WR_ENI_DEFAULT 0x0
+
+/* rx tpo_rpf_sys_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "tpo_rpf_sys_loopback".
+ * port="pif_rpf_tpo_pkt_sys_lbk_i"
+ */
+
+/* register address for bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_ADR 0x00005000
+/* bitmask for bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_MSK 0x00000100
+/* inverted bitmask for bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_MSKN 0xfffffeff
+/* lower bit position of bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_SHIFT 8
+/* width of bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_WIDTH 1
+/* default value of bitfield tpo_rpf_sys_loopback */
+#define HW_ATL_RPF_TPO_RPF_SYS_LBK_DEFAULT 0x0
+
+/* rx vl_inner_tpid[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "vl_inner_tpid[f:0]".
+ * port="pif_rpf_vl_inner_tpid_i[15:0]"
+ */
+
+/* register address for bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_ADR 0x00005284
+/* bitmask for bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_MSK 0x0000ffff
+/* inverted bitmask for bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_MSKN 0xffff0000
+/* lower bit position of bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_SHIFT 0
+/* width of bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_WIDTH 16
+/* default value of bitfield vl_inner_tpid[f:0] */
+#define HW_ATL_RPF_VL_INNER_TPID_DEFAULT 0x8100
+
+/* rx vl_outer_tpid[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "vl_outer_tpid[f:0]".
+ * port="pif_rpf_vl_outer_tpid_i[15:0]"
+ */
+
+/* register address for bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_ADR 0x00005284
+/* bitmask for bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_MSK 0xffff0000
+/* inverted bitmask for bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_MSKN 0x0000ffff
+/* lower bit position of bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_SHIFT 16
+/* width of bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_WIDTH 16
+/* default value of bitfield vl_outer_tpid[f:0] */
+#define HW_ATL_RPF_VL_OUTER_TPID_DEFAULT 0x88a8
+
+/* rx vl_promis_mode bitfield definitions
+ * preprocessor definitions for the bitfield "vl_promis_mode".
+ * port="pif_rpf_vl_promis_mode_i"
+ */
+
+/* register address for bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_ADR 0x00005280
+/* bitmask for bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_MSK 0x00000002
+/* inverted bitmask for bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_MSKN 0xfffffffd
+/* lower bit position of bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_SHIFT 1
+/* width of bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_WIDTH 1
+/* default value of bitfield vl_promis_mode */
+#define HW_ATL_RPF_VL_PROMIS_MODE_DEFAULT 0x0
+
+/* RX vl_accept_untagged_mode Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_accept_untagged_mode".
+ * PORT="pif_rpf_vl_accept_untagged_i"
+ */
+
+/* Register address for bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_ADR 0x00005280
+/* Bitmask for bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_MSK 0x00000004
+/* Inverted bitmask for bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_MSKN 0xFFFFFFFB
+/* Lower bit position of bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_SHIFT 2
+/* Width of bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_WIDTH 1
+/* Default value of bitfield vl_accept_untagged_mode */
+#define HW_ATL_RPF_VL_ACCEPT_UNTAGGED_MODE_DEFAULT 0x0
+
+/* rX vl_untagged_act[2:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_untagged_act[2:0]".
+ * PORT="pif_rpf_vl_untagged_act_i[2:0]"
+ */
+
+/* Register address for bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_ADR 0x00005280
+/* Bitmask for bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_MSK 0x00000038
+/* Inverted bitmask for bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_MSKN 0xFFFFFFC7
+/* Lower bit position of bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_SHIFT 3
+/* Width of bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_WIDTH 3
+/* Default value of bitfield vl_untagged_act[2:0] */
+#define HW_ATL_RPF_VL_UNTAGGED_ACT_DEFAULT 0x0
+
+/* RX vl_en{F} Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_en{F}".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 15]
+ * PORT="pif_rpf_vl_en_i[0]"
+ */
+
+/* Register address for bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* Bitmask for bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_MSK 0x80000000
+/* Inverted bitmask for bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_SHIFT 31
+/* Width of bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_WIDTH 1
+/* Default value of bitfield vl_en{F} */
+#define HW_ATL_RPF_VL_EN_F_DEFAULT 0x0
+
+/* RX vl_act{F}[2:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_act{F}[2:0]".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 15]
+ * PORT="pif_rpf_vl_act0_i[2:0]"
+ */
+
+/* Register address for bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* Bitmask for bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_MSK 0x00070000
+/* Inverted bitmask for bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_MSKN 0xFFF8FFFF
+/* Lower bit position of bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_SHIFT 16
+/* Width of bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_WIDTH 3
+/* Default value of bitfield vl_act{F}[2:0] */
+#define HW_ATL_RPF_VL_ACT_F_DEFAULT 0x0
+
+/* RX vl_id{F}[B:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "vl_id{F}[B:0]".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 15]
+ * PORT="pif_rpf_vl_id0_i[11:0]"
+ */
+
+/* Register address for bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* Bitmask for bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_MSK 0x00000FFF
+/* Inverted bitmask for bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_MSKN 0xFFFFF000
+/* Lower bit position of bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_SHIFT 0
+/* Width of bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_WIDTH 12
+/* Default value of bitfield vl_id{F}[B:0] */
+#define HW_ATL_RPF_VL_ID_F_DEFAULT 0x0
+
+/* RX et_en{F} Bitfield Definitions
+ * Preprocessor definitions for the bitfield "et_en{F}".
+ * Parameter: filter {F} | stride size 0x4 | range [0, 15]
+ * PORT="pif_rpf_et_en_i[0]"
+ */
+
+/* Register address for bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* Bitmask for bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_MSK 0x80000000
+/* Inverted bitmask for bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_SHIFT 31
+/* Width of bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_WIDTH 1
+/* Default value of bitfield et_en{F} */
+#define HW_ATL_RPF_ET_EN_F_DEFAULT 0x0
+
+/* rx et_en{f} bitfield definitions
+ * preprocessor definitions for the bitfield "et_en{f}".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_en_i[0]"
+ */
+
+/* register address for bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_MSK 0x80000000
+/* inverted bitmask for bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_MSKN 0x7fffffff
+/* lower bit position of bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_SHIFT 31
+/* width of bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_WIDTH 1
+/* default value of bitfield et_en{f} */
+#define HW_ATL_RPF_ET_ENF_DEFAULT 0x0
+
+/* rx et_up{f}_en bitfield definitions
+ * preprocessor definitions for the bitfield "et_up{f}_en".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_up_en_i[0]"
+ */
+
+/* register address for bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_MSK 0x40000000
+/* inverted bitmask for bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_MSKN 0xbfffffff
+/* lower bit position of bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_SHIFT 30
+/* width of bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_WIDTH 1
+/* default value of bitfield et_up{f}_en */
+#define HW_ATL_RPF_ET_UPFEN_DEFAULT 0x0
+
+/* rx et_rxq{f}_en bitfield definitions
+ * preprocessor definitions for the bitfield "et_rxq{f}_en".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_rxq_en_i[0]"
+ */
+
+/* register address for bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_MSK 0x20000000
+/* inverted bitmask for bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_MSKN 0xdfffffff
+/* lower bit position of bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_SHIFT 29
+/* width of bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_WIDTH 1
+/* default value of bitfield et_rxq{f}_en */
+#define HW_ATL_RPF_ET_RXQFEN_DEFAULT 0x0
+
+/* rx et_up{f}[2:0] bitfield definitions
+ * preprocessor definitions for the bitfield "et_up{f}[2:0]".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_up0_i[2:0]"
+ */
+
+/* register address for bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_MSK 0x1c000000
+/* inverted bitmask for bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_MSKN 0xe3ffffff
+/* lower bit position of bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_SHIFT 26
+/* width of bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_WIDTH 3
+/* default value of bitfield et_up{f}[2:0] */
+#define HW_ATL_RPF_ET_UPF_DEFAULT 0x0
+
+/* rx et_rxq{f}[4:0] bitfield definitions
+ * preprocessor definitions for the bitfield "et_rxq{f}[4:0]".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_rxq0_i[4:0]"
+ */
+
+/* register address for bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_MSK 0x01f00000
+/* inverted bitmask for bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_MSKN 0xfe0fffff
+/* lower bit position of bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_SHIFT 20
+/* width of bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_WIDTH 5
+/* default value of bitfield et_rxq{f}[4:0] */
+#define HW_ATL_RPF_ET_RXQF_DEFAULT 0x0
+
+/* rx et_mng_rxq{f} bitfield definitions
+ * preprocessor definitions for the bitfield "et_mng_rxq{f}".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_mng_rxq_i[0]"
+ */
+
+/* register address for bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_MSK 0x00080000
+/* inverted bitmask for bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_MSKN 0xfff7ffff
+/* lower bit position of bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_SHIFT 19
+/* width of bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_WIDTH 1
+/* default value of bitfield et_mng_rxq{f} */
+#define HW_ATL_RPF_ET_MNG_RXQF_DEFAULT 0x0
+
+/* rx et_act{f}[2:0] bitfield definitions
+ * preprocessor definitions for the bitfield "et_act{f}[2:0]".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_act0_i[2:0]"
+ */
+
+/* register address for bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_MSK 0x00070000
+/* inverted bitmask for bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_MSKN 0xfff8ffff
+/* lower bit position of bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_SHIFT 16
+/* width of bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_WIDTH 3
+/* default value of bitfield et_act{f}[2:0] */
+#define HW_ATL_RPF_ET_ACTF_DEFAULT 0x0
+
+/* rx et_val{f}[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "et_val{f}[f:0]".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_et_val0_i[15:0]"
+ */
+
+/* register address for bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_ADR(filter) (0x00005300 + (filter) * 0x4)
+/* bitmask for bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_MSK 0x0000ffff
+/* inverted bitmask for bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_MSKN 0xffff0000
+/* lower bit position of bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_SHIFT 0
+/* width of bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_WIDTH 16
+/* default value of bitfield et_val{f}[f:0] */
+#define HW_ATL_RPF_ET_VALF_DEFAULT 0x0
+
+/* rx ipv4_chk_en bitfield definitions
+ * preprocessor definitions for the bitfield "ipv4_chk_en".
+ * port="pif_rpo_ipv4_chk_en_i"
+ */
+
+/* register address for bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_ADR 0x00005580
+/* bitmask for bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_MSK 0x00000002
+/* inverted bitmask for bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_MSKN 0xfffffffd
+/* lower bit position of bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_SHIFT 1
+/* width of bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_WIDTH 1
+/* default value of bitfield ipv4_chk_en */
+#define HW_ATL_RPO_IPV4CHK_EN_DEFAULT 0x0
+
+/* rx desc{d}_vl_strip bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_vl_strip".
+ * parameter: descriptor {d} | stride size 0x20 | range [0, 31]
+ * port="pif_rpo_desc_vl_strip_i[0]"
+ */
+
+/* register address for bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_ADR(descriptor) \
+       (0x00005b08 + (descriptor) * 0x20)
+/* bitmask for bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_MSK 0x20000000
+/* inverted bitmask for bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_MSKN 0xdfffffff
+/* lower bit position of bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_SHIFT 29
+/* width of bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_WIDTH 1
+/* default value of bitfield desc{d}_vl_strip */
+#define HW_ATL_RPO_DESCDVL_STRIP_DEFAULT 0x0
+
+/* rx l4_chk_en bitfield definitions
+ * preprocessor definitions for the bitfield "l4_chk_en".
+ * port="pif_rpo_l4_chk_en_i"
+ */
+
+/* register address for bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_ADR 0x00005580
+/* bitmask for bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_MSK 0x00000001
+/* inverted bitmask for bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_MSKN 0xfffffffe
+/* lower bit position of bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_SHIFT 0
+/* width of bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_WIDTH 1
+/* default value of bitfield l4_chk_en */
+#define HW_ATL_RPOL4CHK_EN_DEFAULT 0x0
+
+/* rx reg_res_dsbl bitfield definitions
+ * preprocessor definitions for the bitfield "reg_res_dsbl".
+ * port="pif_rx_reg_res_dsbl_i"
+ */
+
+/* register address for bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_ADR 0x00005000
+/* bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_MSK 0x20000000
+/* inverted bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_MSKN 0xdfffffff
+/* lower bit position of bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_SHIFT 29
+/* width of bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_WIDTH 1
+/* default value of bitfield reg_res_dsbl */
+#define HW_ATL_RX_REG_RES_DSBL_DEFAULT 0x1
+
+/* tx dca{d}_cpuid[7:0] bitfield definitions
+ * preprocessor definitions for the bitfield "dca{d}_cpuid[7:0]".
+ * parameter: dca {d} | stride size 0x4 | range [0, 31]
+ * port="pif_tdm_dca0_cpuid_i[7:0]"
+ */
+
+/* register address for bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_ADR(dca) (0x00008400 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_MSK 0x000000ff
+/* inverted bitmask for bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_MSKN 0xffffff00
+/* lower bit position of bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_SHIFT 0
+/* width of bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_WIDTH 8
+/* default value of bitfield dca{d}_cpuid[7:0] */
+#define HW_ATL_TDM_DCADCPUID_DEFAULT 0x0
+
+/* tx lso_en[1f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "lso_en[1f:0]".
+ * port="pif_tdm_lso_en_i[31:0]"
+ */
+
+/* register address for bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_ADR 0x00007810
+/* bitmask for bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_MSK 0xffffffff
+/* inverted bitmask for bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_MSKN 0x00000000
+/* lower bit position of bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_SHIFT 0
+/* width of bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_WIDTH 32
+/* default value of bitfield lso_en[1f:0] */
+#define HW_ATL_TDM_LSO_EN_DEFAULT 0x0
+
+/* tx dca_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca_en".
+ * port="pif_tdm_dca_en_i"
+ */
+
+/* register address for bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_ADR 0x00008480
+/* bitmask for bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_MSK 0x80000000
+/* inverted bitmask for bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_MSKN 0x7fffffff
+/* lower bit position of bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_SHIFT 31
+/* width of bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_WIDTH 1
+/* default value of bitfield dca_en */
+#define HW_ATL_TDM_DCA_EN_DEFAULT 0x1
+
+/* tx dca_mode[3:0] bitfield definitions
+ * preprocessor definitions for the bitfield "dca_mode[3:0]".
+ * port="pif_tdm_dca_mode_i[3:0]"
+ */
+
+/* register address for bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_ADR 0x00008480
+/* bitmask for bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_MSK 0x0000000f
+/* inverted bitmask for bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_MSKN 0xfffffff0
+/* lower bit position of bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_SHIFT 0
+/* width of bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_WIDTH 4
+/* default value of bitfield dca_mode[3:0] */
+#define HW_ATL_TDM_DCA_MODE_DEFAULT 0x0
+
+/* tx dca{d}_desc_en bitfield definitions
+ * preprocessor definitions for the bitfield "dca{d}_desc_en".
+ * parameter: dca {d} | stride size 0x4 | range [0, 31]
+ * port="pif_tdm_dca_desc_en_i[0]"
+ */
+
+/* register address for bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_ADR(dca) (0x00008400 + (dca) * 0x4)
+/* bitmask for bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_MSK 0x80000000
+/* inverted bitmask for bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_MSKN 0x7fffffff
+/* lower bit position of bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_SHIFT 31
+/* width of bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_WIDTH 1
+/* default value of bitfield dca{d}_desc_en */
+#define HW_ATL_TDM_DCADDESC_EN_DEFAULT 0x0
+
+/* tx desc{d}_en bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_en".
+ * parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ * port="pif_tdm_desc_en_i[0]"
+ */
+
+/* register address for bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_ADR(descriptor) (0x00007c08 + (descriptor) * 0x40)
+/* bitmask for bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_MSK 0x80000000
+/* inverted bitmask for bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_MSKN 0x7fffffff
+/* lower bit position of bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_SHIFT 31
+/* width of bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_WIDTH 1
+/* default value of bitfield desc{d}_en */
+#define HW_ATL_TDM_DESCDEN_DEFAULT 0x0
+
+/* tx desc{d}_hd[c:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_hd[c:0]".
+ * parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ * port="tdm_pif_desc0_hd_o[12:0]"
+ */
+
+/* register address for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_TDM_DESCDHD_ADR(descriptor) (0x00007c0c + (descriptor) * 0x40)
+/* bitmask for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_TDM_DESCDHD_MSK 0x00001fff
+/* inverted bitmask for bitfield desc{d}_hd[c:0] */
+#define HW_ATL_TDM_DESCDHD_MSKN 0xffffe000
+/* lower bit position of bitfield desc{d}_hd[c:0] */
+#define HW_ATL_TDM_DESCDHD_SHIFT 0
+/* width of bitfield desc{d}_hd[c:0] */
+#define HW_ATL_TDM_DESCDHD_WIDTH 13
+
+/* tx desc{d}_len[9:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_len[9:0]".
+ * parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ * port="pif_tdm_desc0_len_i[9:0]"
+ */
+
+/* register address for bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_ADR(descriptor) (0x00007c08 + (descriptor) * 0x40)
+/* bitmask for bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_MSK 0x00001ff8
+/* inverted bitmask for bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_MSKN 0xffffe007
+/* lower bit position of bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_SHIFT 3
+/* width of bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_WIDTH 10
+/* default value of bitfield desc{d}_len[9:0] */
+#define HW_ATL_TDM_DESCDLEN_DEFAULT 0x0
+
+/* tx int_desc_wrb_en bitfield definitions
+ * preprocessor definitions for the bitfield "int_desc_wrb_en".
+ * port="pif_tdm_int_desc_wrb_en_i"
+ */
+
+/* register address for bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_ADR 0x00007b40
+/* bitmask for bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_MSK 0x00000002
+/* inverted bitmask for bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_MSKN 0xfffffffd
+/* lower bit position of bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_SHIFT 1
+/* width of bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_WIDTH 1
+/* default value of bitfield int_desc_wrb_en */
+#define HW_ATL_TDM_INT_DESC_WRB_EN_DEFAULT 0x0
+
+/* tx desc{d}_wrb_thresh[6:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc{d}_wrb_thresh[6:0]".
+ * parameter: descriptor {d} | stride size 0x40 | range [0, 31]
+ * port="pif_tdm_desc0_wrb_thresh_i[6:0]"
+ */
+
+/* register address for bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_ADR(descriptor) \
+       (0x00007c18 + (descriptor) * 0x40)
+/* bitmask for bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_MSK 0x00007f00
+/* inverted bitmask for bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_MSKN 0xffff80ff
+/* lower bit position of bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_SHIFT 8
+/* width of bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_WIDTH 7
+/* default value of bitfield desc{d}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESCDWRB_THRESH_DEFAULT 0x0
+
+/* tx lso_tcp_flag_first[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "lso_tcp_flag_first[b:0]".
+ * port="pif_thm_lso_tcp_flag_first_i[11:0]"
+ */
+
+/* register address for bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_ADR 0x00007820
+/* bitmask for bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_MSK 0x00000fff
+/* inverted bitmask for bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_MSKN 0xfffff000
+/* lower bit position of bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_SHIFT 0
+/* width of bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_WIDTH 12
+/* default value of bitfield lso_tcp_flag_first[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_FIRST_DEFAULT 0x0
+
+/* tx lso_tcp_flag_last[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "lso_tcp_flag_last[b:0]".
+ * port="pif_thm_lso_tcp_flag_last_i[11:0]"
+ */
+
+/* register address for bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_ADR 0x00007824
+/* bitmask for bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_MSK 0x00000fff
+/* inverted bitmask for bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_MSKN 0xfffff000
+/* lower bit position of bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_SHIFT 0
+/* width of bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_WIDTH 12
+/* default value of bitfield lso_tcp_flag_last[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_LAST_DEFAULT 0x0
+
+/* tx lso_tcp_flag_mid[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "lso_tcp_flag_mid[b:0]".
+ * port="pif_thm_lso_tcp_flag_mid_i[11:0]"
+ */
+
+/* Register address for bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_ADR 0x00005598
+/* Bitmask for bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_MSK 0xFFFFFFFF
+/* Inverted bitmask for bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_MSKN 0x00000000
+/* Lower bit position of bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_SHIFT 0
+/* Width of bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_WIDTH 32
+/* Default value of bitfield lro_rsc_max[1F:0] */
+#define HW_ATL_RPO_LRO_RSC_MAX_DEFAULT 0x0
+
+/* RX lro_en[1F:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_en[1F:0]".
+ * PORT="pif_rpo_lro_en_i[31:0]"
+ */
+
+/* Register address for bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_ADR 0x00005590
+/* Bitmask for bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_MSK 0xFFFFFFFF
+/* Inverted bitmask for bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_MSKN 0x00000000
+/* Lower bit position of bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_SHIFT 0
+/* Width of bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_WIDTH 32
+/* Default value of bitfield lro_en[1F:0] */
+#define HW_ATL_RPO_LRO_EN_DEFAULT 0x0
+
+/* RX lro_ptopt_en Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_ptopt_en".
+ * PORT="pif_rpo_lro_ptopt_en_i"
+ */
+
+/* Register address for bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_ADR 0x00005594
+/* Bitmask for bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_MSK 0x00008000
+/* Inverted bitmask for bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_MSKN 0xFFFF7FFF
+/* Lower bit position of bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_SHIFT 15
+/* Width of bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_WIDTH 1
+/* Default value of bitfield lro_ptopt_en */
+#define HW_ATL_RPO_LRO_PTOPT_EN_DEFALT 0x1
+
+/* RX lro_q_ses_lmt Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_q_ses_lmt".
+ * PORT="pif_rpo_lro_q_ses_lmt_i[1:0]"
+ */
+
+/* Register address for bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_ADR 0x00005594
+/* Bitmask for bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_MSK 0x00003000
+/* Inverted bitmask for bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_MSKN 0xFFFFCFFF
+/* Lower bit position of bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_SHIFT 12
+/* Width of bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_WIDTH 2
+/* Default value of bitfield lro_q_ses_lmt */
+#define HW_ATL_RPO_LRO_QSES_LMT_DEFAULT 0x1
+
+/* RX lro_tot_dsc_lmt[1:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_tot_dsc_lmt[1:0]".
+ * PORT="pif_rpo_lro_tot_dsc_lmt_i[1:0]"
+ */
+
+/* Register address for bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_ADR 0x00005594
+/* Bitmask for bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_MSK 0x00000060
+/* Inverted bitmask for bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_MSKN 0xFFFFFF9F
+/* Lower bit position of bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_SHIFT 5
+/* Width of bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_WIDTH 2
+/* Default value of bitfield lro_tot_dsc_lmt[1:0] */
+#define HW_ATL_RPO_LRO_TOT_DSC_LMT_DEFALT 0x1
+
+/* RX lro_pkt_min[4:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_pkt_min[4:0]".
+ * PORT="pif_rpo_lro_pkt_min_i[4:0]"
+ */
+
+/* Register address for bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_ADR 0x00005594
+/* Bitmask for bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_MSK 0x0000001F
+/* Inverted bitmask for bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_MSKN 0xFFFFFFE0
+/* Lower bit position of bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_SHIFT 0
+/* Width of bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_WIDTH 5
+/* Default value of bitfield lro_pkt_min[4:0] */
+#define HW_ATL_RPO_LRO_PKT_MIN_DEFAULT 0x8
+
+/* Width of bitfield lro{L}_des_max[1:0] */
+#define HW_ATL_RPO_LRO_LDES_MAX_WIDTH 2
+/* Default value of bitfield lro{L}_des_max[1:0] */
+#define HW_ATL_RPO_LRO_LDES_MAX_DEFAULT 0x0
+
+/* RX lro_tb_div[11:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_tb_div[11:0]".
+ * PORT="pif_rpo_lro_tb_div_i[11:0]"
+ */
+
+/* Register address for bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_ADR 0x00005620
+/* Bitmask for bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_MSK 0xFFF00000
+/* Inverted bitmask for bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_MSKN 0x000FFFFF
+/* Lower bit position of bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_SHIFT 20
+/* Width of bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_WIDTH 12
+/* Default value of bitfield lro_tb_div[11:0] */
+#define HW_ATL_RPO_LRO_TB_DIV_DEFAULT 0xC35
+
+/* RX lro_ina_ival[9:0] Bitfield Definitions
+ *   Preprocessor definitions for the bitfield "lro_ina_ival[9:0]".
+ *   PORT="pif_rpo_lro_ina_ival_i[9:0]"
+ */
+
+/* Register address for bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_ADR 0x00005620
+/* Bitmask for bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_MSK 0x000FFC00
+/* Inverted bitmask for bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_MSKN 0xFFF003FF
+/* Lower bit position of bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_SHIFT 10
+/* Width of bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_WIDTH 10
+/* Default value of bitfield lro_ina_ival[9:0] */
+#define HW_ATL_RPO_LRO_INA_IVAL_DEFAULT 0xA
+
+/* RX lro_max_ival[9:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lro_max_ival[9:0]".
+ * PORT="pif_rpo_lro_max_ival_i[9:0]"
+ */
+
+/* Register address for bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_ADR 0x00005620
+/* Bitmask for bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_MSK 0x000003FF
+/* Inverted bitmask for bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_MSKN 0xFFFFFC00
+/* Lower bit position of bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_SHIFT 0
+/* Width of bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_WIDTH 10
+/* Default value of bitfield lro_max_ival[9:0] */
+#define HW_ATL_RPO_LRO_MAX_IVAL_DEFAULT 0x19
+
+/* TX dca{D}_cpuid[7:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "dca{D}_cpuid[7:0]".
+ * Parameter: DCA {D} | stride size 0x4 | range [0, 31]
+ * PORT="pif_tdm_dca0_cpuid_i[7:0]"
+ */
+
+/* Register address for bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_ADR(dca) (0x00008400 + (dca) * 0x4)
+/* Bitmask for bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_MSK 0x000000FF
+/* Inverted bitmask for bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_MSKN 0xFFFFFF00
+/* Lower bit position of bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_SHIFT 0
+/* Width of bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_WIDTH 8
+/* Default value of bitfield dca{D}_cpuid[7:0] */
+#define HW_ATL_TDM_DCA_DCPUID_DEFAULT 0x0
+
+/* TX dca{D}_desc_en Bitfield Definitions
+ * Preprocessor definitions for the bitfield "dca{D}_desc_en".
+ * Parameter: DCA {D} | stride size 0x4 | range [0, 31]
+ * PORT="pif_tdm_dca_desc_en_i[0]"
+ */
+
+/* Register address for bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_ADR(dca) (0x00008400 + (dca) * 0x4)
+/* Bitmask for bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_MSK 0x80000000
+/* Inverted bitmask for bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_SHIFT 31
+/* Width of bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_WIDTH 1
+/* Default value of bitfield dca{D}_desc_en */
+#define HW_ATL_TDM_DCA_DDESC_EN_DEFAULT 0x0
+
+/* TX desc{D}_en Bitfield Definitions
+ * Preprocessor definitions for the bitfield "desc{D}_en".
+ * Parameter: descriptor {D} | stride size 0x40 | range [0, 31]
+ * PORT="pif_tdm_desc_en_i[0]"
+ */
+
+/* Register address for bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_ADR(descriptor) (0x00007C08 + (descriptor) * 0x40)
+/* Bitmask for bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_MSK 0x80000000
+/* Inverted bitmask for bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_MSKN 0x7FFFFFFF
+/* Lower bit position of bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_SHIFT 31
+/* Width of bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_WIDTH 1
+/* Default value of bitfield desc{D}_en */
+#define HW_ATL_TDM_DESC_DEN_DEFAULT 0x0
+
+/* TX desc{D}_hd[C:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "desc{D}_hd[C:0]".
+ * Parameter: descriptor {D} | stride size 0x40 | range [0, 31]
+ * PORT="tdm_pif_desc0_hd_o[12:0]"
+ */
+
+/* Register address for bitfield desc{D}_hd[C:0] */
+#define HW_ATL_TDM_DESC_DHD_ADR(descriptor) (0x00007C0C + (descriptor) * 0x40)
+/* Bitmask for bitfield desc{D}_hd[C:0] */
+#define HW_ATL_TDM_DESC_DHD_MSK 0x00001FFF
+/* Inverted bitmask for bitfield desc{D}_hd[C:0] */
+#define HW_ATL_TDM_DESC_DHD_MSKN 0xFFFFE000
+/* Lower bit position of bitfield desc{D}_hd[C:0] */
+#define HW_ATL_TDM_DESC_DHD_SHIFT 0
+/* Width of bitfield desc{D}_hd[C:0] */
+#define HW_ATL_TDM_DESC_DHD_WIDTH 13
+
+/* TX desc{D}_len[9:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "desc{D}_len[9:0]".
+ * Parameter: descriptor {D} | stride size 0x40 | range [0, 31]
+ * PORT="pif_tdm_desc0_len_i[9:0]"
+ */
+
+/* Register address for bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_ADR(descriptor) (0x00007C08 + (descriptor) * 0x40)
+/* Bitmask for bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_MSK 0x00001FF8
+/* Inverted bitmask for bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_MSKN 0xFFFFE007
+/* Lower bit position of bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_SHIFT 3
+/* Width of bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_WIDTH 10
+/* Default value of bitfield desc{D}_len[9:0] */
+#define HW_ATL_TDM_DESC_DLEN_DEFAULT 0x0
+
+/* TX desc{D}_wrb_thresh[6:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "desc{D}_wrb_thresh[6:0]".
+ * Parameter: descriptor {D} | stride size 0x40 | range [0, 31]
+ * PORT="pif_tdm_desc0_wrb_thresh_i[6:0]"
+ */
+
+/* Register address for bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_ADR(descriptor) \
+       (0x00007C18 + (descriptor) * 0x40)
+/* Bitmask for bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_MSK 0x00007F00
+/* Inverted bitmask for bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_MSKN 0xFFFF80FF
+/* Lower bit position of bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_SHIFT 8
+/* Width of bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_WIDTH 7
+/* Default value of bitfield desc{D}_wrb_thresh[6:0] */
+#define HW_ATL_TDM_DESC_DWRB_THRESH_DEFAULT 0x0
+
+/* TX tdm_int_mod_en Bitfield Definitions
+ * Preprocessor definitions for the bitfield "tdm_int_mod_en".
+ * PORT="pif_tdm_int_mod_en_i"
+ */
+
+/* Register address for bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_ADR 0x00007B40
+/* Bitmask for bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_MSK 0x00000010
+/* Inverted bitmask for bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_MSKN 0xFFFFFFEF
+/* Lower bit position of bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_SHIFT 4
+/* Width of bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_WIDTH 1
+/* Default value of bitfield tdm_int_mod_en */
+#define HW_ATL_TDM_INT_MOD_EN_DEFAULT 0x0
+
+/* TX lso_tcp_flag_mid[B:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "lso_tcp_flag_mid[B:0]".
+ * PORT="pif_thm_lso_tcp_flag_mid_i[11:0]"
+ */
+/* register address for bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_ADR 0x00007820
+/* bitmask for bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_MSK 0x0fff0000
+/* inverted bitmask for bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_MSKN 0xf000ffff
+/* lower bit position of bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_SHIFT 16
+/* width of bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_WIDTH 12
+/* default value of bitfield lso_tcp_flag_mid[b:0] */
+#define HW_ATL_THM_LSO_TCP_FLAG_MID_DEFAULT 0x0
+
+/* tx tx_buf_en bitfield definitions
+ * preprocessor definitions for the bitfield "tx_buf_en".
+ * port="pif_tpb_tx_buf_en_i"
+ */
+
+/* register address for bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_ADR 0x00007900
+/* bitmask for bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_MSK 0x00000001
+/* inverted bitmask for bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_MSKN 0xfffffffe
+/* lower bit position of bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_SHIFT 0
+/* width of bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_WIDTH 1
+/* default value of bitfield tx_buf_en */
+#define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0
+
+/* register address for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900
+/* bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100
+/* inverted bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF
+/* lower bit position of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8
+/* width of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1
+/* default value of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
+
+/* tx tx{b}_hi_thresh[c:0] bitfield definitions
+ * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_tpb_tx0_hi_thresh_i[12:0]"
+ */
+
+/* register address for bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_ADR(buffer) (0x00007914 + (buffer) * 0x10)
+/* bitmask for bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_MSK 0x1fff0000
+/* inverted bitmask for bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_MSKN 0xe000ffff
+/* lower bit position of bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_SHIFT 16
+/* width of bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_WIDTH 13
+/* default value of bitfield tx{b}_hi_thresh[c:0] */
+#define HW_ATL_TPB_TXBHI_THRESH_DEFAULT 0x0
+
+/* tx tx{b}_lo_thresh[c:0] bitfield definitions
+ * preprocessor definitions for the bitfield "tx{b}_lo_thresh[c:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_tpb_tx0_lo_thresh_i[12:0]"
+ */
+
+/* register address for bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_ADR(buffer) (0x00007914 + (buffer) * 0x10)
+/* bitmask for bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_MSK 0x00001fff
+/* inverted bitmask for bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_MSKN 0xffffe000
+/* lower bit position of bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_SHIFT 0
+/* width of bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_WIDTH 13
+/* default value of bitfield tx{b}_lo_thresh[c:0] */
+#define HW_ATL_TPB_TXBLO_THRESH_DEFAULT 0x0
+
+/* tx dma_sys_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "dma_sys_loopback".
+ * port="pif_tpb_dma_sys_lbk_i"
+ */
+
+/* register address for bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_ADR 0x00007000
+/* bitmask for bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_MSK 0x00000040
+/* inverted bitmask for bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_MSKN 0xffffffbf
+/* lower bit position of bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_SHIFT 6
+/* width of bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_WIDTH 1
+/* default value of bitfield dma_sys_loopback */
+#define HW_ATL_TPB_DMA_SYS_LBK_DEFAULT 0x0
+
+/* tx tx{b}_buf_size[7:0] bitfield definitions
+ * preprocessor definitions for the bitfield "tx{b}_buf_size[7:0]".
+ * parameter: buffer {b} | stride size 0x10 | range [0, 7]
+ * port="pif_tpb_tx0_buf_size_i[7:0]"
+ */
+
+/* register address for bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_ADR(buffer) (0x00007910 + (buffer) * 0x10)
+/* bitmask for bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_MSK 0x000000ff
+/* inverted bitmask for bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_MSKN 0xffffff00
+/* lower bit position of bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_SHIFT 0
+/* width of bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_WIDTH 8
+/* default value of bitfield tx{b}_buf_size[7:0] */
+#define HW_ATL_TPB_TXBBUF_SIZE_DEFAULT 0x0
+
+/* tx tx_scp_ins_en bitfield definitions
+ * preprocessor definitions for the bitfield "tx_scp_ins_en".
+ * port="pif_tpb_scp_ins_en_i"
+ */
+
+/* register address for bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_ADR 0x00007900
+/* bitmask for bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_MSK 0x00000004
+/* inverted bitmask for bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_MSKN 0xfffffffb
+/* lower bit position of bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_SHIFT 2
+/* width of bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_WIDTH 1
+/* default value of bitfield tx_scp_ins_en */
+#define HW_ATL_TPB_TX_SCP_INS_EN_DEFAULT 0x0
+
+/* tx ipv4_chk_en bitfield definitions
+ * preprocessor definitions for the bitfield "ipv4_chk_en".
+ * port="pif_tpo_ipv4_chk_en_i"
+ */
+
+/* register address for bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_ADR 0x00007800
+/* bitmask for bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_MSK 0x00000002
+/* inverted bitmask for bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_MSKN 0xfffffffd
+/* lower bit position of bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_SHIFT 1
+/* width of bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_WIDTH 1
+/* default value of bitfield ipv4_chk_en */
+#define HW_ATL_TPO_IPV4CHK_EN_DEFAULT 0x0
+
+/* tx l4_chk_en bitfield definitions
+ * preprocessor definitions for the bitfield "l4_chk_en".
+ * port="pif_tpo_l4_chk_en_i"
+ */
+
+/* register address for bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_ADR 0x00007800
+/* bitmask for bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_MSK 0x00000001
+/* inverted bitmask for bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_MSKN 0xfffffffe
+/* lower bit position of bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_SHIFT 0
+/* width of bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_WIDTH 1
+/* default value of bitfield l4_chk_en */
+#define HW_ATL_TPOL4CHK_EN_DEFAULT 0x0
+
+/* tx pkt_sys_loopback bitfield definitions
+ * preprocessor definitions for the bitfield "pkt_sys_loopback".
+ * port="pif_tpo_pkt_sys_lbk_i"
+ */
+
+/* register address for bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_ADR 0x00007000
+/* bitmask for bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_MSK 0x00000080
+/* inverted bitmask for bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_MSKN 0xffffff7f
+/* lower bit position of bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_SHIFT 7
+/* width of bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_WIDTH 1
+/* default value of bitfield pkt_sys_loopback */
+#define HW_ATL_TPO_PKT_SYS_LBK_DEFAULT 0x0
+
+/* tx data_tc_arb_mode bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc_arb_mode".
+ * port="pif_tps_data_tc_arb_mode_i"
+ */
+
+/* register address for bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_ADR 0x00007100
+/* bitmask for bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_MSK 0x00000001
+/* inverted bitmask for bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_MSKN 0xfffffffe
+/* lower bit position of bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_SHIFT 0
+/* width of bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_WIDTH 1
+/* default value of bitfield data_tc_arb_mode */
+#define HW_ATL_TPS_DATA_TC_ARB_MODE_DEFAULT 0x0
+
+/* tx desc_rate_ta_rst bitfield definitions
+ * preprocessor definitions for the bitfield "desc_rate_ta_rst".
+ * port="pif_tps_desc_rate_ta_rst_i"
+ */
+
+/* register address for bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_ADR 0x00007310
+/* bitmask for bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_MSK 0x80000000
+/* inverted bitmask for bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_MSKN 0x7fffffff
+/* lower bit position of bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_SHIFT 31
+/* width of bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_WIDTH 1
+/* default value of bitfield desc_rate_ta_rst */
+#define HW_ATL_TPS_DESC_RATE_TA_RST_DEFAULT 0x0
+
+/* tx desc_rate_limit[a:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc_rate_limit[a:0]".
+ * port="pif_tps_desc_rate_lim_i[10:0]"
+ */
+
+/* register address for bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_ADR 0x00007310
+/* bitmask for bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_MSK 0x000007ff
+/* inverted bitmask for bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_MSKN 0xfffff800
+/* lower bit position of bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_SHIFT 0
+/* width of bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_WIDTH 11
+/* default value of bitfield desc_rate_limit[a:0] */
+#define HW_ATL_TPS_DESC_RATE_LIM_DEFAULT 0x0
+
+/* tx desc_tc_arb_mode[1:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc_tc_arb_mode[1:0]".
+ * port="pif_tps_desc_tc_arb_mode_i[1:0]"
+ */
+
+/* register address for bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_ADR 0x00007200
+/* bitmask for bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_MSK 0x00000003
+/* inverted bitmask for bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_MSKN 0xfffffffc
+/* lower bit position of bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_SHIFT 0
+/* width of bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_WIDTH 2
+/* default value of bitfield desc_tc_arb_mode[1:0] */
+#define HW_ATL_TPS_DESC_TC_ARB_MODE_DEFAULT 0x0
+
+/* tx desc_tc{t}_credit_max[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc_tc{t}_credit_max[b:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_desc_tc0_credit_max_i[11:0]"
+ */
+
+/* register address for bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_ADR(tc) (0x00007210 + (tc) * 0x4)
+/* bitmask for bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_MSK 0x0fff0000
+/* inverted bitmask for bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_MSKN 0xf000ffff
+/* lower bit position of bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_SHIFT 16
+/* width of bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_WIDTH 12
+/* default value of bitfield desc_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DESC_TCTCREDIT_MAX_DEFAULT 0x0
+
+/* tx desc_tc{t}_weight[8:0] bitfield definitions
+ * preprocessor definitions for the bitfield "desc_tc{t}_weight[8:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_desc_tc0_weight_i[8:0]"
+ */
+
+/* register address for bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_ADR(tc) (0x00007210 + (tc) * 0x4)
+/* bitmask for bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_MSK 0x000001ff
+/* inverted bitmask for bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_MSKN 0xfffffe00
+/* lower bit position of bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_SHIFT 0
+/* width of bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_WIDTH 9
+/* default value of bitfield desc_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DESC_TCTWEIGHT_DEFAULT 0x0
+
+/* tx desc_vm_arb_mode bitfield definitions
+ * preprocessor definitions for the bitfield "desc_vm_arb_mode".
+ * port="pif_tps_desc_vm_arb_mode_i"
+ */
+
+/* register address for bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_ADR 0x00007300
+/* bitmask for bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_MSK 0x00000001
+/* inverted bitmask for bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_MSKN 0xfffffffe
+/* lower bit position of bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_SHIFT 0
+/* width of bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_WIDTH 1
+/* default value of bitfield desc_vm_arb_mode */
+#define HW_ATL_TPS_DESC_VM_ARB_MODE_DEFAULT 0x0
+
+/* tx data_tc{t}_credit_max[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_credit_max[b:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_data_tc0_credit_max_i[11:0]"
+ */
+
+/* register address for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_ADR(tc) (0x00007110 + (tc) * 0x4)
+/* bitmask for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_MSK 0x0fff0000
+/* inverted bitmask for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_MSKN 0xf000ffff
+/* lower bit position of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_SHIFT 16
+/* width of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_WIDTH 12
+/* default value of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL_TPS_DATA_TCTCREDIT_MAX_DEFAULT 0x0
+
+/* tx data_tc{t}_weight[8:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_weight[8:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_data_tc0_weight_i[8:0]"
+ */
+
+/* register address for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_ADR(tc) (0x00007110 + (tc) * 0x4)
+/* bitmask for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_MSK 0x000001ff
+/* inverted bitmask for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_MSKN 0xfffffe00
+/* lower bit position of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_SHIFT 0
+/* width of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_WIDTH 9
+/* default value of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL_TPS_DATA_TCTWEIGHT_DEFAULT 0x0
+
+/* tx reg_res_dsbl bitfield definitions
+ * preprocessor definitions for the bitfield "reg_res_dsbl".
+ * port="pif_tx_reg_res_dsbl_i"
+ */
+
+/* register address for bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_ADR 0x00007000
+/* bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_MSK 0x20000000
+/* inverted bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_MSKN 0xdfffffff
+/* lower bit position of bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_SHIFT 29
+/* width of bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_WIDTH 1
+/* default value of bitfield reg_res_dsbl */
+#define HW_ATL_TX_REG_RES_DSBL_DEFAULT 0x1
+
+/* mac_phy register access busy bitfield definitions
+ * preprocessor definitions for the bitfield "register access busy".
+ * port="msm_pif_reg_busy_o"
+ */
+
+/* register address for bitfield register access busy */
+#define HW_ATL_MSM_REG_ACCESS_BUSY_ADR 0x00004400
+/* bitmask for bitfield register access busy */
+#define HW_ATL_MSM_REG_ACCESS_BUSY_MSK 0x00001000
+/* inverted bitmask for bitfield register access busy */
+#define HW_ATL_MSM_REG_ACCESS_BUSY_MSKN 0xffffefff
+/* lower bit position of bitfield register access busy */
+#define HW_ATL_MSM_REG_ACCESS_BUSY_SHIFT 12
+/* width of bitfield register access busy */
+#define HW_ATL_MSM_REG_ACCESS_BUSY_WIDTH 1
+
+/* mac_phy msm register address[7:0] bitfield definitions
+ * preprocessor definitions for the bitfield "msm register address[7:0]".
+ * port="pif_msm_reg_addr_i[7:0]"
+ */
+
+/* register address for bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_ADR 0x00004400
+/* bitmask for bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_MSK 0x000000ff
+/* inverted bitmask for bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_MSKN 0xffffff00
+/* lower bit position of bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_SHIFT 0
+/* width of bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_WIDTH 8
+/* default value of bitfield msm register address[7:0] */
+#define HW_ATL_MSM_REG_ADDR_DEFAULT 0x0
+
+/* mac_phy register read strobe bitfield definitions
+ * preprocessor definitions for the bitfield "register read strobe".
+ * port="pif_msm_reg_rden_i"
+ */
+
+/* register address for bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_ADR 0x00004400
+/* bitmask for bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_MSK 0x00000200
+/* inverted bitmask for bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_MSKN 0xfffffdff
+/* lower bit position of bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_SHIFT 9
+/* width of bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_WIDTH 1
+/* default value of bitfield register read strobe */
+#define HW_ATL_MSM_REG_RD_STROBE_DEFAULT 0x0
+
+/* mac_phy msm register read data[31:0] bitfield definitions
+ * preprocessor definitions for the bitfield "msm register read data[31:0]".
+ * port="msm_pif_reg_rd_data_o[31:0]"
+ */
+
+/* register address for bitfield msm register read data[31:0] */
+#define HW_ATL_MSM_REG_RD_DATA_ADR 0x00004408
+/* bitmask for bitfield msm register read data[31:0] */
+#define HW_ATL_MSM_REG_RD_DATA_MSK 0xffffffff
+/* inverted bitmask for bitfield msm register read data[31:0] */
+#define HW_ATL_MSM_REG_RD_DATA_MSKN 0x00000000
+/* lower bit position of bitfield msm register read data[31:0] */
+#define HW_ATL_MSM_REG_RD_DATA_SHIFT 0
+/* width of bitfield msm register read data[31:0] */
+#define HW_ATL_MSM_REG_RD_DATA_WIDTH 32
+
+/* mac_phy msm register write data[31:0] bitfield definitions
+ * preprocessor definitions for the bitfield "msm register write data[31:0]".
+ * port="pif_msm_reg_wr_data_i[31:0]"
+ */
+
+/* register address for bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_ADR 0x00004404
+/* bitmask for bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_MSK 0xffffffff
+/* inverted bitmask for bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_MSKN 0x00000000
+/* lower bit position of bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_SHIFT 0
+/* width of bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_WIDTH 32
+/* default value of bitfield msm register write data[31:0] */
+#define HW_ATL_MSM_REG_WR_DATA_DEFAULT 0x0
+
+/* mac_phy register write strobe bitfield definitions
+ * preprocessor definitions for the bitfield "register write strobe".
+ * port="pif_msm_reg_wren_i"
+ */
+
+/* register address for bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_ADR 0x00004400
+/* bitmask for bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_MSK 0x00000100
+/* inverted bitmask for bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_MSKN 0xfffffeff
+/* lower bit position of bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_SHIFT 8
+/* width of bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_WIDTH 1
+/* default value of bitfield register write strobe */
+#define HW_ATL_MSM_REG_WR_STROBE_DEFAULT 0x0
+
+/* mif soft reset bitfield definitions
+ * preprocessor definitions for the bitfield "soft reset".
+ * port="pif_glb_res_i"
+ */
+
+/* register address for bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_ADR 0x00000000
+/* bitmask for bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_MSK 0x00008000
+/* inverted bitmask for bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_MSKN 0xffff7fff
+/* lower bit position of bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_SHIFT 15
+/* width of bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_WIDTH 1
+/* default value of bitfield soft reset */
+#define HW_ATL_GLB_SOFT_RES_DEFAULT 0x0
+
+/* mif register reset disable bitfield definitions
+ * preprocessor definitions for the bitfield "register reset disable".
+ * port="pif_glb_reg_res_dsbl_i"
+ */
+
+/* register address for bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_ADR 0x00000000
+/* bitmask for bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_MSK 0x00004000
+/* inverted bitmask for bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_MSKN 0xffffbfff
+/* lower bit position of bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_SHIFT 14
+/* width of bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_WIDTH 1
+/* default value of bitfield register reset disable */
+#define HW_ATL_GLB_REG_RES_DIS_DEFAULT 0x1
+
+/* tx dma debug control definitions */
+#define HW_ATL_TX_DMA_DEBUG_CTL_ADR 0x00008920u
+
+/* tx dma descriptor base address msw definitions */
+#define HW_ATL_TX_DMA_DESC_BASE_ADDRMSW_ADR(descriptor) \
+                       (0x00007c04u + (descriptor) * 0x40)
+
+/* tx dma total request limit */
+#define HW_ATL_TX_DMA_TOTAL_REQ_LIMIT_ADR 0x00007b20u
+
+/* tx interrupt moderation control register definitions
+ * Preprocessor definitions for TX Interrupt Moderation Control Register
+ * Base Address: 0x00008980
+ * Parameter: queue {Q} | stride size 0x4 | range [0, 31]
+ */
+
+#define HW_ATL_TX_INTR_MODERATION_CTL_ADR(queue) (0x00008980u + (queue) * 0x4)
+
+/* pcie reg_res_dsbl bitfield definitions
+ * preprocessor definitions for the bitfield "reg_res_dsbl".
+ * port="pif_pci_reg_res_dsbl_i"
+ */
+
+/* register address for bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_ADR 0x00001000
+/* bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_MSK 0x20000000
+/* inverted bitmask for bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_MSKN 0xdfffffff
+/* lower bit position of bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_SHIFT 29
+/* width of bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_WIDTH 1
+/* default value of bitfield reg_res_dsbl */
+#define HW_ATL_PCI_REG_RES_DSBL_DEFAULT 0x1
+
+/* PCI core control register */
+#define HW_ATL_PCI_REG_CONTROL6_ADR 0x1014u
+
+/* global microprocessor scratch pad definitions */
+#define HW_ATL_GLB_CPU_SCRATCH_SCP_ADR(scratch_scp) \
+       (0x00000300u + (scratch_scp) * 0x4)
+
+/* register address for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_ADR 0x00000404
+/* bitmask for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_MSK 0x00000002
+/* inverted bitmask for bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_MSKN 0xFFFFFFFD
+/* lower bit position of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_SHIFT 1
+/* width of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_WIDTH 1
+/* default value of bitfield uP Force Interrupt */
+#define HW_ATL_MCP_UP_FORCE_INTERRUPT_DEFAULT 0x0
+
+#endif /* HW_ATL_LLH_INTERNAL_H */
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/atlantic/hw_atl/hw_atl_utils.c
new file mode 100644 (file)
index 0000000..f11093a
--- /dev/null
@@ -0,0 +1,942 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
+ * abstraction layer.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <rte_ether.h>
+#include "../atl_hw_regs.h"
+
+#include "hw_atl_llh.h"
+#include "hw_atl_llh_internal.h"
+#include "../atl_logs.h"
+
+#define HW_ATL_UCP_0X370_REG    0x0370U
+
+#define HW_ATL_MIF_CMD          0x0200U
+#define HW_ATL_MIF_ADDR         0x0208U
+#define HW_ATL_MIF_VAL          0x020CU
+
+#define HW_ATL_FW_SM_RAM        0x2U
+#define HW_ATL_MPI_FW_VERSION  0x18
+#define HW_ATL_MPI_CONTROL_ADR  0x0368U
+#define HW_ATL_MPI_STATE_ADR    0x036CU
+
+#define HW_ATL_MPI_STATE_MSK      0x00FFU
+#define HW_ATL_MPI_STATE_SHIFT    0U
+#define HW_ATL_MPI_SPEED_MSK      0x00FF0000U
+#define HW_ATL_MPI_SPEED_SHIFT    16U
+#define HW_ATL_MPI_DIRTY_WAKE_MSK 0x02000000U
+
+#define HW_ATL_MPI_DAISY_CHAIN_STATUS  0x704
+#define HW_ATL_MPI_BOOT_EXIT_CODE      0x388
+
+#define HW_ATL_MAC_PHY_CONTROL 0x4000
+#define HW_ATL_MAC_PHY_MPI_RESET_BIT 0x1D
+
+#define HW_ATL_FW_VER_1X 0x01050006U
+#define HW_ATL_FW_VER_2X 0x02000000U
+#define HW_ATL_FW_VER_3X 0x03000000U
+
+#define FORCE_FLASHLESS 0
+
+static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
+static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+                               enum hal_atl_utils_fw_state_e state);
+
+
+int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
+{
+       int err = 0;
+
+       err = hw_atl_utils_soft_reset(self);
+       if (err)
+               return err;
+
+       hw_atl_utils_hw_chip_features_init(self,
+                                          &self->chip_features);
+
+       hw_atl_utils_get_fw_version(self, &self->fw_ver_actual);
+
+       if (hw_atl_utils_ver_match(HW_ATL_FW_VER_1X,
+                                  self->fw_ver_actual) == 0) {
+               *fw_ops = &aq_fw_1x_ops;
+       } else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_2X,
+                                       self->fw_ver_actual) == 0) {
+               *fw_ops = &aq_fw_2x_ops;
+       } else if (hw_atl_utils_ver_match(HW_ATL_FW_VER_3X,
+                                       self->fw_ver_actual) == 0) {
+               *fw_ops = &aq_fw_2x_ops;
+       } else {
+               PMD_DRV_LOG(ERR, "Bad FW version detected: %x\n",
+                         self->fw_ver_actual);
+               return -EOPNOTSUPP;
+       }
+       self->aq_fw_ops = *fw_ops;
+       err = self->aq_fw_ops->init(self);
+       return err;
+}
+
+static int hw_atl_utils_soft_reset_flb(struct aq_hw_s *self)
+{
+       u32 gsr, val;
+       int k = 0;
+
+       aq_hw_write_reg(self, 0x404, 0x40e1);
+       AQ_HW_SLEEP(50);
+
+       /* Cleanup SPI */
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
+
+       gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR);
+       aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000);
+
+       /* Kickstart MAC */
+       aq_hw_write_reg(self, 0x404, 0x80e0);
+       aq_hw_write_reg(self, 0x32a8, 0x0);
+       aq_hw_write_reg(self, 0x520, 0x1);
+
+       /* Reset SPI again because of possible interrupted SPI burst */
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
+       AQ_HW_SLEEP(10);
+       /* Clear SPI reset state */
+       aq_hw_write_reg(self, 0x53C, val & ~0x10);
+
+       aq_hw_write_reg(self, 0x404, 0x180e0);
+
+       for (k = 0; k < 1000; k++) {
+               u32 flb_status = aq_hw_read_reg(self,
+                                               HW_ATL_MPI_DAISY_CHAIN_STATUS);
+
+               flb_status = flb_status & 0x10;
+               if (flb_status)
+                       break;
+               AQ_HW_SLEEP(10);
+       }
+       if (k == 1000) {
+               PMD_DRV_LOG(ERR, "MAC kickstart failed\n");
+               return -EIO;
+       }
+
+       /* FW reset */
+       aq_hw_write_reg(self, 0x404, 0x80e0);
+       AQ_HW_SLEEP(50);
+       aq_hw_write_reg(self, 0x3a0, 0x1);
+
+       /* Kickstart PHY - skipped */
+
+       /* Global software reset*/
+       hw_atl_rx_rx_reg_res_dis_set(self, 0U);
+       hw_atl_tx_tx_reg_res_dis_set(self, 0U);
+       aq_hw_write_reg_bit(self, HW_ATL_MAC_PHY_CONTROL,
+                           BIT(HW_ATL_MAC_PHY_MPI_RESET_BIT),
+                           HW_ATL_MAC_PHY_MPI_RESET_BIT, 0x0);
+       gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR);
+       aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR, (gsr & 0xBFFF) | 0x8000);
+
+       for (k = 0; k < 1000; k++) {
+               u32 fw_state = aq_hw_read_reg(self, HW_ATL_MPI_FW_VERSION);
+
+               if (fw_state)
+                       break;
+               AQ_HW_SLEEP(10);
+       }
+       if (k == 1000) {
+               PMD_DRV_LOG(ERR, "FW kickstart failed\n");
+               return -EIO;
+       }
+       /* Old FW requires fixed delay after init */
+       AQ_HW_SLEEP(15);
+
+       return 0;
+}
+
+static int hw_atl_utils_soft_reset_rbl(struct aq_hw_s *self)
+{
+       u32 gsr, val, rbl_status;
+       int k;
+
+       aq_hw_write_reg(self, 0x404, 0x40e1);
+       aq_hw_write_reg(self, 0x3a0, 0x1);
+       aq_hw_write_reg(self, 0x32a8, 0x0);
+
+       /* Alter RBL status */
+       aq_hw_write_reg(self, 0x388, 0xDEAD);
+
+       /* Cleanup SPI */
+       val = aq_hw_read_reg(self, 0x53C);
+       aq_hw_write_reg(self, 0x53C, val | 0x10);
+
+       /* Global software reset*/
+       hw_atl_rx_rx_reg_res_dis_set(self, 0U);
+       hw_atl_tx_tx_reg_res_dis_set(self, 0U);
+       aq_hw_write_reg_bit(self, HW_ATL_MAC_PHY_CONTROL,
+                           BIT(HW_ATL_MAC_PHY_MPI_RESET_BIT),
+                           HW_ATL_MAC_PHY_MPI_RESET_BIT, 0x0);
+       gsr = aq_hw_read_reg(self, HW_ATL_GLB_SOFT_RES_ADR);
+       aq_hw_write_reg(self, HW_ATL_GLB_SOFT_RES_ADR,
+                       (gsr & 0xFFFFBFFF) | 0x8000);
+
+       if (FORCE_FLASHLESS)
+               aq_hw_write_reg(self, 0x534, 0x0);
+
+       aq_hw_write_reg(self, 0x404, 0x40e0);
+
+       /* Wait for RBL boot */
+       for (k = 0; k < 1000; k++) {
+               rbl_status = aq_hw_read_reg(self, 0x388) & 0xFFFF;
+               if (rbl_status && rbl_status != 0xDEAD)
+                       break;
+               AQ_HW_SLEEP(10);
+       }
+       if (!rbl_status || rbl_status == 0xDEAD) {
+               PMD_DRV_LOG(ERR, "RBL Restart failed");
+               return -EIO;
+       }
+
+       /* Restore NVR */
+       if (FORCE_FLASHLESS)
+               aq_hw_write_reg(self, 0x534, 0xA0);
+
+       if (rbl_status == 0xF1A7) {
+               PMD_DRV_LOG(ERR, "No FW detected. Dynamic FW load not implemented\n");
+               return -EOPNOTSUPP;
+       }
+
+       for (k = 0; k < 1000; k++) {
+               u32 fw_state = aq_hw_read_reg(self, HW_ATL_MPI_FW_VERSION);
+
+               if (fw_state)
+                       break;
+               AQ_HW_SLEEP(10);
+       }
+       if (k == 1000) {
+               PMD_DRV_LOG(ERR, "FW kickstart failed\n");
+               return -EIO;
+       }
+       /* Old FW requires fixed delay after init */
+       AQ_HW_SLEEP(15);
+
+       return 0;
+}
+
+int hw_atl_utils_soft_reset(struct aq_hw_s *self)
+{
+       int err = 0;
+       int k;
+       u32 boot_exit_code = 0;
+
+       for (k = 0; k < 1000; ++k) {
+               u32 flb_status = aq_hw_read_reg(self,
+                                               HW_ATL_MPI_DAISY_CHAIN_STATUS);
+               boot_exit_code = aq_hw_read_reg(self,
+                                               HW_ATL_MPI_BOOT_EXIT_CODE);
+               if (flb_status != 0x06000000 || boot_exit_code != 0)
+                       break;
+       }
+
+       if (k == 1000) {
+               PMD_DRV_LOG(ERR, "Neither RBL nor FLB firmware started\n");
+               return -EOPNOTSUPP;
+       }
+
+       self->rbl_enabled = (boot_exit_code != 0);
+
+       /* FW 1.x may bootup in an invalid POWER state (WOL feature).
+        * We should work around this by forcing its state back to DEINIT
+        */
+       if (!hw_atl_utils_ver_match(HW_ATL_FW_VER_1X,
+                                   aq_hw_read_reg(self,
+                                                  HW_ATL_MPI_FW_VERSION))) {
+               hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
+               AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR) &
+                              HW_ATL_MPI_STATE_MSK) == MPI_DEINIT,
+                              10, 1000U);
+       }
+
+       if (self->rbl_enabled)
+               err = hw_atl_utils_soft_reset_rbl(self);
+       else
+               err = hw_atl_utils_soft_reset_flb(self);
+
+       return err;
+}
+
+int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
+                                 u32 *p, u32 cnt)
+{
+       int err = 0;
+
+       AQ_HW_WAIT_FOR(hw_atl_reg_glb_cpu_sem_get(self,
+                                                 HW_ATL_FW_SM_RAM) == 1U,
+                                                 1U, 10000U);
+
+       if (err < 0) {
+               bool is_locked;
+
+               hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+               is_locked = hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+               if (!is_locked) {
+                       err = -ETIMEDOUT;
+                       goto err_exit;
+               }
+       }
+
+       aq_hw_write_reg(self, HW_ATL_MIF_ADDR, a);
+
+       for (++cnt; --cnt && !err;) {
+               aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U);
+
+               if (IS_CHIP_FEATURE(REVISION_B1))
+                       AQ_HW_WAIT_FOR(a != aq_hw_read_reg(self,
+                                                          HW_ATL_MIF_ADDR),
+                                      1, 1000U);
+               else
+                       AQ_HW_WAIT_FOR(!(0x100 & aq_hw_read_reg(self,
+                                                          HW_ATL_MIF_CMD)),
+                                      1, 1000U);
+
+               *(p++) = aq_hw_read_reg(self, HW_ATL_MIF_VAL);
+               a += 4;
+       }
+
+       hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+
+err_exit:
+       return err;
+}
+
+int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
+                                        u32 cnt)
+{
+       int err = 0;
+       bool is_locked;
+
+       is_locked = hw_atl_reg_glb_cpu_sem_get(self, HW_ATL_FW_SM_RAM);
+       if (!is_locked) {
+               err = -ETIMEDOUT;
+               goto err_exit;
+       }
+       if (IS_CHIP_FEATURE(REVISION_B1)) {
+               u32 offset = 0;
+
+               for (; offset < cnt; ++offset) {
+                       aq_hw_write_reg(self, 0x328, p[offset]);
+                       aq_hw_write_reg(self, 0x32C,
+                               (0x80000000 | (0xFFFF & (offset * 4))));
+                       hw_atl_mcp_up_force_intr_set(self, 1);
+                       /* 1000 times by 10us = 10ms */
+                       AQ_HW_WAIT_FOR((aq_hw_read_reg(self,
+                                       0x32C) & 0xF0000000) != 0x80000000,
+                                       10, 1000);
+               }
+       } else {
+               u32 offset = 0;
+
+               aq_hw_write_reg(self, 0x208, a);
+
+               for (; offset < cnt; ++offset) {
+                       aq_hw_write_reg(self, 0x20C, p[offset]);
+                       aq_hw_write_reg(self, 0x200, 0xC000);
+
+                       AQ_HW_WAIT_FOR((aq_hw_read_reg(self, 0x200U)
+                                       & 0x100) == 0, 10, 1000);
+               }
+       }
+
+       hw_atl_reg_glb_cpu_sem_set(self, 1U, HW_ATL_FW_SM_RAM);
+
+err_exit:
+       return err;
+}
+
+static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual)
+{
+       int err = 0;
+       const u32 dw_major_mask = 0xff000000U;
+       const u32 dw_minor_mask = 0x00ffffffU;
+
+       err = (dw_major_mask & (ver_expected ^ ver_actual)) ? -EOPNOTSUPP : 0;
+       if (err < 0)
+               goto err_exit;
+       err = ((dw_minor_mask & ver_expected) > (dw_minor_mask & ver_actual)) ?
+               -EOPNOTSUPP : 0;
+err_exit:
+       return err;
+}
+
+static int hw_atl_utils_init_ucp(struct aq_hw_s *self)
+{
+       int err = 0;
+
+       if (!aq_hw_read_reg(self, 0x370U)) {
+               unsigned int rnd = (uint32_t)rte_rand();
+               unsigned int ucp_0x370 = 0U;
+
+               ucp_0x370 = 0x02020202U | (0xFEFEFEFEU & rnd);
+               aq_hw_write_reg(self, HW_ATL_UCP_0X370_REG, ucp_0x370);
+       }
+
+       hw_atl_reg_glb_cpu_scratch_scp_set(self, 0x00000000U, 25U);
+
+       /* check 10 times by 1ms */
+       AQ_HW_WAIT_FOR(0U != (self->mbox_addr =
+                      aq_hw_read_reg(self, 0x360U)), 1000U, 10U);
+       AQ_HW_WAIT_FOR(0U != (self->rpc_addr =
+                      aq_hw_read_reg(self, 0x334U)), 1000U, 100U);
+
+       return err;
+}
+
+#define HW_ATL_RPC_CONTROL_ADR 0x0338U
+#define HW_ATL_RPC_STATE_ADR   0x033CU
+
+struct aq_hw_atl_utils_fw_rpc_tid_s {
+       union {
+               u32 val;
+               struct {
+                       u16 tid;
+                       u16 len;
+               };
+       };
+};
+
+#define hw_atl_utils_fw_rpc_init(_H_) hw_atl_utils_fw_rpc_wait(_H_, NULL)
+
+int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size)
+{
+       int err = 0;
+       struct aq_hw_atl_utils_fw_rpc_tid_s sw;
+
+       if (!IS_CHIP_FEATURE(MIPS)) {
+               err = -1;
+               goto err_exit;
+       }
+       err = hw_atl_utils_fw_upload_dwords(self, self->rpc_addr,
+                                           (u32 *)(void *)&self->rpc,
+                                           (rpc_size + sizeof(u32) -
+                                           sizeof(u8)) / sizeof(u32));
+       if (err < 0)
+               goto err_exit;
+
+       sw.tid = 0xFFFFU & (++self->rpc_tid);
+       sw.len = (u16)rpc_size;
+       aq_hw_write_reg(self, HW_ATL_RPC_CONTROL_ADR, sw.val);
+
+err_exit:
+       return err;
+}
+
+int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+                                   struct hw_aq_atl_utils_fw_rpc **rpc)
+{
+       int err = 0;
+       struct aq_hw_atl_utils_fw_rpc_tid_s sw;
+       struct aq_hw_atl_utils_fw_rpc_tid_s fw;
+
+       do {
+               sw.val = aq_hw_read_reg(self, HW_ATL_RPC_CONTROL_ADR);
+
+               self->rpc_tid = sw.tid;
+
+               AQ_HW_WAIT_FOR(sw.tid ==
+                               (fw.val =
+                               aq_hw_read_reg(self, HW_ATL_RPC_STATE_ADR),
+                               fw.tid), 1000U, 100U);
+               if (err < 0)
+                       goto err_exit;
+
+               if (fw.len == 0xFFFFU) {
+                       err = hw_atl_utils_fw_rpc_call(self, sw.len);
+                       if (err < 0)
+                               goto err_exit;
+               }
+       } while (sw.tid != fw.tid || 0xFFFFU == fw.len);
+       if (err < 0)
+               goto err_exit;
+
+       if (rpc) {
+               if (fw.len) {
+                       err =
+                       hw_atl_utils_fw_downld_dwords(self,
+                                                     self->rpc_addr,
+                                                     (u32 *)(void *)
+                                                     &self->rpc,
+                                                     (fw.len + sizeof(u32) -
+                                                     sizeof(u8)) /
+                                                     sizeof(u32));
+                       if (err < 0)
+                               goto err_exit;
+               }
+
+               *rpc = &self->rpc;
+       }
+
+err_exit:
+       return err;
+}
+
+static int hw_atl_utils_mpi_create(struct aq_hw_s *self)
+{
+       int err = 0;
+
+       err = hw_atl_utils_init_ucp(self);
+       if (err < 0)
+               goto err_exit;
+
+       err = hw_atl_utils_fw_rpc_init(self);
+       if (err < 0)
+               goto err_exit;
+
+err_exit:
+       return err;
+}
+
+int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self,
+                              struct hw_aq_atl_utils_mbox_header *pmbox)
+{
+       return hw_atl_utils_fw_downld_dwords(self,
+                                     self->mbox_addr,
+                                     (u32 *)(void *)pmbox,
+                                     sizeof(*pmbox) / sizeof(u32));
+}
+
+void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
+                                struct hw_aq_atl_utils_mbox *pmbox)
+{
+       int err = 0;
+
+       err = hw_atl_utils_fw_downld_dwords(self,
+                                           self->mbox_addr,
+                                           (u32 *)(void *)pmbox,
+                                           sizeof(*pmbox) / sizeof(u32));
+       if (err < 0)
+               goto err_exit;
+
+       if (IS_CHIP_FEATURE(REVISION_A0)) {
+               unsigned int mtu = 1514;
+               pmbox->stats.ubrc = pmbox->stats.uprc * mtu;
+               pmbox->stats.ubtc = pmbox->stats.uptc * mtu;
+       } else {
+               pmbox->stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
+       }
+
+err_exit:;
+}
+
+static
+int hw_atl_utils_mpi_set_speed(struct aq_hw_s *self, u32 speed)
+{
+       u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
+
+       val = val & ~HW_ATL_MPI_SPEED_MSK;
+       val |= speed << HW_ATL_MPI_SPEED_SHIFT;
+       aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
+
+       return 0;
+}
+
+int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
+                               enum hal_atl_utils_fw_state_e state)
+{
+       int err = 0;
+       u32 transaction_id = 0;
+       struct hw_aq_atl_utils_mbox_header mbox;
+       u32 val = aq_hw_read_reg(self, HW_ATL_MPI_CONTROL_ADR);
+
+       if (state == MPI_RESET) {
+               hw_atl_utils_mpi_read_mbox(self, &mbox);
+
+               transaction_id = mbox.transaction_id;
+
+               AQ_HW_WAIT_FOR(transaction_id !=
+                               (hw_atl_utils_mpi_read_mbox(self, &mbox),
+                                mbox.transaction_id),
+                              1000U, 100U);
+               if (err < 0)
+                       goto err_exit;
+       }
+       /* On interface DEINIT we disable DW (raise bit)
+        * Otherwise enable DW (clear bit)
+        */
+       if (state == MPI_DEINIT || state == MPI_POWER)
+               val |= HW_ATL_MPI_DIRTY_WAKE_MSK;
+       else
+               val &= ~HW_ATL_MPI_DIRTY_WAKE_MSK;
+
+       /* Set new state bits */
+       val = val & ~HW_ATL_MPI_STATE_MSK;
+       val |= state & HW_ATL_MPI_STATE_MSK;
+
+       aq_hw_write_reg(self, HW_ATL_MPI_CONTROL_ADR, val);
+err_exit:
+       return err;
+}
+
+int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
+{
+       u32 cp0x036C = aq_hw_read_reg(self, HW_ATL_MPI_STATE_ADR);
+       u32 link_speed_mask = cp0x036C >> HW_ATL_MPI_SPEED_SHIFT;
+       struct aq_hw_link_status_s *link_status = &self->aq_link_status;
+
+       if (!link_speed_mask) {
+               link_status->mbps = 0U;
+       } else {
+               switch (link_speed_mask) {
+               case HAL_ATLANTIC_RATE_10G:
+                       link_status->mbps = 10000U;
+                       break;
+
+               case HAL_ATLANTIC_RATE_5G:
+               case HAL_ATLANTIC_RATE_5GSR:
+                       link_status->mbps = 5000U;
+                       break;
+
+               case HAL_ATLANTIC_RATE_2GS:
+                       link_status->mbps = 2500U;
+                       break;
+
+               case HAL_ATLANTIC_RATE_1G:
+                       link_status->mbps = 1000U;
+                       break;
+
+               case HAL_ATLANTIC_RATE_100M:
+                       link_status->mbps = 100U;
+                       break;
+
+               default:
+                       return -EBUSY;
+               }
+       }
+
+       return 0;
+}
+
+static int hw_atl_utils_get_mac_permanent(struct aq_hw_s *self,
+                                  u8 *mac)
+{
+       int err = 0;
+       u32 h = 0U;
+       u32 l = 0U;
+       u32 mac_addr[2];
+
+       if (!aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG)) {
+               unsigned int rnd = (uint32_t)rte_rand();
+               unsigned int ucp_0x370 = 0;
+
+               //get_random_bytes(&rnd, sizeof(unsigned int));
+
+               ucp_0x370 = 0x02020202 | (0xFEFEFEFE & rnd);
+               aq_hw_write_reg(self, HW_ATL_UCP_0X370_REG, ucp_0x370);
+       }
+
+       err = hw_atl_utils_fw_downld_dwords(self,
+                                           aq_hw_read_reg(self, 0x00000374U) +
+                                           (40U * 4U),
+                                           mac_addr,
+                                           ARRAY_SIZE(mac_addr));
+       if (err < 0) {
+               mac_addr[0] = 0U;
+               mac_addr[1] = 0U;
+               err = 0;
+       } else {
+               mac_addr[0] = rte_constant_bswap32(mac_addr[0]);
+               mac_addr[1] = rte_constant_bswap32(mac_addr[1]);
+       }
+
+       ether_addr_copy((struct ether_addr *)mac_addr,
+                       (struct ether_addr *)mac);
+
+       if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
+               /* chip revision */
+               l = 0xE3000000U
+                       | (0xFFFFU & aq_hw_read_reg(self, HW_ATL_UCP_0X370_REG))
+                       | (0x00 << 16);
+               h = 0x8001300EU;
+
+               mac[5] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[4] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[3] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[2] = (u8)(0xFFU & l);
+               mac[1] = (u8)(0xFFU & h);
+               h >>= 8;
+               mac[0] = (u8)(0xFFU & h);
+       }
+
+       return err;
+}
+
+unsigned int hw_atl_utils_mbps_2_speed_index(unsigned int mbps)
+{
+       unsigned int ret = 0U;
+
+       switch (mbps) {
+       case 100U:
+               ret = 5U;
+               break;
+
+       case 1000U:
+               ret = 4U;
+               break;
+
+       case 2500U:
+               ret = 3U;
+               break;
+
+       case 5000U:
+               ret = 1U;
+               break;
+
+       case 10000U:
+               ret = 0U;
+               break;
+
+       default:
+               break;
+       }
+       return ret;
+}
+
+void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
+{
+       u32 chip_features = 0U;
+       u32 val = hw_atl_reg_glb_mif_id_get(self);
+       u32 mif_rev = val & 0xFFU;
+
+       if ((0xFU & mif_rev) == 1U) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
+                       HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
+                       HAL_ATLANTIC_UTILS_CHIP_MIPS;
+       } else if ((0xFU & mif_rev) == 2U) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
+                       HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
+                       HAL_ATLANTIC_UTILS_CHIP_MIPS |
+                       HAL_ATLANTIC_UTILS_CHIP_TPO2 |
+                       HAL_ATLANTIC_UTILS_CHIP_RPF2;
+       } else if ((0xFU & mif_rev) == 0xAU) {
+               chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 |
+                       HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
+                       HAL_ATLANTIC_UTILS_CHIP_MIPS |
+                       HAL_ATLANTIC_UTILS_CHIP_TPO2 |
+                       HAL_ATLANTIC_UTILS_CHIP_RPF2;
+       }
+
+       *p = chip_features;
+}
+
+static int hw_atl_fw1x_deinit(struct aq_hw_s *self)
+{
+       hw_atl_utils_mpi_set_speed(self, 0);
+       hw_atl_utils_mpi_set_state(self, MPI_DEINIT);
+       return 0;
+}
+
+int hw_atl_utils_update_stats(struct aq_hw_s *self)
+{
+       struct hw_aq_atl_utils_mbox mbox;
+
+       hw_atl_utils_mpi_read_stats(self, &mbox);
+
+#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \
+                       mbox.stats._N_ - self->last_stats._N_)
+
+       if (1) {//self->aq_link_status.mbps) {
+               AQ_SDELTA(uprc);
+               AQ_SDELTA(mprc);
+               AQ_SDELTA(bprc);
+               AQ_SDELTA(erpt);
+
+               AQ_SDELTA(uptc);
+               AQ_SDELTA(mptc);
+               AQ_SDELTA(bptc);
+               AQ_SDELTA(erpr);
+               AQ_SDELTA(ubrc);
+               AQ_SDELTA(ubtc);
+               AQ_SDELTA(mbrc);
+               AQ_SDELTA(mbtc);
+               AQ_SDELTA(bbrc);
+               AQ_SDELTA(bbtc);
+               AQ_SDELTA(dpc);
+       }
+#undef AQ_SDELTA
+       self->curr_stats.dma_pkt_rc =
+          hw_atl_stats_rx_dma_good_pkt_counterlsw_get(self) +
+          ((u64)hw_atl_stats_rx_dma_good_pkt_countermsw_get(self) << 32);
+       self->curr_stats.dma_pkt_tc =
+          hw_atl_stats_tx_dma_good_pkt_counterlsw_get(self) +
+          ((u64)hw_atl_stats_tx_dma_good_pkt_countermsw_get(self) << 32);
+       self->curr_stats.dma_oct_rc =
+          hw_atl_stats_rx_dma_good_octet_counterlsw_get(self) +
+          ((u64)hw_atl_stats_rx_dma_good_octet_countermsw_get(self) << 32);
+       self->curr_stats.dma_oct_tc =
+          hw_atl_stats_tx_dma_good_octet_counterlsw_get(self) +
+          ((u64)hw_atl_stats_tx_dma_good_octet_countermsw_get(self) << 32);
+
+       self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
+
+       memcpy(&self->last_stats, &mbox.stats, sizeof(mbox.stats));
+
+       return 0;
+}
+
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self)
+{
+       return &self->curr_stats;
+}
+
+static const u32 hw_atl_utils_hw_mac_regs[] = {
+       0x00005580U, 0x00005590U, 0x000055B0U, 0x000055B4U,
+       0x000055C0U, 0x00005B00U, 0x00005B04U, 0x00005B08U,
+       0x00005B0CU, 0x00005B10U, 0x00005B14U, 0x00005B18U,
+       0x00005B1CU, 0x00005B20U, 0x00005B24U, 0x00005B28U,
+       0x00005B2CU, 0x00005B30U, 0x00005B34U, 0x00005B38U,
+       0x00005B3CU, 0x00005B40U, 0x00005B44U, 0x00005B48U,
+       0x00005B4CU, 0x00005B50U, 0x00005B54U, 0x00005B58U,
+       0x00005B5CU, 0x00005B60U, 0x00005B64U, 0x00005B68U,
+       0x00005B6CU, 0x00005B70U, 0x00005B74U, 0x00005B78U,
+       0x00005B7CU, 0x00007C00U, 0x00007C04U, 0x00007C08U,
+       0x00007C0CU, 0x00007C10U, 0x00007C14U, 0x00007C18U,
+       0x00007C1CU, 0x00007C20U, 0x00007C40U, 0x00007C44U,
+       0x00007C48U, 0x00007C4CU, 0x00007C50U, 0x00007C54U,
+       0x00007C58U, 0x00007C5CU, 0x00007C60U, 0x00007C80U,
+       0x00007C84U, 0x00007C88U, 0x00007C8CU, 0x00007C90U,
+       0x00007C94U, 0x00007C98U, 0x00007C9CU, 0x00007CA0U,
+       0x00007CC0U, 0x00007CC4U, 0x00007CC8U, 0x00007CCCU,
+       0x00007CD0U, 0x00007CD4U, 0x00007CD8U, 0x00007CDCU,
+       0x00007CE0U, 0x00000300U, 0x00000304U, 0x00000308U,
+       0x0000030cU, 0x00000310U, 0x00000314U, 0x00000318U,
+       0x0000031cU, 0x00000360U, 0x00000364U, 0x00000368U,
+       0x0000036cU, 0x00000370U, 0x00000374U, 0x00006900U,
+};
+
+unsigned int hw_atl_utils_hw_get_reg_length(void)
+{
+       return ARRAY_SIZE(hw_atl_utils_hw_mac_regs);
+}
+
+int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
+                            u32 *regs_buff)
+{
+       unsigned int i = 0U;
+       unsigned int mac_regs_count = hw_atl_utils_hw_get_reg_length();
+
+       for (i = 0; i < mac_regs_count; i++)
+               regs_buff[i] = aq_hw_read_reg(self,
+                                             hw_atl_utils_hw_mac_regs[i]);
+       return 0;
+}
+
+int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version)
+{
+       *fw_version = aq_hw_read_reg(self, 0x18U);
+       return 0;
+}
+
+static int aq_fw1x_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac)
+{
+       struct hw_aq_atl_utils_fw_rpc *prpc = NULL;
+       unsigned int rpc_size = 0U;
+       int err = 0;
+
+       err = hw_atl_utils_fw_rpc_wait(self, &prpc);
+       if (err < 0)
+               goto err_exit;
+
+       memset(prpc, 0, sizeof(*prpc));
+
+       if (wol_enabled) {
+               rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_wol);
+
+               prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD;
+               prpc->msg_wol.priority = 0x10000000; /* normal priority */
+               prpc->msg_wol.pattern_id = 1U;
+               prpc->msg_wol.wol_packet_type = 2U; /* Magic Packet */
+
+               ether_addr_copy((struct ether_addr *)mac,
+                       (struct ether_addr *)&prpc->msg_wol.wol_pattern);
+       } else {
+               rpc_size = sizeof(prpc->msg_id) + sizeof(prpc->msg_del_id);
+
+               prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL;
+               prpc->msg_wol.pattern_id = 1U;
+       }
+
+       err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+       if (err < 0)
+               goto err_exit;
+err_exit:
+       return err;
+}
+
+static
+int aq_fw1x_set_power(struct aq_hw_s *self,
+                     unsigned int power_state __rte_unused,
+                     u8 *mac)
+{
+       struct hw_aq_atl_utils_fw_rpc *prpc = NULL;
+       unsigned int rpc_size = 0U;
+       int err = 0;
+       if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
+               err = aq_fw1x_set_wol(self, 1, mac);
+
+               if (err < 0)
+                       goto err_exit;
+
+               rpc_size = sizeof(prpc->msg_id) +
+                               sizeof(prpc->msg_enable_wakeup);
+
+               err = hw_atl_utils_fw_rpc_wait(self, &prpc);
+
+               if (err < 0)
+                       goto err_exit;
+
+               memset(prpc, 0, rpc_size);
+
+               prpc->msg_id = HAL_ATLANTIC_UTILS_FW_MSG_ENABLE_WAKEUP;
+               prpc->msg_enable_wakeup.pattern_mask = 0x00000002;
+
+               err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+               if (err < 0)
+                       goto err_exit;
+       }
+
+       hw_atl_utils_mpi_set_speed(self, 0);
+       hw_atl_utils_mpi_set_state(self, MPI_POWER);
+err_exit:
+       return err;
+}
+
+
+
+const struct aq_fw_ops aq_fw_1x_ops = {
+       .init = hw_atl_utils_mpi_create,
+       .deinit = hw_atl_fw1x_deinit,
+       .reset = NULL,
+       .get_mac_permanent = hw_atl_utils_get_mac_permanent,
+       .set_link_speed = hw_atl_utils_mpi_set_speed,
+       .set_state = hw_atl_utils_mpi_set_state,
+       .update_link_status = hw_atl_utils_mpi_get_link_status,
+       .update_stats = hw_atl_utils_update_stats,
+       .set_power = aq_fw1x_set_power,
+       .get_temp = NULL,
+       .get_cable_len = NULL,
+       .set_eee_rate = NULL,
+       .get_eee_rate = NULL,
+       .set_flow_control = NULL,
+       .led_control = NULL,
+       .get_eeprom = NULL,
+       .set_eeprom = NULL,
+};
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/atlantic/hw_atl/hw_atl_utils.h
new file mode 100644 (file)
index 0000000..5f3f708
--- /dev/null
@@ -0,0 +1,510 @@
+/* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0) */
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_utils.h: Declaration of common functions for Atlantic hardware
+ * abstraction layer.
+ */
+
+#ifndef HW_ATL_UTILS_H
+#define HW_ATL_UTILS_H
+
+#define HW_ATL_FLUSH() { (void)aq_hw_read_reg(self, 0x10); }
+
+/* Hardware tx descriptor */
+struct hw_atl_txd_s {
+       u64 buf_addr;
+
+       union {
+               struct {
+                       u32 type:3;
+                       u32:1;
+                       u32 len:16;
+                       u32 dd:1;
+                       u32 eop:1;
+                       u32 cmd:8;
+                       u32:14;
+                       u32 ct_idx:1;
+                       u32 ct_en:1;
+                       u32 pay_len:18;
+               } __attribute__((__packed__));
+               u64 flags;
+       };
+} __attribute__((__packed__));
+
+/* Hardware tx context descriptor */
+union hw_atl_txc_s {
+       struct {
+               u64 flags1;
+               u64 flags2;
+       };
+
+       struct {
+               u64:40;
+               u32 tun_len:8;
+               u32 out_len:16;
+               u32 type:3;
+               u32 idx:1;
+               u32 vlan_tag:16;
+               u32 cmd:4;
+               u32 l2_len:7;
+               u32 l3_len:9;
+               u32 l4_len:8;
+               u32 mss_len:16;
+       } __attribute__((__packed__));
+} __attribute__((__packed__));
+
+enum aq_tx_desc_type {
+       tx_desc_type_desc = 1,
+       tx_desc_type_ctx = 2,
+};
+
+enum aq_tx_desc_cmd {
+       tx_desc_cmd_vlan = 1,
+       tx_desc_cmd_fcs = 2,
+       tx_desc_cmd_ipv4 = 4,
+       tx_desc_cmd_l4cs = 8,
+       tx_desc_cmd_lso = 0x10,
+       tx_desc_cmd_wb = 0x20,
+};
+
+
+/* Hardware rx descriptor */
+struct hw_atl_rxd_s {
+       u64 buf_addr;
+       u64 hdr_addr;
+} __attribute__((__packed__));
+
+/* Hardware rx descriptor writeback */
+struct hw_atl_rxd_wb_s {
+       u32 rss_type:4;
+       u32 pkt_type:8;
+       u32 type:20;
+       u32 rss_hash;
+       u16 dd:1;
+       u16 eop:1;
+       u16 rx_stat:4;
+       u16 rx_estat:6;
+       u16 rsc_cnt:4;
+       u16 pkt_len;
+       u16 next_desc_ptr;
+       u16 vlan;
+} __attribute__((__packed__));
+
+struct hw_atl_stats_s {
+       u32 uprc;
+       u32 mprc;
+       u32 bprc;
+       u32 erpt;
+       u32 uptc;
+       u32 mptc;
+       u32 bptc;
+       u32 erpr;
+       u32 mbtc;
+       u32 bbtc;
+       u32 mbrc;
+       u32 bbrc;
+       u32 ubrc;
+       u32 ubtc;
+       u32 dpc;
+} __attribute__((__packed__));
+
+union ip_addr {
+       struct {
+               u8 addr[16];
+       } v6;
+       struct {
+               u8 padding[12];
+               u8 addr[4];
+       } v4;
+} __attribute__((__packed__));
+
+struct hw_aq_atl_utils_fw_rpc {
+       u32 msg_id;
+
+       union {
+               struct {
+                       u32 pong;
+               } msg_ping;
+
+               struct {
+                       u8 mac_addr[6];
+                       u32 ip_addr_cnt;
+
+                       struct {
+                               union ip_addr addr;
+                               union ip_addr mask;
+                       } ip[1];
+               } msg_arp;
+
+               struct {
+                       u32 len;
+                       u8 packet[1514U];
+               } msg_inject;
+
+               struct {
+                       u32 priority;
+                       u32 wol_packet_type;
+                       u32 pattern_id;
+                       u32 next_wol_pattern_offset;
+                       union {
+                               struct {
+                                       u32 flags;
+                                       u8 ipv4_source_address[4];
+                                       u8 ipv4_dest_address[4];
+                                       u16 tcp_source_port_number;
+                                       u16 tcp_dest_port_number;
+                               } ipv4_tcp_syn_parameters;
+
+                               struct {
+                                       u32 flags;
+                                       u8 ipv6_source_address[16];
+                                       u8 ipv6_dest_address[16];
+                                       u16 tcp_source_port_number;
+                                       u16 tcp_dest_port_number;
+                               } ipv6_tcp_syn_parameters;
+
+                               struct {
+                                       u32 flags;
+                               } eapol_request_id_message_parameters;
+
+                               struct {
+                                       u32 flags;
+                                       u32 mask_offset;
+                                       u32 mask_size;
+                                       u32 pattern_offset;
+                                       u32 pattern_size;
+                               } wol_bit_map_pattern;
+                               struct {
+                                       u8 mac_addr[6];
+                               } wol_magic_packet_pattern;
+
+                       } wol_pattern;
+               } msg_wol;
+
+               struct {
+                       u16 tc_quanta[8];
+                       u16 tc_threshold[8];
+               } msg_msm_pfc_quantas;
+
+               struct {
+                       union {
+                               u32 pattern_mask;
+                               struct {
+                                       u32 aq_pm_wol_reason_arp_v4_pkt : 1;
+                                       u32 aq_pm_wol_reason_ipv4_ping_pkt : 1;
+                                       u32 aq_pm_wol_reason_ipv6_ns_pkt : 1;
+                                       u32 aq_pm_wol_reason_ipv6_ping_pkt : 1;
+                                       u32 aq_pm_wol_reason_link_up : 1;
+                                       u32 aq_pm_wol_reason_link_down : 1;
+                                       u32 aq_pm_wol_reason_maximum : 1;
+                               };
+                       };
+                       union {
+                               u32 offload_mask;
+                       };
+               } msg_enable_wakeup;
+
+               struct {
+                       u32 priority;
+                       u32 protocol_offload_type;
+                       u32 protocol_offload_id;
+                       u32 next_protocol_offload_offset;
+
+                       union {
+                               struct {
+                                       u32 flags;
+                                       u8 remote_ipv4_addr[4];
+                                       u8 host_ipv4_addr[4];
+                                       u8 mac_addr[6];
+                               } ipv4_arp_params;
+                       };
+               } msg_offload;
+
+               struct {
+                       u32 id;
+               } msg_del_id;
+
+       };
+} __attribute__((__packed__));
+
+struct hw_aq_atl_utils_mbox_header {
+       u32 version;
+       u32 transaction_id;
+       u32 error;
+} __attribute__((__packed__));
+
+struct hw_aq_info {
+       u8 reserved[6];
+       u16 phy_fault_code;
+       u16 phy_temperature;
+       u8 cable_len;
+       u8 reserved1;
+       u32 cable_diag_data[4];
+       u8 reserved2[32];
+       u32 caps_lo;
+       u32 caps_hi;
+} __attribute__((__packed__));
+
+struct hw_aq_atl_utils_mbox {
+       struct hw_aq_atl_utils_mbox_header header;
+       struct hw_atl_stats_s stats;
+       struct hw_aq_info info;
+} __attribute__((__packed__));
+
+/* fw2x */
+typedef u16    in_port_t;
+typedef u32    ip4_addr_t;
+typedef int    int32_t;
+typedef short  int16_t;
+typedef u32    fw_offset_t;
+
+struct ip6_addr {
+       u32 addr[4];
+} __attribute__((__packed__));
+
+struct offload_ka_v4 {
+       u32 timeout;
+       in_port_t local_port;
+       in_port_t remote_port;
+       u8 remote_mac_addr[6];
+       u16 win_size;
+       u32 seq_num;
+       u32 ack_num;
+       ip4_addr_t local_ip;
+       ip4_addr_t remote_ip;
+} __attribute__((__packed__));
+
+struct offload_ka_v6 {
+       u32 timeout;
+       in_port_t local_port;
+       in_port_t remote_port;
+       u8 remote_mac_addr[6];
+       u16 win_size;
+       u32 seq_num;
+       u32 ack_num;
+       struct ip6_addr local_ip;
+       struct ip6_addr remote_ip;
+} __attribute__((__packed__));
+
+struct offload_ip_info {
+       u8 v4_local_addr_count;
+       u8 v4_addr_count;
+       u8 v6_local_addr_count;
+       u8 v6_addr_count;
+       fw_offset_t v4_addr;
+       fw_offset_t v4_prefix;
+       fw_offset_t v6_addr;
+       fw_offset_t v6_prefix;
+} __attribute__((__packed__));
+
+struct offload_port_info {
+       u16 udp_port_count;
+       u16 tcp_port_count;
+       fw_offset_t udp_port;
+       fw_offset_t tcp_port;
+} __attribute__((__packed__));
+
+struct offload_ka_info {
+       u16 v4_ka_count;
+       u16 v6_ka_count;
+       u32 retry_count;
+       u32 retry_interval;
+       fw_offset_t v4_ka;
+       fw_offset_t v6_ka;
+} __attribute__((__packed__));
+
+struct offload_rr_info {
+       u32 rr_count;
+       u32 rr_buf_len;
+       fw_offset_t rr_id_x;
+       fw_offset_t rr_buf;
+} __attribute__((__packed__));
+
+struct offload_info {
+       u32 version;            // current version is 0x00000000
+       u32 len;                // The whole structure length
+                               // including the variable-size buf
+       u8 mac_addr[6];         // 8 bytes to keep alignment. Only
+                               // first 6 meaningful.
+
+       u8 reserved[2];
+
+       struct offload_ip_info ips;
+       struct offload_port_info ports;
+       struct offload_ka_info kas;
+       struct offload_rr_info rrs;
+       u8 buf[0];
+} __attribute__((__packed__));
+
+struct smbus_read_request {
+       u32 offset; /* not used */
+       u32 device_id;
+       u32 address;
+       u32 length;
+} __attribute__((__packed__));
+
+struct smbus_write_request {
+       u32 offset; /* not used */
+       u32 device_id;
+       u32 address;
+       u32 length;
+} __attribute__((__packed__));
+
+#define HAL_ATLANTIC_UTILS_CHIP_MIPS         0x00000001U
+#define HAL_ATLANTIC_UTILS_CHIP_TPO2         0x00000002U
+#define HAL_ATLANTIC_UTILS_CHIP_RPF2         0x00000004U
+#define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ       0x00000010U
+#define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0  0x01000000U
+#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0  0x02000000U
+#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1  0x04000000U
+
+
+#define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \
+       self->chip_features)
+
+enum hal_atl_utils_fw_state_e {
+       MPI_DEINIT = 0,
+       MPI_RESET = 1,
+       MPI_INIT = 2,
+       MPI_POWER = 4,
+};
+
+#define HAL_ATLANTIC_RATE_10G        BIT(0)
+#define HAL_ATLANTIC_RATE_5G         BIT(1)
+#define HAL_ATLANTIC_RATE_5GSR       BIT(2)
+#define HAL_ATLANTIC_RATE_2GS        BIT(3)
+#define HAL_ATLANTIC_RATE_1G         BIT(4)
+#define HAL_ATLANTIC_RATE_100M       BIT(5)
+#define HAL_ATLANTIC_RATE_INVALID    BIT(6)
+
+#define HAL_ATLANTIC_UTILS_FW_MSG_PING     1U
+#define HAL_ATLANTIC_UTILS_FW_MSG_ARP      2U
+#define HAL_ATLANTIC_UTILS_FW_MSG_INJECT   3U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_ADD 4U
+#define HAL_ATLANTIC_UTILS_FW_MSG_WOL_DEL 5U
+#define HAL_ATLANTIC_UTILS_FW_MSG_ENABLE_WAKEUP 6U
+#define HAL_ATLANTIC_UTILS_FW_MSG_MSM_PFC  7U
+#define HAL_ATLANTIC_UTILS_FW_MSG_PROVISIONING 8U
+#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_ADD  9U
+#define HAL_ATLANTIC_UTILS_FW_MSG_OFFLOAD_DEL  10U
+#define HAL_ATLANTIC_UTILS_FW_MSG_CABLE_DIAG   13U // 0xd
+
+#define SMBUS_READ_REQUEST BIT(13)
+#define SMBUS_WRITE_REQUEST BIT(14)
+#define SMBUS_DEVICE_ID 0x50
+
+enum hw_atl_fw2x_rate {
+       FW2X_RATE_100M    = 0x20,
+       FW2X_RATE_1G      = 0x100,
+       FW2X_RATE_2G5     = 0x200,
+       FW2X_RATE_5G      = 0x400,
+       FW2X_RATE_10G     = 0x800,
+};
+
+enum hw_atl_fw2x_caps_lo {
+       CAPS_LO_10BASET_HD = 0x00,
+       CAPS_LO_10BASET_FD,
+       CAPS_LO_100BASETX_HD,
+       CAPS_LO_100BASET4_HD,
+       CAPS_LO_100BASET2_HD,
+       CAPS_LO_100BASETX_FD,
+       CAPS_LO_100BASET2_FD,
+       CAPS_LO_1000BASET_HD,
+       CAPS_LO_1000BASET_FD,
+       CAPS_LO_2P5GBASET_FD,
+       CAPS_LO_5GBASET_FD,
+       CAPS_LO_10GBASET_FD,
+};
+
+enum hw_atl_fw2x_caps_hi {
+       CAPS_HI_RESERVED1 = 0x00,
+       CAPS_HI_10BASET_EEE,
+       CAPS_HI_RESERVED2,
+       CAPS_HI_PAUSE,
+       CAPS_HI_ASYMMETRIC_PAUSE,
+       CAPS_HI_100BASETX_EEE,
+       CAPS_HI_RESERVED3,
+       CAPS_HI_RESERVED4,
+       CAPS_HI_1000BASET_FD_EEE,
+       CAPS_HI_2P5GBASET_FD_EEE,
+       CAPS_HI_5GBASET_FD_EEE,
+       CAPS_HI_10GBASET_FD_EEE,
+       CAPS_HI_RESERVED5,
+       CAPS_HI_RESERVED6,
+       CAPS_HI_RESERVED7,
+       CAPS_HI_RESERVED8,
+       CAPS_HI_RESERVED9,
+       CAPS_HI_CABLE_DIAG,
+       CAPS_HI_TEMPERATURE,
+       CAPS_HI_DOWNSHIFT,
+       CAPS_HI_PTP_AVB_EN,
+       CAPS_HI_MEDIA_DETECT,
+       CAPS_HI_LINK_DROP,
+       CAPS_HI_SLEEP_PROXY,
+       CAPS_HI_WOL,
+       CAPS_HI_MAC_STOP,
+       CAPS_HI_EXT_LOOPBACK,
+       CAPS_HI_INT_LOOPBACK,
+       CAPS_HI_EFUSE_AGENT,
+       CAPS_HI_WOL_TIMER,
+       CAPS_HI_STATISTICS,
+       CAPS_HI_TRANSACTION_ID,
+};
+
+struct aq_hw_s;
+struct aq_fw_ops;
+struct aq_hw_link_status_s;
+
+int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops);
+
+int hw_atl_utils_soft_reset(struct aq_hw_s *self);
+
+void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p);
+
+int hw_atl_utils_mpi_read_mbox(struct aq_hw_s *self,
+                              struct hw_aq_atl_utils_mbox_header *pmbox);
+
+void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
+                                struct hw_aq_atl_utils_mbox *pmbox);
+
+void hw_atl_utils_mpi_set(struct aq_hw_s *self,
+                         enum hal_atl_utils_fw_state_e state,
+                         u32 speed);
+
+int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self);
+
+unsigned int hw_atl_utils_mbps_2_speed_index(unsigned int mbps);
+
+unsigned int hw_atl_utils_hw_get_reg_length(void);
+
+int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
+                            u32 *regs_buff);
+
+int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
+                             unsigned int power_state);
+
+int hw_atl_utils_hw_deinit(struct aq_hw_s *self);
+
+int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version);
+
+int hw_atl_utils_update_stats(struct aq_hw_s *self);
+
+struct aq_stats_s *hw_atl_utils_get_hw_stats(struct aq_hw_s *self);
+
+int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
+                                 u32 *p, u32 cnt);
+
+int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 a, u32 *p,
+                               u32 cnt);
+
+int hw_atl_utils_fw_set_wol(struct aq_hw_s *self, bool wol_enabled, u8 *mac);
+
+int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size);
+
+int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
+                   struct hw_aq_atl_utils_fw_rpc **rpc);
+
+extern const struct aq_fw_ops aq_fw_1x_ops;
+extern const struct aq_fw_ops aq_fw_2x_ops;
+
+#endif /* HW_ATL_UTILS_H */
diff --git a/drivers/net/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/atlantic/hw_atl/hw_atl_utils_fw2x.c
new file mode 100644 (file)
index 0000000..6841d9b
--- /dev/null
@@ -0,0 +1,618 @@
+// SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0)
+/* Copyright (C) 2014-2017 aQuantia Corporation. */
+
+/* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for
+ * Atlantic hardware abstraction layer.
+ */
+
+#include <rte_ether.h>
+#include "../atl_hw_regs.h"
+
+#include "../atl_types.h"
+#include "hw_atl_utils.h"
+#include "hw_atl_llh.h"
+
+#define HW_ATL_FW2X_MPI_EFUSE_ADDR     0x364
+#define HW_ATL_FW2X_MPI_MBOX_ADDR      0x360
+#define HW_ATL_FW2X_MPI_RPC_ADDR       0x334
+
+#define HW_ATL_FW2X_MPI_CONTROL_ADDR   0x368
+#define HW_ATL_FW2X_MPI_CONTROL2_ADDR  0x36C
+#define HW_ATL_FW2X_MPI_LED_ADDR       0x31c
+
+#define HW_ATL_FW2X_MPI_STATE_ADDR     0x370
+#define HW_ATL_FW2X_MPI_STATE2_ADDR    0x374
+
+#define HW_ATL_FW2X_CAP_SLEEP_PROXY BIT(CAPS_HI_SLEEP_PROXY)
+#define HW_ATL_FW2X_CAP_WOL BIT(CAPS_HI_WOL)
+
+#define HW_ATL_FW2X_CAP_EEE_1G_MASK   BIT(CAPS_HI_1000BASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_2G5_MASK  BIT(CAPS_HI_2P5GBASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_5G_MASK   BIT(CAPS_HI_5GBASET_FD_EEE)
+#define HW_ATL_FW2X_CAP_EEE_10G_MASK  BIT(CAPS_HI_10GBASET_FD_EEE)
+
+#define HAL_ATLANTIC_WOL_FILTERS_COUNT     8
+#define HAL_ATLANTIC_UTILS_FW2X_MSG_WOL    0x0E
+
+#define HW_ATL_FW_FEATURE_EEPROM 0x03010025
+#define HW_ATL_FW_FEATURE_LED 0x03010026
+
+struct fw2x_msg_wol_pattern {
+       u8 mask[16];
+       u32 crc;
+} __attribute__((__packed__));
+
+struct fw2x_msg_wol {
+       u32 msg_id;
+       u8 hw_addr[6];
+       u8 magic_packet_enabled;
+       u8 filter_count;
+       struct fw2x_msg_wol_pattern filter[HAL_ATLANTIC_WOL_FILTERS_COUNT];
+       u8 link_up_enabled;
+       u8 link_down_enabled;
+       u16 reserved;
+       u32 link_up_timeout;
+       u32 link_down_timeout;
+} __attribute__((__packed__));
+
+static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed);
+static int aq_fw2x_set_state(struct aq_hw_s *self,
+                            enum hal_atl_utils_fw_state_e state);
+
+static int aq_fw2x_init(struct aq_hw_s *self)
+{
+       int err = 0;
+
+       /* check 10 times by 1ms */
+       AQ_HW_WAIT_FOR(0U != (self->mbox_addr =
+                      aq_hw_read_reg(self, HW_ATL_FW2X_MPI_MBOX_ADDR)),
+                      1000U, 10U);
+       AQ_HW_WAIT_FOR(0U != (self->rpc_addr =
+                      aq_hw_read_reg(self, HW_ATL_FW2X_MPI_RPC_ADDR)),
+                      1000U, 100U);
+       return err;
+}
+
+static int aq_fw2x_deinit(struct aq_hw_s *self)
+{
+       int err = aq_fw2x_set_link_speed(self, 0);
+
+       if (!err)
+               err = aq_fw2x_set_state(self, MPI_DEINIT);
+
+       return err;
+}
+
+static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
+{
+       enum hw_atl_fw2x_rate rate = 0;
+
+       if (speed & AQ_NIC_RATE_10G)
+               rate |= FW2X_RATE_10G;
+
+       if (speed & AQ_NIC_RATE_5G)
+               rate |= FW2X_RATE_5G;
+
+       if (speed & AQ_NIC_RATE_5G5R)
+               rate |= FW2X_RATE_5G;
+
+       if (speed & AQ_NIC_RATE_2G5)
+               rate |= FW2X_RATE_2G5;
+
+       if (speed & AQ_NIC_RATE_1G)
+               rate |= FW2X_RATE_1G;
+
+       if (speed & AQ_NIC_RATE_100M)
+               rate |= FW2X_RATE_100M;
+
+       return rate;
+}
+
+static u32 fw2x_to_eee_mask(u32 speed)
+{
+       u32 rate = 0;
+
+       if (speed & HW_ATL_FW2X_CAP_EEE_10G_MASK)
+               rate |= AQ_NIC_RATE_EEE_10G;
+
+       if (speed & HW_ATL_FW2X_CAP_EEE_5G_MASK)
+               rate |= AQ_NIC_RATE_EEE_5G;
+
+       if (speed & HW_ATL_FW2X_CAP_EEE_2G5_MASK)
+               rate |= AQ_NIC_RATE_EEE_2G5;
+
+       if (speed & HW_ATL_FW2X_CAP_EEE_1G_MASK)
+               rate |= AQ_NIC_RATE_EEE_1G;
+
+       return rate;
+}
+
+static int aq_fw2x_set_link_speed(struct aq_hw_s *self, u32 speed)
+{
+       u32 val = link_speed_mask_2fw2x_ratemask(speed);
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR, val);
+
+       return 0;
+}
+
+static void aq_fw2x_set_mpi_flow_control(struct aq_hw_s *self, u32 *mpi_state)
+{
+       if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_RX)
+               *mpi_state |= BIT(CAPS_HI_PAUSE);
+       else
+               *mpi_state &= ~BIT(CAPS_HI_PAUSE);
+
+       if (self->aq_nic_cfg->flow_control & AQ_NIC_FC_TX)
+               *mpi_state |= BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+       else
+               *mpi_state &= ~BIT(CAPS_HI_ASYMMETRIC_PAUSE);
+}
+
+static int aq_fw2x_set_state(struct aq_hw_s *self,
+                            enum hal_atl_utils_fw_state_e state)
+{
+       u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+       switch (state) {
+       case MPI_INIT:
+               mpi_state &= ~BIT(CAPS_HI_LINK_DROP);
+               aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+               break;
+       case MPI_DEINIT:
+               mpi_state |= BIT(CAPS_HI_LINK_DROP);
+               break;
+       case MPI_RESET:
+       case MPI_POWER:
+               /* No actions */
+               break;
+       }
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
+       return 0;
+}
+
+static int aq_fw2x_update_link_status(struct aq_hw_s *self)
+{
+       u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR);
+       u32 speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
+                               FW2X_RATE_2G5 | FW2X_RATE_5G | FW2X_RATE_10G);
+       struct aq_hw_link_status_s *link_status = &self->aq_link_status;
+
+       if (speed) {
+               if (speed & FW2X_RATE_10G)
+                       link_status->mbps = 10000;
+               else if (speed & FW2X_RATE_5G)
+                       link_status->mbps = 5000;
+               else if (speed & FW2X_RATE_2G5)
+                       link_status->mbps = 2500;
+               else if (speed & FW2X_RATE_1G)
+                       link_status->mbps = 1000;
+               else if (speed & FW2X_RATE_100M)
+                       link_status->mbps = 100;
+               else
+                       link_status->mbps = 10000;
+       } else {
+               link_status->mbps = 0;
+       }
+
+       return 0;
+}
+
+static
+int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
+{
+       int err = 0;
+       u32 h = 0U;
+       u32 l = 0U;
+       u32 mac_addr[2] = { 0 };
+       u32 efuse_addr = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_EFUSE_ADDR);
+
+       if (efuse_addr != 0) {
+               err = hw_atl_utils_fw_downld_dwords(self,
+                                                   efuse_addr + (40U * 4U),
+                                                   mac_addr,
+                                                   ARRAY_SIZE(mac_addr));
+               if (err)
+                       return err;
+               mac_addr[0] = rte_constant_bswap32(mac_addr[0]);
+               mac_addr[1] = rte_constant_bswap32(mac_addr[1]);
+       }
+
+       ether_addr_copy((struct ether_addr *)mac_addr,
+                       (struct ether_addr *)mac);
+
+       if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
+               unsigned int rnd = (uint32_t)rte_rand();
+
+               //get_random_bytes(&rnd, sizeof(unsigned int));
+
+               l = 0xE3000000U
+                       | (0xFFFFU & rnd)
+                       | (0x00 << 16);
+               h = 0x8001300EU;
+
+               mac[5] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[4] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[3] = (u8)(0xFFU & l);
+               l >>= 8;
+               mac[2] = (u8)(0xFFU & l);
+               mac[1] = (u8)(0xFFU & h);
+               h >>= 8;
+               mac[0] = (u8)(0xFFU & h);
+       }
+       return err;
+}
+
+static int aq_fw2x_update_stats(struct aq_hw_s *self)
+{
+       int err = 0;
+       u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       u32 orig_stats_val = mpi_opts & BIT(CAPS_HI_STATISTICS);
+
+       /* Toggle statistics bit for FW to update */
+       mpi_opts = mpi_opts ^ BIT(CAPS_HI_STATISTICS);
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       /* Wait FW to report back */
+       AQ_HW_WAIT_FOR(orig_stats_val !=
+                      (aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) &
+                                      BIT(CAPS_HI_STATISTICS)),
+                      1U, 10000U);
+       if (err)
+               return err;
+
+       return hw_atl_utils_update_stats(self);
+}
+
+static int aq_fw2x_get_temp(struct aq_hw_s *self, int *temp)
+{
+       int err = 0;
+       u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       u32 temp_val = mpi_opts & BIT(CAPS_HI_TEMPERATURE);
+       u32 temp_res;
+
+       /* Toggle statistics bit for FW to 0x36C.18 (CAPS_HI_TEMPERATURE) */
+       mpi_opts = mpi_opts ^ BIT(CAPS_HI_TEMPERATURE);
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       /* Wait FW to report back */
+       AQ_HW_WAIT_FOR(temp_val !=
+                       (aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) &
+                                       BIT(CAPS_HI_TEMPERATURE)), 1U, 10000U);
+       err = hw_atl_utils_fw_downld_dwords(self,
+                               self->mbox_addr +
+                               offsetof(struct hw_aq_atl_utils_mbox, info) +
+                               offsetof(struct hw_aq_info, phy_temperature),
+                               &temp_res,
+                               sizeof(temp_res) / sizeof(u32));
+
+       if (err)
+               return err;
+
+       *temp = temp_res  * 100 / 256;
+       return 0;
+}
+
+static int aq_fw2x_get_cable_len(struct aq_hw_s *self, int *cable_len)
+{
+       int err = 0;
+       u32 cable_len_res;
+
+       err = hw_atl_utils_fw_downld_dwords(self,
+                               self->mbox_addr +
+                               offsetof(struct hw_aq_atl_utils_mbox, info) +
+                               offsetof(struct hw_aq_info, phy_temperature),
+                               &cable_len_res,
+                               sizeof(cable_len_res) / sizeof(u32));
+
+       if (err)
+               return err;
+
+       *cable_len = (cable_len_res >> 16) & 0xFF;
+       return 0;
+}
+
+#ifndef ETH_ALEN
+#define ETH_ALEN 6
+#endif
+
+static int aq_fw2x_set_sleep_proxy(struct aq_hw_s *self, u8 *mac)
+{
+       int err = 0;
+       struct hw_aq_atl_utils_fw_rpc *rpc = NULL;
+       struct offload_info *cfg = NULL;
+       unsigned int rpc_size = 0U;
+       u32 mpi_opts;
+
+       rpc_size = sizeof(rpc->msg_id) + sizeof(*cfg);
+
+       err = hw_atl_utils_fw_rpc_wait(self, &rpc);
+       if (err < 0)
+               goto err_exit;
+
+       memset(rpc, 0, rpc_size);
+       cfg = (struct offload_info *)(&rpc->msg_id + 1);
+
+       memcpy(cfg->mac_addr, mac, ETH_ALEN);
+       cfg->len = sizeof(*cfg);
+
+       /* Clear bit 0x36C.23 */
+       mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       mpi_opts &= ~HW_ATL_FW2X_CAP_SLEEP_PROXY;
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       err = hw_atl_utils_fw_rpc_call(self, rpc_size);
+       if (err < 0)
+               goto err_exit;
+
+       /* Set bit 0x36C.23 */
+       mpi_opts |= HW_ATL_FW2X_CAP_SLEEP_PROXY;
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) &
+                       HW_ATL_FW2X_CAP_SLEEP_PROXY), 1U, 10000U);
+err_exit:
+       return err;
+}
+
+static int aq_fw2x_set_wol_params(struct aq_hw_s *self, u8 *mac)
+{
+       int err = 0;
+       struct fw2x_msg_wol *msg = NULL;
+       u32 mpi_opts;
+
+       struct hw_aq_atl_utils_fw_rpc *rpc = NULL;
+
+       err = hw_atl_utils_fw_rpc_wait(self, &rpc);
+       if (err < 0)
+               goto err_exit;
+
+       msg = (struct fw2x_msg_wol *)rpc;
+
+       msg->msg_id = HAL_ATLANTIC_UTILS_FW2X_MSG_WOL;
+       msg->magic_packet_enabled = true;
+       memcpy(msg->hw_addr, mac, ETH_ALEN);
+
+       mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       mpi_opts &= ~(HW_ATL_FW2X_CAP_SLEEP_PROXY | HW_ATL_FW2X_CAP_WOL);
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       err = hw_atl_utils_fw_rpc_call(self, sizeof(*msg));
+       if (err < 0)
+               goto err_exit;
+
+       /* Set bit 0x36C.24 */
+       mpi_opts |= HW_ATL_FW2X_CAP_WOL;
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR) &
+                       HW_ATL_FW2X_CAP_WOL), 1U, 10000U);
+err_exit:
+       return err;
+}
+
+static int aq_fw2x_set_power(struct aq_hw_s *self,
+                            unsigned int power_state __rte_unused,
+                            u8 *mac)
+{
+       int err = 0;
+
+       if (self->aq_nic_cfg->wol & AQ_NIC_WOL_ENABLED) {
+               err = aq_fw2x_set_sleep_proxy(self, mac);
+               if (err < 0)
+                       goto err_exit;
+               err = aq_fw2x_set_wol_params(self, mac);
+               if (err < 0)
+                       goto err_exit;
+       }
+err_exit:
+       return err;
+}
+
+static int aq_fw2x_set_eee_rate(struct aq_hw_s *self, u32 speed)
+{
+       u32 mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+       mpi_opts &= ~(HW_ATL_FW2X_CAP_EEE_1G_MASK |
+               HW_ATL_FW2X_CAP_EEE_2G5_MASK | HW_ATL_FW2X_CAP_EEE_5G_MASK |
+               HW_ATL_FW2X_CAP_EEE_10G_MASK);
+
+       if (speed & AQ_NIC_RATE_EEE_10G)
+               mpi_opts |= HW_ATL_FW2X_CAP_EEE_10G_MASK;
+
+       if (speed & AQ_NIC_RATE_EEE_5G)
+               mpi_opts |= HW_ATL_FW2X_CAP_EEE_5G_MASK;
+
+       if (speed & AQ_NIC_RATE_EEE_2G5)
+               mpi_opts |= HW_ATL_FW2X_CAP_EEE_2G5_MASK;
+
+       if (speed & AQ_NIC_RATE_EEE_1G)
+               mpi_opts |= HW_ATL_FW2X_CAP_EEE_1G_MASK;
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_opts);
+
+       return 0;
+}
+
+static int aq_fw2x_get_eee_rate(struct aq_hw_s *self, u32 *rate,
+                                       u32 *supported_rates)
+{
+       int err = 0;
+       u32 caps_hi;
+       u32 mpi_state;
+
+       err = hw_atl_utils_fw_downld_dwords(self,
+                               self->mbox_addr +
+                               offsetof(struct hw_aq_atl_utils_mbox, info) +
+                               offsetof(struct hw_aq_info, caps_hi),
+                               &caps_hi,
+                               sizeof(caps_hi) / sizeof(u32));
+
+       if (err)
+               return err;
+
+       *supported_rates = fw2x_to_eee_mask(caps_hi);
+
+       mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE2_ADDR);
+       *rate = fw2x_to_eee_mask(mpi_state);
+
+       return err;
+}
+
+
+
+static int aq_fw2x_set_flow_control(struct aq_hw_s *self)
+{
+       u32 mpi_state = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR);
+
+       aq_fw2x_set_mpi_flow_control(self, &mpi_state);
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL2_ADDR, mpi_state);
+
+       return 0;
+}
+
+static int aq_fw2x_led_control(struct aq_hw_s *self, u32 mode)
+{
+       if (self->fw_ver_actual < HW_ATL_FW_FEATURE_LED)
+               return -EOPNOTSUPP;
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_LED_ADDR, mode);
+       return 0;
+}
+
+static int aq_fw2x_get_eeprom(struct aq_hw_s *self, u32 *data, u32 len)
+{
+       int err = 0;
+       struct smbus_read_request request;
+       u32 mpi_opts;
+       u32 result = 0;
+
+       if (self->fw_ver_actual < HW_ATL_FW_FEATURE_EEPROM)
+               return -EOPNOTSUPP;
+
+       request.device_id = SMBUS_DEVICE_ID;
+       request.address = 0;
+       request.length = len;
+
+       /* Write SMBUS request to cfg memory */
+       err = hw_atl_utils_fw_upload_dwords(self, self->rpc_addr,
+                               (u32 *)(void *)&request,
+                               RTE_ALIGN(sizeof(request), sizeof(u32)));
+
+       if (err < 0)
+               return err;
+
+       /* Toggle 0x368.SMBUS_READ_REQUEST bit */
+       mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR);
+       mpi_opts ^= SMBUS_READ_REQUEST;
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR, mpi_opts);
+
+       /* Wait until REQUEST_BIT matched in 0x370 */
+
+       AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR) &
+               SMBUS_READ_REQUEST) == (mpi_opts & SMBUS_READ_REQUEST),
+               10U, 10000U);
+
+       if (err < 0)
+               return err;
+
+       err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr + sizeof(u32),
+                       &result,
+                       RTE_ALIGN(sizeof(result), sizeof(u32)));
+
+       if (err < 0)
+               return err;
+
+       if (result == 0) {
+               err = hw_atl_utils_fw_downld_dwords(self,
+                               self->rpc_addr + sizeof(u32) * 2,
+                               data,
+                               RTE_ALIGN(len, sizeof(u32)));
+
+               if (err < 0)
+                       return err;
+       }
+
+       return 0;
+}
+
+
+static int aq_fw2x_set_eeprom(struct aq_hw_s *self, u32 *data, u32 len)
+{
+       struct smbus_write_request request;
+       u32 mpi_opts, result = 0;
+       int err = 0;
+
+       if (self->fw_ver_actual < HW_ATL_FW_FEATURE_EEPROM)
+               return -EOPNOTSUPP;
+
+       request.device_id = SMBUS_DEVICE_ID;
+       request.address = 0;
+       request.length = len;
+
+       /* Write SMBUS request to cfg memory */
+       err = hw_atl_utils_fw_upload_dwords(self, self->rpc_addr,
+                               (u32 *)(void *)&request,
+                               RTE_ALIGN(sizeof(request), sizeof(u32)));
+
+       if (err < 0)
+               return err;
+
+       /* Write SMBUS data to cfg memory */
+       err = hw_atl_utils_fw_upload_dwords(self,
+                               self->rpc_addr + sizeof(request),
+                               (u32 *)(void *)data,
+                               RTE_ALIGN(len, sizeof(u32)));
+
+       if (err < 0)
+               return err;
+
+       /* Toggle 0x368.SMBUS_WRITE_REQUEST bit */
+       mpi_opts = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR);
+       mpi_opts ^= SMBUS_WRITE_REQUEST;
+
+       aq_hw_write_reg(self, HW_ATL_FW2X_MPI_CONTROL_ADDR, mpi_opts);
+
+       /* Wait until REQUEST_BIT matched in 0x370 */
+       AQ_HW_WAIT_FOR((aq_hw_read_reg(self, HW_ATL_FW2X_MPI_STATE_ADDR) &
+               SMBUS_WRITE_REQUEST) == (mpi_opts & SMBUS_WRITE_REQUEST),
+               10U, 10000U);
+
+       if (err < 0)
+               return err;
+
+       /* Read status of write operation */
+       err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr + sizeof(u32),
+                               &result,
+                               RTE_ALIGN(sizeof(result), sizeof(u32)));
+
+       if (err < 0)
+               return err;
+
+       return 0;
+}
+
+const struct aq_fw_ops aq_fw_2x_ops = {
+       .init = aq_fw2x_init,
+       .deinit = aq_fw2x_deinit,
+       .reset = NULL,
+       .get_mac_permanent = aq_fw2x_get_mac_permanent,
+       .set_link_speed = aq_fw2x_set_link_speed,
+       .set_state = aq_fw2x_set_state,
+       .update_link_status = aq_fw2x_update_link_status,
+       .update_stats = aq_fw2x_update_stats,
+       .set_power = aq_fw2x_set_power,
+       .get_temp = aq_fw2x_get_temp,
+       .get_cable_len = aq_fw2x_get_cable_len,
+       .set_eee_rate = aq_fw2x_set_eee_rate,
+       .get_eee_rate = aq_fw2x_get_eee_rate,
+       .set_flow_control = aq_fw2x_set_flow_control,
+       .led_control = aq_fw2x_led_control,
+       .get_eeprom = aq_fw2x_get_eeprom,
+       .set_eeprom = aq_fw2x_set_eeprom,
+};
diff --git a/drivers/net/atlantic/meson.build b/drivers/net/atlantic/meson.build
new file mode 100644 (file)
index 0000000..28fb97c
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Aquantia Corporation
+
+sources = files(
+       'atl_rxtx.c',
+       'atl_ethdev.c',
+       'atl_hw_regs.c',
+       'hw_atl/hw_atl_b0.c',
+       'hw_atl/hw_atl_llh.c',
+       'hw_atl/hw_atl_utils_fw2x.c',
+       'hw_atl/hw_atl_utils.c',
+)
diff --git a/drivers/net/atlantic/rte_pmd_atlantic_version.map b/drivers/net/atlantic/rte_pmd_atlantic_version.map
new file mode 100644 (file)
index 0000000..521e51f
--- /dev/null
@@ -0,0 +1,4 @@
+DPDK_18.11 {
+
+       local: *;
+};
index 3f815bb..aec6e4c 100644 (file)
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 LIB = librte_pmd_avf.a
 
-CFLAGS += -O3
+CFLAGS += -O3 $(WERROR_FLAGS) -Wno-strict-aliasing -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_pci
index 3a2baaf..13eec1b 100644 (file)
@@ -154,7 +154,6 @@ static int
 avf_init_rss(struct avf_adapter *adapter)
 {
        struct avf_info *vf =  AVF_DEV_PRIVATE_TO_VF(adapter);
-       struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(adapter);
        struct rte_eth_rss_conf *rss_conf;
        uint8_t i, j, nb_q;
        int ret;
@@ -259,11 +258,8 @@ avf_init_rxq(struct rte_eth_dev *dev, struct avf_rx_queue *rxq)
 static int
 avf_init_queues(struct rte_eth_dev *dev)
 {
-       struct avf_info *vf = AVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct avf_rx_queue **rxq =
                (struct avf_rx_queue **)dev->data->rx_queues;
-       struct avf_tx_queue **txq =
-               (struct avf_tx_queue **)dev->data->tx_queues;
        int i, ret = AVF_SUCCESS;
 
        for (i = 0; i < dev->data->nb_rx_queues; i++) {
@@ -415,7 +411,6 @@ avf_dev_start(struct rte_eth_dev *dev)
                AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct avf_info *vf = AVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct rte_intr_handle *intr_handle = dev->intr_handle;
 
        PMD_INIT_FUNC_TRACE();
@@ -476,9 +471,7 @@ avf_dev_stop(struct rte_eth_dev *dev)
        struct avf_adapter *adapter =
                AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct rte_intr_handle *intr_handle = dev->intr_handle;
-       int ret, i;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -503,8 +496,6 @@ avf_dev_stop(struct rte_eth_dev *dev)
 static void
 avf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 {
-       struct avf_adapter *adapter =
-               AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct avf_info *vf = AVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
 
        memset(dev_info, 0, sizeof(*dev_info));
@@ -523,8 +514,6 @@ avf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM |
                DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-               DEV_RX_OFFLOAD_CRC_STRIP |
-               DEV_RX_OFFLOAD_KEEP_CRC |
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
                DEV_RX_OFFLOAD_VLAN_FILTER;
@@ -569,7 +558,7 @@ avf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
 }
 
 static const uint32_t *
-avf_dev_supported_ptypes_get(struct rte_eth_dev *dev)
+avf_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
 {
        static const uint32_t ptypes[] = {
                RTE_PTYPE_L2_ETHER,
@@ -915,7 +904,6 @@ avf_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 static int
 avf_dev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
-       struct avf_info *vf = AVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
        uint32_t frame_size = mtu + AVF_ETH_OVERHEAD;
        int ret = 0;
 
@@ -1045,8 +1033,6 @@ avf_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id)
 static int
 avf_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id)
 {
-       struct avf_adapter *adapter =
-               AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(dev);
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        uint16_t msix_intr;
@@ -1089,7 +1075,7 @@ avf_check_vf_reset_done(struct avf_hw *hw)
 static int
 avf_init_vf(struct rte_eth_dev *dev)
 {
-       int i, err, bufsz;
+       int err, bufsz;
        struct avf_adapter *adapter =
                AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
@@ -1198,7 +1184,6 @@ avf_dev_interrupt_handler(void *param)
 
        avf_handle_virtchnl_msg(dev);
 
-done:
        avf_enable_irq0(hw);
 }
 
@@ -1318,9 +1303,6 @@ avf_dev_uninit(struct rte_eth_dev *dev)
        rte_free(vf->aq_resp);
        vf->aq_resp = NULL;
 
-       rte_free(dev->data->mac_addrs);
-       dev->data->mac_addrs = NULL;
-
        if (vf->rss_lut) {
                rte_free(vf->rss_lut);
                vf->rss_lut = NULL;
index e03a136..8c7a967 100644 (file)
@@ -247,7 +247,6 @@ alloc_rxq_mbufs(struct avf_rx_queue *rxq)
 static inline void
 release_rxq_mbufs(struct avf_rx_queue *rxq)
 {
-       struct rte_mbuf *mbuf;
        uint16_t i;
 
        if (!rxq->sw_ring)
@@ -310,9 +309,8 @@ avf_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        struct avf_rx_queue *rxq;
        const struct rte_memzone *mz;
        uint32_t ring_size;
-       uint16_t len, i;
+       uint16_t len;
        uint16_t rx_free_thresh;
-       uint16_t base, bsf, tc_mapping;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -428,13 +426,10 @@ avf_dev_tx_queue_setup(struct rte_eth_dev *dev,
                       const struct rte_eth_txconf *tx_conf)
 {
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(dev->data->dev_private);
-       struct avf_adapter *ad =
-               AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
        struct avf_tx_queue *txq;
        const struct rte_memzone *mz;
        uint32_t ring_size;
        uint16_t tx_rs_thresh, tx_free_thresh;
-       uint16_t i, base, bsf, tc_mapping;
        uint64_t offloads;
 
        PMD_INIT_FUNC_TRACE();
@@ -515,8 +510,11 @@ avf_dev_tx_queue_setup(struct rte_eth_dev *dev,
        txq->ops = &def_txq_ops;
 
 #ifdef RTE_LIBRTE_AVF_INC_VECTOR
-       if (check_tx_vec_allow(txq) == FALSE)
+       if (check_tx_vec_allow(txq) == FALSE) {
+               struct avf_adapter *ad =
+                       AVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
                ad->tx_vec_allowed = false;
+       }
 #endif
 
        return 0;
@@ -1268,7 +1266,6 @@ static inline uint16_t
 rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
        struct avf_rx_queue *rxq = (struct avf_rx_queue *)rx_queue;
-       struct rte_eth_dev *dev;
        uint16_t nb_rx = 0;
 
        if (!nb_pkts)
@@ -1584,10 +1581,6 @@ avf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                if (nb_ctx) {
                        /* Setup TX context descriptor if required */
-                       volatile struct avf_tx_context_desc *ctx_txd =
-                               (volatile struct avf_tx_context_desc *)
-                                       &txr[tx_id];
-                       uint16_t cd_l2tag2 = 0;
                        uint64_t cd_type_cmd_tso_mss =
                                AVF_TX_DESC_DTYPE_CONTEXT;
 
@@ -1603,7 +1596,7 @@ avf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                                cd_type_cmd_tso_mss |=
                                        avf_set_tso_ctx(tx_pkt, tx_offload);
 
-                       AVF_DUMP_TX_DESC(txq, ctx_txd, tx_id);
+                       AVF_DUMP_TX_DESC(txq, &txr[tx_id], tx_id);
                        txe->last_id = tx_last;
                        tx_id = txe->next_id;
                        txe = txn;
@@ -1925,7 +1918,7 @@ avf_dev_tx_desc_status(void *tx_queue, uint16_t offset)
        return RTE_ETH_TX_DESC_FULL;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 avf_recv_pkts_vec(__rte_unused void *rx_queue,
                  __rte_unused struct rte_mbuf **rx_pkts,
                  __rte_unused uint16_t nb_pkts)
@@ -1933,7 +1926,7 @@ avf_recv_pkts_vec(__rte_unused void *rx_queue,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 avf_recv_scattered_pkts_vec(__rte_unused void *rx_queue,
                            __rte_unused struct rte_mbuf **rx_pkts,
                            __rte_unused uint16_t nb_pkts)
@@ -1941,7 +1934,7 @@ avf_recv_scattered_pkts_vec(__rte_unused void *rx_queue,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 avf_xmit_fixed_burst_vec(__rte_unused void *tx_queue,
                         __rte_unused struct rte_mbuf **tx_pkts,
                         __rte_unused uint16_t nb_pkts)
@@ -1949,13 +1942,13 @@ avf_xmit_fixed_burst_vec(__rte_unused void *tx_queue,
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 avf_rxq_vec_setup(__rte_unused struct avf_rx_queue *rxq)
 {
        return -1;
 }
 
-int __attribute__((weak))
+__rte_weak int
 avf_txq_vec_setup(__rte_unused struct avf_tx_queue *txq)
 {
        return -1;
index 297d077..898d2f3 100644 (file)
@@ -201,17 +201,17 @@ int avf_txq_vec_setup(struct avf_tx_queue *txq);
 
 static inline
 void avf_dump_rx_descriptor(struct avf_rx_queue *rxq,
-                           const void *desc,
+                           const volatile void *desc,
                            uint16_t rx_id)
 {
 #ifdef RTE_LIBRTE_AVF_16BYTE_RX_DESC
-       const union avf_16byte_rx_desc *rx_desc = desc;
+       const volatile union avf_16byte_rx_desc *rx_desc = desc;
 
        printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n",
               rxq->queue_id, rx_id, rx_desc->read.pkt_addr,
               rx_desc->read.hdr_addr);
 #else
-       const union avf_32byte_rx_desc *rx_desc = desc;
+       const volatile union avf_32byte_rx_desc *rx_desc = desc;
 
        printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64
               " QW2: 0x%016"PRIx64" QW3: 0x%016"PRIx64"\n", rxq->queue_id,
@@ -225,10 +225,10 @@ void avf_dump_rx_descriptor(struct avf_rx_queue *rxq,
  */
 static inline
 void avf_dump_tx_descriptor(const struct avf_tx_queue *txq,
-                           const void *desc, uint16_t tx_id)
+                           const volatile void *desc, uint16_t tx_id)
 {
-       char *name;
-       const struct avf_tx_desc *tx_desc = desc;
+       const char *name;
+       const volatile struct avf_tx_desc *tx_desc = desc;
        enum avf_tx_desc_dtype_value type;
 
        type = (enum avf_tx_desc_dtype_value)rte_le_to_cpu_64(
index 8275100..343a6aa 100644 (file)
@@ -621,7 +621,7 @@ avf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
        return nb_pkts;
 }
 
-void __attribute__((cold))
+static void __attribute__((cold))
 avf_rx_queue_release_mbufs_sse(struct avf_rx_queue *rxq)
 {
        _avf_rx_queue_release_mbufs_vec(rxq);
index fa71014..fd90cc2 100644 (file)
@@ -69,7 +69,6 @@ avf_execute_vf_cmd(struct avf_adapter *adapter, struct avf_cmd_info *args)
 {
        struct avf_hw *hw = AVF_DEV_PRIVATE_TO_HW(adapter);
        struct avf_info *vf = AVF_DEV_PRIVATE_TO_VF(adapter);
-       struct avf_arq_event_info event_info;
        enum avf_status_code ret;
        int err = 0;
        int i = 0;
@@ -600,7 +599,6 @@ avf_config_irq_map(struct avf_adapter *adapter)
        struct virtchnl_irq_map_info *map_info;
        struct virtchnl_vector_map *vecmap;
        struct avf_cmd_info args;
-       uint32_t vector_id;
        int len, i, err;
 
        len = sizeof(struct virtchnl_irq_map_info) +
index 9ef4596..442a5ac 100644 (file)
@@ -93,8 +93,8 @@ typedef uint64_t        u64;
 #define avf_memset(a, b, c, d) memset((a), (b), (c))
 #define avf_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
 
-#define avf_usec_delay(x) rte_delay_us(x)
-#define avf_msec_delay(x) rte_delay_us(1000*(x))
+#define avf_usec_delay(x) rte_delay_us_sleep(x)
+#define avf_msec_delay(x) avf_usec_delay(1000 * (x))
 
 #define AVF_PCI_REG(reg)               rte_read32(reg)
 #define AVF_PCI_REG_ADDR(a, reg) \
diff --git a/drivers/net/avf/base/meson.build b/drivers/net/avf/base/meson.build
new file mode 100644 (file)
index 0000000..6f3d719
--- /dev/null
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+sources = [
+       'avf_adminq.c',
+       'avf_common.c',
+]
+
+error_cflags = ['-Wno-pointer-to-int-cast']
+c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
+foreach flag: error_cflags
+       if cc.has_argument(flag)
+               c_args += flag
+       endif
+endforeach
+
+base_lib = static_library('avf_base', sources,
+       dependencies: static_rte_eal,
+       c_args: c_args)
+base_objs = base_lib.extract_all_objects()
diff --git a/drivers/net/avf/meson.build b/drivers/net/avf/meson.build
new file mode 100644 (file)
index 0000000..2dfda9d
--- /dev/null
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+cflags += ['-Wno-strict-aliasing']
+
+allow_experimental_apis = true
+
+subdir('base')
+objs = [base_objs]
+
+sources = files(
+       'avf_ethdev.c',
+       'avf_rxtx.c',
+       'avf_vchnl.c',
+)
+
+if arch_subdir == 'x86'
+       dpdk_conf.set('RTE_LIBRTE_AVF_INC_VECTOR', 1)
+       sources += files('avf_rxtx_vec_sse.c')
+endif
index 761f6c1..09388d0 100644 (file)
@@ -1036,11 +1036,6 @@ eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
                return ret;
        }
 
-       if (eth_dev->data->mac_addrs != NULL) {
-               rte_free(eth_dev->data->mac_addrs);
-               eth_dev->data->mac_addrs = NULL;
-       }
-
        return 0;
 }
 
@@ -2170,7 +2165,6 @@ avp_dev_info_get(struct rte_eth_dev *eth_dev,
                dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
                dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
        }
-       dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
index 6076c31..b7ffdfc 100644 (file)
@@ -1,5 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2018 Intel Corporation
 
+if host_machine.system() != 'linux'
+        build = false
+endif
 sources = files('avp_ethdev.c')
 install_headers('rte_avp_common.h', 'rte_avp_fifo.h')
index 9ae9f06..e89c0ec 100644 (file)
@@ -364,7 +364,6 @@ axgbe_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_RX_OFFLOAD_IPV4_CKSUM |
                DEV_RX_OFFLOAD_UDP_CKSUM  |
                DEV_RX_OFFLOAD_TCP_CKSUM  |
-               DEV_RX_OFFLOAD_CRC_STRIP  |
                DEV_RX_OFFLOAD_KEEP_CRC;
 
        dev_info->tx_offload_capa =
@@ -719,9 +718,6 @@ eth_axgbe_dev_uninit(struct rte_eth_dev *eth_dev)
                return 0;
 
        pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
-       /*Free macaddres*/
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
index c5fd5f4..b5a29a9 100644 (file)
@@ -74,7 +74,7 @@ int axgbe_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                (DMA_CH_INC * rxq->queue_id));
        rxq->dma_tail_reg = (volatile uint32_t *)((uint8_t *)rxq->dma_regs +
                                                  DMA_CH_RDTR_LO);
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = ETHER_CRC_LEN;
        else
                rxq->crc_len = 0;
index 4904eaf..2797593 100644 (file)
@@ -112,6 +112,7 @@ static void bnx2x_pf_disable(struct bnx2x_softc *sc);
 static void bnx2x_update_rx_prod(struct bnx2x_softc *sc,
                                 struct bnx2x_fastpath *fp,
                                 uint16_t rx_bd_prod, uint16_t rx_cq_prod);
+static void bnx2x_link_report_locked(struct bnx2x_softc *sc);
 static void bnx2x_link_report(struct bnx2x_softc *sc);
 void bnx2x_link_status_update(struct bnx2x_softc *sc);
 static int bnx2x_alloc_mem(struct bnx2x_softc *sc);
@@ -178,13 +179,14 @@ bnx2x_dma_alloc(struct bnx2x_softc *sc, size_t size, struct bnx2x_dma *dma,
                                        SOCKET_ID_ANY,
                                        RTE_MEMZONE_IOVA_CONTIG, align);
        if (z == NULL) {
-               PMD_DRV_LOG(ERR, "DMA alloc failed for %s", msg);
+               PMD_DRV_LOG(ERR, sc, "DMA alloc failed for %s", msg);
                return -ENOMEM;
        }
        dma->paddr = (uint64_t) z->iova;
        dma->vaddr = z->addr;
 
-       PMD_DRV_LOG(DEBUG, "%s: virt=%p phys=%" PRIx64, msg, dma->vaddr, dma->paddr);
+       PMD_DRV_LOG(DEBUG, sc,
+                   "%s: virt=%p phys=%" PRIx64, msg, dma->vaddr, dma->paddr);
 
        return 0;
 }
@@ -197,11 +199,12 @@ static int bnx2x_acquire_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
        uint32_t hw_lock_control_reg;
        int cnt;
 
-       PMD_INIT_FUNC_TRACE();
+       if (resource)
+               PMD_INIT_FUNC_TRACE(sc);
 
        /* validate the resource is within range */
        if (resource > HW_LOCK_MAX_RESOURCE_VALUE) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE",
                            resource);
                return -1;
@@ -217,7 +220,7 @@ static int bnx2x_acquire_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
        /* validate the resource is not already taken */
        lock_status = REG_RD(sc, hw_lock_control_reg);
        if (lock_status & resource_bit) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "resource in use (status 0x%x bit 0x%x)",
                            lock_status, resource_bit);
                return -1;
@@ -233,7 +236,8 @@ static int bnx2x_acquire_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
                DELAY(5000);
        }
 
-       PMD_DRV_LOG(NOTICE, "Resource lock timeout!");
+       PMD_DRV_LOG(NOTICE, sc, "Resource 0x%x resource_bit 0x%x lock timeout!",
+                   resource, resource_bit);
        return -1;
 }
 
@@ -244,13 +248,14 @@ static int bnx2x_release_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
        int func = SC_FUNC(sc);
        uint32_t hw_lock_control_reg;
 
-       PMD_INIT_FUNC_TRACE();
+       if (resource)
+               PMD_INIT_FUNC_TRACE(sc);
 
        /* validate the resource is within range */
        if (resource > HW_LOCK_MAX_RESOURCE_VALUE) {
-               PMD_DRV_LOG(NOTICE,
-                           "resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE",
-                           resource);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "(resource 0x%x > HW_LOCK_MAX_RESOURCE_VALUE)"
+                           " resource_bit 0x%x", resource, resource_bit);
                return -1;
        }
 
@@ -264,7 +269,7 @@ static int bnx2x_release_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
        /* validate the resource is currently taken */
        lock_status = REG_RD(sc, hw_lock_control_reg);
        if (!(lock_status & resource_bit)) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "resource not in use (status 0x%x bit 0x%x)",
                            lock_status, resource_bit);
                return -1;
@@ -274,6 +279,18 @@ static int bnx2x_release_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
        return 0;
 }
 
+static void bnx2x_acquire_phy_lock(struct bnx2x_softc *sc)
+{
+       BNX2X_PHY_LOCK(sc);
+       bnx2x_acquire_hw_lock(sc, HW_LOCK_RESOURCE_MDIO);
+}
+
+static void bnx2x_release_phy_lock(struct bnx2x_softc *sc)
+{
+       bnx2x_release_hw_lock(sc, HW_LOCK_RESOURCE_MDIO);
+       BNX2X_PHY_UNLOCK(sc);
+}
+
 /* copy command into DMAE command memory and set DMAE command Go */
 void bnx2x_post_dmae(struct bnx2x_softc *sc, struct dmae_command *dmae, int idx)
 {
@@ -366,7 +383,7 @@ bnx2x_issue_dmae_with_comp(struct bnx2x_softc *sc, struct dmae_command *dmae)
                if (!timeout ||
                    (sc->recovery_state != BNX2X_RECOVERY_DONE &&
                     sc->recovery_state != BNX2X_RECOVERY_NIC_LOADING)) {
-                       PMD_DRV_LOG(INFO, "DMAE timeout!");
+                       PMD_DRV_LOG(INFO, sc, "DMAE timeout!");
                        return DMAE_TIMEOUT;
                }
 
@@ -375,7 +392,7 @@ bnx2x_issue_dmae_with_comp(struct bnx2x_softc *sc, struct dmae_command *dmae)
        }
 
        if (*wb_comp & DMAE_PCI_ERR_FLAG) {
-               PMD_DRV_LOG(INFO, "DMAE PCI error!");
+               PMD_DRV_LOG(INFO, sc, "DMAE PCI error!");
                return DMAE_PCI_ERROR;
        }
 
@@ -534,7 +551,7 @@ void
 elink_cb_event_log(__rte_unused struct bnx2x_softc *sc,
                   __rte_unused const elink_log_id_t elink_log_id, ...)
 {
-       PMD_DRV_LOG(DEBUG, "ELINK EVENT LOG (%d)", elink_log_id);
+       PMD_DRV_LOG(DEBUG, sc, "ELINK EVENT LOG (%d)", elink_log_id);
 }
 
 static int bnx2x_set_spio(struct bnx2x_softc *sc, int spio, uint32_t mode)
@@ -543,7 +560,7 @@ static int bnx2x_set_spio(struct bnx2x_softc *sc, int spio, uint32_t mode)
 
        /* Only 2 SPIOs are configurable */
        if ((spio != MISC_SPIO_SPIO4) && (spio != MISC_SPIO_SPIO5)) {
-               PMD_DRV_LOG(NOTICE, "Invalid SPIO 0x%x", spio);
+               PMD_DRV_LOG(NOTICE, sc, "Invalid SPIO 0x%x", spio);
                return -1;
        }
 
@@ -593,7 +610,7 @@ static int bnx2x_gpio_read(struct bnx2x_softc *sc, int gpio_num, uint8_t port)
        uint32_t gpio_reg;
 
        if (gpio_num > MISC_REGISTERS_GPIO_3) {
-               PMD_DRV_LOG(NOTICE, "Invalid GPIO %d", gpio_num);
+               PMD_DRV_LOG(NOTICE, sc, "Invalid GPIO %d", gpio_num);
                return -1;
        }
 
@@ -618,7 +635,7 @@ bnx2x_gpio_write(struct bnx2x_softc *sc, int gpio_num, uint32_t mode, uint8_t po
        uint32_t gpio_reg;
 
        if (gpio_num > MISC_REGISTERS_GPIO_3) {
-               PMD_DRV_LOG(NOTICE, "Invalid GPIO %d", gpio_num);
+               PMD_DRV_LOG(NOTICE, sc, "Invalid GPIO %d", gpio_num);
                return -1;
        }
 
@@ -687,7 +704,8 @@ bnx2x_gpio_mult_write(struct bnx2x_softc *sc, uint8_t pins, uint32_t mode)
                break;
 
        default:
-               PMD_DRV_LOG(NOTICE, "Invalid GPIO mode assignment %d", mode);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Invalid GPIO mode assignment %d", mode);
                bnx2x_release_hw_lock(sc, HW_LOCK_RESOURCE_GPIO);
                return -1;
        }
@@ -713,7 +731,7 @@ bnx2x_gpio_int_write(struct bnx2x_softc *sc, int gpio_num, uint32_t mode,
        uint32_t gpio_reg;
 
        if (gpio_num > MISC_REGISTERS_GPIO_3) {
-               PMD_DRV_LOG(NOTICE, "Invalid GPIO %d", gpio_num);
+               PMD_DRV_LOG(NOTICE, sc, "Invalid GPIO %d", gpio_num);
                return -1;
        }
 
@@ -790,7 +808,7 @@ elink_cb_fw_command(struct bnx2x_softc *sc, uint32_t command, uint32_t param)
        SHMEM_WR(sc, func_mb[mb_idx].drv_mb_param, param);
        SHMEM_WR(sc, func_mb[mb_idx].drv_mb_header, (command | seq));
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "wrote command 0x%08x to FW MB param 0x%08x",
                    (command | seq), param);
 
@@ -805,7 +823,7 @@ elink_cb_fw_command(struct bnx2x_softc *sc, uint32_t command, uint32_t param)
                rc &= FW_MSG_CODE_MASK;
        } else {
                /* Ruh-roh! */
-               PMD_DRV_LOG(NOTICE, "FW failed to respond!");
+               PMD_DRV_LOG(NOTICE, sc, "FW failed to respond!");
                rc = 0;
        }
 
@@ -1023,12 +1041,12 @@ bnx2x_sp_post(struct bnx2x_softc *sc, int command, int cid, uint32_t data_hi,
 
        if (common) {
                if (!atomic_load_acq_long(&sc->eq_spq_left)) {
-                       PMD_DRV_LOG(INFO, "EQ ring is full!");
+                       PMD_DRV_LOG(INFO, sc, "EQ ring is full!");
                        return -1;
                }
        } else {
                if (!atomic_load_acq_long(&sc->cq_spq_left)) {
-                       PMD_DRV_LOG(INFO, "SPQ ring is full!");
+                       PMD_DRV_LOG(INFO, sc, "SPQ ring is full!");
                        return -1;
                }
        }
@@ -1061,7 +1079,7 @@ bnx2x_sp_post(struct bnx2x_softc *sc, int command, int cid, uint32_t data_hi,
                atomic_subtract_acq_long(&sc->cq_spq_left, 1);
        }
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "SPQE[%x] (%x:%x) (cmd, common?) (%d,%d) hw_cid %x"
                    "data (%x:%x) type(0x%x) left (CQ, EQ) (%lx,%lx)",
                    sc->spq_prod_idx,
@@ -1132,44 +1150,45 @@ bnx2x_sp_event(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
        enum ecore_queue_cmd drv_cmd = ECORE_Q_CMD_MAX;
        struct ecore_queue_sp_obj *q_obj = &BNX2X_SP_OBJ(sc, fp).q_obj;
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "fp=%d cid=%d got ramrod #%d state is %x type is %d",
                    fp->index, cid, command, sc->state,
                    rr_cqe->ramrod_cqe.ramrod_type);
 
        switch (command) {
        case (RAMROD_CMD_ID_ETH_CLIENT_UPDATE):
-               PMD_DRV_LOG(DEBUG, "got UPDATE ramrod. CID %d", cid);
+               PMD_DRV_LOG(DEBUG, sc, "got UPDATE ramrod. CID %d", cid);
                drv_cmd = ECORE_Q_CMD_UPDATE;
                break;
 
        case (RAMROD_CMD_ID_ETH_CLIENT_SETUP):
-               PMD_DRV_LOG(DEBUG, "got MULTI[%d] setup ramrod", cid);
+               PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] setup ramrod", cid);
                drv_cmd = ECORE_Q_CMD_SETUP;
                break;
 
        case (RAMROD_CMD_ID_ETH_TX_QUEUE_SETUP):
-               PMD_DRV_LOG(DEBUG, "got MULTI[%d] tx-only setup ramrod", cid);
+               PMD_DRV_LOG(DEBUG, sc,
+                           "got MULTI[%d] tx-only setup ramrod", cid);
                drv_cmd = ECORE_Q_CMD_SETUP_TX_ONLY;
                break;
 
        case (RAMROD_CMD_ID_ETH_HALT):
-               PMD_DRV_LOG(DEBUG, "got MULTI[%d] halt ramrod", cid);
+               PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] halt ramrod", cid);
                drv_cmd = ECORE_Q_CMD_HALT;
                break;
 
        case (RAMROD_CMD_ID_ETH_TERMINATE):
-               PMD_DRV_LOG(DEBUG, "got MULTI[%d] teminate ramrod", cid);
+               PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] teminate ramrod", cid);
                drv_cmd = ECORE_Q_CMD_TERMINATE;
                break;
 
        case (RAMROD_CMD_ID_ETH_EMPTY):
-               PMD_DRV_LOG(DEBUG, "got MULTI[%d] empty ramrod", cid);
+               PMD_DRV_LOG(DEBUG, sc, "got MULTI[%d] empty ramrod", cid);
                drv_cmd = ECORE_Q_CMD_EMPTY;
                break;
 
        default:
-               PMD_DRV_LOG(DEBUG,
+               PMD_DRV_LOG(DEBUG, sc,
                            "ERROR: unexpected MC reply (%d)"
                            "on fp[%d]", command, fp->index);
                return;
@@ -1191,7 +1210,7 @@ bnx2x_sp_event(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 
        atomic_add_acq_long(&sc->cq_spq_left, 1);
 
-       PMD_DRV_LOG(DEBUG, "sc->cq_spq_left 0x%lx",
+       PMD_DRV_LOG(DEBUG, sc, "sc->cq_spq_left 0x%lx",
                    atomic_load_acq_long(&sc->cq_spq_left));
 }
 
@@ -1387,7 +1406,7 @@ bnx2x_del_all_macs(struct bnx2x_softc *sc, struct ecore_vlan_mac_obj *mac_obj,
 
        rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
        if (rc < 0)
-               PMD_DRV_LOG(ERR, "Failed to delete MACs (%d)", rc);
+               PMD_DRV_LOG(ERR, sc, "Failed to delete MACs (%d)", rc);
 
        return rc;
 }
@@ -1538,13 +1557,13 @@ static int bnx2x_nic_load_no_mcp(struct bnx2x_softc *sc)
        int path = SC_PATH(sc);
        int port = SC_PORT(sc);
 
-       PMD_DRV_LOG(INFO, "NO MCP - load counts[%d]      %d, %d, %d",
+       PMD_DRV_LOG(INFO, sc, "NO MCP - load counts[%d]      %d, %d, %d",
                    path, load_count[path][0], load_count[path][1],
                    load_count[path][2]);
 
        load_count[path][0]++;
        load_count[path][1 + port]++;
-       PMD_DRV_LOG(INFO, "NO MCP - new load counts[%d]  %d, %d, %d",
+       PMD_DRV_LOG(INFO, sc, "NO MCP - new load counts[%d]  %d, %d, %d",
                    path, load_count[path][0], load_count[path][1],
                    load_count[path][2]);
        if (load_count[path][0] == 1)
@@ -1561,12 +1580,12 @@ static int bnx2x_nic_unload_no_mcp(struct bnx2x_softc *sc)
        int port = SC_PORT(sc);
        int path = SC_PATH(sc);
 
-       PMD_DRV_LOG(INFO, "NO MCP - load counts[%d]      %d, %d, %d",
+       PMD_DRV_LOG(INFO, sc, "NO MCP - load counts[%d]      %d, %d, %d",
                    path, load_count[path][0], load_count[path][1],
                    load_count[path][2]);
        load_count[path][0]--;
        load_count[path][1 + port]--;
-       PMD_DRV_LOG(INFO, "NO MCP - new load counts[%d]  %d, %d, %d",
+       PMD_DRV_LOG(INFO, sc, "NO MCP - new load counts[%d]  %d, %d, %d",
                    path, load_count[path][0], load_count[path][1],
                    load_count[path][2]);
        if (load_count[path][0] == 0) {
@@ -1646,7 +1665,7 @@ static int bnx2x_func_wait_started(struct bnx2x_softc *sc)
                 */
                struct ecore_func_state_params func_params = { NULL };
 
-               PMD_DRV_LOG(NOTICE, "Unexpected function state! "
+               PMD_DRV_LOG(NOTICE, sc, "Unexpected function state! "
                            "Forcing STARTED-->TX_STOPPED-->STARTED");
 
                func_params.f_obj = &sc->func_obj;
@@ -1670,7 +1689,7 @@ static int bnx2x_stop_queue(struct bnx2x_softc *sc, int index)
        struct ecore_queue_state_params q_params = { NULL };
        int rc;
 
-       PMD_DRV_LOG(DEBUG, "stopping queue %d cid %d", index, fp->index);
+       PMD_DRV_LOG(DEBUG, sc, "stopping queue %d cid %d", index, fp->index);
 
        q_params.q_obj = &sc->sp_objs[fp->index].q_obj;
        /* We want to wait for completion in this context */
@@ -1721,7 +1740,7 @@ static uint8_t bnx2x_wait_sp_comp(struct bnx2x_softc *sc, unsigned long mask)
 
        tmp = atomic_load_acq_long(&sc->sp_state);
        if (tmp & mask) {
-               PMD_DRV_LOG(INFO, "Filtering completion timed out: "
+               PMD_DRV_LOG(INFO, sc, "Filtering completion timed out: "
                            "sp_state 0x%lx, mask 0x%lx", tmp, mask);
                return FALSE;
        }
@@ -1747,7 +1766,7 @@ static int bnx2x_func_stop(struct bnx2x_softc *sc)
         */
        rc = ecore_func_state_change(sc, &func_params);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "FUNC_STOP ramrod failed. "
+               PMD_DRV_LOG(NOTICE, sc, "FUNC_STOP ramrod failed. "
                            "Running a dry transaction");
                bnx2x_set_bit(RAMROD_DRV_CLR_ONLY, &func_params.ramrod_flags);
                return ecore_func_state_change(sc, &func_params);
@@ -1796,14 +1815,16 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
        rc = bnx2x_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_ETH_MAC,
                              FALSE);
        if (rc < 0) {
-               PMD_DRV_LOG(NOTICE, "Failed to delete all ETH MACs (%d)", rc);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Failed to delete all ETH MACs (%d)", rc);
        }
 
        /* Clean up UC list  */
        rc = bnx2x_del_all_macs(sc, &sc->sp_objs[0].mac_obj, ECORE_UC_LIST_MAC,
                              TRUE);
        if (rc < 0) {
-               PMD_DRV_LOG(NOTICE, "Failed to delete UC MACs list (%d)", rc);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Failed to delete UC MACs list (%d)", rc);
        }
 
        /* Disable LLH */
@@ -1826,7 +1847,7 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
        rparam.mcast_obj = &sc->mcast_obj;
        rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
        if (rc < 0) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "Failed to send DEL MCAST command (%d)", rc);
        }
 
@@ -1843,7 +1864,7 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
         */
        rc = bnx2x_func_wait_started(sc);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "bnx2x_func_wait_started failed");
+               PMD_DRV_LOG(NOTICE, sc, "bnx2x_func_wait_started failed");
        }
 
        /*
@@ -1861,14 +1882,14 @@ bnx2x_chip_cleanup(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_li
         * very wrong has happen.
         */
        if (!bnx2x_wait_sp_comp(sc, ~0x0UL)) {
-               PMD_DRV_LOG(NOTICE, "Common slow path ramrods got stuck!");
+               PMD_DRV_LOG(NOTICE, sc, "Common slow path ramrods got stuck!");
        }
 
 unload_error:
 
        rc = bnx2x_func_stop(sc);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Function stop failed!");
+               PMD_DRV_LOG(NOTICE, sc, "Function stop failed!");
        }
 
        /* disable HW interrupts */
@@ -1877,7 +1898,7 @@ unload_error:
        /* Reset the chip */
        rc = bnx2x_reset_hw(sc, reset_code);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Hardware reset failed");
+               PMD_DRV_LOG(NOTICE, sc, "Hardware reset failed");
        }
 
        /* Report UNLOAD_DONE to MCP */
@@ -1888,7 +1909,7 @@ static void bnx2x_disable_close_the_gate(struct bnx2x_softc *sc)
 {
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "Disabling 'close the gates'");
+       PMD_DRV_LOG(DEBUG, sc, "Disabling 'close the gates'");
 
        val = REG_RD(sc, MISC_REG_AEU_GENERAL_MASK);
        val &= ~(MISC_AEU_GENERAL_MASK_REG_AEU_PXP_CLOSE_MASK |
@@ -1919,7 +1940,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
        rc = mac_obj->delete_all(sc, &sc->sp_objs->mac_obj, &vlan_mac_flags,
                                 &ramrod_flags);
        if (rc != 0) {
-               PMD_DRV_LOG(NOTICE, "Failed to clean ETH MACs (%d)", rc);
+               PMD_DRV_LOG(NOTICE, sc, "Failed to clean ETH MACs (%d)", rc);
        }
 
        /* Cleanup UC list */
@@ -1927,7 +1948,8 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
        bnx2x_set_bit(ECORE_UC_LIST_MAC, &vlan_mac_flags);
        rc = mac_obj->delete_all(sc, mac_obj, &vlan_mac_flags, &ramrod_flags);
        if (rc != 0) {
-               PMD_DRV_LOG(NOTICE, "Failed to clean UC list MACs (%d)", rc);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Failed to clean UC list MACs (%d)", rc);
        }
 
        /* Now clean mcast object... */
@@ -1938,7 +1960,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
        /* Add a DEL command... */
        rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_DEL);
        if (rc < 0) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "Failed to send DEL MCAST command (%d)", rc);
        }
 
@@ -1947,7 +1969,7 @@ static void bnx2x_squeeze_objects(struct bnx2x_softc *sc)
        rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT);
        while (rc != 0) {
                if (rc < 0) {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "Failed to clean MCAST object (%d)", rc);
                        return;
                }
@@ -1964,7 +1986,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link
        uint8_t global = FALSE;
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "Starting NIC unload...");
+       PMD_DRV_LOG(DEBUG, sc, "Starting NIC unload...");
 
        /* mark driver as unloaded in shmem2 */
        if (IS_PF(sc) && SHMEM2_HAS(sc, drv_capabilities_flag)) {
@@ -1988,7 +2010,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link
                bnx2x_release_leader_lock(sc);
                mb();
 
-               PMD_DRV_LOG(NOTICE, "Can't unload in closed or error state");
+               PMD_DRV_LOG(NOTICE, sc, "Can't unload in closed or error state");
                return -1;
        }
 
@@ -2093,7 +2115,7 @@ bnx2x_nic_unload(struct bnx2x_softc *sc, uint32_t unload_mode, uint8_t keep_link
                bnx2x_disable_close_the_gate(sc);
        }
 
-       PMD_DRV_LOG(DEBUG, "Ended NIC unload");
+       PMD_DRV_LOG(DEBUG, sc, "Ended NIC unload");
 
        return 0;
 }
@@ -2241,7 +2263,7 @@ static void bnx2x_ilt_set_info(struct bnx2x_softc *sc)
        struct ecore_ilt *ilt = sc->ilt;
        uint16_t line = 0;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        ilt->start_line = FUNC_ILT_BASE(SC_FUNC(sc));
 
@@ -2395,7 +2417,7 @@ static int bnx2x_alloc_mem(struct bnx2x_softc *sc)
        bnx2x_alloc_ilt_lines_mem(sc);
 
        if (ecore_ilt_mem_op(sc, ILT_MEMOP_ALLOC)) {
-               PMD_DRV_LOG(NOTICE, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed");
+               PMD_DRV_LOG(NOTICE, sc, "ecore_ilt_mem_op ILT_MEMOP_ALLOC failed");
                bnx2x_free_mem(sc);
                return -1;
        }
@@ -2598,7 +2620,7 @@ static void bnx2x_set_pf_load(struct bnx2x_softc *sc)
 
        bnx2x_acquire_hw_lock(sc, HW_LOCK_RESOURCE_RECOVERY_REG);
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        val = REG_RD(sc, BNX2X_RECOVERY_GLOB_REG);
 
@@ -2651,14 +2673,14 @@ static uint8_t bnx2x_clear_pf_load(struct bnx2x_softc *sc)
 /* send load requrest to mcp and analyze response */
 static int bnx2x_nic_load_request(struct bnx2x_softc *sc, uint32_t * load_code)
 {
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        /* init fw_seq */
        sc->fw_seq =
            (SHMEM_RD(sc, func_mb[SC_FW_MB_IDX(sc)].drv_mb_header) &
             DRV_MSG_SEQ_NUMBER_MASK);
 
-       PMD_DRV_LOG(DEBUG, "initial fw_seq 0x%04x", sc->fw_seq);
+       PMD_DRV_LOG(DEBUG, sc, "initial fw_seq 0x%04x", sc->fw_seq);
 
 #ifdef BNX2X_PULSE
        /* get the current FW pulse sequence */
@@ -2677,13 +2699,13 @@ static int bnx2x_nic_load_request(struct bnx2x_softc *sc, uint32_t * load_code)
 
        /* if the MCP fails to respond we must abort */
        if (!(*load_code)) {
-               PMD_DRV_LOG(NOTICE, "MCP response failure!");
+               PMD_DRV_LOG(NOTICE, sc, "MCP response failure!");
                return -1;
        }
 
        /* if MCP refused then must abort */
        if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) {
-               PMD_DRV_LOG(NOTICE, "MCP refused load request");
+               PMD_DRV_LOG(NOTICE, sc, "MCP refused load request");
                return -1;
        }
 
@@ -2710,12 +2732,12 @@ static int bnx2x_nic_load_analyze_req(struct bnx2x_softc *sc, uint32_t load_code
 
                /* read loaded FW from chip */
                loaded_fw = REG_RD(sc, XSEM_REG_PRAM);
-               PMD_DRV_LOG(DEBUG, "loaded FW 0x%08x / my FW 0x%08x",
+               PMD_DRV_LOG(DEBUG, sc, "loaded FW 0x%08x / my FW 0x%08x",
                            loaded_fw, my_fw);
 
                /* abort nic load if version mismatch */
                if (my_fw != loaded_fw) {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "FW 0x%08x already loaded (mine is 0x%08x)",
                                    loaded_fw, my_fw);
                        return -1;
@@ -2730,7 +2752,7 @@ static void bnx2x_nic_load_pmf(struct bnx2x_softc *sc, uint32_t load_code)
 {
        uint32_t ncsi_oem_data_addr;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
            (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
@@ -2745,7 +2767,7 @@ static void bnx2x_nic_load_pmf(struct bnx2x_softc *sc, uint32_t load_code)
                sc->port.pmf = 0;
        }
 
-       PMD_DRV_LOG(DEBUG, "pmf %d", sc->port.pmf);
+       PMD_DRV_LOG(DEBUG, sc, "pmf %d", sc->port.pmf);
 
        if (load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) {
                if (SHMEM2_HAS(sc, ncsi_oem_data_addr)) {
@@ -2788,10 +2810,10 @@ static void bnx2x_read_mf_cfg(struct bnx2x_softc *sc)
 
        if (sc->devinfo.mf_info.mf_config[SC_VN(sc)] &
            FUNC_MF_CFG_FUNC_DISABLED) {
-               PMD_DRV_LOG(DEBUG, "mf_cfg function disabled");
+               PMD_DRV_LOG(DEBUG, sc, "mf_cfg function disabled");
                sc->flags |= BNX2X_MF_FUNC_DIS;
        } else {
-               PMD_DRV_LOG(DEBUG, "mf_cfg function enabled");
+               PMD_DRV_LOG(DEBUG, sc, "mf_cfg function enabled");
                sc->flags &= ~BNX2X_MF_FUNC_DIS;
        }
 }
@@ -2812,7 +2834,7 @@ static int bnx2x_acquire_alr(struct bnx2x_softc *sc)
        }
 
        if (!(val & (1L << 31))) {
-               PMD_DRV_LOG(NOTICE, "Cannot acquire MCP access lock register");
+               PMD_DRV_LOG(NOTICE, sc, "Cannot acquire MCP access lock register");
                return -1;
        }
 
@@ -2840,7 +2862,7 @@ static void bnx2x_fan_failure(struct bnx2x_softc *sc)
                 ext_phy_config);
 
        /* log the failure */
-       PMD_DRV_LOG(INFO,
+       PMD_DRV_LOG(INFO, sc,
                    "Fan Failure has caused the driver to shutdown "
                    "the card to prevent permanent damage. "
                    "Please contact OEM Support for assistance");
@@ -2897,7 +2919,7 @@ static void bnx2x_link_attn(struct bnx2x_softc *sc)
                }
        }
 
-       bnx2x_link_report(sc);
+       bnx2x_link_report_locked(sc);
 
        if (IS_MF(sc)) {
                bnx2x_link_sync_notify(sc);
@@ -2918,7 +2940,7 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
        uint32_t cnt;
 
        if (sc->attn_state & asserted) {
-               PMD_DRV_LOG(ERR, "IGU ERROR attn=0x%08x", asserted);
+               PMD_DRV_LOG(ERR, sc, "IGU ERROR attn=0x%08x", asserted);
        }
 
        bnx2x_acquire_hw_lock(sc, HW_LOCK_RESOURCE_PORT0_ATT_MASK + port);
@@ -2936,6 +2958,7 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
        if (asserted & ATTN_HARD_WIRED_MASK) {
                if (asserted & ATTN_NIG_FOR_FUNC) {
 
+                       bnx2x_acquire_phy_lock(sc);
                        /* save nig interrupt mask */
                        nig_mask = REG_RD(sc, nig_int_mask_addr);
 
@@ -2950,45 +2973,45 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
                }
 
                if (asserted & ATTN_SW_TIMER_4_FUNC) {
-                       PMD_DRV_LOG(DEBUG, "ATTN_SW_TIMER_4_FUNC!");
+                       PMD_DRV_LOG(DEBUG, sc, "ATTN_SW_TIMER_4_FUNC!");
                }
 
                if (asserted & GPIO_2_FUNC) {
-                       PMD_DRV_LOG(DEBUG, "GPIO_2_FUNC!");
+                       PMD_DRV_LOG(DEBUG, sc, "GPIO_2_FUNC!");
                }
 
                if (asserted & GPIO_3_FUNC) {
-                       PMD_DRV_LOG(DEBUG, "GPIO_3_FUNC!");
+                       PMD_DRV_LOG(DEBUG, sc, "GPIO_3_FUNC!");
                }
 
                if (asserted & GPIO_4_FUNC) {
-                       PMD_DRV_LOG(DEBUG, "GPIO_4_FUNC!");
+                       PMD_DRV_LOG(DEBUG, sc, "GPIO_4_FUNC!");
                }
 
                if (port == 0) {
                        if (asserted & ATTN_GENERAL_ATTN_1) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_1!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_1!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_1, 0x0);
                        }
                        if (asserted & ATTN_GENERAL_ATTN_2) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_2!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_2!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_2, 0x0);
                        }
                        if (asserted & ATTN_GENERAL_ATTN_3) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_3!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_3!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_3, 0x0);
                        }
                } else {
                        if (asserted & ATTN_GENERAL_ATTN_4) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_4!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_4!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_4, 0x0);
                        }
                        if (asserted & ATTN_GENERAL_ATTN_5) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_5!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_5!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_5, 0x0);
                        }
                        if (asserted & ATTN_GENERAL_ATTN_6) {
-                               PMD_DRV_LOG(DEBUG, "ATTN_GENERAL_ATTN_6!");
+                               PMD_DRV_LOG(DEBUG, sc, "ATTN_GENERAL_ATTN_6!");
                                REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_6, 0x0);
                        }
                }
@@ -3002,7 +3025,7 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
                reg_addr = (BAR_IGU_INTMEM + IGU_CMD_ATTN_BIT_SET_UPPER * 8);
        }
 
-       PMD_DRV_LOG(DEBUG, "about to mask 0x%08x at %s addr 0x%08x",
+       PMD_DRV_LOG(DEBUG, sc, "about to mask 0x%08x at %s addr 0x%08x",
                    asserted,
                    (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU",
                    reg_addr);
@@ -3024,7 +3047,7 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
                                 && (++cnt < MAX_IGU_ATTN_ACK_TO));
 
                        if (!igu_acked) {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Failed to verify IGU ack on time");
                        }
 
@@ -3033,6 +3056,7 @@ static void bnx2x_attn_int_asserted(struct bnx2x_softc *sc, uint32_t asserted)
 
                REG_WR(sc, nig_int_mask_addr, nig_mask);
 
+               bnx2x_release_phy_lock(sc);
        }
 }
 
@@ -3040,7 +3064,7 @@ static void
 bnx2x_print_next_block(__rte_unused struct bnx2x_softc *sc, __rte_unused int idx,
                     __rte_unused const char *blk)
 {
-       PMD_DRV_LOG(INFO, "%s%s", idx ? ", " : "", blk);
+       PMD_DRV_LOG(INFO, sc, "%s%s", idx ? ", " : "", blk);
 }
 
 static int
@@ -3348,7 +3372,7 @@ bnx2x_parity_attn(struct bnx2x_softc *sc, uint8_t * global, uint8_t print,
            (sig[2] & HW_PRTY_ASSERT_SET_2) ||
            (sig[3] & HW_PRTY_ASSERT_SET_3) ||
            (sig[4] & HW_PRTY_ASSERT_SET_4)) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "Parity error: HW block parity attention:"
                            "[0]:0x%08x [1]:0x%08x [2]:0x%08x [3]:0x%08x [4]:0x%08x",
                            (uint32_t) (sig[0] & HW_PRTY_ASSERT_SET_0),
@@ -3358,7 +3382,7 @@ bnx2x_parity_attn(struct bnx2x_softc *sc, uint8_t * global, uint8_t print,
                            (uint32_t) (sig[4] & HW_PRTY_ASSERT_SET_4));
 
                if (print)
-                       PMD_DRV_LOG(INFO, "Parity errors detected in blocks: ");
+                       PMD_DRV_LOG(INFO, sc, "Parity errors detected in blocks: ");
 
                par_num =
                    bnx2x_check_blocks_with_parity0(sc, sig[0] &
@@ -3382,7 +3406,7 @@ bnx2x_parity_attn(struct bnx2x_softc *sc, uint8_t * global, uint8_t print,
                                                  par_num, print);
 
                if (print)
-                       PMD_DRV_LOG(INFO, "");
+                       PMD_DRV_LOG(INFO, sc, "");
 
                return TRUE;
        }
@@ -3414,64 +3438,64 @@ static void bnx2x_attn_int_deasserted4(struct bnx2x_softc *sc, uint32_t attn)
 
        if (attn & AEU_INPUTS_ATTN_BITS_PGLUE_HW_INTERRUPT) {
                val = REG_RD(sc, PGLUE_B_REG_PGLUE_B_INT_STS_CLR);
-               PMD_DRV_LOG(INFO, "ERROR: PGLUE hw attention 0x%08x", val);
+               PMD_DRV_LOG(INFO, sc, "ERROR: PGLUE hw attention 0x%08x", val);
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_ADDRESS_ERROR");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_INCORRECT_RCV_BEHAVIOR");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_VF_LENGTH_VIOLATION_ATTN");
                if (val &
                    PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_VF_GRC_SPACE_VIOLATION_ATTN");
                if (val &
                    PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_VF_MSIX_BAR_VIOLATION_ATTN");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_ERROR_ATTN");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_TCPL_IN_TWO_RCBS_ATTN");
                if (val & PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: PGLUE_B_PGLUE_B_INT_STS_REG_CSSNOOP_FIFO_OVERFLOW");
        }
 
        if (attn & AEU_INPUTS_ATTN_BITS_ATC_HW_INTERRUPT) {
                val = REG_RD(sc, ATC_REG_ATC_INT_STS_CLR);
-               PMD_DRV_LOG(INFO, "ERROR: ATC hw attention 0x%08x", val);
+               PMD_DRV_LOG(INFO, sc, "ERROR: ATC hw attention 0x%08x", val);
                if (val & ATC_ATC_INT_STS_REG_ADDRESS_ERROR)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ADDRESS_ERROR");
                if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ATC_TCPL_TO_NOT_PEND");
                if (val & ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ATC_GPA_MULTIPLE_HITS");
                if (val & ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ATC_RCPL_TO_EMPTY_CNT");
                if (val & ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ATC_TCPL_ERROR");
                if (val & ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU)
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "ERROR: ATC_ATC_INT_STS_REG_ATC_IREQ_LESS_THAN_STU");
        }
 
        if (attn & (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR |
                    AEU_INPUTS_ATTN_BITS_ATC_PARITY_ERROR)) {
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "ERROR: FATAL parity attention set4 0x%08x",
                            (uint32_t) (attn &
                                        (AEU_INPUTS_ATTN_BITS_PGLUE_PARITY_ERROR
@@ -3594,11 +3618,11 @@ static void bnx2x_dcc_event(struct bnx2x_softc *sc, uint32_t dcc_event)
  */
                if (sc->devinfo.
                    mf_info.mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_DISABLED) {
-                       PMD_DRV_LOG(DEBUG, "mf_cfg function disabled");
+                       PMD_DRV_LOG(DEBUG, sc, "mf_cfg function disabled");
                        sc->flags |= BNX2X_MF_FUNC_DIS;
                        bnx2x_e1h_disable(sc);
                } else {
-                       PMD_DRV_LOG(DEBUG, "mf_cfg function enabled");
+                       PMD_DRV_LOG(DEBUG, sc, "mf_cfg function enabled");
                        sc->flags &= ~BNX2X_MF_FUNC_DIS;
                        bnx2x_e1h_enable(sc);
                }
@@ -3653,7 +3677,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
        last_idx =
            REG_RD8(sc, BAR_XSTRORM_INTMEM + XSTORM_ASSERT_LIST_INDEX_OFFSET);
        if (last_idx)
-               PMD_DRV_LOG(ERR, "XSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
+               PMD_DRV_LOG(ERR, sc, "XSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
 
        /* print the asserts */
        for (i = 0; i < STORM_ASSERT_ARRAY_SIZE; i++) {
@@ -3675,7 +3699,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
                           12);
 
                if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) {
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "XSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x",
                                    i, row3, row2, row1, row0);
                        rc++;
@@ -3688,7 +3712,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
        last_idx =
            REG_RD8(sc, BAR_TSTRORM_INTMEM + TSTORM_ASSERT_LIST_INDEX_OFFSET);
        if (last_idx) {
-               PMD_DRV_LOG(ERR, "TSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
+               PMD_DRV_LOG(ERR, sc, "TSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
        }
 
        /* print the asserts */
@@ -3711,7 +3735,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
                           12);
 
                if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) {
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "TSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x",
                                    i, row3, row2, row1, row0);
                        rc++;
@@ -3724,7 +3748,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
        last_idx =
            REG_RD8(sc, BAR_CSTRORM_INTMEM + CSTORM_ASSERT_LIST_INDEX_OFFSET);
        if (last_idx) {
-               PMD_DRV_LOG(ERR, "CSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
+               PMD_DRV_LOG(ERR, sc, "CSTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
        }
 
        /* print the asserts */
@@ -3747,7 +3771,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
                           12);
 
                if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) {
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "CSTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x",
                                    i, row3, row2, row1, row0);
                        rc++;
@@ -3760,7 +3784,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
        last_idx =
            REG_RD8(sc, BAR_USTRORM_INTMEM + USTORM_ASSERT_LIST_INDEX_OFFSET);
        if (last_idx) {
-               PMD_DRV_LOG(ERR, "USTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
+               PMD_DRV_LOG(ERR, sc, "USTORM_ASSERT_LIST_INDEX 0x%x", last_idx);
        }
 
        /* print the asserts */
@@ -3783,7 +3807,7 @@ static int bnx2x_mc_assert(struct bnx2x_softc *sc)
                           12);
 
                if (row0 != COMMON_ASM_INVALID_ASSERT_OPCODE) {
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "USTORM_ASSERT_INDEX 0x%x = 0x%08x 0x%08x 0x%08x 0x%08x",
                                    i, row3, row2, row1, row0);
                        rc++;
@@ -3832,8 +3856,10 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x_softc *sc, uint32_t attn)
                        if (sc->link_vars.periodic_flags &
                            ELINK_PERIODIC_FLAGS_LINK_EVENT) {
                                /* sync with link */
+                               bnx2x_acquire_phy_lock(sc);
                                sc->link_vars.periodic_flags &=
                                    ~ELINK_PERIODIC_FLAGS_LINK_EVENT;
+                               bnx2x_release_phy_lock(sc);
                                if (IS_MF(sc)) {
                                        bnx2x_link_sync_notify(sc);
                                }
@@ -3848,7 +3874,7 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x_softc *sc, uint32_t attn)
 
                } else if (attn & BNX2X_MC_ASSERT_BITS) {
 
-                       PMD_DRV_LOG(ERR, "MC assert!");
+                       PMD_DRV_LOG(ERR, sc, "MC assert!");
                        bnx2x_mc_assert(sc);
                        REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_10, 0);
                        REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_9, 0);
@@ -3858,24 +3884,24 @@ static void bnx2x_attn_int_deasserted3(struct bnx2x_softc *sc, uint32_t attn)
 
                } else if (attn & BNX2X_MCP_ASSERT) {
 
-                       PMD_DRV_LOG(ERR, "MCP assert!");
+                       PMD_DRV_LOG(ERR, sc, "MCP assert!");
                        REG_WR(sc, MISC_REG_AEU_GENERAL_ATTN_11, 0);
 
                } else {
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "Unknown HW assert! (attn 0x%08x)", attn);
                }
        }
 
        if (attn & EVEREST_LATCHED_ATTN_IN_USE_MASK) {
-               PMD_DRV_LOG(ERR, "LATCHED attention 0x%08x (masked)", attn);
+               PMD_DRV_LOG(ERR, sc, "LATCHED attention 0x%08x (masked)", attn);
                if (attn & BNX2X_GRC_TIMEOUT) {
                        val = REG_RD(sc, MISC_REG_GRC_TIMEOUT_ATTN);
-                       PMD_DRV_LOG(ERR, "GRC time-out 0x%08x", val);
+                       PMD_DRV_LOG(ERR, sc, "GRC time-out 0x%08x", val);
                }
                if (attn & BNX2X_GRC_RSV) {
                        val = REG_RD(sc, MISC_REG_GRC_RSV_ATTN);
-                       PMD_DRV_LOG(ERR, "GRC reserved 0x%08x", val);
+                       PMD_DRV_LOG(ERR, sc, "GRC reserved 0x%08x", val);
                }
                REG_WR(sc, MISC_REG_AEU_CLR_LATCH_SIGNAL, 0x7ff);
        }
@@ -3890,24 +3916,24 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x_softc *sc, uint32_t attn)
 
        if (attn & AEU_INPUTS_ATTN_BITS_CFC_HW_INTERRUPT) {
                val = REG_RD(sc, CFC_REG_CFC_INT_STS_CLR);
-               PMD_DRV_LOG(ERR, "CFC hw attention 0x%08x", val);
+               PMD_DRV_LOG(ERR, sc, "CFC hw attention 0x%08x", val);
 /* CFC error attention */
                if (val & 0x2) {
-                       PMD_DRV_LOG(ERR, "FATAL error from CFC");
+                       PMD_DRV_LOG(ERR, sc, "FATAL error from CFC");
                }
        }
 
        if (attn & AEU_INPUTS_ATTN_BITS_PXP_HW_INTERRUPT) {
                val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_0);
-               PMD_DRV_LOG(ERR, "PXP hw attention-0 0x%08x", val);
+               PMD_DRV_LOG(ERR, sc, "PXP hw attention-0 0x%08x", val);
 /* RQ_USDMDP_FIFO_OVERFLOW */
                if (val & 0x18000) {
-                       PMD_DRV_LOG(ERR, "FATAL error from PXP");
+                       PMD_DRV_LOG(ERR, sc, "FATAL error from PXP");
                }
 
                if (!CHIP_IS_E1x(sc)) {
                        val = REG_RD(sc, PXP_REG_PXP_INT_STS_CLR_1);
-                       PMD_DRV_LOG(ERR, "PXP hw attention-1 0x%08x", val);
+                       PMD_DRV_LOG(ERR, sc, "PXP hw attention-1 0x%08x", val);
                }
        }
 #define PXP2_EOP_ERROR_BIT  PXP2_PXP2_INT_STS_CLR_0_REG_WR_PGLUE_EOP_ERROR
@@ -3935,7 +3961,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x_softc *sc, uint32_t attn)
                                val0 = REG_RD(sc, PXP2_REG_PXP2_INT_STS_CLR_0);
 
                        /* print the register, since no one can restore it */
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "PXP2_REG_PXP2_INT_STS_CLR_0 0x%08x", val0);
 
                        /*
@@ -3943,7 +3969,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x_softc *sc, uint32_t attn)
                         * then notify
                         */
                        if (val0 & PXP2_EOP_ERROR_BIT) {
-                               PMD_DRV_LOG(ERR, "PXP2_WR_PGLUE_EOP_ERROR");
+                               PMD_DRV_LOG(ERR, sc, "PXP2_WR_PGLUE_EOP_ERROR");
 
                                /*
                                 * if only PXP2_PXP2_INT_STS_0_REG_WR_PGLUE_EOP_ERROR is
@@ -3964,7 +3990,7 @@ static void bnx2x_attn_int_deasserted2(struct bnx2x_softc *sc, uint32_t attn)
                val &= ~(attn & HW_INTERRUT_ASSERT_SET_2);
                REG_WR(sc, reg_offset, val);
 
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "FATAL HW block attention set2 0x%x",
                            (uint32_t) (attn & HW_INTERRUT_ASSERT_SET_2));
                rte_panic("HW block attention set2");
@@ -3979,10 +4005,10 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x_softc *sc, uint32_t attn)
 
        if (attn & AEU_INPUTS_ATTN_BITS_DOORBELLQ_HW_INTERRUPT) {
                val = REG_RD(sc, DORQ_REG_DORQ_INT_STS_CLR);
-               PMD_DRV_LOG(ERR, "DB hw attention 0x%08x", val);
+               PMD_DRV_LOG(ERR, sc, "DB hw attention 0x%08x", val);
 /* DORQ discard attention */
                if (val & 0x2) {
-                       PMD_DRV_LOG(ERR, "FATAL error from DORQ");
+                       PMD_DRV_LOG(ERR, sc, "FATAL error from DORQ");
                }
        }
 
@@ -3994,7 +4020,7 @@ static void bnx2x_attn_int_deasserted1(struct bnx2x_softc *sc, uint32_t attn)
                val &= ~(attn & HW_INTERRUT_ASSERT_SET_1);
                REG_WR(sc, reg_offset, val);
 
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "FATAL HW block attention set1 0x%08x",
                            (uint32_t) (attn & HW_INTERRUT_ASSERT_SET_1));
                rte_panic("HW block attention set1");
@@ -4015,7 +4041,7 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x_softc *sc, uint32_t attn)
                val &= ~AEU_INPUTS_ATTN_BITS_SPIO5;
                REG_WR(sc, reg_offset, val);
 
-               PMD_DRV_LOG(WARNING, "SPIO5 hw attention");
+               PMD_DRV_LOG(WARNING, sc, "SPIO5 hw attention");
 
 /* Fan failure attention */
                elink_hw_reset_phy(&sc->link_params);
@@ -4023,7 +4049,9 @@ static void bnx2x_attn_int_deasserted0(struct bnx2x_softc *sc, uint32_t attn)
        }
 
        if ((attn & sc->link_vars.aeu_int_mask) && sc->port.pmf) {
+               bnx2x_acquire_phy_lock(sc);
                elink_handle_module_detect_int(&sc->link_params);
+               bnx2x_release_phy_lock(sc);
        }
 
        if (attn & HW_INTERRUT_ASSERT_SET_0) {
@@ -4105,14 +4133,14 @@ static void bnx2x_attn_int_deasserted(struct bnx2x_softc *sc, uint32_t deasserte
        }
 
        val = ~deasserted;
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "about to mask 0x%08x at %s addr 0x%08x", val,
                    (sc->devinfo.int_block == INT_BLOCK_HC) ? "HC" : "IGU",
                    reg_addr);
        REG_WR(sc, reg_addr, val);
 
        if (~sc->attn_state & deasserted) {
-               PMD_DRV_LOG(ERR, "IGU error");
+               PMD_DRV_LOG(ERR, sc, "IGU error");
        }
 
        reg_addr = port ? MISC_REG_AEU_MASK_ATTN_FUNC_1 :
@@ -4142,12 +4170,12 @@ static void bnx2x_attn_int(struct bnx2x_softc *sc)
        uint32_t asserted = attn_bits & ~attn_ack & ~attn_state;
        uint32_t deasserted = ~attn_bits & attn_ack & attn_state;
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "attn_bits 0x%08x attn_ack 0x%08x asserted 0x%08x deasserted 0x%08x",
                    attn_bits, attn_ack, asserted, deasserted);
 
        if (~(attn_bits ^ attn_ack) & (attn_bits ^ attn_state)) {
-               PMD_DRV_LOG(ERR, "BAD attention state");
+               PMD_DRV_LOG(ERR, sc, "BAD attention state");
        }
 
        /* handle bits that were raised */
@@ -4204,7 +4232,7 @@ static void bnx2x_handle_mcast_eqe(struct bnx2x_softc *sc)
        if (sc->mcast_obj.check_pending(&sc->mcast_obj)) {
                rc = ecore_config_mcast(sc, &rparam, ECORE_MCAST_CMD_CONT);
                if (rc < 0) {
-                       PMD_DRV_LOG(INFO,
+                       PMD_DRV_LOG(INFO, sc,
                                    "Failed to send pending mcast commands (%d)",
                                    rc);
                }
@@ -4224,17 +4252,17 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
 
        switch (le32toh(elem->message.data.eth_event.echo) >> BNX2X_SWCID_SHIFT) {
        case ECORE_FILTER_MAC_PENDING:
-               PMD_DRV_LOG(DEBUG, "Got SETUP_MAC completions");
+               PMD_DRV_LOG(DEBUG, sc, "Got SETUP_MAC completions");
                vlan_mac_obj = &sc->sp_objs[cid].mac_obj;
                break;
 
        case ECORE_FILTER_MCAST_PENDING:
-               PMD_DRV_LOG(DEBUG, "Got SETUP_MCAST completions");
+               PMD_DRV_LOG(DEBUG, sc, "Got SETUP_MCAST completions");
                bnx2x_handle_mcast_eqe(sc);
                return;
 
        default:
-               PMD_DRV_LOG(NOTICE, "Unsupported classification command: %d",
+               PMD_DRV_LOG(NOTICE, sc, "Unsupported classification command: %d",
                            elem->message.data.eth_event.echo);
                return;
        }
@@ -4242,9 +4270,10 @@ bnx2x_handle_classification_eqe(struct bnx2x_softc *sc, union event_ring_elem *e
        rc = vlan_mac_obj->complete(sc, vlan_mac_obj, elem, &ramrod_flags);
 
        if (rc < 0) {
-               PMD_DRV_LOG(NOTICE, "Failed to schedule new commands (%d)", rc);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Failed to schedule new commands (%d)", rc);
        } else if (rc > 0) {
-               PMD_DRV_LOG(DEBUG, "Scheduled next pending commands...");
+               PMD_DRV_LOG(DEBUG, sc, "Scheduled next pending commands...");
        }
 }
 
@@ -4308,7 +4337,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
 /* handle eq element */
                switch (opcode) {
                case EVENT_RING_OPCODE_STAT_QUERY:
-                       PMD_DEBUG_PERIODIC_LOG(DEBUG, "got statistics completion event %d",
+                       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "got statistics completion event %d",
                                    sc->stats_comp++);
                        /* nothing to do with stats comp */
                        goto next_spqe;
@@ -4316,7 +4345,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case EVENT_RING_OPCODE_CFC_DEL:
                        /* handle according to cid range */
                        /* we may want to verify here that the sc state is HALTING */
-                       PMD_DRV_LOG(DEBUG, "got delete ramrod for MULTI[%d]",
+                       PMD_DRV_LOG(DEBUG, sc, "got delete ramrod for MULTI[%d]",
                                    cid);
                        q_obj = bnx2x_cid_to_q_obj(sc, cid);
                        if (q_obj->complete_cmd(sc, q_obj, ECORE_Q_CMD_CFC_DEL)) {
@@ -4325,14 +4354,14 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                        goto next_spqe;
 
                case EVENT_RING_OPCODE_STOP_TRAFFIC:
-                       PMD_DRV_LOG(DEBUG, "got STOP TRAFFIC");
+                       PMD_DRV_LOG(DEBUG, sc, "got STOP TRAFFIC");
                        if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_TX_STOP)) {
                                break;
                        }
                        goto next_spqe;
 
                case EVENT_RING_OPCODE_START_TRAFFIC:
-                       PMD_DRV_LOG(DEBUG, "got START TRAFFIC");
+                       PMD_DRV_LOG(DEBUG, sc, "got START TRAFFIC");
                        if (f_obj->complete_cmd
                            (sc, f_obj, ECORE_F_CMD_TX_START)) {
                                break;
@@ -4342,7 +4371,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case EVENT_RING_OPCODE_FUNCTION_UPDATE:
                        echo = elem->message.data.function_update_event.echo;
                        if (echo == SWITCH_UPDATE) {
-                               PMD_DRV_LOG(DEBUG,
+                               PMD_DRV_LOG(DEBUG, sc,
                                            "got FUNC_SWITCH_UPDATE ramrod");
                                if (f_obj->complete_cmd(sc, f_obj,
                                                        ECORE_F_CMD_SWITCH_UPDATE))
@@ -4350,7 +4379,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                                        break;
                                }
                        } else {
-                               PMD_DRV_LOG(DEBUG,
+                               PMD_DRV_LOG(DEBUG, sc,
                                            "AFEX: ramrod completed FUNCTION_UPDATE");
                                f_obj->complete_cmd(sc, f_obj,
                                                    ECORE_F_CMD_AFEX_UPDATE);
@@ -4366,14 +4395,14 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                        goto next_spqe;
 
                case EVENT_RING_OPCODE_FUNCTION_START:
-                       PMD_DRV_LOG(DEBUG, "got FUNC_START ramrod");
+                       PMD_DRV_LOG(DEBUG, sc, "got FUNC_START ramrod");
                        if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_START)) {
                                break;
                        }
                        goto next_spqe;
 
                case EVENT_RING_OPCODE_FUNCTION_STOP:
-                       PMD_DRV_LOG(DEBUG, "got FUNC_STOP ramrod");
+                       PMD_DRV_LOG(DEBUG, sc, "got FUNC_STOP ramrod");
                        if (f_obj->complete_cmd(sc, f_obj, ECORE_F_CMD_STOP)) {
                                break;
                        }
@@ -4385,7 +4414,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case (EVENT_RING_OPCODE_RSS_UPDATE_RULES | BNX2X_STATE_OPENING_WAITING_PORT):
                        cid =
                            elem->message.data.eth_event.echo & BNX2X_SWCID_MASK;
-                       PMD_DRV_LOG(DEBUG, "got RSS_UPDATE ramrod. CID %d",
+                       PMD_DRV_LOG(DEBUG, sc, "got RSS_UPDATE ramrod. CID %d",
                                    cid);
                        rss_raw->clear_pending(rss_raw);
                        break;
@@ -4396,7 +4425,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BNX2X_STATE_OPEN):
                case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BNX2X_STATE_DIAG):
                case (EVENT_RING_OPCODE_CLASSIFICATION_RULES | BNX2X_STATE_CLOSING_WAITING_HALT):
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "got (un)set mac ramrod");
                        bnx2x_handle_classification_eqe(sc, elem);
                        break;
@@ -4404,7 +4433,7 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case (EVENT_RING_OPCODE_MULTICAST_RULES | BNX2X_STATE_OPEN):
                case (EVENT_RING_OPCODE_MULTICAST_RULES | BNX2X_STATE_DIAG):
                case (EVENT_RING_OPCODE_MULTICAST_RULES | BNX2X_STATE_CLOSING_WAITING_HALT):
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "got mcast ramrod");
                        bnx2x_handle_mcast_eqe(sc);
                        break;
@@ -4412,14 +4441,14 @@ static void bnx2x_eq_int(struct bnx2x_softc *sc)
                case (EVENT_RING_OPCODE_FILTERS_RULES | BNX2X_STATE_OPEN):
                case (EVENT_RING_OPCODE_FILTERS_RULES | BNX2X_STATE_DIAG):
                case (EVENT_RING_OPCODE_FILTERS_RULES | BNX2X_STATE_CLOSING_WAITING_HALT):
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "got rx_mode ramrod");
                        bnx2x_handle_rx_mode_eqe(sc);
                        break;
 
                default:
                        /* unknown event log error and continue */
-                       PMD_DRV_LOG(INFO, "Unknown EQ event %d, sc->state 0x%x",
+                       PMD_DRV_LOG(INFO, sc, "Unknown EQ event %d, sc->state 0x%x",
                                    elem->message.opcode, sc->state);
                }
 
@@ -4445,12 +4474,16 @@ static int bnx2x_handle_sp_tq(struct bnx2x_softc *sc)
        uint16_t status;
        int rc = 0;
 
+       PMD_DRV_LOG(DEBUG, sc, "---> SP TASK <---");
+
        /* what work needs to be performed? */
        status = bnx2x_update_dsb_idx(sc);
 
+       PMD_DRV_LOG(DEBUG, sc, "dsb status 0x%04x", status);
+
        /* HW attentions */
        if (status & BNX2X_DEF_SB_ATT_IDX) {
-               PMD_DRV_LOG(DEBUG, "---> ATTN INTR <---");
+               PMD_DRV_LOG(DEBUG, sc, "---> ATTN INTR <---");
                bnx2x_attn_int(sc);
                status &= ~BNX2X_DEF_SB_ATT_IDX;
                rc = 1;
@@ -4459,7 +4492,7 @@ static int bnx2x_handle_sp_tq(struct bnx2x_softc *sc)
        /* SP events: STAT_QUERY and others */
        if (status & BNX2X_DEF_SB_IDX) {
 /* handle EQ completions */
-               PMD_DEBUG_PERIODIC_LOG(DEBUG, "---> EQ INTR <---");
+               PMD_DRV_LOG(DEBUG, sc, "---> EQ INTR <---");
                bnx2x_eq_int(sc);
                bnx2x_ack_sb(sc, sc->igu_dsb_id, USTORM_ID,
                           le16toh(sc->def_idx), IGU_INT_NOP, 1);
@@ -4468,7 +4501,7 @@ static int bnx2x_handle_sp_tq(struct bnx2x_softc *sc)
 
        /* if status is non zero then something went wrong */
        if (unlikely(status)) {
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "Got an unknown SP interrupt! (0x%04x)", status);
        }
 
@@ -4484,7 +4517,8 @@ static void bnx2x_handle_fp_tq(struct bnx2x_fastpath *fp, int scan_fp)
        struct bnx2x_softc *sc = fp->sc;
        uint8_t more_rx = FALSE;
 
-       PMD_DRV_LOG(DEBUG, "---> FP TASK QUEUE (%d) <--", fp->index);
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc,
+                              "---> FP TASK QUEUE (%d) <--", fp->index);
 
        /* update the fastpath index */
        bnx2x_update_fp_sb_idx(fp);
@@ -4534,25 +4568,31 @@ int bnx2x_intr_legacy(struct bnx2x_softc *sc, int scan_fp)
                return 0;
        }
 
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "Interrupt status 0x%04x", status);
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "Interrupt status 0x%04x", status);
        //bnx2x_dump_status_block(sc);
 
        FOR_EACH_ETH_QUEUE(sc, i) {
                fp = &sc->fp[i];
                mask = (0x2 << (fp->index + CNIC_SUPPORT(sc)));
                if (status & mask) {
+               /* acknowledge and disable further fastpath interrupts */
+                       bnx2x_ack_sb(sc, fp->igu_sb_id, USTORM_ID,
+                                    0, IGU_INT_DISABLE, 0);
                        bnx2x_handle_fp_tq(fp, scan_fp);
                        status &= ~mask;
                }
        }
 
        if (unlikely(status & 0x1)) {
+               /* acknowledge and disable further slowpath interrupts */
+               bnx2x_ack_sb(sc, sc->igu_dsb_id, USTORM_ID,
+                            0, IGU_INT_DISABLE, 0);
                rc = bnx2x_handle_sp_tq(sc);
                status &= ~0x1;
        }
 
        if (unlikely(status)) {
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(WARNING, sc,
                            "Unexpected fastpath status (0x%08x)!", status);
        }
 
@@ -4588,7 +4628,7 @@ static void bnx2x_init_func_obj(struct bnx2x_softc *sc)
 {
        sc->dmae_ready = 0;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        ecore_init_func_obj(sc,
                            &sc->func_obj,
@@ -4604,7 +4644,7 @@ static int bnx2x_init_hw(struct bnx2x_softc *sc, uint32_t load_code)
        struct ecore_func_state_params func_params = { NULL };
        int rc;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        /* prepare the parameters for function state transitions */
        bnx2x_set_bit(RAMROD_COMP_WAIT, &func_params.ramrod_flags);
@@ -5193,7 +5233,7 @@ static void bnx2x_init_internal(struct bnx2x_softc *sc, uint32_t load_code)
                break;
 
        default:
-               PMD_DRV_LOG(NOTICE, "Unknown load_code (0x%x) from MCP",
+               PMD_DRV_LOG(NOTICE, sc, "Unknown load_code (0x%x) from MCP",
                            load_code);
                break;
        }
@@ -5284,7 +5324,7 @@ bnx2x_extract_max_cfg(__rte_unused struct bnx2x_softc *sc, uint32_t mf_cfg)
                            FUNC_MF_CFG_MAX_BW_SHIFT);
 
        if (!max_cfg) {
-               PMD_DRV_LOG(DEBUG,
+               PMD_DRV_LOG(DEBUG, sc,
                            "Max BW configured to 0 - using 100 instead");
                max_cfg = 100;
        }
@@ -5548,7 +5588,7 @@ static void bnx2x_igu_int_enable(struct bnx2x_softc *sc)
 
        val |= IGU_PF_CONF_FUNC_EN;
 
-       PMD_DRV_LOG(DEBUG, "write 0x%x to IGU mode %s",
+       PMD_DRV_LOG(DEBUG, sc, "write 0x%x to IGU mode %s",
                    val, ((msix) ? "MSI-X" : ((msi) ? "MSI" : "INTx")));
 
        REG_WR(sc, IGU_REG_PF_CONFIGURATION, val);
@@ -5596,7 +5636,7 @@ static void bnx2x_hc_int_disable(struct bnx2x_softc *sc)
 
        REG_WR(sc, addr, val);
        if (REG_RD(sc, addr) != val) {
-               PMD_DRV_LOG(ERR, "proper val not read from HC IGU!");
+               PMD_DRV_LOG(ERR, sc, "proper val not read from HC IGU!");
        }
 }
 
@@ -5607,14 +5647,14 @@ static void bnx2x_igu_int_disable(struct bnx2x_softc *sc)
        val &= ~(IGU_PF_CONF_MSI_MSIX_EN |
                 IGU_PF_CONF_INT_LINE_EN | IGU_PF_CONF_ATTN_BIT_EN);
 
-       PMD_DRV_LOG(DEBUG, "write %x to IGU", val);
+       PMD_DRV_LOG(DEBUG, sc, "write %x to IGU", val);
 
        /* flush all outstanding writes */
        mb();
 
        REG_WR(sc, IGU_REG_PF_CONFIGURATION, val);
        if (REG_RD(sc, IGU_REG_PF_CONFIGURATION) != val) {
-               PMD_DRV_LOG(ERR, "proper val not read from IGU!");
+               PMD_DRV_LOG(ERR, sc, "proper val not read from IGU!");
        }
 }
 
@@ -5631,7 +5671,7 @@ static void bnx2x_nic_init(struct bnx2x_softc *sc, int load_code)
 {
        int i;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        for (i = 0; i < sc->num_queues; i++) {
                bnx2x_init_eth_fp(sc, i);
@@ -5761,7 +5801,7 @@ static int bnx2x_set_power_state(struct bnx2x_softc *sc, uint8_t state)
 
        /* If there is no power capability, silently succeed */
        if (!(sc->devinfo.pcie_cap_flags & BNX2X_PM_CAPABLE_FLAG)) {
-               PMD_DRV_LOG(WARNING, "No power capability");
+               PMD_DRV_LOG(WARNING, sc, "No power capability");
                return 0;
        }
 
@@ -5806,7 +5846,7 @@ static int bnx2x_set_power_state(struct bnx2x_softc *sc, uint8_t state)
                break;
 
        default:
-               PMD_DRV_LOG(NOTICE, "Can't support PCI power state = %d",
+               PMD_DRV_LOG(NOTICE, sc, "Can't support PCI power state = %d",
                            state);
                return -1;
        }
@@ -5824,7 +5864,7 @@ static uint8_t bnx2x_trylock_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
 
        /* Validating that the resource is within range */
        if (resource > HW_LOCK_MAX_RESOURCE_VALUE) {
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "resource(0x%x) > HW_LOCK_MAX_RESOURCE_VALUE(0x%x)",
                            resource, HW_LOCK_MAX_RESOURCE_VALUE);
                return FALSE;
@@ -5844,7 +5884,7 @@ static uint8_t bnx2x_trylock_hw_lock(struct bnx2x_softc *sc, uint32_t resource)
                return TRUE;
        }
 
-       PMD_DRV_LOG(NOTICE, "Failed to get a resource lock 0x%x", resource);
+       PMD_DRV_LOG(NOTICE, sc, "Failed to get a resource lock 0x%x", resource);
 
        return FALSE;
 }
@@ -5937,7 +5977,7 @@ static int bnx2x_er_poll_igu_vq(struct bnx2x_softc *sc)
        } while (cnt-- > 0);
 
        if (cnt <= 0) {
-               PMD_DRV_LOG(NOTICE, "Still pending IGU requests bits=0x%08x!",
+               PMD_DRV_LOG(NOTICE, sc, "Still pending IGU requests bits=0x%08x!",
                            pend_bits);
                return -1;
        }
@@ -6018,7 +6058,7 @@ static int bnx2x_init_shmem(struct bnx2x_softc *sc)
 
        } while (cnt++ < (MCP_TIMEOUT / MCP_ONE_TIMEOUT));
 
-       PMD_DRV_LOG(NOTICE, "BAD MCP validity signature");
+       PMD_DRV_LOG(NOTICE, sc, "BAD MCP validity signature");
 
        return -1;
 }
@@ -6173,7 +6213,7 @@ static int bnx2x_process_kill(struct bnx2x_softc *sc, uint8_t global)
        } while (cnt-- > 0);
 
        if (cnt <= 0) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "ERROR: Tetris buffer didn't get empty or there "
                            "are still outstanding read requests after 1s! "
                            "sr_cnt=0x%08x, blk_cnt=0x%08x, port_is_idle_0=0x%08x, "
@@ -6246,14 +6286,14 @@ static int bnx2x_leader_reset(struct bnx2x_softc *sc)
                load_code = bnx2x_fw_command(sc, DRV_MSG_CODE_LOAD_REQ,
                                           DRV_MSG_CODE_LOAD_REQ_WITH_LFA);
                if (!load_code) {
-                       PMD_DRV_LOG(NOTICE, "MCP response failure, aborting");
+                       PMD_DRV_LOG(NOTICE, sc, "MCP response failure, aborting");
                        rc = -1;
                        goto exit_leader_reset;
                }
 
                if ((load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) &&
                    (load_code != FW_MSG_CODE_DRV_LOAD_COMMON)) {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "MCP unexpected response, aborting");
                        rc = -1;
                        goto exit_leader_reset2;
@@ -6261,7 +6301,7 @@ static int bnx2x_leader_reset(struct bnx2x_softc *sc)
 
                load_code = bnx2x_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0);
                if (!load_code) {
-                       PMD_DRV_LOG(NOTICE, "MCP response failure, aborting");
+                       PMD_DRV_LOG(NOTICE, sc, "MCP response failure, aborting");
                        rc = -1;
                        goto exit_leader_reset2;
                }
@@ -6269,7 +6309,7 @@ static int bnx2x_leader_reset(struct bnx2x_softc *sc)
 
        /* try to recover after the failure */
        if (bnx2x_process_kill(sc, global)) {
-               PMD_DRV_LOG(NOTICE, "Something bad occurred on engine %d!",
+               PMD_DRV_LOG(NOTICE, sc, "Something bad occurred on engine %d!",
                            SC_PATH(sc));
                rc = -1;
                goto exit_leader_reset2;
@@ -6428,12 +6468,12 @@ bnx2x_pf_rx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
        /* validate rings have enough entries to cross high thresholds */
        if (sc->dropless_fc &&
            pause->bd_th_hi + FW_PREFETCH_CNT > sc->rx_ring_size) {
-               PMD_DRV_LOG(WARNING, "rx bd ring threshold limit");
+               PMD_DRV_LOG(WARNING, sc, "rx bd ring threshold limit");
        }
 
        if (sc->dropless_fc &&
            pause->rcq_th_hi + FW_PREFETCH_CNT > USABLE_RCQ_ENTRIES(rxq)) {
-               PMD_DRV_LOG(WARNING, "rcq ring threshold limit");
+               PMD_DRV_LOG(WARNING, sc, "rcq ring threshold limit");
        }
 
        pause->pri_map = 1;
@@ -6504,7 +6544,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
        struct ecore_queue_setup_params *setup_params = &q_params.params.setup;
        int rc;
 
-       PMD_DRV_LOG(DEBUG, "setting up queue %d", fp->index);
+       PMD_DRV_LOG(DEBUG, sc, "setting up queue %d", fp->index);
 
        bnx2x_ack_sb(sc, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0);
 
@@ -6522,11 +6562,11 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
        /* Change the state to INIT */
        rc = ecore_queue_state_change(sc, &q_params);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Queue(%d) INIT failed", fp->index);
+               PMD_DRV_LOG(NOTICE, sc, "Queue(%d) INIT failed", fp->index);
                return rc;
        }
 
-       PMD_DRV_LOG(DEBUG, "init complete");
+       PMD_DRV_LOG(DEBUG, sc, "init complete");
 
        /* now move the Queue to the SETUP state */
        memset(setup_params, 0, sizeof(*setup_params));
@@ -6550,7 +6590,7 @@ bnx2x_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, uint8_t lea
        /* change the state to SETUP */
        rc = ecore_queue_state_change(sc, &q_params);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Queue(%d) SETUP failed", fp->index);
+               PMD_DRV_LOG(NOTICE, sc, "Queue(%d) SETUP failed", fp->index);
                return rc;
        }
 
@@ -6678,11 +6718,11 @@ bnx2x_set_mac_one(struct bnx2x_softc *sc, uint8_t * mac,
        rc = ecore_config_vlan_mac(sc, &ramrod_param);
 
        if (rc == ECORE_EXISTS) {
-               PMD_DRV_LOG(INFO, "Failed to schedule ADD operations (EEXIST)");
+               PMD_DRV_LOG(INFO, sc, "Failed to schedule ADD operations (EEXIST)");
 /* do not treat adding same MAC as error */
                rc = 0;
        } else if (rc < 0) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "%s MAC failed (%d)", (set ? "Set" : "Delete"), rc);
        }
 
@@ -6693,7 +6733,7 @@ static int bnx2x_set_eth_mac(struct bnx2x_softc *sc, uint8_t set)
 {
        unsigned long ramrod_flags = 0;
 
-       PMD_DRV_LOG(DEBUG, "Adding Ethernet MAC");
+       PMD_DRV_LOG(DEBUG, sc, "Adding Ethernet MAC");
 
        bnx2x_set_bit(RAMROD_COMP_WAIT, &ramrod_flags);
 
@@ -6849,7 +6889,7 @@ bnx2x_fill_report_data(struct bnx2x_softc *sc, struct bnx2x_link_report_data *da
 }
 
 /* report link status to OS, should be called under phy_lock */
-static void bnx2x_link_report(struct bnx2x_softc *sc)
+static void bnx2x_link_report_locked(struct bnx2x_softc *sc)
 {
        struct bnx2x_link_report_data cur_data;
 
@@ -6870,14 +6910,19 @@ static void bnx2x_link_report(struct bnx2x_softc *sc)
                return;
        }
 
+       PMD_DRV_LOG(INFO, sc, "Change in link status : cur_data = %lx, last_reported_link = %lx\n",
+                   cur_data.link_report_flags,
+                   sc->last_reported_link.link_report_flags);
+
        sc->link_cnt++;
 
+       PMD_DRV_LOG(INFO, sc, "link status change count = %x\n", sc->link_cnt);
        /* report new link params and remember the state for the next time */
        rte_memcpy(&sc->last_reported_link, &cur_data, sizeof(cur_data));
 
        if (bnx2x_test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
                         &cur_data.link_report_flags)) {
-               PMD_DRV_LOG(INFO, "NIC Link is Down");
+               PMD_DRV_LOG(INFO, sc, "NIC Link is Down");
        } else {
                __rte_unused const char *duplex;
                __rte_unused const char *flow;
@@ -6917,12 +6962,20 @@ static void bnx2x_link_report(struct bnx2x_softc *sc)
                        flow = "none";
                }
 
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "NIC Link is Up, %d Mbps %s duplex, Flow control: %s",
                            cur_data.line_speed, duplex, flow);
        }
 }
 
+static void
+bnx2x_link_report(struct bnx2x_softc *sc)
+{
+       bnx2x_acquire_phy_lock(sc);
+       bnx2x_link_report_locked(sc);
+       bnx2x_release_phy_lock(sc);
+}
+
 void bnx2x_link_status_update(struct bnx2x_softc *sc)
 {
        if (sc->state != BNX2X_STATE_OPEN) {
@@ -7001,6 +7054,8 @@ static int bnx2x_initial_phy_init(struct bnx2x_softc *sc, int load_mode)
 
        bnx2x_set_requested_fc(sc);
 
+       bnx2x_acquire_phy_lock(sc);
+
        if (load_mode == LOAD_DIAG) {
                lp->loopback_mode = ELINK_LOOPBACK_XGXS;
 /* Prefer doing PHY loopback at 10G speed, if possible */
@@ -7020,6 +7075,8 @@ static int bnx2x_initial_phy_init(struct bnx2x_softc *sc, int load_mode)
 
        rc = elink_phy_init(&sc->link_params, &sc->link_vars);
 
+       bnx2x_release_phy_lock(sc);
+
        bnx2x_calc_fc_adv(sc);
 
        if (sc->link_vars.link_up) {
@@ -7058,7 +7115,7 @@ void bnx2x_periodic_callout(struct bnx2x_softc *sc)
 {
        if ((sc->state != BNX2X_STATE_OPEN) ||
            (atomic_load_acq_long(&sc->periodic_flags) == PERIODIC_STOP)) {
-               PMD_DRV_LOG(WARNING, "periodic callout exit (state=0x%x)",
+               PMD_DRV_LOG(INFO, sc, "periodic callout exit (state=0x%x)",
                            sc->state);
                return;
        }
@@ -7070,7 +7127,9 @@ void bnx2x_periodic_callout(struct bnx2x_softc *sc)
  */
                mb();
                if (sc->port.pmf) {
+                       bnx2x_acquire_phy_lock(sc);
                        elink_period_func(&sc->link_params, &sc->link_vars);
+                       bnx2x_release_phy_lock(sc);
                }
        }
 #ifdef BNX2X_PULSE
@@ -7095,7 +7154,7 @@ void bnx2x_periodic_callout(struct bnx2x_softc *sc)
                if ((drv_pulse != mcp_pulse) &&
                    (drv_pulse != ((mcp_pulse + 1) & MCP_PULSE_SEQ_MASK))) {
                        /* someone lost a heartbeat... */
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "drv_pulse (0x%x) != mcp_pulse (0x%x)",
                                    drv_pulse, mcp_pulse);
                }
@@ -7111,7 +7170,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
        uint32_t load_code = 0;
        int i, rc = 0;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        sc->state = BNX2X_STATE_OPENING_WAITING_LOAD;
 
@@ -7165,7 +7224,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
                                goto bnx2x_nic_load_error2;
                        }
                } else {
-                       PMD_DRV_LOG(INFO, "Device has no MCP!");
+                       PMD_DRV_LOG(INFO, sc, "Device has no MCP!");
                        load_code = bnx2x_nic_load_no_mcp(sc);
                }
 
@@ -7177,7 +7236,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
 
 /* Initialize HW */
                if (bnx2x_init_hw(sc, load_code) != 0) {
-                       PMD_DRV_LOG(NOTICE, "HW init failed");
+                       PMD_DRV_LOG(NOTICE, sc, "HW init failed");
                        bnx2x_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0);
                        sc->state = BNX2X_STATE_CLOSED;
                        rc = -ENXIO;
@@ -7197,7 +7256,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
                sc->state = BNX2X_STATE_OPENING_WAITING_PORT;
                rc = bnx2x_func_start(sc);
                if (rc) {
-                       PMD_DRV_LOG(NOTICE, "Function start failed!");
+                       PMD_DRV_LOG(NOTICE, sc, "Function start failed!");
                        bnx2x_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0);
                        sc->state = BNX2X_STATE_ERROR;
                        goto bnx2x_nic_load_error3;
@@ -7208,7 +7267,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
                        load_code =
                            bnx2x_fw_command(sc, DRV_MSG_CODE_LOAD_DONE, 0);
                        if (!load_code) {
-                               PMD_DRV_LOG(NOTICE,
+                               PMD_DRV_LOG(NOTICE, sc,
                                            "MCP response failure, aborting");
                                sc->state = BNX2X_STATE_ERROR;
                                rc = -ENXIO;
@@ -7219,7 +7278,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
 
        rc = bnx2x_setup_leading(sc);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Setup leading failed!");
+               PMD_DRV_LOG(NOTICE, sc, "Setup leading failed!");
                sc->state = BNX2X_STATE_ERROR;
                goto bnx2x_nic_load_error3;
        }
@@ -7231,7 +7290,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
                        rc = bnx2x_vf_setup_queue(sc, &sc->fp[i], FALSE);
 
                if (rc) {
-                       PMD_DRV_LOG(NOTICE, "Queue(%d) setup failed", i);
+                       PMD_DRV_LOG(NOTICE, sc, "Queue(%d) setup failed", i);
                        sc->state = BNX2X_STATE_ERROR;
                        goto bnx2x_nic_load_error3;
                }
@@ -7239,7 +7298,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
 
        rc = bnx2x_init_rss_pf(sc);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "PF RSS init failed");
+               PMD_DRV_LOG(NOTICE, sc, "PF RSS init failed");
                sc->state = BNX2X_STATE_ERROR;
                goto bnx2x_nic_load_error3;
        }
@@ -7255,7 +7314,7 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
        }
 
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Setting Ethernet MAC failed");
+               PMD_DRV_LOG(NOTICE, sc, "Setting Ethernet MAC failed");
                sc->state = BNX2X_STATE_ERROR;
                goto bnx2x_nic_load_error3;
        }
@@ -7307,13 +7366,13 @@ int bnx2x_nic_load(struct bnx2x_softc *sc)
 
        /* wait for all pending SP commands to complete */
        if (IS_PF(sc) && !bnx2x_wait_sp_comp(sc, ~0x0UL)) {
-               PMD_DRV_LOG(NOTICE, "Timeout waiting for all SPs to complete!");
+               PMD_DRV_LOG(NOTICE, sc, "Timeout waiting for all SPs to complete!");
                bnx2x_periodic_stop(sc);
                bnx2x_nic_unload(sc, UNLOAD_CLOSE, FALSE);
                return -ENXIO;
        }
 
-       PMD_DRV_LOG(DEBUG, "NIC successfully loaded");
+       PMD_DRV_LOG(DEBUG, sc, "NIC successfully loaded");
 
        return 0;
 
@@ -7362,7 +7421,7 @@ int bnx2x_init(struct bnx2x_softc *sc)
 
        /* Check if the driver is still running and bail out if it is. */
        if (sc->state != BNX2X_STATE_CLOSED) {
-               PMD_DRV_LOG(DEBUG, "Init called while driver is running!");
+               PMD_DRV_LOG(DEBUG, sc, "Init called while driver is running!");
                rc = 0;
                goto bnx2x_init_done;
        }
@@ -7400,7 +7459,7 @@ int bnx2x_init(struct bnx2x_softc *sc)
                                     && (!global ||!other_load_status))
                                    && bnx2x_trylock_leader_lock(sc)
                                    && !bnx2x_leader_reset(sc)) {
-                                       PMD_DRV_LOG(INFO,
+                                       PMD_DRV_LOG(INFO, sc,
                                                    "Recovered during init");
                                        break;
                                }
@@ -7410,7 +7469,7 @@ int bnx2x_init(struct bnx2x_softc *sc)
 
                                sc->recovery_state = BNX2X_RECOVERY_FAILED;
 
-                               PMD_DRV_LOG(NOTICE,
+                               PMD_DRV_LOG(NOTICE, sc,
                                            "Recovery flow hasn't properly "
                                            "completed yet, try again later. "
                                            "If you still see this message after a "
@@ -7429,7 +7488,7 @@ int bnx2x_init(struct bnx2x_softc *sc)
 bnx2x_init_done:
 
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "Initialization failed, "
+               PMD_DRV_LOG(NOTICE, sc, "Initialization failed, "
                            "stack notified driver is NOT running!");
        }
 
@@ -7461,7 +7520,7 @@ static void bnx2x_get_function_num(struct bnx2x_softc *sc)
                sc->pfunc_abs = (sc->pfunc_rel | sc->path_id);
        }
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "Relative function %d, Absolute function %d, Path %d",
                    sc->pfunc_rel, sc->pfunc_abs, sc->path_id);
 }
@@ -7498,14 +7557,14 @@ static uint32_t bnx2x_pcie_capability_read(struct bnx2x_softc *sc, int reg)
        /* ensure PCIe capability is enabled */
        caps = pci_find_cap(sc, PCIY_EXPRESS, BNX2X_PCI_CAP);
        if (NULL != caps) {
-               PMD_DRV_LOG(DEBUG, "Found PCIe capability: "
+               PMD_DRV_LOG(DEBUG, sc, "Found PCIe capability: "
                            "id=0x%04X type=0x%04X addr=0x%08X",
                            caps->id, caps->type, caps->addr);
                pci_read(sc, (caps->addr + reg), &ret, 2);
                return ret;
        }
 
-       PMD_DRV_LOG(WARNING, "PCIe capability NOT FOUND!!!");
+       PMD_DRV_LOG(WARNING, sc, "PCIe capability NOT FOUND!!!");
 
        return 0;
 }
@@ -7523,7 +7582,7 @@ static uint8_t bnx2x_is_pcie_pending(struct bnx2x_softc *sc)
 */
 static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc)
 {
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        struct bnx2x_pci_cap *caps;
        uint16_t link_status;
@@ -7532,7 +7591,7 @@ static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc)
        /* check if PCI Power Management is enabled */
        caps = pci_find_cap(sc, PCIY_PMG, BNX2X_PCI_CAP);
        if (NULL != caps) {
-               PMD_DRV_LOG(DEBUG, "Found PM capability: "
+               PMD_DRV_LOG(DEBUG, sc, "Found PM capability: "
                            "id=0x%04X type=0x%04X addr=0x%08X",
                            caps->id, caps->type, caps->addr);
 
@@ -7546,7 +7605,7 @@ static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc)
        sc->devinfo.pcie_link_width =
            ((link_status & PCIM_LINK_STA_WIDTH) >> 4);
 
-       PMD_DRV_LOG(DEBUG, "PCIe link speed=%d width=%d",
+       PMD_DRV_LOG(DEBUG, sc, "PCIe link speed=%d width=%d",
                    sc->devinfo.pcie_link_speed, sc->devinfo.pcie_link_width);
 
        sc->devinfo.pcie_cap_flags |= BNX2X_PCIE_CAPABLE_FLAG;
@@ -7554,7 +7613,7 @@ static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc)
        /* check if MSI capability is enabled */
        caps = pci_find_cap(sc, PCIY_MSI, BNX2X_PCI_CAP);
        if (NULL != caps) {
-               PMD_DRV_LOG(DEBUG, "Found MSI capability at 0x%04x", reg);
+               PMD_DRV_LOG(DEBUG, sc, "Found MSI capability at 0x%04x", reg);
 
                sc->devinfo.pcie_cap_flags |= BNX2X_MSI_CAPABLE_FLAG;
                sc->devinfo.pcie_msi_cap_reg = caps->addr;
@@ -7563,7 +7622,7 @@ static void bnx2x_probe_pci_caps(struct bnx2x_softc *sc)
        /* check if MSI-X capability is enabled */
        caps = pci_find_cap(sc, PCIY_MSIX, BNX2X_PCI_CAP);
        if (NULL != caps) {
-               PMD_DRV_LOG(DEBUG, "Found MSI-X capability at 0x%04x", reg);
+               PMD_DRV_LOG(DEBUG, sc, "Found MSI-X capability at 0x%04x", reg);
 
                sc->devinfo.pcie_cap_flags |= BNX2X_MSIX_CAPABLE_FLAG;
                sc->devinfo.pcie_msix_cap_reg = caps->addr;
@@ -7583,7 +7642,7 @@ static int bnx2x_get_shmem_mf_cfg_info_sd(struct bnx2x_softc *sc)
        mf_info->multi_vnics_mode = 1;
 
        if (!VALID_OVLAN(mf_info->ext_id)) {
-               PMD_DRV_LOG(NOTICE, "Invalid VLAN (%d)", mf_info->ext_id);
+               PMD_DRV_LOG(NOTICE, sc, "Invalid VLAN (%d)", mf_info->ext_id);
                return 1;
        }
 
@@ -7707,14 +7766,14 @@ static int bnx2x_check_valid_mf_cfg(struct bnx2x_softc *sc)
        /* various MF mode sanity checks... */
 
        if (mf_info->mf_config[SC_VN(sc)] & FUNC_MF_CFG_FUNC_HIDE) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "Enumerated function %d is marked as hidden",
                            SC_PORT(sc));
                return 1;
        }
 
        if ((mf_info->vnics_per_port > 1) && !mf_info->multi_vnics_mode) {
-               PMD_DRV_LOG(NOTICE, "vnics_per_port=%d multi_vnics_mode=%d",
+               PMD_DRV_LOG(NOTICE, sc, "vnics_per_port=%d multi_vnics_mode=%d",
                            mf_info->vnics_per_port, mf_info->multi_vnics_mode);
                return 1;
        }
@@ -7722,13 +7781,13 @@ static int bnx2x_check_valid_mf_cfg(struct bnx2x_softc *sc)
        if (mf_info->mf_mode == MULTI_FUNCTION_SD) {
 /* vnic id > 0 must have valid ovlan in switch-dependent mode */
                if ((SC_VN(sc) > 0) && !VALID_OVLAN(OVLAN(sc))) {
-                       PMD_DRV_LOG(NOTICE, "mf_mode=SD vnic_id=%d ovlan=%d",
+                       PMD_DRV_LOG(NOTICE, sc, "mf_mode=SD vnic_id=%d ovlan=%d",
                                    SC_VN(sc), OVLAN(sc));
                        return 1;
                }
 
                if (!VALID_OVLAN(OVLAN(sc)) && mf_info->multi_vnics_mode) {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "mf_mode=SD multi_vnics_mode=%d ovlan=%d",
                                    mf_info->multi_vnics_mode, OVLAN(sc));
                        return 1;
@@ -7747,7 +7806,7 @@ static int bnx2x_check_valid_mf_cfg(struct bnx2x_softc *sc)
                              && !VALID_OVLAN(ovlan1))
                             || ((!mf_info->multi_vnics_mode)
                                 && VALID_OVLAN(ovlan1)))) {
-                               PMD_DRV_LOG(NOTICE,
+                               PMD_DRV_LOG(NOTICE, sc,
                                            "mf_mode=SD function %d MF config "
                                            "mismatch, multi_vnics_mode=%d ovlan=%d",
                                            i, mf_info->multi_vnics_mode,
@@ -7771,7 +7830,7 @@ static int bnx2x_check_valid_mf_cfg(struct bnx2x_softc *sc)
                                    && !(mf_cfg2 & FUNC_MF_CFG_FUNC_HIDE)
                                    && VALID_OVLAN(ovlan2)
                                    && (ovlan1 == ovlan2)) {
-                                       PMD_DRV_LOG(NOTICE,
+                                       PMD_DRV_LOG(NOTICE, sc,
                                                    "mf_mode=SD functions %d and %d "
                                                    "have the same ovlan (%d)",
                                                    i, j, ovlan1);
@@ -7801,7 +7860,7 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
        }
 
        if (sc->devinfo.mf_cfg_base == SHMEM_MF_CFG_ADDR_NONE) {
-               PMD_DRV_LOG(NOTICE, "Invalid mf_cfg_base!");
+               PMD_DRV_LOG(NOTICE, sc, "Invalid mf_cfg_base!");
                return 1;
        }
 
@@ -7819,7 +7878,7 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
                if (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT) {
                        mf_info->mf_mode = MULTI_FUNCTION_SI;
                } else {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "Invalid config for Switch Independent mode");
                }
 
@@ -7835,7 +7894,7 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
                    FUNC_MF_CFG_E1HOV_TAG_DEFAULT) {
                        mf_info->mf_mode = MULTI_FUNCTION_SD;
                } else {
-                       PMD_DRV_LOG(NOTICE,
+                       PMD_DRV_LOG(NOTICE, sc,
                                    "Invalid config for Switch Dependent mode");
                }
 
@@ -7859,14 +7918,14 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
                    (mac_upper != FUNC_MF_CFG_UPPERMAC_DEFAULT)) {
                        mf_info->mf_mode = MULTI_FUNCTION_AFEX;
                } else {
-                       PMD_DRV_LOG(NOTICE, "Invalid config for AFEX mode");
+                       PMD_DRV_LOG(NOTICE, sc, "Invalid config for AFEX mode");
                }
 
                break;
 
        default:
 
-               PMD_DRV_LOG(NOTICE, "Unknown MF mode (0x%08x)",
+               PMD_DRV_LOG(NOTICE, sc, "Unknown MF mode (0x%08x)",
                            (val & SHARED_FEAT_CFG_FORCE_SF_MODE_MASK));
 
                return 1;
@@ -7898,7 +7957,7 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
        if (mf_info->mf_mode == SINGLE_FUNCTION) {
 /* invalid MF config */
                if (SC_VN(sc) >= 1) {
-                       PMD_DRV_LOG(NOTICE, "VNIC ID >= 1 in SF mode");
+                       PMD_DRV_LOG(NOTICE, sc, "VNIC ID >= 1 in SF mode");
                        return 1;
                }
 
@@ -7927,7 +7986,7 @@ static int bnx2x_get_mf_cfg_info(struct bnx2x_softc *sc)
 
        default:
 
-               PMD_DRV_LOG(NOTICE, "Get MF config failed (mf_mode=0x%08x)",
+               PMD_DRV_LOG(NOTICE, sc, "Get MF config failed (mf_mode=0x%08x)",
                            mf_info->mf_mode);
                return 1;
        }
@@ -7955,7 +8014,7 @@ static int bnx2x_get_shmem_info(struct bnx2x_softc *sc)
        int port;
        uint32_t mac_hi, mac_lo, val;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        port = SC_PORT(sc);
        mac_hi = mac_lo = 0;
@@ -8029,7 +8088,7 @@ static int bnx2x_get_shmem_info(struct bnx2x_softc *sc)
 
        if ((mac_lo == 0) && (mac_hi == 0)) {
                *sc->mac_addr_str = 0;
-               PMD_DRV_LOG(NOTICE, "No Ethernet address programmed!");
+               PMD_DRV_LOG(NOTICE, sc, "No Ethernet address programmed!");
        } else {
                sc->link_params.mac_addr[0] = (uint8_t) (mac_hi >> 8);
                sc->link_params.mac_addr[1] = (uint8_t) (mac_hi);
@@ -8045,7 +8104,8 @@ static int bnx2x_get_shmem_info(struct bnx2x_softc *sc)
                         sc->link_params.mac_addr[3],
                         sc->link_params.mac_addr[4],
                         sc->link_params.mac_addr[5]);
-               PMD_DRV_LOG(DEBUG, "Ethernet address: %s", sc->mac_addr_str);
+               PMD_DRV_LOG(DEBUG, sc,
+                           "Ethernet address: %s", sc->mac_addr_str);
        }
 
        return 0;
@@ -8060,24 +8120,24 @@ static void bnx2x_media_detect(struct bnx2x_softc *sc)
        case ELINK_ETH_PHY_XFP_FIBER:
        case ELINK_ETH_PHY_KR:
        case ELINK_ETH_PHY_CX4:
-               PMD_DRV_LOG(INFO, "Found 10GBase-CX4 media.");
+               PMD_DRV_LOG(INFO, sc, "Found 10GBase-CX4 media.");
                sc->media = IFM_10G_CX4;
                break;
        case ELINK_ETH_PHY_DA_TWINAX:
-               PMD_DRV_LOG(INFO, "Found 10Gb Twinax media.");
+               PMD_DRV_LOG(INFO, sc, "Found 10Gb Twinax media.");
                sc->media = IFM_10G_TWINAX;
                break;
        case ELINK_ETH_PHY_BASE_T:
-               PMD_DRV_LOG(INFO, "Found 10GBase-T media.");
+               PMD_DRV_LOG(INFO, sc, "Found 10GBase-T media.");
                sc->media = IFM_10G_T;
                break;
        case ELINK_ETH_PHY_NOT_PRESENT:
-               PMD_DRV_LOG(INFO, "Media not present.");
+               PMD_DRV_LOG(INFO, sc, "Media not present.");
                sc->media = 0;
                break;
        case ELINK_ETH_PHY_UNSPECIFIED:
        default:
-               PMD_DRV_LOG(INFO, "Unknown media!");
+               PMD_DRV_LOG(INFO, sc, "Unknown media!");
                sc->media = 0;
                break;
        }
@@ -8140,7 +8200,7 @@ static int bnx2x_get_igu_cam_info(struct bnx2x_softc *sc)
        sc->igu_sb_cnt = min(sc->igu_sb_cnt, igu_sb_cnt);
 
        if (igu_sb_cnt == 0) {
-               PMD_DRV_LOG(ERR, "CAM configuration error");
+               PMD_DRV_LOG(ERR, sc, "CAM configuration error");
                return -1;
        }
 
@@ -8177,7 +8237,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                sc->devinfo.chip_id |= 0x1;
        }
 
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "chip_id=0x%08x (num=0x%04x rev=0x%01x metal=0x%02x bond=0x%01x)",
                    sc->devinfo.chip_id,
                    ((sc->devinfo.chip_id >> 16) & 0xffff),
@@ -8188,7 +8248,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
        val = (REG_RD(sc, 0x2874) & 0x55);
        if ((sc->devinfo.chip_id & 0x1) || (CHIP_IS_E1H(sc) && (val == 0x55))) {
                sc->flags |= BNX2X_ONE_PORT_FLAG;
-               PMD_DRV_LOG(DEBUG, "single port device");
+               PMD_DRV_LOG(DEBUG, sc, "single port device");
        }
 
        /* set the doorbell size */
@@ -8212,7 +8272,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                sc->devinfo.chip_port_mode =
                    (val) ? CHIP_4_PORT_MODE : CHIP_2_PORT_MODE;
 
-               PMD_DRV_LOG(DEBUG, "Port mode = %s", (val) ? "4" : "2");
+               PMD_DRV_LOG(DEBUG, sc, "Port mode = %s", (val) ? "4" : "2");
        }
 
        /* get the function and path info for the device */
@@ -8227,7 +8287,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
 
        if (!sc->devinfo.shmem_base) {
 /* this should ONLY prevent upcoming shmem reads */
-               PMD_DRV_LOG(INFO, "MCP not active");
+               PMD_DRV_LOG(INFO, sc, "MCP not active");
                sc->flags |= BNX2X_NO_MCP_FLAG;
                return 0;
        }
@@ -8236,7 +8296,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
        val = SHMEM_RD(sc, validity_map[SC_PORT(sc)]);
        if ((val & (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) !=
            (SHR_MEM_VALIDITY_DEV_INFO | SHR_MEM_VALIDITY_MB)) {
-               PMD_DRV_LOG(NOTICE, "Invalid SHMEM validity signature: 0x%08x",
+               PMD_DRV_LOG(NOTICE, sc, "Invalid SHMEM validity signature: 0x%08x",
                            val);
                return 0;
        }
@@ -8249,7 +8309,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                 ((sc->devinfo.bc_ver >> 24) & 0xff),
                 ((sc->devinfo.bc_ver >> 16) & 0xff),
                 ((sc->devinfo.bc_ver >> 8) & 0xff));
-       PMD_DRV_LOG(INFO, "Bootcode version: %s", sc->devinfo.bc_ver_str);
+       PMD_DRV_LOG(INFO, sc, "Bootcode version: %s", sc->devinfo.bc_ver_str);
 
        /* get the bootcode shmem address */
        sc->devinfo.mf_cfg_base = bnx2x_get_shmem_mf_cfg_base(sc);
@@ -8304,7 +8364,7 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                        }
 
                        if (REG_RD(sc, IGU_REG_RESET_MEMORIES)) {
-                               PMD_DRV_LOG(NOTICE,
+                               PMD_DRV_LOG(NOTICE, sc,
                                            "FORCING IGU Normal Mode failed!!!");
                                bnx2x_release_hw_lock(sc, HW_LOCK_RESOURCE_RESET);
                                return -1;
@@ -8312,10 +8372,10 @@ static int bnx2x_get_device_info(struct bnx2x_softc *sc)
                }
 
                if (val & IGU_BLOCK_CONFIGURATION_REG_BACKWARD_COMP_EN) {
-                       PMD_DRV_LOG(DEBUG, "IGU Backward Compatible Mode");
+                       PMD_DRV_LOG(DEBUG, sc, "IGU Backward Compatible Mode");
                        sc->devinfo.int_block |= INT_BLOCK_MODE_BW_COMP;
                } else {
-                       PMD_DRV_LOG(DEBUG, "IGU Normal Mode");
+                       PMD_DRV_LOG(DEBUG, sc, "IGU Normal Mode");
                }
 
                rc = bnx2x_get_igu_cam_info(sc);
@@ -8389,7 +8449,7 @@ bnx2x_link_settings_supported(struct bnx2x_softc *sc, uint32_t switch_cfg)
        }
 
        if (!(sc->port.supported[0] || sc->port.supported[1])) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "Invalid phy config in NVRAM (PHY1=0x%08x PHY2=0x%08x)",
                            SHMEM_RD(sc,
                                     dev_info.port_hw_config
@@ -8415,7 +8475,7 @@ bnx2x_link_settings_supported(struct bnx2x_softc *sc, uint32_t switch_cfg)
                                   NIG_REG_XGXS0_CTRL_PHY_ADDR + port * 0x18);
                        break;
                default:
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "Invalid switch config in"
                                    "link_config=0x%08x",
                                    sc->port.link_config[0]);
@@ -8423,7 +8483,7 @@ bnx2x_link_settings_supported(struct bnx2x_softc *sc, uint32_t switch_cfg)
                }
        }
 
-       PMD_DRV_LOG(INFO, "PHY addr 0x%08x", sc->port.phy_addr);
+       PMD_DRV_LOG(INFO, sc, "PHY addr 0x%08x", sc->port.phy_addr);
 
        /* mask what we support according to speed_cap_mask per configuration */
        for (idx = 0; idx < cfg_size; idx++) {
@@ -8476,7 +8536,7 @@ bnx2x_link_settings_supported(struct bnx2x_softc *sc, uint32_t switch_cfg)
                }
        }
 
-       PMD_DRV_LOG(INFO, "PHY supported 0=0x%08x 1=0x%08x",
+       PMD_DRV_LOG(INFO, sc, "PHY supported 0=0x%08x 1=0x%08x",
                    sc->port.supported[0], sc->port.supported[1]);
 }
 
@@ -8535,7 +8595,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_10baseT_Full | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8555,7 +8615,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_10baseT_Half | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8574,7 +8634,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_100baseT_Full | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8594,7 +8654,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_100baseT_Half | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8612,7 +8672,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_1000baseT_Full | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8630,7 +8690,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                sc->port.advertising[idx] |=
                                    (ADVERTISED_2500baseX_Full | ADVERTISED_TP);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8649,7 +8709,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                                    (ADVERTISED_10000baseT_Full |
                                     ADVERTISED_FIBRE);
                        } else {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Invalid NVRAM config link_config=0x%08x "
                                            "speed_cap_mask=0x%08x",
                                            link_config,
@@ -8664,7 +8724,7 @@ static void bnx2x_link_settings_requested(struct bnx2x_softc *sc)
                        break;
 
                default:
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "Invalid NVRAM config link_config=0x%08x "
                                    "speed_cap_mask=0x%08x", link_config,
                                    sc->link_params.speed_cap_mask[idx]);
@@ -8695,7 +8755,7 @@ static void bnx2x_get_phy_info(struct bnx2x_softc *sc)
        uint8_t port = SC_PORT(sc);
        uint32_t eee_mode;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        /* shmem data already read in bnx2x_get_shmem_info() */
 
@@ -8855,7 +8915,7 @@ int bnx2x_alloc_hsi_mem(struct bnx2x_softc *sc)
                snprintf(buf, sizeof(buf), "fp_%d_sb", i);
                if (bnx2x_dma_alloc(sc, sizeof(union bnx2x_host_hc_status_block),
                                  &fp->sb_dma, buf, RTE_CACHE_LINE_SIZE) != 0) {
-                       PMD_DRV_LOG(NOTICE, "Failed to alloc %s", buf);
+                       PMD_DRV_LOG(NOTICE, sc, "Failed to alloc %s", buf);
                        return -1;
                } else {
                        if (CHIP_IS_E2E3(sc)) {
@@ -8945,7 +9005,7 @@ static int bnx2x_prev_mcp_done(struct bnx2x_softc *sc)
        uint32_t rc = bnx2x_fw_command(sc, DRV_MSG_CODE_UNLOAD_DONE,
                                     DRV_MSG_CODE_UNLOAD_SKIP_LINK_RESET);
        if (!rc) {
-               PMD_DRV_LOG(NOTICE, "MCP response failure, aborting");
+               PMD_DRV_LOG(NOTICE, sc, "MCP response failure, aborting");
                return -1;
        }
 
@@ -8977,12 +9037,12 @@ static uint8_t bnx2x_prev_is_path_marked(struct bnx2x_softc *sc)
        tmp = bnx2x_prev_path_get_entry(sc);
        if (tmp) {
                if (tmp->aer) {
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "Path %d/%d/%d was marked by AER",
                                    sc->pcie_bus, sc->pcie_device, SC_PATH(sc));
                } else {
                        rc = TRUE;
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "Path %d/%d/%d was already cleaned from previous drivers",
                                    sc->pcie_bus, sc->pcie_device, SC_PATH(sc));
                }
@@ -9003,11 +9063,11 @@ static int bnx2x_prev_mark_path(struct bnx2x_softc *sc, uint8_t after_undi)
        tmp = bnx2x_prev_path_get_entry(sc);
        if (tmp) {
                if (!tmp->aer) {
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "Re-marking AER in path %d/%d/%d",
                                    sc->pcie_bus, sc->pcie_device, SC_PATH(sc));
                } else {
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "Removing AER indication from path %d/%d/%d",
                                    sc->pcie_bus, sc->pcie_device, SC_PATH(sc));
                        tmp->aer = 0;
@@ -9023,7 +9083,7 @@ static int bnx2x_prev_mark_path(struct bnx2x_softc *sc, uint8_t after_undi)
        tmp = rte_malloc("", sizeof(struct bnx2x_prev_list_node),
                         RTE_CACHE_LINE_SIZE);
        if (!tmp) {
-               PMD_DRV_LOG(NOTICE, "Failed to allocate 'bnx2x_prev_list_node'");
+               PMD_DRV_LOG(NOTICE, sc, "Failed to allocate 'bnx2x_prev_list_node'");
                return -1;
        }
 
@@ -9048,13 +9108,13 @@ static int bnx2x_do_flr(struct bnx2x_softc *sc)
 
        /* only E2 and onwards support FLR */
        if (CHIP_IS_E1x(sc)) {
-               PMD_DRV_LOG(WARNING, "FLR not supported in E1H");
+               PMD_DRV_LOG(WARNING, sc, "FLR not supported in E1H");
                return -1;
        }
 
        /* only bootcode REQ_BC_VER_4_INITIATE_FLR and onwards support flr */
        if (sc->devinfo.bc_ver < REQ_BC_VER_4_INITIATE_FLR) {
-               PMD_DRV_LOG(WARNING,
+               PMD_DRV_LOG(WARNING, sc,
                            "FLR not supported by BC_VER: 0x%08x",
                            sc->devinfo.bc_ver);
                return -1;
@@ -9071,7 +9131,7 @@ static int bnx2x_do_flr(struct bnx2x_softc *sc)
                }
        }
 
-       PMD_DRV_LOG(NOTICE, "PCIE transaction is not cleared, "
+       PMD_DRV_LOG(NOTICE, sc, "PCIE transaction is not cleared, "
                    "proceeding with reset anyway");
 
 clear:
@@ -9219,7 +9279,7 @@ static int bnx2x_prev_unload_common(struct bnx2x_softc *sc)
                if (reset_reg & MISC_REGISTERS_RESET_REG_1_RST_DORQ) {
                        tmp_reg = REG_RD(sc, DORQ_REG_NORM_CID_OFST);
                        if (tmp_reg == 0x7) {
-                               PMD_DRV_LOG(DEBUG, "UNDI previously loaded");
+                               PMD_DRV_LOG(DEBUG, sc, "UNDI previously loaded");
                                prev_undi = TRUE;
                                /* clear the UNDI indication */
                                REG_WR(sc, DORQ_REG_NORM_CID_OFST, 0);
@@ -9238,7 +9298,7 @@ static int bnx2x_prev_unload_common(struct bnx2x_softc *sc)
                                break;
                        }
 
-                       PMD_DRV_LOG(DEBUG, "BRB still has 0x%08x", tmp_reg);
+                       PMD_DRV_LOG(DEBUG, sc, "BRB still has 0x%08x", tmp_reg);
 
                        /* reset timer as long as BRB actually gets emptied */
                        if (prev_brb > tmp_reg) {
@@ -9256,7 +9316,7 @@ static int bnx2x_prev_unload_common(struct bnx2x_softc *sc)
                }
 
                if (!timer_count) {
-                       PMD_DRV_LOG(NOTICE, "Failed to empty BRB");
+                       PMD_DRV_LOG(NOTICE, sc, "Failed to empty BRB");
                }
        }
 
@@ -9311,7 +9371,7 @@ static int bnx2x_prev_unload_uncommon(struct bnx2x_softc *sc)
                return 0;
        }
 
-       PMD_DRV_LOG(INFO, "Could not FLR");
+       PMD_DRV_LOG(INFO, sc, "Could not FLR");
 
        /* Close the MCP request, return failure */
        rc = bnx2x_prev_mcp_done(sc);
@@ -9358,7 +9418,7 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc)
                /* Lock MCP using an unload request */
                fw = bnx2x_fw_command(sc, DRV_MSG_CODE_UNLOAD_REQ_WOL_DIS, 0);
                if (!fw) {
-                       PMD_DRV_LOG(NOTICE, "MCP response failure, aborting");
+                       PMD_DRV_LOG(NOTICE, sc, "MCP response failure, aborting");
                        rc = -1;
                        break;
                }
@@ -9378,7 +9438,7 @@ static int bnx2x_prev_unload(struct bnx2x_softc *sc)
        } while (--time_counter);
 
        if (!time_counter || rc) {
-               PMD_DRV_LOG(NOTICE, "Failed to unload previous driver!");
+               PMD_DRV_LOG(NOTICE, sc, "Failed to unload previous driver!");
                rc = -1;
        }
 
@@ -9395,7 +9455,7 @@ bnx2x_dcbx_set_state(struct bnx2x_softc *sc, uint8_t dcb_on, uint32_t dcbx_enabl
                sc->dcb_state = FALSE;
                sc->dcbx_enabled = BNX2X_DCBX_ENABLED_INVALID;
        }
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
                    "DCB state [%s:%s]",
                    dcb_on ? "ON" : "OFF",
                    (dcbx_enabled == BNX2X_DCBX_ENABLED_OFF) ? "user-mode" :
@@ -9428,7 +9488,7 @@ static void bnx2x_init_multi_cos(struct bnx2x_softc *sc)
                if (cos < sc->max_cos) {
                        sc->prio_to_cos[pri] = cos;
                } else {
-                       PMD_DRV_LOG(WARNING,
+                       PMD_DRV_LOG(WARNING, sc,
                                    "Invalid COS %d for priority %d "
                                    "(max COS is %d), setting to 0", cos, pri,
                                    (sc->max_cos - 1));
@@ -9449,7 +9509,7 @@ static int bnx2x_pci_get_caps(struct bnx2x_softc *sc)
        cap = sc->pci_caps = rte_zmalloc("caps", sizeof(struct bnx2x_pci_cap),
                                         RTE_CACHE_LINE_SIZE);
        if (!cap) {
-               PMD_DRV_LOG(NOTICE, "Failed to allocate memory");
+               PMD_DRV_LOG(NOTICE, sc, "Failed to allocate memory");
                return -ENOMEM;
        }
 
@@ -9460,7 +9520,7 @@ static int bnx2x_pci_get_caps(struct bnx2x_softc *sc)
        pci_read(sc, PCIR_STATUS, &status, 2);
        if (!(status & PCIM_STATUS_CAPPRESENT)) {
 #endif
-               PMD_DRV_LOG(NOTICE, "PCIe capability reading failed");
+               PMD_DRV_LOG(NOTICE, sc, "PCIe capability reading failed");
                return -1;
        }
 
@@ -9480,7 +9540,7 @@ static int bnx2x_pci_get_caps(struct bnx2x_softc *sc)
                                        sizeof(struct bnx2x_pci_cap),
                                        RTE_CACHE_LINE_SIZE);
                if (!cap->next) {
-                       PMD_DRV_LOG(NOTICE, "Failed to allocate memory");
+                       PMD_DRV_LOG(NOTICE, sc, "Failed to allocate memory");
                        return -ENOMEM;
                }
                cap = cap->next;
@@ -9516,25 +9576,25 @@ void bnx2x_load_firmware(struct bnx2x_softc *sc)
                ? FW_NAME_57711 : FW_NAME_57810;
        f = open(fwname, O_RDONLY);
        if (f < 0) {
-               PMD_DRV_LOG(NOTICE, "Can't open firmware file");
+               PMD_DRV_LOG(NOTICE, sc, "Can't open firmware file");
                return;
        }
 
        if (fstat(f, &st) < 0) {
-               PMD_DRV_LOG(NOTICE, "Can't stat firmware file");
+               PMD_DRV_LOG(NOTICE, sc, "Can't stat firmware file");
                close(f);
                return;
        }
 
        sc->firmware = rte_zmalloc("bnx2x_fw", st.st_size, RTE_CACHE_LINE_SIZE);
        if (!sc->firmware) {
-               PMD_DRV_LOG(NOTICE, "Can't allocate memory for firmware");
+               PMD_DRV_LOG(NOTICE, sc, "Can't allocate memory for firmware");
                close(f);
                return;
        }
 
        if (read(f, sc->firmware, st.st_size) != st.st_size) {
-               PMD_DRV_LOG(NOTICE, "Can't read firmware data");
+               PMD_DRV_LOG(NOTICE, sc, "Can't read firmware data");
                close(f);
                return;
        }
@@ -9542,10 +9602,11 @@ void bnx2x_load_firmware(struct bnx2x_softc *sc)
 
        sc->fw_len = st.st_size;
        if (sc->fw_len < FW_HEADER_LEN) {
-               PMD_DRV_LOG(NOTICE, "Invalid fw size: %" PRIu64, sc->fw_len);
+               PMD_DRV_LOG(NOTICE, sc,
+                           "Invalid fw size: %" PRIu64, sc->fw_len);
                return;
        }
-       PMD_DRV_LOG(DEBUG, "fw_len = %" PRIu64, sc->fw_len);
+       PMD_DRV_LOG(DEBUG, sc, "fw_len = %" PRIu64, sc->fw_len);
 }
 
 static void
@@ -9612,11 +9673,11 @@ int bnx2x_attach(struct bnx2x_softc *sc)
 {
        int rc;
 
-       PMD_DRV_LOG(DEBUG, "Starting attach...");
+       PMD_DRV_LOG(DEBUG, sc, "Starting attach...");
 
        rc = bnx2x_pci_get_caps(sc);
        if (rc) {
-               PMD_DRV_LOG(NOTICE, "PCIe caps reading was failed");
+               PMD_DRV_LOG(NOTICE, sc, "PCIe caps reading was failed");
                return rc;
        }
 
@@ -9655,7 +9716,7 @@ int bnx2x_attach(struct bnx2x_softc *sc)
 
                /* get device info and set params */
                if (bnx2x_get_device_info(sc) != 0) {
-                       PMD_DRV_LOG(NOTICE, "getting device info");
+                       PMD_DRV_LOG(NOTICE, sc, "getting device info");
                        return -ENXIO;
                }
 
@@ -9754,7 +9815,7 @@ bnx2x_igu_clear_sb_gen(struct bnx2x_softc *sc, uint8_t func, uint8_t idu_sb_id,
 
        mb();
 
-       PMD_DRV_LOG(DEBUG, "write 0x%08x to IGU(via GRC) addr 0x%x",
+       PMD_DRV_LOG(DEBUG, sc, "write 0x%08x to IGU(via GRC) addr 0x%x",
                    ctl, igu_addr_ctl);
        REG_WR(sc, igu_addr_ctl, ctl);
 
@@ -9766,7 +9827,7 @@ bnx2x_igu_clear_sb_gen(struct bnx2x_softc *sc, uint8_t func, uint8_t idu_sb_id,
        }
 
        if (!(REG_RD(sc, igu_addr_ack) & sb_bit)) {
-               PMD_DRV_LOG(DEBUG,
+               PMD_DRV_LOG(DEBUG, sc,
                            "Unable to finish IGU cleanup: "
                            "idu_sb_id %d offset %d bit %d (cnt %d)",
                            idu_sb_id, idu_sb_id / 32, idu_sb_id % 32, cnt);
@@ -9786,7 +9847,7 @@ static void bnx2x_reset_common(struct bnx2x_softc *sc)
 {
        uint32_t val = 0x1400;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        /* reset_common */
        REG_WR(sc, (GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_CLEAR),
@@ -9820,8 +9881,10 @@ static void bnx2x_common_init_phy(struct bnx2x_softc *sc)
                shmem2_base[1] = SHMEM2_RD(sc, other_shmem2_base_addr);
        }
 
+       bnx2x_acquire_phy_lock(sc);
        elink_common_init_phy(sc, shmem_base, shmem2_base,
                              sc->devinfo.chip_id, 0);
+       bnx2x_release_phy_lock(sc);
 }
 
 static void bnx2x_pf_disable(struct bnx2x_softc *sc)
@@ -9995,7 +10058,8 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc)
        uint8_t abs_func_id;
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "starting common init for func %d", SC_ABS_FUNC(sc));
+       PMD_DRV_LOG(DEBUG, sc,
+                   "starting common init for func %d", SC_ABS_FUNC(sc));
 
        /*
         * take the RESET lock to protect undi_unload flow from accessing
@@ -10078,12 +10142,12 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc)
 
        val = REG_RD(sc, PXP2_REG_RQ_CFG_DONE);
        if (val != 1) {
-               PMD_DRV_LOG(NOTICE, "PXP2 CFG failed");
+               PMD_DRV_LOG(NOTICE, sc, "PXP2 CFG failed");
                return -1;
        }
        val = REG_RD(sc, PXP2_REG_RD_INIT_DONE);
        if (val != 1) {
-               PMD_DRV_LOG(NOTICE, "PXP2 RD_INIT failed");
+               PMD_DRV_LOG(NOTICE, sc, "PXP2 RD_INIT failed");
                return -1;
        }
 
@@ -10205,7 +10269,7 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc)
                } while (factor-- && (val != 1));
 
                if (val != 1) {
-                       PMD_DRV_LOG(NOTICE, "ATC_INIT failed");
+                       PMD_DRV_LOG(NOTICE, sc, "ATC_INIT failed");
                        return -1;
                }
        }
@@ -10343,7 +10407,7 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc)
 
        if (sizeof(union cdu_context) != 1024) {
 /* we currently assume that a context is 1024 bytes */
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "please adjust the size of cdu_context(%ld)",
                            (long)sizeof(union cdu_context));
        }
@@ -10405,17 +10469,17 @@ static int bnx2x_init_hw_common(struct bnx2x_softc *sc)
        /* finish CFC init */
        val = reg_poll(sc, CFC_REG_LL_INIT_DONE, 1, 100, 10);
        if (val != 1) {
-               PMD_DRV_LOG(NOTICE, "CFC LL_INIT failed");
+               PMD_DRV_LOG(NOTICE, sc, "CFC LL_INIT failed");
                return -1;
        }
        val = reg_poll(sc, CFC_REG_AC_INIT_DONE, 1, 100, 10);
        if (val != 1) {
-               PMD_DRV_LOG(NOTICE, "CFC AC_INIT failed");
+               PMD_DRV_LOG(NOTICE, sc, "CFC AC_INIT failed");
                return -1;
        }
        val = reg_poll(sc, CFC_REG_CAM_INIT_DONE, 1, 100, 10);
        if (val != 1) {
-               PMD_DRV_LOG(NOTICE, "CFC CAM_INIT failed");
+               PMD_DRV_LOG(NOTICE, sc, "CFC CAM_INIT failed");
                return -1;
        }
        REG_WR(sc, CFC_REG_DEBUG0, 0);
@@ -10468,7 +10532,7 @@ static int bnx2x_init_hw_port(struct bnx2x_softc *sc)
        uint32_t low, high;
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "starting port init for port %d", port);
+       PMD_DRV_LOG(DEBUG, sc, "starting port init for port %d", port);
 
        REG_WR(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4, 0);
 
@@ -10695,7 +10759,7 @@ bnx2x_flr_clnup_poll_hw_counter(struct bnx2x_softc *sc, uint32_t reg,
        uint32_t val = bnx2x_flr_clnup_reg_poll(sc, reg, 0, poll_cnt);
 
        if (val != 0) {
-               PMD_DRV_LOG(NOTICE, "%s usage count=%d", msg, val);
+               PMD_DRV_LOG(NOTICE, sc, "%s usage count=%d", msg, val);
                return -1;
        }
 
@@ -10787,7 +10851,7 @@ bnx2x_send_final_clnup(struct bnx2x_softc *sc, uint8_t clnup_func,
        int ret = 0;
 
        if (REG_RD(sc, comp_addr)) {
-               PMD_DRV_LOG(NOTICE,
+               PMD_DRV_LOG(NOTICE, sc,
                            "Cleanup complete was not 0 before sending");
                return -1;
        }
@@ -10800,8 +10864,8 @@ bnx2x_send_final_clnup(struct bnx2x_softc *sc, uint8_t clnup_func,
        REG_WR(sc, XSDM_REG_OPERATION_GEN, op_gen_command);
 
        if (bnx2x_flr_clnup_reg_poll(sc, comp_addr, 1, poll_cnt) != 1) {
-               PMD_DRV_LOG(NOTICE, "FW final cleanup did not succeed");
-               PMD_DRV_LOG(DEBUG, "At timeout completion address contained %x",
+               PMD_DRV_LOG(NOTICE, sc, "FW final cleanup did not succeed");
+               PMD_DRV_LOG(DEBUG, sc, "At timeout completion address contained %x",
                            (REG_RD(sc, comp_addr)));
                rte_panic("FLR cleanup failed");
                return -1;
@@ -10917,28 +10981,30 @@ static void bnx2x_hw_enable_status(struct bnx2x_softc *sc)
        __rte_unused uint32_t val;
 
        val = REG_RD(sc, CFC_REG_WEAK_ENABLE_PF);
-       PMD_DRV_LOG(DEBUG, "CFC_REG_WEAK_ENABLE_PF is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc, "CFC_REG_WEAK_ENABLE_PF is 0x%x", val);
 
        val = REG_RD(sc, PBF_REG_DISABLE_PF);
-       PMD_DRV_LOG(DEBUG, "PBF_REG_DISABLE_PF is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc, "PBF_REG_DISABLE_PF is 0x%x", val);
 
        val = REG_RD(sc, IGU_REG_PCI_PF_MSI_EN);
-       PMD_DRV_LOG(DEBUG, "IGU_REG_PCI_PF_MSI_EN is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc, "IGU_REG_PCI_PF_MSI_EN is 0x%x", val);
 
        val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_EN);
-       PMD_DRV_LOG(DEBUG, "IGU_REG_PCI_PF_MSIX_EN is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc, "IGU_REG_PCI_PF_MSIX_EN is 0x%x", val);
 
        val = REG_RD(sc, IGU_REG_PCI_PF_MSIX_FUNC_MASK);
-       PMD_DRV_LOG(DEBUG, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc, "IGU_REG_PCI_PF_MSIX_FUNC_MASK is 0x%x", val);
 
        val = REG_RD(sc, PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR);
-       PMD_DRV_LOG(DEBUG, "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc,
+                   "PGLUE_B_REG_SHADOW_BME_PF_7_0_CLR is 0x%x", val);
 
        val = REG_RD(sc, PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR);
-       PMD_DRV_LOG(DEBUG, "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x", val);
+       PMD_DRV_LOG(DEBUG, sc,
+                   "PGLUE_B_REG_FLR_REQUEST_PF_7_0_CLR is 0x%x", val);
 
        val = REG_RD(sc, PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER);
-       PMD_DRV_LOG(DEBUG, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x",
+       PMD_DRV_LOG(DEBUG, sc, "PGLUE_B_REG_INTERNAL_PFID_ENABLE_MASTER is 0x%x",
                    val);
 }
 
@@ -10982,7 +11048,7 @@ static int bnx2x_pf_flr_clnup(struct bnx2x_softc *sc)
 
        /* Verify no pending pci transactions */
        if (bnx2x_is_pcie_pending(sc)) {
-               PMD_DRV_LOG(NOTICE, "PCIE Transactions still pending");
+               PMD_DRV_LOG(NOTICE, sc, "PCIE Transactions still pending");
        }
 
        /* Debug */
@@ -11009,13 +11075,13 @@ static int bnx2x_init_hw_func(struct bnx2x_softc *sc)
        int main_mem_width, rc;
        uint32_t i;
 
-       PMD_DRV_LOG(DEBUG, "starting func init for func %d", func);
+       PMD_DRV_LOG(DEBUG, sc, "starting func init for func %d", func);
 
        /* FLR cleanup */
        if (!CHIP_IS_E1x(sc)) {
                rc = bnx2x_pf_flr_clnup(sc);
                if (rc) {
-                       PMD_DRV_LOG(NOTICE, "FLR cleanup failed!");
+                       PMD_DRV_LOG(NOTICE, sc, "FLR cleanup failed!");
                        return rc;
                }
        }
@@ -11262,7 +11328,7 @@ static int bnx2x_init_hw_func(struct bnx2x_softc *sc)
 
                val = REG_RD(sc, main_mem_prty_clr);
                if (val) {
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                                    "Parity errors in HC block during function init (0x%x)!",
                                    val);
                }
@@ -11297,10 +11363,12 @@ static int bnx2x_init_hw_func(struct bnx2x_softc *sc)
 static void bnx2x_link_reset(struct bnx2x_softc *sc)
 {
        if (!BNX2X_NOMCP(sc)) {
+               bnx2x_acquire_phy_lock(sc);
                elink_lfa_reset(&sc->link_params, &sc->link_vars);
+               bnx2x_release_phy_lock(sc);
        } else {
                if (!CHIP_REV_IS_SLOW(sc)) {
-                       PMD_DRV_LOG(WARNING,
+                       PMD_DRV_LOG(WARNING, sc,
                                    "Bootcode is missing - cannot reset link");
                }
        }
@@ -11330,7 +11398,7 @@ static void bnx2x_reset_port(struct bnx2x_softc *sc)
        /* Check for BRB port occupancy */
        val = REG_RD(sc, BRB1_REG_PORT_NUM_OCC_BLOCKS_0 + port * 4);
        if (val) {
-               PMD_DRV_LOG(DEBUG,
+               PMD_DRV_LOG(DEBUG, sc,
                            "BRB1 is not empty, %d blocks are occupied", val);
        }
 }
@@ -11524,10 +11592,10 @@ static int ecore_gunzip(struct bnx2x_softc *sc, const uint8_t * zbuf, int len)
        int ret;
        int data_begin = cut_gzip_prefix(zbuf, len);
 
-       PMD_DRV_LOG(DEBUG, "ecore_gunzip %d", len);
+       PMD_DRV_LOG(DEBUG, sc, "ecore_gunzip %d", len);
 
        if (data_begin <= 0) {
-               PMD_DRV_LOG(NOTICE, "bad gzip prefix");
+               PMD_DRV_LOG(NOTICE, sc, "bad gzip prefix");
                return -1;
        }
 
@@ -11539,19 +11607,19 @@ static int ecore_gunzip(struct bnx2x_softc *sc, const uint8_t * zbuf, int len)
 
        ret = inflateInit2(&zlib_stream, -MAX_WBITS);
        if (ret != Z_OK) {
-               PMD_DRV_LOG(NOTICE, "zlib inflateInit2 error");
+               PMD_DRV_LOG(NOTICE, sc, "zlib inflateInit2 error");
                return ret;
        }
 
        ret = inflate(&zlib_stream, Z_FINISH);
        if ((ret != Z_STREAM_END) && (ret != Z_OK)) {
-               PMD_DRV_LOG(NOTICE, "zlib inflate error: %d %s", ret,
+               PMD_DRV_LOG(NOTICE, sc, "zlib inflate error: %d %s", ret,
                            zlib_stream.msg);
        }
 
        sc->gz_outlen = zlib_stream.total_out;
        if (sc->gz_outlen & 0x3) {
-               PMD_DRV_LOG(NOTICE, "firmware is not aligned. gz_outlen == %d",
+               PMD_DRV_LOG(NOTICE, sc, "firmware is not aligned. gz_outlen == %d",
                            sc->gz_outlen);
        }
        sc->gz_outlen >>= 2;
@@ -11670,7 +11738,7 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc)
        int i = 0;
        __rte_unused uint32_t ext_phy_type;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
        if (sc->link_vars.phy_flags & PHY_XGXS_FLAG)
                ext_phy_type = ELINK_XGXS_EXT_PHY_TYPE(REG_RD(sc,
                                                              sc->
@@ -11689,97 +11757,102 @@ void bnx2x_print_adapter_info(struct bnx2x_softc *sc)
                                                                         dev_info.port_hw_config
                                                                         [0].external_phy_config)));
 
-       PMD_INIT_LOG(DEBUG, "\n\n===================================\n");
+       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
        /* Hardware chip info. */
-       PMD_INIT_LOG(DEBUG, "%12s : %#08x", "ASIC", sc->devinfo.chip_id);
-       PMD_INIT_LOG(DEBUG, "%12s : %c%d", "Rev", (CHIP_REV(sc) >> 12) + 'A',
+       PMD_DRV_LOG(INFO, sc, "%12s : %#08x", "ASIC", sc->devinfo.chip_id);
+       PMD_DRV_LOG(INFO, sc, "%12s : %c%d", "Rev", (CHIP_REV(sc) >> 12) + 'A',
                     (CHIP_METAL(sc) >> 4));
 
        /* Bus info. */
-       PMD_INIT_LOG(DEBUG, "%12s : %d, ", "Bus PCIe", sc->devinfo.pcie_link_width);
+       PMD_DRV_LOG(INFO, sc,
+                   "%12s : %d, ", "Bus PCIe", sc->devinfo.pcie_link_width);
        switch (sc->devinfo.pcie_link_speed) {
        case 1:
-               PMD_INIT_LOG(DEBUG, "%23s", "2.5 Gbps");
+               PMD_DRV_LOG(INFO, sc, "%23s", "2.5 Gbps");
                break;
        case 2:
-               PMD_INIT_LOG(DEBUG, "%21s", "5 Gbps");
+               PMD_DRV_LOG(INFO, sc, "%21s", "5 Gbps");
                break;
        case 4:
-               PMD_INIT_LOG(DEBUG, "%21s", "8 Gbps");
+               PMD_DRV_LOG(INFO, sc, "%21s", "8 Gbps");
                break;
        default:
-               PMD_INIT_LOG(DEBUG, "%33s", "Unknown link speed");
+               PMD_DRV_LOG(INFO, sc, "%33s", "Unknown link speed");
        }
 
        /* Device features. */
-       PMD_INIT_LOG(DEBUG, "%12s : ", "Flags");
+       PMD_DRV_LOG(INFO, sc, "%12s : ", "Flags");
 
        /* Miscellaneous flags. */
        if (sc->devinfo.pcie_cap_flags & BNX2X_MSI_CAPABLE_FLAG) {
-               PMD_INIT_LOG(DEBUG, "%18s", "MSI");
+               PMD_DRV_LOG(INFO, sc, "%18s", "MSI");
                i++;
        }
 
        if (sc->devinfo.pcie_cap_flags & BNX2X_MSIX_CAPABLE_FLAG) {
                if (i > 0)
-                       PMD_INIT_LOG(DEBUG, "|");
-               PMD_INIT_LOG(DEBUG, "%20s", "MSI-X");
+                       PMD_DRV_LOG(INFO, sc, "|");
+               PMD_DRV_LOG(INFO, sc, "%20s", "MSI-X");
                i++;
        }
 
        if (IS_PF(sc)) {
-               PMD_INIT_LOG(DEBUG, "%12s : ", "Queues");
+               PMD_DRV_LOG(INFO, sc, "%12s : ", "Queues");
                switch (sc->sp->rss_rdata.rss_mode) {
                case ETH_RSS_MODE_DISABLED:
-                       PMD_INIT_LOG(DEBUG, "%19s", "None");
+                       PMD_DRV_LOG(INFO, sc, "%19s", "None");
                        break;
                case ETH_RSS_MODE_REGULAR:
-                       PMD_INIT_LOG(DEBUG, "%18s : %d", "RSS", sc->num_queues);
+                       PMD_DRV_LOG(INFO, sc,
+                                   "%18s : %d", "RSS", sc->num_queues);
                        break;
                default:
-                       PMD_INIT_LOG(DEBUG, "%22s", "Unknown");
+                       PMD_DRV_LOG(INFO, sc, "%22s", "Unknown");
                        break;
                }
        }
 
        /* RTE and Driver versions */
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "DPDK",
-                    rte_version());
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "Driver",
-                    bnx2x_pmd_version());
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "DPDK",
+                       rte_version());
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "Driver",
+                       bnx2x_pmd_version());
 
        /* Firmware versions and device features. */
-       PMD_INIT_LOG(DEBUG, "%12s : %d.%d.%d",
+       PMD_DRV_LOG(INFO, sc, "%12s : %d.%d.%d",
                     "Firmware",
                     BNX2X_5710_FW_MAJOR_VERSION,
                     BNX2X_5710_FW_MINOR_VERSION,
                     BNX2X_5710_FW_REVISION_VERSION);
-       PMD_INIT_LOG(DEBUG, "%12s : %s",
+       PMD_DRV_LOG(INFO, sc, "%12s : %s",
                     "Bootcode", sc->devinfo.bc_ver_str);
 
-       PMD_INIT_LOG(DEBUG, "\n\n===================================\n");
-       PMD_INIT_LOG(DEBUG, "%12s : %u", "Bnx2x Func", sc->pcie_func);
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "Bnx2x Flags", get_bnx2x_flags(sc->flags));
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "DMAE Is",
+       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
+       PMD_DRV_LOG(INFO, sc, "%12s : %u", "Bnx2x Func", sc->pcie_func);
+       PMD_DRV_LOG(INFO, sc,
+                   "%12s : %s", "Bnx2x Flags", get_bnx2x_flags(sc->flags));
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "DMAE Is",
                     (sc->dmae_ready ? "Ready" : "Not Ready"));
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO"));
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO"));
-       PMD_INIT_LOG(DEBUG, "%12s : %u", "MTU", sc->mtu);
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "PHY Type", get_ext_phy_type(ext_phy_type));
-       PMD_INIT_LOG(DEBUG, "%12s : %x:%x:%x:%x:%x:%x", "MAC Addr",
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "OVLAN", (OVLAN(sc) ? "YES" : "NO"));
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "MF", (IS_MF(sc) ? "YES" : "NO"));
+       PMD_DRV_LOG(INFO, sc, "%12s : %u", "MTU", sc->mtu);
+       PMD_DRV_LOG(INFO, sc,
+                   "%12s : %s", "PHY Type", get_ext_phy_type(ext_phy_type));
+       PMD_DRV_LOG(INFO, sc, "%12s : %x:%x:%x:%x:%x:%x", "MAC Addr",
                        sc->link_params.mac_addr[0],
                        sc->link_params.mac_addr[1],
                        sc->link_params.mac_addr[2],
                        sc->link_params.mac_addr[3],
                        sc->link_params.mac_addr[4],
                        sc->link_params.mac_addr[5]);
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "RX Mode", get_rx_mode(sc->rx_mode));
-       PMD_INIT_LOG(DEBUG, "%12s : %s", "State", get_state(sc->state));
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "RX Mode", get_rx_mode(sc->rx_mode));
+       PMD_DRV_LOG(INFO, sc, "%12s : %s", "State", get_state(sc->state));
        if (sc->recovery_state)
-               PMD_INIT_LOG(DEBUG, "%12s : %s", "Recovery",
+               PMD_DRV_LOG(INFO, sc, "%12s : %s", "Recovery",
                             get_recovery_state(sc->recovery_state));
-       PMD_INIT_LOG(DEBUG, "%12s : CQ = %lx,  EQ = %lx", "SPQ Left",
+       PMD_DRV_LOG(INFO, sc, "%12s : CQ = %lx,  EQ = %lx", "SPQ Left",
                     sc->cq_spq_left, sc->eq_spq_left);
-       PMD_INIT_LOG(DEBUG, "%12s : %x", "Switch", sc->link_params.switch_cfg);
-       PMD_INIT_LOG(DEBUG, "\n\n===================================\n");
+       PMD_DRV_LOG(INFO, sc,
+                   "%12s : %x", "Switch", sc->link_params.switch_cfg);
+       PMD_DRV_LOG(INFO, sc, "\n\n===================================\n");
 }
index 0f6024f..7478072 100644 (file)
@@ -725,6 +725,13 @@ struct bnx2x_port {
 
     uint32_t    phy_addr;
 
+       /* Used to synchronize phy accesses. */
+       rte_spinlock_t  phy_mtx;
+       char            phy_mtx_name[32];
+
+#define BNX2X_PHY_LOCK(sc)          rte_spinlock_lock(&sc->port.phy_mtx)
+#define BNX2X_PHY_UNLOCK(sc)        rte_spinlock_unlock(&sc->port.phy_mtx)
+
     /*
      * MCP scratchpad address for port specific statistics.
      * The device is responsible for writing statistcss
@@ -803,6 +810,10 @@ struct bnx2x_mf_info {
 
 /* Device information data structure. */
 struct bnx2x_devinfo {
+#if 1
+#define NAME_SIZE 128
+       char name[NAME_SIZE];
+#endif
        /* PCIe info */
        uint16_t vendor_id;
        uint16_t device_id;
@@ -820,6 +831,7 @@ struct bnx2x_devinfo {
 #define CHIP_ID(sc)           ((sc)->devinfo.chip_id & 0xffff0000)
 #define CHIP_NUM(sc)          ((sc)->devinfo.chip_id >> 16)
 /* device ids */
+#define CHIP_NUM_57710        0x164e
 #define CHIP_NUM_57711        0x164f
 #define CHIP_NUM_57711E       0x1650
 #define CHIP_NUM_57712        0x1662
@@ -861,6 +873,8 @@ struct bnx2x_devinfo {
 #define CHIP_METAL(sc)      ((sc->devinfo.chip_id) & 0x00000ff0)
 #define CHIP_BOND_ID(sc)    ((sc->devinfo.chip_id) & 0x0000000f)
 
+#define CHIP_IS_E1(sc)      (CHIP_NUM(sc) == CHIP_NUM_57710)
+#define CHIP_IS_57710(sc)   (CHIP_NUM(sc) == CHIP_NUM_57710)
 #define CHIP_IS_57711(sc)   (CHIP_NUM(sc) == CHIP_NUM_57711)
 #define CHIP_IS_57711E(sc)  (CHIP_NUM(sc) == CHIP_NUM_57711E)
 #define CHIP_IS_E1H(sc)     ((CHIP_IS_57711(sc)) || \
@@ -1418,7 +1432,7 @@ struct bnx2x_func_init_params {
 static inline void
 bnx2x_reg_write8(struct bnx2x_softc *sc, size_t offset, uint8_t val)
 {
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%02x",
                               (unsigned long)offset, val);
        rte_write8(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
 }
@@ -1428,10 +1442,10 @@ bnx2x_reg_write16(struct bnx2x_softc *sc, size_t offset, uint16_t val)
 {
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
        if ((offset % 2) != 0)
-               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit write to 0x%08lx",
+               PMD_DRV_LOG(NOTICE, sc, "Unaligned 16-bit write to 0x%08lx",
                            (unsigned long)offset);
 #endif
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%04x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%04x",
                               (unsigned long)offset, val);
        rte_write16(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
 
@@ -1442,11 +1456,11 @@ bnx2x_reg_write32(struct bnx2x_softc *sc, size_t offset, uint32_t val)
 {
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
        if ((offset % 4) != 0)
-               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit write to 0x%08lx",
+               PMD_DRV_LOG(NOTICE, sc, "Unaligned 32-bit write to 0x%08lx",
                            (unsigned long)offset);
 #endif
 
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%08x",
                               (unsigned long)offset, val);
        rte_write32(val, ((uint8_t *)sc->bar[BAR0].base_addr + offset));
 }
@@ -1457,7 +1471,7 @@ bnx2x_reg_read8(struct bnx2x_softc *sc, size_t offset)
        uint8_t val;
 
        val = rte_read8((uint8_t *)sc->bar[BAR0].base_addr + offset);
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%02x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%02x",
                               (unsigned long)offset, val);
 
        return val;
@@ -1470,12 +1484,12 @@ bnx2x_reg_read16(struct bnx2x_softc *sc, size_t offset)
 
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
        if ((offset % 2) != 0)
-               PMD_DRV_LOG(NOTICE, "Unaligned 16-bit read from 0x%08lx",
+               PMD_DRV_LOG(NOTICE, sc, "Unaligned 16-bit read from 0x%08lx",
                            (unsigned long)offset);
 #endif
 
        val = rte_read16(((uint8_t *)sc->bar[BAR0].base_addr + offset));
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%08x",
                               (unsigned long)offset, val);
 
        return val;
@@ -1488,12 +1502,12 @@ bnx2x_reg_read32(struct bnx2x_softc *sc, size_t offset)
 
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
        if ((offset % 4) != 0)
-               PMD_DRV_LOG(NOTICE, "Unaligned 32-bit read from 0x%08lx",
+               PMD_DRV_LOG(NOTICE, sc, "Unaligned 32-bit read from 0x%08lx",
                            (unsigned long)offset);
 #endif
 
        val = rte_read32(((uint8_t *)sc->bar[BAR0].base_addr + offset));
-       PMD_DEBUG_PERIODIC_LOG(DEBUG, "offset=0x%08lx val=0x%08x",
+       PMD_DEBUG_PERIODIC_LOG(DEBUG, sc, "offset=0x%08lx val=0x%08x",
                               (unsigned long)offset, val);
 
        return val;
@@ -1970,7 +1984,7 @@ bnx2x_set_rx_mode(struct bnx2x_softc *sc)
                        bnx2x_vf_set_rx_mode(sc);
                }
        } else {
-               PMD_DRV_LOG(NOTICE, "Card is not ready to change mode");
+               PMD_DRV_LOG(NOTICE, sc, "Card is not ready to change mode");
        }
 }
 
@@ -1978,7 +1992,7 @@ static inline int pci_read(struct bnx2x_softc *sc, size_t addr,
                           void *val, uint8_t size)
 {
        if (rte_pci_read_config(sc->pci_dev, val, size, addr) <= 0) {
-               PMD_DRV_LOG(ERR, "Can't read from PCI config space");
+               PMD_DRV_LOG(ERR, sc, "Can't read from PCI config space");
                return ENXIO;
        }
 
@@ -1991,7 +2005,7 @@ static inline int pci_write_word(struct bnx2x_softc *sc, size_t addr, off_t val)
 
        if (rte_pci_write_config(sc->pci_dev, &val16,
                                     sizeof(val16), addr) <= 0) {
-               PMD_DRV_LOG(ERR, "Can't write to PCI config space");
+               PMD_DRV_LOG(ERR, sc, "Can't write to PCI config space");
                return ENXIO;
        }
 
@@ -2003,7 +2017,7 @@ static inline int pci_write_long(struct bnx2x_softc *sc, size_t addr, off_t val)
        uint32_t val32 = val;
        if (rte_pci_write_config(sc->pci_dev, &val32,
                                     sizeof(val32), addr) <= 0) {
-               PMD_DRV_LOG(ERR, "Can't write to PCI config space");
+               PMD_DRV_LOG(ERR, sc, "Can't write to PCI config space");
                return ENXIO;
        }
 
index 575271a..0057843 100644 (file)
@@ -85,7 +85,7 @@ bnx2x_link_update(struct rte_eth_dev *dev)
        struct bnx2x_softc *sc = dev->data->dev_private;
        struct rte_eth_link link;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        bnx2x_link_status_update(sc);
        memset(&link, 0, sizeof(link));
@@ -129,9 +129,11 @@ bnx2x_interrupt_handler(void *param)
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_DEBUG_PERIODIC_LOG(INFO, "Interrupt handled");
+       PMD_DEBUG_PERIODIC_LOG(INFO, sc, "Interrupt handled");
 
+       atomic_store_rel_long(&sc->periodic_flags, PERIODIC_STOP);
        bnx2x_interrupt_action(dev);
+       atomic_store_rel_long(&sc->periodic_flags, PERIODIC_GO);
        rte_intr_enable(&sc->pci_dev->intr_handle);
 }
 
@@ -147,8 +149,8 @@ static void bnx2x_periodic_start(void *param)
                ret = rte_eal_alarm_set(BNX2X_SP_TIMER_PERIOD,
                                        bnx2x_periodic_start, (void *)dev);
                if (ret) {
-                       PMD_DRV_LOG(ERR, "Unable to start periodic"
-                                        " timer rc %d", ret);
+                       PMD_DRV_LOG(ERR, sc, "Unable to start periodic"
+                                            " timer rc %d", ret);
                        assert(false && "Unable to start periodic timer");
                }
        }
@@ -176,34 +178,34 @@ bnx2x_dev_configure(struct rte_eth_dev *dev)
 
        int mp_ncpus = sysconf(_SC_NPROCESSORS_CONF);
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
                sc->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len;
 
        if (dev->data->nb_tx_queues > dev->data->nb_rx_queues) {
-               PMD_DRV_LOG(ERR, "The number of TX queues is greater than number of RX queues");
+               PMD_DRV_LOG(ERR, sc, "The number of TX queues is greater than number of RX queues");
                return -EINVAL;
        }
 
        sc->num_queues = MAX(dev->data->nb_rx_queues, dev->data->nb_tx_queues);
        if (sc->num_queues > mp_ncpus) {
-               PMD_DRV_LOG(ERR, "The number of queues is more than number of CPUs");
+               PMD_DRV_LOG(ERR, sc, "The number of queues is more than number of CPUs");
                return -EINVAL;
        }
 
-       PMD_DRV_LOG(DEBUG, "num_queues=%d, mtu=%d",
+       PMD_DRV_LOG(DEBUG, sc, "num_queues=%d, mtu=%d",
                       sc->num_queues, sc->mtu);
 
        /* allocate ilt */
        if (bnx2x_alloc_ilt_mem(sc) != 0) {
-               PMD_DRV_LOG(ERR, "bnx2x_alloc_ilt_mem was failed");
+               PMD_DRV_LOG(ERR, sc, "bnx2x_alloc_ilt_mem was failed");
                return -ENXIO;
        }
 
        /* allocate the host hardware/software hsi structures */
        if (bnx2x_alloc_hsi_mem(sc) != 0) {
-               PMD_DRV_LOG(ERR, "bnx2x_alloc_hsi_mem was failed");
+               PMD_DRV_LOG(ERR, sc, "bnx2x_alloc_hsi_mem was failed");
                bnx2x_free_ilt_mem(sc);
                return -ENXIO;
        }
@@ -217,7 +219,7 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
        struct bnx2x_softc *sc = dev->data->dev_private;
        int ret = 0;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        /* start the periodic callout */
        if (sc->periodic_flags & PERIODIC_STOP)
@@ -225,7 +227,7 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
 
        ret = bnx2x_init(sc);
        if (ret) {
-               PMD_DRV_LOG(DEBUG, "bnx2x_init failed (%d)", ret);
+               PMD_DRV_LOG(DEBUG, sc, "bnx2x_init failed (%d)", ret);
                return -1;
        }
 
@@ -234,12 +236,12 @@ bnx2x_dev_start(struct rte_eth_dev *dev)
                                bnx2x_interrupt_handler, (void *)dev);
 
                if (rte_intr_enable(&sc->pci_dev->intr_handle))
-                       PMD_DRV_LOG(ERR, "rte_intr_enable failed");
+                       PMD_DRV_LOG(ERR, sc, "rte_intr_enable failed");
        }
 
        ret = bnx2x_dev_rx_init(dev);
        if (ret != 0) {
-               PMD_DRV_LOG(DEBUG, "bnx2x_dev_rx_init returned error code");
+               PMD_DRV_LOG(DEBUG, sc, "bnx2x_dev_rx_init returned error code");
                return -3;
        }
 
@@ -255,7 +257,7 @@ bnx2x_dev_stop(struct rte_eth_dev *dev)
        struct bnx2x_softc *sc = dev->data->dev_private;
        int ret = 0;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        if (IS_PF(sc)) {
                rte_intr_disable(&sc->pci_dev->intr_handle);
@@ -268,7 +270,7 @@ bnx2x_dev_stop(struct rte_eth_dev *dev)
 
        ret = bnx2x_nic_unload(sc, UNLOAD_NORMAL, FALSE);
        if (ret) {
-               PMD_DRV_LOG(DEBUG, "bnx2x_nic_unload failed (%d)", ret);
+               PMD_DRV_LOG(DEBUG, sc, "bnx2x_nic_unload failed (%d)", ret);
                return;
        }
 
@@ -280,7 +282,7 @@ bnx2x_dev_close(struct rte_eth_dev *dev)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        if (IS_VF(sc))
                bnx2x_vf_close(sc);
@@ -300,7 +302,7 @@ bnx2x_promisc_enable(struct rte_eth_dev *dev)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
        sc->rx_mode = BNX2X_RX_MODE_PROMISC;
        if (rte_eth_allmulticast_get(dev->data->port_id) == 1)
                sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC;
@@ -312,7 +314,7 @@ bnx2x_promisc_disable(struct rte_eth_dev *dev)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
        sc->rx_mode = BNX2X_RX_MODE_NORMAL;
        if (rte_eth_allmulticast_get(dev->data->port_id) == 1)
                sc->rx_mode = BNX2X_RX_MODE_ALLMULTI;
@@ -324,7 +326,7 @@ bnx2x_dev_allmulticast_enable(struct rte_eth_dev *dev)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
        sc->rx_mode = BNX2X_RX_MODE_ALLMULTI;
        if (rte_eth_promiscuous_get(dev->data->port_id) == 1)
                sc->rx_mode = BNX2X_RX_MODE_ALLMULTI_PROMISC;
@@ -336,7 +338,7 @@ bnx2x_dev_allmulticast_disable(struct rte_eth_dev *dev)
 {
        struct bnx2x_softc *sc = dev->data->dev_private;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
        sc->rx_mode = BNX2X_RX_MODE_NORMAL;
        if (rte_eth_promiscuous_get(dev->data->port_id) == 1)
                sc->rx_mode = BNX2X_RX_MODE_PROMISC;
@@ -346,7 +348,9 @@ bnx2x_dev_allmulticast_disable(struct rte_eth_dev *dev)
 static int
 bnx2x_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete)
 {
-       PMD_INIT_FUNC_TRACE();
+       struct bnx2x_softc *sc = dev->data->dev_private;
+
+       PMD_INIT_FUNC_TRACE(sc);
 
        return bnx2x_link_update(dev);
 }
@@ -361,7 +365,7 @@ bnx2xvf_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_comple
 
        bnx2x_check_bull(sc);
        if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) {
-               PMD_DRV_LOG(ERR, "PF indicated channel is down."
+               PMD_DRV_LOG(ERR, sc, "PF indicated channel is down."
                                "VF device is no longer operational");
                dev->data->dev_link.link_status = ETH_LINK_DOWN;
        }
@@ -377,7 +381,7 @@ bnx2x_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        uint64_t brb_drops;
        uint64_t brb_truncates;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        bnx2x_stats_handle(sc, STATS_EVENT_UPDATE);
 
@@ -568,27 +572,35 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
 {
        int ret = 0;
        struct rte_pci_device *pci_dev;
+       struct rte_pci_addr pci_addr;
        struct bnx2x_softc *sc;
 
-       PMD_INIT_FUNC_TRACE();
+       /* Extract key data structures */
+       sc = eth_dev->data->dev_private;
+       pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
+       pci_addr = pci_dev->addr;
+
+       snprintf(sc->devinfo.name, NAME_SIZE, PCI_SHORT_PRI_FMT ":dpdk-port-%u",
+                pci_addr.bus, pci_addr.devid, pci_addr.function,
+                eth_dev->data->port_id);
+
+       PMD_INIT_FUNC_TRACE(sc);
 
        eth_dev->dev_ops = is_vf ? &bnx2xvf_eth_dev_ops : &bnx2x_eth_dev_ops;
-       pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
        rte_eth_copy_pci_info(eth_dev, pci_dev);
 
-       sc = eth_dev->data->dev_private;
        sc->pcie_bus    = pci_dev->addr.bus;
        sc->pcie_device = pci_dev->addr.devid;
 
-       if (is_vf)
-               sc->flags = BNX2X_IS_VF_FLAG;
-
        sc->devinfo.vendor_id    = pci_dev->id.vendor_id;
        sc->devinfo.device_id    = pci_dev->id.device_id;
        sc->devinfo.subvendor_id = pci_dev->id.subsystem_vendor_id;
        sc->devinfo.subdevice_id = pci_dev->id.subsystem_device_id;
 
+       if (is_vf)
+               sc->flags = BNX2X_IS_VF_FLAG;
+
        sc->pcie_func = pci_dev->addr.function;
        sc->bar[BAR0].base_addr = (void *)pci_dev->mem_resource[0].addr;
        if (is_vf)
@@ -616,7 +628,7 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
        sc->pci_dev = pci_dev;
        ret = bnx2x_attach(sc);
        if (ret) {
-               PMD_DRV_LOG(ERR, "bnx2x_attach failed (%d)", ret);
+               PMD_DRV_LOG(ERR, sc, "bnx2x_attach failed (%d)", ret);
                return ret;
        }
 
@@ -625,21 +637,21 @@ bnx2x_common_dev_init(struct rte_eth_dev *eth_dev, int is_vf)
                ret = rte_eal_alarm_set(BNX2X_SP_TIMER_PERIOD,
                                        bnx2x_periodic_start, (void *)eth_dev);
                if (ret) {
-                       PMD_DRV_LOG(ERR, "Unable to start periodic"
-                                         " timer rc %d", ret);
+                       PMD_DRV_LOG(ERR, sc, "Unable to start periodic"
+                                            " timer rc %d", ret);
                        return -EINVAL;
                }
        }
 
        eth_dev->data->mac_addrs = (struct ether_addr *)sc->link_params.mac_addr;
 
-       PMD_DRV_LOG(INFO, "pcie_bus=%d, pcie_device=%d",
+       PMD_DRV_LOG(INFO, sc, "pcie_bus=%d, pcie_device=%d",
                        sc->pcie_bus, sc->pcie_device);
-       PMD_DRV_LOG(INFO, "bar0.addr=%p, bar1.addr=%p",
+       PMD_DRV_LOG(INFO, sc, "bar0.addr=%p, bar1.addr=%p",
                        sc->bar[BAR0].base_addr, sc->bar[BAR1].base_addr);
-       PMD_DRV_LOG(INFO, "port=%d, path=%d, vnic=%d, func=%d",
+       PMD_DRV_LOG(INFO, sc, "port=%d, path=%d, vnic=%d, func=%d",
                        PORT_ID(sc), PATH_ID(sc), VNIC_ID(sc), FUNC_ID(sc));
-       PMD_DRV_LOG(INFO, "portID=%d vendorID=0x%x deviceID=0x%x",
+       PMD_DRV_LOG(INFO, sc, "portID=%d vendorID=0x%x deviceID=0x%x",
                        eth_dev->data->port_id, pci_dev->id.vendor_id, pci_dev->id.device_id);
 
        if (IS_VF(sc)) {
@@ -679,14 +691,16 @@ out:
 static int
 eth_bnx2x_dev_init(struct rte_eth_dev *eth_dev)
 {
-       PMD_INIT_FUNC_TRACE();
+       struct bnx2x_softc *sc = eth_dev->data->dev_private;
+       PMD_INIT_FUNC_TRACE(sc);
        return bnx2x_common_dev_init(eth_dev, 0);
 }
 
 static int
 eth_bnx2xvf_dev_init(struct rte_eth_dev *eth_dev)
 {
-       PMD_INIT_FUNC_TRACE();
+       struct bnx2x_softc *sc = eth_dev->data->dev_private;
+       PMD_INIT_FUNC_TRACE(sc);
        return bnx2x_common_dev_init(eth_dev, 1);
 }
 
index 9e232a9..753bccd 100644 (file)
@@ -9,11 +9,11 @@
 #define _PMD_LOGS_H_
 
 extern int bnx2x_logtype_init;
-#define PMD_INIT_LOG(level, fmt, args...) \
-       rte_log(RTE_LOG_ ## level, bnx2x_logtype_init, \
-               "%s(): " fmt "\n", __func__, ##args)
+#define PMD_INIT_LOG(level, sc, fmt, args...) \
+       RTE_LOG(level, PMD, \
+       "[bnx2x_pmd: %s] %s() " fmt "\n", (sc)->devinfo.name, __func__, ##args)
 
-#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#define PMD_INIT_FUNC_TRACE(sc) PMD_INIT_LOG(DEBUG, sc, " >>")
 
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_RX
 #define PMD_RX_LOG(level, fmt, args...) \
@@ -37,18 +37,19 @@ extern int bnx2x_logtype_init;
 #endif
 
 extern int bnx2x_logtype_driver;
-#define PMD_DRV_LOG_RAW(level, fmt, args...) \
-       rte_log(RTE_LOG_ ## level, bnx2x_logtype_driver, \
-               "%s(): " fmt, __func__, ## args)
+#define PMD_DRV_LOG_RAW(level, sc, fmt, args...) \
+       RTE_LOG(level, PMD, "[%s:%d(%s)] " fmt, __func__, __LINE__, \
+               (sc)->devinfo.name ? (sc)->devinfo.name : "", ## args)
 
-#define PMD_DRV_LOG(level, fmt, args...) \
-       PMD_DRV_LOG_RAW(level, fmt "\n", ## args)
+#define PMD_DRV_LOG(level, sc, fmt, args...) \
+       PMD_DRV_LOG_RAW(level, sc, fmt "\n", ## args)
 
 #ifdef RTE_LIBRTE_BNX2X_DEBUG_PERIODIC
-#define PMD_DEBUG_PERIODIC_LOG(level, fmt, args...) \
-       RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) \
+       RTE_LOG(level, PMD, "%s(%s): " fmt "\n", __func__, \
+               (sc)->devinfo.name ? (sc)->devinfo.name : "", ## args)
 #else
-#define PMD_DEBUG_PERIODIC_LOG(level, fmt, args...) do { } while(0)
+#define PMD_DEBUG_PERIODIC_LOG(level, sc, fmt, args...) do { } while (0)
 #endif
 
 #endif /* _PMD_LOGS_H_ */
index d9a4127..ca28aac 100644 (file)
@@ -12,19 +12,8 @@ static const struct rte_memzone *
 ring_dma_zone_reserve(struct rte_eth_dev *dev, const char *ring_name,
                      uint16_t queue_id, uint32_t ring_size, int socket_id)
 {
-       char z_name[RTE_MEMZONE_NAMESIZE];
-       const struct rte_memzone *mz;
-
-       snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                       dev->device->driver->name, ring_name,
-                       dev->data->port_id, queue_id);
-
-       mz = rte_memzone_lookup(z_name);
-       if (mz)
-               return mz;
-
-       return rte_memzone_reserve_aligned(z_name, ring_size, socket_id,
-                       RTE_MEMZONE_IOVA_CONTIG, BNX2X_PAGE_SIZE);
+       return rte_eth_dma_zone_reserve(dev, ring_name, queue_id,
+                       ring_size, BNX2X_PAGE_SIZE, socket_id);
 }
 
 static void
@@ -76,7 +65,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct bnx2x_rx_queue),
                                 RTE_CACHE_LINE_SIZE, socket_id);
        if (NULL == rxq) {
-               PMD_INIT_LOG(ERR, "rte_zmalloc for rxq failed!");
+               PMD_DRV_LOG(ERR, sc, "rte_zmalloc for rxq failed!");
                return -ENOMEM;
        }
        rxq->sc = sc;
@@ -92,7 +81,7 @@ bnx2x_dev_rx_queue_setup(struct rte_eth_dev *dev,
        sc->rx_ring_size = USABLE_RX_BD(rxq);
        rxq->nb_cq_pages = RCQ_BD_PAGES(rxq);
 
-       PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, usable_bd=%lu, "
+       PMD_DRV_LOG(DEBUG, sc, "fp[%02d] req_bd=%u, usable_bd=%lu, "
                       "total_bd=%lu, rx_pages=%u, cq_pages=%u",
                       queue_idx, nb_desc, (unsigned long)USABLE_RX_BD(rxq),
                       (unsigned long)TOTAL_RX_BD(rxq), rxq->nb_rx_pages,
@@ -275,7 +264,7 @@ bnx2x_dev_tx_queue_setup(struct rte_eth_dev *dev,
        txq->tx_free_thresh = min(txq->tx_free_thresh,
                                  txq->nb_tx_desc - BDS_PER_TX_PKT);
 
-       PMD_INIT_LOG(DEBUG, "fp[%02d] req_bd=%u, thresh=%u, usable_bd=%lu, "
+       PMD_DRV_LOG(DEBUG, sc, "fp[%02d] req_bd=%u, thresh=%u, usable_bd=%lu, "
                     "total_bd=%lu, tx_pages=%u",
                     queue_idx, nb_desc, txq->tx_free_thresh,
                     (unsigned long)USABLE_TX_BD(txq),
@@ -301,7 +290,7 @@ bnx2x_dev_tx_queue_setup(struct rte_eth_dev *dev,
                return -ENOMEM;
        }
 
-       /* PMD_DRV_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
+       /* PMD_DRV_LOG(DEBUG, sc, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
           txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr); */
 
        /* Link TX pages */
@@ -310,7 +299,9 @@ bnx2x_dev_tx_queue_setup(struct rte_eth_dev *dev,
                busaddr = txq->tx_ring_phys_addr + BNX2X_PAGE_SIZE * (i % txq->nb_tx_pages);
                tx_n_bd->addr_hi = rte_cpu_to_le_32(U64_HI(busaddr));
                tx_n_bd->addr_lo = rte_cpu_to_le_32(U64_LO(busaddr));
-               /* PMD_DRV_LOG(DEBUG, "link tx page %lu", (TOTAL_TX_BD_PER_PAGE * i - 1)); */
+               /* PMD_DRV_LOG(DEBUG, sc, "link tx page %lu",
+                *          (TOTAL_TX_BD_PER_PAGE * i - 1));
+                */
        }
 
        txq->queue_id = queue_idx;
@@ -461,9 +452,10 @@ bnx2x_dev_rx_init(struct rte_eth_dev *dev)
 void
 bnx2x_dev_clear_queues(struct rte_eth_dev *dev)
 {
+       struct bnx2x_softc *sc = dev->data->dev_private;
        uint8_t i;
 
-       PMD_INIT_FUNC_TRACE();
+       PMD_INIT_FUNC_TRACE(sc);
 
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
                struct bnx2x_tx_queue *txq = dev->data->tx_queues[i];
index edc86cc..1cd9725 100644 (file)
@@ -82,7 +82,7 @@ bnx2x_storm_stats_post(struct bnx2x_softc *sc)
                sc->fw_stats_req->hdr.drv_stats_counter =
                        htole16(sc->stats_counter++);
 
-               PMD_DEBUG_PERIODIC_LOG(DEBUG,
+               PMD_DEBUG_PERIODIC_LOG(DEBUG, sc,
                                "sending statistics ramrod %d",
                                le16toh(sc->fw_stats_req->hdr.drv_stats_counter));
 
@@ -154,7 +154,7 @@ bnx2x_stats_comp(struct bnx2x_softc *sc)
 
        while (*stats_comp != DMAE_COMP_VAL) {
                if (!cnt) {
-                       PMD_DRV_LOG(ERR, "Timeout waiting for stats finished");
+                       PMD_DRV_LOG(ERR, sc, "Timeout waiting for stats finished");
                        break;
                }
 
@@ -189,7 +189,7 @@ bnx2x_stats_pmf_update(struct bnx2x_softc *sc)
        }
        /* sanity */
        if (!sc->port.pmf || !sc->port.port_stx) {
-               PMD_DRV_LOG(ERR, "BUG!");
+               PMD_DRV_LOG(ERR, sc, "BUG!");
                return;
        }
 
@@ -239,7 +239,7 @@ bnx2x_port_stats_init(struct bnx2x_softc *sc)
 
     /* sanity */
     if (!sc->link_vars.link_up || !sc->port.pmf) {
-       PMD_DRV_LOG(ERR, "BUG!");
+       PMD_DRV_LOG(ERR, sc, "BUG!");
        return;
     }
 
@@ -463,7 +463,7 @@ bnx2x_func_stats_init(struct bnx2x_softc *sc)
 
     /* sanity */
     if (!sc->func_stx) {
-       PMD_DRV_LOG(ERR, "BUG!");
+       PMD_DRV_LOG(ERR, sc, "BUG!");
        return;
     }
 
@@ -797,12 +797,12 @@ bnx2x_hw_stats_update(struct bnx2x_softc *sc)
        break;
 
     case ELINK_MAC_TYPE_NONE: /* unreached */
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
              "stats updated by DMAE but no MAC active");
        return -1;
 
     default: /* unreached */
-       PMD_DRV_LOG(ERR, "stats update failed, unknown MAC type");
+       PMD_DRV_LOG(ERR, sc, "stats update failed, unknown MAC type");
     }
 
     ADD_EXTEND_64(pstats->brb_drop_hi, pstats->brb_drop_lo,
@@ -837,7 +837,7 @@ bnx2x_hw_stats_update(struct bnx2x_softc *sc)
        nig_timer_max = SHMEM_RD(sc, port_mb[SC_PORT(sc)].stat_nig_timer);
        if (nig_timer_max != estats->nig_timer_max) {
            estats->nig_timer_max = nig_timer_max;
-           PMD_DRV_LOG(ERR, "invalid NIG timer max (%u)",
+           PMD_DRV_LOG(ERR, sc, "invalid NIG timer max (%u)",
                  estats->nig_timer_max);
        }
     }
@@ -859,7 +859,7 @@ bnx2x_storm_stats_validate_counters(struct bnx2x_softc *sc)
 
     /* are storm stats valid? */
     if (le16toh(counters->xstats_counter) != cur_stats_counter) {
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
              "stats not updated by xstorm, "
              "counter 0x%x != stats_counter 0x%x",
              le16toh(counters->xstats_counter), sc->stats_counter);
@@ -867,7 +867,7 @@ bnx2x_storm_stats_validate_counters(struct bnx2x_softc *sc)
     }
 
     if (le16toh(counters->ustats_counter) != cur_stats_counter) {
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
              "stats not updated by ustorm, "
              "counter 0x%x != stats_counter 0x%x",
              le16toh(counters->ustats_counter), sc->stats_counter);
@@ -875,7 +875,7 @@ bnx2x_storm_stats_validate_counters(struct bnx2x_softc *sc)
     }
 
     if (le16toh(counters->cstats_counter) != cur_stats_counter) {
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
              "stats not updated by cstorm, "
              "counter 0x%x != stats_counter 0x%x",
              le16toh(counters->cstats_counter), sc->stats_counter);
@@ -883,7 +883,7 @@ bnx2x_storm_stats_validate_counters(struct bnx2x_softc *sc)
     }
 
     if (le16toh(counters->tstats_counter) != cur_stats_counter) {
-       PMD_DRV_LOG(DEBUG,
+       PMD_DRV_LOG(DEBUG, sc,
              "stats not updated by tstorm, "
              "counter 0x%x != stats_counter 0x%x",
              le16toh(counters->tstats_counter), sc->stats_counter);
@@ -929,12 +929,13 @@ bnx2x_storm_stats_update(struct bnx2x_softc *sc)
 
                uint32_t diff;
 
-               /* PMD_DRV_LOG(DEBUG,
+               /* PMD_DRV_LOG(DEBUG, sc,
                                "queue[%d]: ucast_sent 0x%x bcast_sent 0x%x mcast_sent 0x%x",
                                i, xclient->ucast_pkts_sent, xclient->bcast_pkts_sent,
                                xclient->mcast_pkts_sent);
 
-               PMD_DRV_LOG(DEBUG, "---------------"); */
+               PMD_DRV_LOG(DEBUG, sc, "---------------");
+                */
 
                UPDATE_QSTAT(tclient->rcv_bcast_bytes,
                                total_broadcast_bytes_received);
@@ -1288,7 +1289,7 @@ void bnx2x_stats_handle(struct bnx2x_softc *sc, enum bnx2x_stats_event event)
        bnx2x_stats_stm[state][event].action(sc);
 
        if (event != STATS_EVENT_UPDATE) {
-               PMD_DRV_LOG(DEBUG,
+               PMD_DRV_LOG(DEBUG, sc,
                                "state %d -> event %d -> state %d",
                                state, event, sc->stats_state);
        }
@@ -1302,7 +1303,7 @@ bnx2x_port_stats_base_init(struct bnx2x_softc *sc)
 
     /* sanity */
     if (!sc->port.pmf || !sc->port.port_stx) {
-       PMD_DRV_LOG(ERR, "BUG!");
+       PMD_DRV_LOG(ERR, sc, "BUG!");
        return;
     }
 
@@ -1474,7 +1475,7 @@ bnx2x_stats_init(struct bnx2x_softc *sc)
                sc->func_stx = 0;
        }
 
-       PMD_DRV_LOG(DEBUG, "port_stx 0x%x func_stx 0x%x",
+       PMD_DRV_LOG(DEBUG, sc, "port_stx 0x%x func_stx 0x%x",
                        sc->port.port_stx, sc->func_stx);
 
        /* pmf should retrieve port statistics from SP on a non-init*/
index 50099d4..048bf12 100644 (file)
@@ -37,12 +37,12 @@ bnx2x_check_bull(struct bnx2x_softc *sc)
                        if (bull->crc == bnx2x_vf_crc(bull))
                                break;
 
-                       PMD_DRV_LOG(ERR, "bad crc on bulletin board. contained %x computed %x",
+                       PMD_DRV_LOG(ERR, sc, "bad crc on bulletin board. contained %x computed %x",
                                        bull->crc, bnx2x_vf_crc(bull));
                        ++tries;
                }
                if (tries == BNX2X_VF_BULLETIN_TRIES) {
-                       PMD_DRV_LOG(ERR, "pf to vf bulletin board crc was wrong %d consecutive times. Aborting",
+                       PMD_DRV_LOG(ERR, sc, "pf to vf bulletin board crc was wrong %d consecutive times. Aborting",
                                        tries);
                        return FALSE;
                }
@@ -82,7 +82,7 @@ bnx2x_vf_prep(struct bnx2x_softc *sc, struct vf_first_tlv *first_tlv,
 
        rte_spinlock_lock(&sc->vf2pf_lock);
 
-       PMD_DRV_LOG(DEBUG, "Preparing %d tlv for sending", type);
+       PMD_DRV_LOG(DEBUG, sc, "Preparing %d tlv for sending", type);
 
        memset(mbox, 0, sizeof(struct bnx2x_vf_mbx_msg));
 
@@ -97,7 +97,7 @@ static void
 bnx2x_vf_finalize(struct bnx2x_softc *sc,
                  __rte_unused struct vf_first_tlv *first_tlv)
 {
-       PMD_DRV_LOG(DEBUG, "done sending [%d] tlv over vf pf channel",
+       PMD_DRV_LOG(DEBUG, sc, "done sending [%d] tlv over vf pf channel",
                    first_tlv->tl.type);
 
        rte_spinlock_unlock(&sc->vf2pf_lock);
@@ -116,14 +116,14 @@ bnx2x_do_req4pf(struct bnx2x_softc *sc, rte_iova_t phys_addr)
        uint8_t i;
 
        if (*status) {
-               PMD_DRV_LOG(ERR, "status should be zero before message"
+               PMD_DRV_LOG(ERR, sc, "status should be zero before message"
                                 " to pf was sent");
                return -EINVAL;
        }
 
        bnx2x_check_bull(sc);
        if (sc->old_bulletin.valid_bitmap & (1 << CHANNEL_DOWN)) {
-               PMD_DRV_LOG(ERR, "channel is down. Aborting message sending");
+               PMD_DRV_LOG(ERR, sc, "channel is down. Aborting message sending");
                return -EINVAL;
        }
 
@@ -143,11 +143,11 @@ bnx2x_do_req4pf(struct bnx2x_softc *sc, rte_iova_t phys_addr)
        }
 
        if (!*status) {
-               PMD_DRV_LOG(ERR, "Response from PF timed out");
+               PMD_DRV_LOG(ERR, sc, "Response from PF timed out");
                return -EAGAIN;
        }
 
-       PMD_DRV_LOG(DEBUG, "Response from PF was received");
+       PMD_DRV_LOG(DEBUG, sc, "Response from PF was received");
        return 0;
 }
 
@@ -195,7 +195,7 @@ int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
        int rc;
 
        do {
-               PMD_DRV_LOG(DEBUG, "trying to get resources");
+               PMD_DRV_LOG(DEBUG, sc, "trying to get resources");
 
                rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
                if (rc)
@@ -207,11 +207,11 @@ int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
 
                /* check PF to request acceptance */
                if (sc_resp->status == BNX2X_VF_STATUS_SUCCESS) {
-                       PMD_DRV_LOG(DEBUG, "resources obtained successfully");
+                       PMD_DRV_LOG(DEBUG, sc, "resources obtained successfully");
                        res_obtained = true;
                } else if (sc_resp->status == BNX2X_VF_STATUS_NO_RESOURCES &&
                           tries < BNX2X_VF_OBTAIN_MAX_TRIES) {
-                       PMD_DRV_LOG(DEBUG,
+                       PMD_DRV_LOG(DEBUG, sc,
                           "PF cannot allocate requested amount of resources");
 
                        res_query = &sc->vf2pf_mbox->query[0].acquire.res_query;
@@ -227,7 +227,7 @@ int bnx2x_loop_obtain_resources(struct bnx2x_softc *sc)
 
                        memset(&sc->vf2pf_mbox->resp, 0, sizeof(union resp_tlvs));
                } else {
-                       PMD_DRV_LOG(ERR, "Failed to get the requested "
+                       PMD_DRV_LOG(ERR, sc, "Failed to get the requested "
                                         "amount of resources: %d.",
                                         sc_resp->status);
                        return -EINVAL;
@@ -296,7 +296,7 @@ int bnx2x_vf_get_resources(struct bnx2x_softc *sc, uint8_t tx_count, uint8_t rx_
        sc->doorbell_size = sc_resp.db_size;
        sc->flags |= BNX2X_NO_WOL_FLAG | BNX2X_NO_ISCSI_OOO_FLAG | BNX2X_NO_ISCSI_FLAG | BNX2X_NO_FCOE_FLAG;
 
-       PMD_DRV_LOG(DEBUG, "status block count = %d, base status block = %x",
+       PMD_DRV_LOG(DEBUG, sc, "status block count = %d, base status block = %x",
                sc->igu_sb_cnt, sc->igu_base_sb);
        strncpy(sc->fw_ver, sc_resp.fw_ver, sizeof(sc->fw_ver));
 
@@ -333,7 +333,7 @@ bnx2x_vf_close(struct bnx2x_softc *sc)
 
                rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
                if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
-                       PMD_DRV_LOG(ERR, "Failed to release VF");
+                       PMD_DRV_LOG(ERR, sc, "Failed to release VF");
 
                bnx2x_vf_finalize(sc, &query->first_tlv);
        }
@@ -367,12 +367,12 @@ bnx2x_vf_init(struct bnx2x_softc *sc)
        if (rc)
                goto out;
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to init VF");
+               PMD_DRV_LOG(ERR, sc, "Failed to init VF");
                rc = -EINVAL;
                goto out;
        }
 
-       PMD_DRV_LOG(DEBUG, "VF was initialized");
+       PMD_DRV_LOG(DEBUG, sc, "VF was initialized");
 out:
        bnx2x_vf_finalize(sc, &query->first_tlv);
        return rc;
@@ -403,7 +403,7 @@ bnx2x_vf_unload(struct bnx2x_softc *sc)
 
                        rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
                        if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Bad reply for vf_q %d teardown", i);
 
                        bnx2x_vf_finalize(sc, &query_op->first_tlv);
@@ -423,7 +423,7 @@ bnx2x_vf_unload(struct bnx2x_softc *sc)
 
                rc = bnx2x_do_req4pf(sc, sc->vf2pf_mbox_mapping.paddr);
                if (rc || reply->status != BNX2X_VF_STATUS_SUCCESS)
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "Bad reply from PF for close message");
 
                bnx2x_vf_finalize(sc, &query->first_tlv);
@@ -450,7 +450,7 @@ bnx2x_vf_rx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 
        rxq = sc->rx_queues[fp->index];
        if (!rxq) {
-               PMD_DRV_LOG(ERR, "RX queue %d is NULL", fp->index);
+               PMD_DRV_LOG(ERR, sc, "RX queue %d is NULL", fp->index);
                return;
        }
 
@@ -474,7 +474,7 @@ bnx2x_vf_tx_q_prep(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp,
 
        txq = sc->tx_queues[fp->index];
        if (!txq) {
-               PMD_DRV_LOG(ERR, "TX queue %d is NULL", fp->index);
+               PMD_DRV_LOG(ERR, sc, "TX queue %d is NULL", fp->index);
                return;
        }
 
@@ -511,7 +511,7 @@ bnx2x_vf_setup_queue(struct bnx2x_softc *sc, struct bnx2x_fastpath *fp, int lead
        if (rc)
                goto out;
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to setup VF queue[%d]",
+               PMD_DRV_LOG(ERR, sc, "Failed to setup VF queue[%d]",
                                 fp->index);
                rc = -EINVAL;
        }
@@ -566,7 +566,7 @@ bnx2x_vf_set_mac(struct bnx2x_softc *sc, int set)
        }
 
        if (BNX2X_VF_STATUS_SUCCESS != reply->status) {
-               PMD_DRV_LOG(ERR, "Bad reply from PF for SET MAC message: %d",
+               PMD_DRV_LOG(ERR, sc, "Bad reply from PF for SET MAC message: %d",
                                reply->status);
                rc = -EINVAL;
        }
@@ -608,7 +608,7 @@ bnx2x_vf_config_rss(struct bnx2x_softc *sc,
                goto out;
 
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to configure RSS");
+               PMD_DRV_LOG(ERR, sc, "Failed to configure RSS");
                rc = -EINVAL;
        }
 out:
@@ -652,7 +652,7 @@ bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc)
                query->rx_mask |= VFPF_RX_MASK_ACCEPT_BROADCAST;
                break;
        default:
-               PMD_DRV_LOG(ERR, "BAD rx mode (%d)", sc->rx_mode);
+               PMD_DRV_LOG(ERR, sc, "BAD rx mode (%d)", sc->rx_mode);
                rc = -EINVAL;
                goto out;
        }
@@ -666,7 +666,7 @@ bnx2x_vf_set_rx_mode(struct bnx2x_softc *sc)
                goto out;
 
        if (reply->status != BNX2X_VF_STATUS_SUCCESS) {
-               PMD_DRV_LOG(ERR, "Failed to set RX mode");
+               PMD_DRV_LOG(ERR, sc, "Failed to set RX mode");
                rc = -EINVAL;
        }
 
index 57085eb..1192e5d 100644 (file)
@@ -500,6 +500,18 @@ struct port_hw_cfg {                   /* port 0: 0x12c  port 1: 0x2bc */
        #define PORT_HW_CFG_TX_DRV_BROADCAST_MASK                     0x000F0000
        #define PORT_HW_CFG_TX_DRV_BROADCAST_SHIFT                    16
 
+       /*  Set non-default values for TXFIR in SFP mode. */
+       #define PORT_HW_CFG_TX_DRV_IFIR_MASK                          0x00F00000
+       #define PORT_HW_CFG_TX_DRV_IFIR_SHIFT                         20
+
+       /*  Set non-default values for IPREDRIVER in SFP mode. */
+       #define PORT_HW_CFG_TX_DRV_IPREDRIVER_MASK                    0x0F000000
+       #define PORT_HW_CFG_TX_DRV_IPREDRIVER_SHIFT                   24
+
+       /*  Set non-default values for POST2 in SFP mode. */
+       #define PORT_HW_CFG_TX_DRV_POST2_MASK                         0xF0000000
+       #define PORT_HW_CFG_TX_DRV_POST2_SHIFT                        28
+
        uint32_t reserved0[5];                              /* 0x17c */
 
        uint32_t aeu_int_mask;                              /* 0x190 */
@@ -783,6 +795,7 @@ struct port_hw_cfg {                    /* port 0: 0x12c  port 1: 0x2bc */
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8722        0x00000f00
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54616       0x00001000
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834       0x00001100
+               #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858     0x00001200
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT_WC      0x0000fc00
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE        0x0000fd00
                #define PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN       0x0000ff00
@@ -2532,7 +2545,12 @@ struct shmem2_region {
        uint32_t drv_func_info_addr;                    /* Offset 0x14C */
        uint32_t drv_func_info_size;                    /* Offset 0x150 */
        uint32_t link_attr_sync[PORT_MAX];              /* Offset 0x154 */
-       #define LINK_ATTR_SYNC_KR2_ENABLE       (1<<0)
+       #define LINK_ATTR_SYNC_KR2_ENABLE       0x00000001
+       #define LINK_ATTR_84858                 0x00000002
+       #define LINK_SFP_EEPROM_COMP_CODE_MASK  0x0000ff00
+       #define LINK_SFP_EEPROM_COMP_CODE_SHIFT          8
+
+       uint32_t link_change_count[PORT_MAX];           /* Offset 0x160-0x164 */
 };
 
 
index f2de07e..97dfe69 100644 (file)
@@ -741,7 +741,7 @@ static inline void ecore_disable_blocks_parity(struct bnx2x_softc *sc)
                if (dis_mask) {
                        REG_WR(sc, ecore_blocks_parity_data[i].mask_addr,
                               dis_mask);
-                       ECORE_MSG("Setting parity mask "
+                       ECORE_MSG(sc, "Setting parity mask "
                                                 "for %s to\t\t0x%x",
                                    ecore_blocks_parity_data[i].name, dis_mask);
                }
@@ -776,7 +776,7 @@ static inline void ecore_clear_blocks_parity(struct bnx2x_softc *sc)
                        reg_val = REG_RD(sc, ecore_blocks_parity_data[i].
                                         sts_clr_addr);
                        if (reg_val & reg_mask)
-                               ECORE_MSG("Parity errors in %s: 0x%x",
+                               ECORE_MSG(sc, "Parity errors in %s: 0x%x",
                                           ecore_blocks_parity_data[i].name,
                                           reg_val & reg_mask);
                }
@@ -785,7 +785,7 @@ static inline void ecore_clear_blocks_parity(struct bnx2x_softc *sc)
        /* Check if there were parity attentions in MCP */
        reg_val = REG_RD(sc, MISC_REG_AEU_AFTER_INVERT_4_MCP);
        if (reg_val & mcp_aeu_bits)
-               ECORE_MSG("Parity error in MCP: 0x%x",
+               ECORE_MSG(sc, "Parity error in MCP: 0x%x",
                           reg_val & mcp_aeu_bits);
 
        /* Clear parity attentions in MCP:
index 2b003af..733ad1a 100644 (file)
@@ -424,20 +424,20 @@ static void ecore_init_pxp_arb(struct bnx2x_softc *sc, int r_order,
        uint32_t val, i;
 
        if (r_order > MAX_RD_ORD) {
-               ECORE_MSG("read order of %d  order adjusted to %d",
+               ECORE_MSG(sc, "read order of %d  order adjusted to %d",
                           r_order, MAX_RD_ORD);
                r_order = MAX_RD_ORD;
        }
        if (w_order > MAX_WR_ORD) {
-               ECORE_MSG("write order of %d  order adjusted to %d",
+               ECORE_MSG(sc, "write order of %d  order adjusted to %d",
                           w_order, MAX_WR_ORD);
                w_order = MAX_WR_ORD;
        }
        if (CHIP_REV_IS_FPGA(sc)) {
-               ECORE_MSG("write order adjusted to 1 for FPGA");
+               ECORE_MSG(sc, "write order adjusted to 1 for FPGA");
                w_order = 0;
        }
-       ECORE_MSG("read order %d  write order %d", r_order, w_order);
+       ECORE_MSG(sc, "read order %d  write order %d", r_order, w_order);
 
        for (i = 0; i < NUM_RD_Q-1; i++) {
                REG_WR(sc, read_arb_addr[i].l, read_arb_data[i][r_order].l);
index ae8a93b..d69e857 100644 (file)
 #define HW_LOCK_MAX_RESOURCE_VALUE              31
 #define HW_LOCK_RESOURCE_DRV_FLAGS              10
 #define HW_LOCK_RESOURCE_GPIO                   1
+#define HW_LOCK_RESOURCE_MDIO                   0
 #define HW_LOCK_RESOURCE_NVRAM                  12
 #define HW_LOCK_RESOURCE_PORT0_ATT_MASK                 3
 #define HW_LOCK_RESOURCE_RECOVERY_LEADER_0      8
index 0c8685c..ab730ab 100644 (file)
@@ -53,14 +53,14 @@ ecore_exe_queue_init(struct bnx2x_softc *sc __rte_unused,
        o->execute = exec;
        o->get = get;
 
-       ECORE_MSG("Setup the execution queue with the chunk length of %d",
+       ECORE_MSG(sc, "Setup the execution queue with the chunk length of %d",
                  exe_len);
 }
 
 static void ecore_exe_queue_free_elem(struct bnx2x_softc *sc __rte_unused,
                                      struct ecore_exeq_elem *elem)
 {
-       ECORE_MSG("Deleting an exe_queue element");
+       ECORE_MSG(sc, "Deleting an exe_queue element");
        ECORE_FREE(sc, elem, sizeof(*elem));
 }
 
@@ -106,7 +106,7 @@ static int ecore_exe_queue_add(struct bnx2x_softc *sc,
                /* Check if this request is ok */
                rc = o->validate(sc, o->owner, elem);
                if (rc) {
-                       ECORE_MSG("Preamble failed: %d", rc);
+                       ECORE_MSG(sc, "Preamble failed: %d", rc);
                        goto free_and_exit;
                }
        }
@@ -176,8 +176,8 @@ static int ecore_exe_queue_step(struct bnx2x_softc *sc,
         */
        if (!ECORE_LIST_IS_EMPTY(&o->pending_comp)) {
                if (ECORE_TEST_BIT(RAMROD_DRV_CLR_ONLY, ramrod_flags)) {
-                       ECORE_MSG
-                           ("RAMROD_DRV_CLR_ONLY requested: resetting a pending_comp list");
+                       ECORE_MSG(sc,
+                                 "RAMROD_DRV_CLR_ONLY requested: resetting a pending_comp list");
                        __ecore_exe_queue_reset_pending(sc, o);
                } else {
                        return ECORE_PENDING;
@@ -240,7 +240,7 @@ static struct ecore_exeq_elem *ecore_exe_queue_alloc_elem(struct
                                                          bnx2x_softc *sc
                                                          __rte_unused)
 {
-       ECORE_MSG("Allocating a new exe_queue element");
+       ECORE_MSG(sc, "Allocating a new exe_queue element");
        return ECORE_ZALLOC(sizeof(struct ecore_exeq_elem), GFP_ATOMIC, sc);
 }
 
@@ -290,14 +290,14 @@ static int ecore_state_wait(struct bnx2x_softc *sc, int state,
        if (CHIP_REV_IS_EMUL(sc))
                cnt *= 20;
 
-       ECORE_MSG("waiting for state to become %d", state);
+       ECORE_MSG(sc, "waiting for state to become %d", state);
 
        ECORE_MIGHT_SLEEP();
        while (cnt--) {
                bnx2x_intr_legacy(sc, 1);
                if (!ECORE_TEST_BIT(state, pstate)) {
 #ifdef ECORE_STOP_ON_ERROR
-                       ECORE_MSG("exit  (cnt %d)", 5000 - cnt);
+                       ECORE_MSG(sc, "exit  (cnt %d)", 5000 - cnt);
 #endif
                        return ECORE_SUCCESS;
                }
@@ -309,7 +309,7 @@ static int ecore_state_wait(struct bnx2x_softc *sc, int state,
        }
 
        /* timeout! */
-       PMD_DRV_LOG(ERR, "timeout waiting for state %d", state);
+       PMD_DRV_LOG(ERR, sc, "timeout waiting for state %d", state);
 #ifdef ECORE_STOP_ON_ERROR
        ecore_panic();
 #endif
@@ -370,11 +370,11 @@ static int __ecore_vlan_mac_h_write_trylock(struct bnx2x_softc *sc __rte_unused,
                                            struct ecore_vlan_mac_obj *o)
 {
        if (o->head_reader) {
-               ECORE_MSG("vlan_mac_lock writer - There are readers; Busy");
+               ECORE_MSG(sc, "vlan_mac_lock writer - There are readers; Busy");
                return ECORE_BUSY;
        }
 
-       ECORE_MSG("vlan_mac_lock writer - Taken");
+       ECORE_MSG(sc, "vlan_mac_lock writer - Taken");
        return ECORE_SUCCESS;
 }
 
@@ -394,13 +394,13 @@ static void __ecore_vlan_mac_h_exec_pending(struct bnx2x_softc *sc,
        int rc;
        unsigned long ramrod_flags = o->saved_ramrod_flags;
 
-       ECORE_MSG("vlan_mac_lock execute pending command with ramrod flags %lu",
+       ECORE_MSG(sc, "vlan_mac_lock execute pending command with ramrod flags %lu",
                  ramrod_flags);
        o->head_exe_request = FALSE;
        o->saved_ramrod_flags = 0;
        rc = ecore_exe_queue_step(sc, &o->exe_queue, &ramrod_flags);
        if (rc != ECORE_SUCCESS) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "execution of pending commands failed with rc %d",
                            rc);
 #ifdef ECORE_STOP_ON_ERROR
@@ -425,7 +425,7 @@ static void __ecore_vlan_mac_h_pend(struct bnx2x_softc *sc __rte_unused,
 {
        o->head_exe_request = TRUE;
        o->saved_ramrod_flags = ramrod_flags;
-       ECORE_MSG("Placing pending execution with ramrod flags %lu",
+       ECORE_MSG(sc, "Placing pending execution with ramrod flags %lu",
                  ramrod_flags);
 }
 
@@ -446,8 +446,8 @@ static void __ecore_vlan_mac_h_write_unlock(struct bnx2x_softc *sc,
         * executed. If so, execute again. [Ad infinitum]
         */
        while (o->head_exe_request) {
-               ECORE_MSG
-                   ("vlan_mac_lock - writer release encountered a pending request");
+               ECORE_MSG(sc,
+                         "vlan_mac_lock - writer release encountered a pending request");
                __ecore_vlan_mac_h_exec_pending(sc, o);
        }
 }
@@ -483,7 +483,8 @@ static int __ecore_vlan_mac_h_read_lock(struct bnx2x_softc *sc __rte_unused,
 {
        /* If we got here, we're holding lock --> no WRITER exists */
        o->head_reader++;
-       ECORE_MSG("vlan_mac_lock - locked reader - number %d", o->head_reader);
+       ECORE_MSG(sc,
+                 "vlan_mac_lock - locked reader - number %d", o->head_reader);
 
        return ECORE_SUCCESS;
 }
@@ -522,14 +523,14 @@ static void __ecore_vlan_mac_h_read_unlock(struct bnx2x_softc *sc,
                                           struct ecore_vlan_mac_obj *o)
 {
        if (!o->head_reader) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "Need to release vlan mac reader lock, but lock isn't taken");
 #ifdef ECORE_STOP_ON_ERROR
                ecore_panic();
 #endif
        } else {
                o->head_reader--;
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "vlan_mac_lock - decreased readers to %d",
                            o->head_reader);
        }
@@ -538,7 +539,7 @@ static void __ecore_vlan_mac_h_read_unlock(struct bnx2x_softc *sc,
         * was last - if so we need to execute the command.
         */
        if (!o->head_reader && o->head_exe_request) {
-               PMD_DRV_LOG(INFO,
+               PMD_DRV_LOG(INFO, sc,
                            "vlan_mac_lock - reader release encountered a pending request");
 
                /* Writer release will do the trick */
@@ -581,10 +582,10 @@ static int ecore_get_n_elements(struct bnx2x_softc *sc,
        uint8_t *next = base;
        int counter = 0, read_lock;
 
-       ECORE_MSG("get_n_elements - taking vlan_mac_lock (reader)");
+       ECORE_MSG(sc, "get_n_elements - taking vlan_mac_lock (reader)");
        read_lock = ecore_vlan_mac_h_read_lock(sc, o);
        if (read_lock != ECORE_SUCCESS)
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "get_n_elements failed to get vlan mac reader lock; Access without lock");
 
        /* traverse list */
@@ -593,15 +594,15 @@ static int ecore_get_n_elements(struct bnx2x_softc *sc,
                if (counter < n) {
                        ECORE_MEMCPY(next, &pos->u, size);
                        counter++;
-                       ECORE_MSG
-                           ("copied element number %d to address %p element was:",
+                           ECORE_MSG
+                           (sc, "copied element number %d to address %p element was:",
                             counter, next);
                        next += stride + size;
                }
        }
 
        if (read_lock == ECORE_SUCCESS) {
-               ECORE_MSG("get_n_elements - releasing vlan_mac_lock (reader)");
+               ECORE_MSG(sc, "get_n_elements - releasing vlan_mac_lock (reader)");
                ecore_vlan_mac_h_read_unlock(sc, o);
        }
 
@@ -615,7 +616,7 @@ static int ecore_check_mac_add(struct bnx2x_softc *sc __rte_unused,
 {
        struct ecore_vlan_mac_registry_elem *pos;
 
-       ECORE_MSG("Checking MAC %02x:%02x:%02x:%02x:%02x:%02x for ADD command",
+       ECORE_MSG(sc, "Checking MAC %02x:%02x:%02x:%02x:%02x:%02x for ADD command",
                  data->mac.mac[0], data->mac.mac[1], data->mac.mac[2],
                  data->mac.mac[3], data->mac.mac[4], data->mac.mac[5]);
 
@@ -644,7 +645,7 @@ static struct ecore_vlan_mac_registry_elem *ecore_check_mac_del(struct bnx2x_sof
 {
        struct ecore_vlan_mac_registry_elem *pos;
 
-       ECORE_MSG("Checking MAC %02x:%02x:%02x:%02x:%02x:%02x for DEL command",
+       ECORE_MSG(sc, "Checking MAC %02x:%02x:%02x:%02x:%02x:%02x for DEL command",
                  data->mac.mac[0], data->mac.mac[1], data->mac.mac[2],
                  data->mac.mac[3], data->mac.mac[4], data->mac.mac[5]);
 
@@ -722,7 +723,7 @@ static void ecore_set_mac_in_nig(struct bnx2x_softc *sc,
        if (index > ECORE_LLH_CAM_MAX_PF_LINE)
                return;
 
-       ECORE_MSG("Going to %s LLH configuration at entry %d",
+       ECORE_MSG(sc, "Going to %s LLH configuration at entry %d",
                  (add ? "ADD" : "DELETE"), index);
 
        if (add) {
@@ -838,7 +839,7 @@ static void ecore_set_one_mac_e2(struct bnx2x_softc *sc,
        ecore_vlan_mac_set_cmd_hdr_e2(o, add, CLASSIFY_RULE_OPCODE_MAC,
                                      &rule_entry->mac.header);
 
-       ECORE_MSG("About to %s MAC %02x:%02x:%02x:%02x:%02x:%02x for Queue %d",
+       ECORE_MSG(sc, "About to %s MAC %02x:%02x:%02x:%02x:%02x:%02x for Queue %d",
                  (add ? "add" : "delete"), mac[0], mac[1], mac[2], mac[3],
                  mac[4], mac[5], raw->cl_id);
 
@@ -943,7 +944,7 @@ static void ecore_vlan_mac_set_rdata_e1x(struct bnx2x_softc *sc
        ecore_vlan_mac_set_cfg_entry_e1x(o, add, opcode, mac, vlan_id,
                                         cfg_entry);
 
-       ECORE_MSG("%s MAC %02x:%02x:%02x:%02x:%02x:%02x CLID %d CAM offset %d",
+       ECORE_MSG(sc, "%s MAC %02x:%02x:%02x:%02x:%02x:%02x CLID %d CAM offset %d",
                  (add ? "setting" : "clearing"),
                  mac[0], mac[1], mac[2], mac[3], mac[4], mac[5],
                  o->raw.cl_id, cam_offset);
@@ -1088,8 +1089,8 @@ static int ecore_validate_vlan_mac_add(struct bnx2x_softc *sc,
        /* Check the registry */
        rc = o->check_add(sc, o, &elem->cmd_data.vlan_mac.u);
        if (rc) {
-               ECORE_MSG
-                   ("ADD command is not allowed considering current registry state.");
+               ECORE_MSG(sc,
+                         "ADD command is not allowed considering current registry state.");
                return rc;
        }
 
@@ -1097,7 +1098,7 @@ static int ecore_validate_vlan_mac_add(struct bnx2x_softc *sc,
         * MAC/VLAN/VLAN-MAC. Return an error if there is.
         */
        if (exeq->get(exeq, elem)) {
-               ECORE_MSG("There is a pending ADD command already");
+               ECORE_MSG(sc, "There is a pending ADD command already");
                return ECORE_EXISTS;
        }
 
@@ -1136,8 +1137,8 @@ static int ecore_validate_vlan_mac_del(struct bnx2x_softc *sc,
         */
        pos = o->check_del(sc, o, &elem->cmd_data.vlan_mac.u);
        if (!pos) {
-               ECORE_MSG
-                   ("DEL command is not allowed considering current registry state");
+               ECORE_MSG(sc,
+                         "DEL command is not allowed considering current registry state");
                return ECORE_EXISTS;
        }
 
@@ -1149,13 +1150,13 @@ static int ecore_validate_vlan_mac_del(struct bnx2x_softc *sc,
        /* Check for MOVE commands */
        query_elem.cmd_data.vlan_mac.cmd = ECORE_VLAN_MAC_MOVE;
        if (exeq->get(exeq, &query_elem)) {
-               PMD_DRV_LOG(ERR, "There is a pending MOVE command already");
+               PMD_DRV_LOG(ERR, sc, "There is a pending MOVE command already");
                return ECORE_INVAL;
        }
 
        /* Check for DEL commands */
        if (exeq->get(exeq, elem)) {
-               ECORE_MSG("There is a pending DEL command already");
+               ECORE_MSG(sc, "There is a pending DEL command already");
                return ECORE_EXISTS;
        }
 
@@ -1163,7 +1164,7 @@ static int ecore_validate_vlan_mac_del(struct bnx2x_softc *sc,
        if (!(ECORE_TEST_BIT(ECORE_DONT_CONSUME_CAM_CREDIT,
                             &elem->cmd_data.vlan_mac.vlan_mac_flags) ||
              o->put_credit(o))) {
-               PMD_DRV_LOG(ERR, "Failed to return a credit");
+               PMD_DRV_LOG(ERR, sc, "Failed to return a credit");
                return ECORE_INVAL;
        }
 
@@ -1196,8 +1197,8 @@ static int ecore_validate_vlan_mac_move(struct bnx2x_softc *sc,
         * state.
         */
        if (!src_o->check_move(sc, src_o, dest_o, &elem->cmd_data.vlan_mac.u)) {
-               ECORE_MSG
-                   ("MOVE command is not allowed considering current registry state");
+               ECORE_MSG(sc,
+                         "MOVE command is not allowed considering current registry state");
                return ECORE_INVAL;
        }
 
@@ -1210,21 +1211,21 @@ static int ecore_validate_vlan_mac_move(struct bnx2x_softc *sc,
        /* Check DEL on source */
        query_elem.cmd_data.vlan_mac.cmd = ECORE_VLAN_MAC_DEL;
        if (src_exeq->get(src_exeq, &query_elem)) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "There is a pending DEL command on the source queue already");
                return ECORE_INVAL;
        }
 
        /* Check MOVE on source */
        if (src_exeq->get(src_exeq, elem)) {
-               ECORE_MSG("There is a pending MOVE command already");
+               ECORE_MSG(sc, "There is a pending MOVE command already");
                return ECORE_EXISTS;
        }
 
        /* Check ADD on destination */
        query_elem.cmd_data.vlan_mac.cmd = ECORE_VLAN_MAC_ADD;
        if (dest_exeq->get(dest_exeq, &query_elem)) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "There is a pending ADD command on the destination queue already");
                return ECORE_INVAL;
        }
@@ -1329,7 +1330,7 @@ static int __ecore_vlan_mac_execute_step(struct bnx2x_softc *sc,
 
        ECORE_SPIN_LOCK_BH(&o->exe_queue.lock);
 
-       ECORE_MSG("vlan_mac_execute_step - trying to take writer lock");
+       ECORE_MSG(sc, "vlan_mac_execute_step - trying to take writer lock");
        rc = __ecore_vlan_mac_h_write_trylock(sc, o);
 
        if (rc != ECORE_SUCCESS) {
@@ -1426,17 +1427,17 @@ static int ecore_optimize_vlan_mac(struct bnx2x_softc *sc,
                                    &pos->cmd_data.vlan_mac.vlan_mac_flags)) {
                        if ((query.cmd_data.vlan_mac.cmd ==
                             ECORE_VLAN_MAC_ADD) && !o->put_credit(o)) {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Failed to return the credit for the optimized ADD command");
                                return ECORE_INVAL;
                        } else if (!o->get_credit(o)) { /* VLAN_MAC_DEL */
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Failed to recover the credit from the optimized DEL command");
                                return ECORE_INVAL;
                        }
                }
 
-               ECORE_MSG("Optimizing %s command",
+               ECORE_MSG(sc, "Optimizing %s command",
                          (elem->cmd_data.vlan_mac.cmd == ECORE_VLAN_MAC_ADD) ?
                          "ADD" : "DEL");
 
@@ -1486,7 +1487,7 @@ static int ecore_vlan_mac_get_registry_elem(struct bnx2x_softc *sc,
                        return ECORE_INVAL;
                }
 
-               ECORE_MSG("Got cam offset %d", reg_elem->cam_offset);
+               ECORE_MSG(sc, "Got cam offset %d", reg_elem->cam_offset);
 
                /* Set a VLAN-MAC data */
                ECORE_MEMCPY(&reg_elem->u, &elem->cmd_data.vlan_mac.u,
@@ -1695,8 +1696,8 @@ int ecore_config_vlan_mac(struct bnx2x_softc *sc,
                rc = ECORE_PENDING;
 
        if (ECORE_TEST_BIT(RAMROD_DRV_CLR_ONLY, ramrod_flags)) {
-               ECORE_MSG
-                   ("RAMROD_DRV_CLR_ONLY requested: clearing a pending bit.");
+               ECORE_MSG(sc,
+                         "RAMROD_DRV_CLR_ONLY requested: clearing a pending bit.");
                raw->clear_pending(raw);
        }
 
@@ -1775,7 +1776,7 @@ static int ecore_vlan_mac_del_all(struct bnx2x_softc *sc,
                    *vlan_mac_flags) {
                        rc = exeq->remove(sc, exeq->owner, exeq_pos);
                        if (rc) {
-                               PMD_DRV_LOG(ERR, "Failed to remove command");
+                               PMD_DRV_LOG(ERR, sc, "Failed to remove command");
                                ECORE_SPIN_UNLOCK_BH(&exeq->lock);
                                return rc;
                        }
@@ -1800,7 +1801,7 @@ static int ecore_vlan_mac_del_all(struct bnx2x_softc *sc,
        ECORE_CLEAR_BIT_NA(RAMROD_EXEC, &p.ramrod_flags);
        ECORE_CLEAR_BIT_NA(RAMROD_CONT, &p.ramrod_flags);
 
-       ECORE_MSG("vlan_mac_del_all -- taking vlan_mac_lock (reader)");
+       ECORE_MSG(sc, "vlan_mac_del_all -- taking vlan_mac_lock (reader)");
        read_lock = ecore_vlan_mac_h_read_lock(sc, o);
        if (read_lock != ECORE_SUCCESS)
                return read_lock;
@@ -1812,7 +1813,7 @@ static int ecore_vlan_mac_del_all(struct bnx2x_softc *sc,
                        ECORE_MEMCPY(&p.user_req.u, &pos->u, sizeof(pos->u));
                        rc = ecore_config_vlan_mac(sc, &p);
                        if (rc < 0) {
-                               PMD_DRV_LOG(ERR,
+                               PMD_DRV_LOG(ERR, sc,
                                            "Failed to add a new DEL command");
                                ecore_vlan_mac_h_read_unlock(sc, o);
                                return rc;
@@ -1820,7 +1821,7 @@ static int ecore_vlan_mac_del_all(struct bnx2x_softc *sc,
                }
        }
 
-       ECORE_MSG("vlan_mac_del_all -- releasing vlan_mac_lock (reader)");
+       ECORE_MSG(sc, "vlan_mac_del_all -- releasing vlan_mac_lock (reader)");
        ecore_vlan_mac_h_read_unlock(sc, o);
 
        p.ramrod_flags = *ramrod_flags;
@@ -2007,7 +2008,7 @@ static int ecore_set_rx_mode_e1x(struct bnx2x_softc *sc,
            mac_filters->unmatched_unicast | mask :
            mac_filters->unmatched_unicast & ~mask;
 
-       ECORE_MSG("drop_ucast 0x%xdrop_mcast 0x%x accp_ucast 0x%x"
+       ECORE_MSG(sc, "drop_ucast 0x%xdrop_mcast 0x%x accp_ucast 0x%x"
                  "accp_mcast 0x%xaccp_bcast 0x%x",
                  mac_filters->ucast_drop_all, mac_filters->mcast_drop_all,
                  mac_filters->ucast_accept_all, mac_filters->mcast_accept_all,
@@ -2153,8 +2154,8 @@ static int ecore_set_rx_mode_e2(struct bnx2x_softc *sc,
         */
        ecore_rx_mode_set_rdata_hdr_e2(p->cid, &data->header, rule_idx);
 
-       ECORE_MSG
-           ("About to configure %d rules, rx_accept_flags 0x%lx, tx_accept_flags 0x%lx",
+           ECORE_MSG
+           (sc, "About to configure %d rules, rx_accept_flags 0x%lx, tx_accept_flags 0x%lx",
             data->header.rule_cnt, p->rx_accept_flags, p->tx_accept_flags);
 
        /* No need for an explicit memory barrier here as long we would
@@ -2207,7 +2208,7 @@ int ecore_config_rx_mode(struct bnx2x_softc *sc,
                                return rc;
                }
        } else {
-               ECORE_MSG("ERROR: config_rx_mode is NULL");
+               ECORE_MSG(sc, "ERROR: config_rx_mode is NULL");
                return -1;
        }
 
@@ -2288,7 +2289,7 @@ static int ecore_mcast_enqueue_cmd(struct bnx2x_softc *sc __rte_unused,
        if (!new_cmd)
                return ECORE_NOMEM;
 
-       ECORE_MSG("About to enqueue a new %d command. macs_list_len=%d",
+       ECORE_MSG(sc, "About to enqueue a new %d command. macs_list_len=%d",
                  cmd, macs_list_len);
 
        ECORE_LIST_INIT(&new_cmd->data.macs_head);
@@ -2324,7 +2325,7 @@ static int ecore_mcast_enqueue_cmd(struct bnx2x_softc *sc __rte_unused,
 
        default:
                ECORE_FREE(sc, new_cmd, total_sz);
-               PMD_DRV_LOG(ERR, "Unknown command: %d", cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", cmd);
                return ECORE_INVAL;
        }
 
@@ -2436,11 +2437,11 @@ static void ecore_mcast_set_one_rule_e2(struct bnx2x_softc *sc __rte_unused,
                break;
 
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", cmd);
                return;
        }
 
-       ECORE_MSG("%s bin %d",
+       ECORE_MSG(sc, "%s bin %d",
                  ((rx_tx_add_flag & ETH_MULTICAST_RULES_CMD_IS_ADD) ?
                   "Setting" : "Clearing"), bin);
 
@@ -2475,7 +2476,7 @@ static int ecore_mcast_handle_restore_cmd_e2(struct bnx2x_softc *sc,
 
                cnt++;
 
-               ECORE_MSG("About to configure a bin %d", cur_bin);
+               ECORE_MSG(sc, "About to configure a bin %d", cur_bin);
 
                /* Break if we reached the maximum number
                 * of rules.
@@ -2507,8 +2508,8 @@ static void ecore_mcast_hdl_pending_add_e2(struct bnx2x_softc *sc,
 
                cnt++;
 
-               ECORE_MSG
-                   ("About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC",
+                   ECORE_MSG
+                   (sc, "About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC",
                     pmac_pos->mac[0], pmac_pos->mac[1], pmac_pos->mac[2],
                     pmac_pos->mac[3], pmac_pos->mac[4], pmac_pos->mac[5]);
 
@@ -2543,7 +2544,7 @@ static void ecore_mcast_hdl_pending_del_e2(struct bnx2x_softc *sc,
 
                cmd_pos->data.macs_num--;
 
-               ECORE_MSG("Deleting MAC. %d left,cnt is %d",
+               ECORE_MSG(sc, "Deleting MAC. %d left,cnt is %d",
                          cmd_pos->data.macs_num, cnt);
 
                /* Break if we reached the maximum
@@ -2602,7 +2603,8 @@ static int ecore_mcast_handle_pending_cmds_e2(struct bnx2x_softc *sc, struct
                        break;
 
                default:
-                       PMD_DRV_LOG(ERR, "Unknown command: %d", cmd_pos->type);
+                       PMD_DRV_LOG(ERR, sc,
+                                   "Unknown command: %d", cmd_pos->type);
                        return ECORE_INVAL;
                }
 
@@ -2639,8 +2641,8 @@ static void ecore_mcast_hdl_add(struct bnx2x_softc *sc,
 
                cnt++;
 
-               ECORE_MSG
-                   ("About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC",
+                   ECORE_MSG
+                   (sc, "About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC",
                     mlist_pos->mac[0], mlist_pos->mac[1], mlist_pos->mac[2],
                     mlist_pos->mac[3], mlist_pos->mac[4], mlist_pos->mac[5]);
        }
@@ -2660,7 +2662,8 @@ static void ecore_mcast_hdl_del(struct bnx2x_softc *sc,
 
                cnt++;
 
-               ECORE_MSG("Deleting MAC. %d left", p->mcast_list_len - i - 1);
+               ECORE_MSG(sc,
+                         "Deleting MAC. %d left", p->mcast_list_len - i - 1);
        }
 
        *line_idx = cnt;
@@ -2686,7 +2689,7 @@ static int ecore_mcast_handle_current_cmd(struct bnx2x_softc *sc, struct
        struct ecore_mcast_obj *o = p->mcast_obj;
        int cnt = start_cnt;
 
-       ECORE_MSG("p->mcast_list_len=%d", p->mcast_list_len);
+       ECORE_MSG(sc, "p->mcast_list_len=%d", p->mcast_list_len);
 
        switch (cmd) {
        case ECORE_MCAST_CMD_ADD:
@@ -2702,7 +2705,7 @@ static int ecore_mcast_handle_current_cmd(struct bnx2x_softc *sc, struct
                break;
 
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", cmd);
                return ECORE_INVAL;
        }
 
@@ -2747,7 +2750,7 @@ static int ecore_mcast_validate_e2(__rte_unused struct bnx2x_softc *sc,
                break;
 
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", cmd);
                return ECORE_INVAL;
        }
 
@@ -2933,8 +2936,8 @@ static void ecore_mcast_hdl_add_e1h(struct bnx2x_softc *sc __rte_unused,
                bit = ecore_mcast_bin_from_mac(mlist_pos->mac);
                ECORE_57711_SET_MC_FILTER(mc_filter, bit);
 
-               ECORE_MSG
-                   ("About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC, bin %d",
+                   ECORE_MSG
+                   (sc, "About to configure %02x:%02x:%02x:%02x:%02x:%02x mcast MAC, bin %d",
                     mlist_pos->mac[0], mlist_pos->mac[1], mlist_pos->mac[2],
                     mlist_pos->mac[3], mlist_pos->mac[4], mlist_pos->mac[5],
                     bit);
@@ -2954,7 +2957,7 @@ static void ecore_mcast_hdl_restore_e1h(struct bnx2x_softc *sc
        for (bit = ecore_mcast_get_next_bin(o, 0);
             bit >= 0; bit = ecore_mcast_get_next_bin(o, bit + 1)) {
                ECORE_57711_SET_MC_FILTER(mc_filter, bit);
-               ECORE_MSG("About to set bin %d", bit);
+               ECORE_MSG(sc, "About to set bin %d", bit);
        }
 }
 
@@ -2985,7 +2988,7 @@ static int ecore_mcast_setup_e1h(struct bnx2x_softc *sc,
                        break;
 
                case ECORE_MCAST_CMD_DEL:
-                       ECORE_MSG("Invalidating multicast MACs configuration");
+                       ECORE_MSG(sc, "Invalidating multicast MACs configuration");
 
                        /* clear the registry */
                        ECORE_MEMSET(o->registry.aprox_match.vec, 0,
@@ -2997,7 +3000,7 @@ static int ecore_mcast_setup_e1h(struct bnx2x_softc *sc,
                        break;
 
                default:
-                       PMD_DRV_LOG(ERR, "Unknown command: %d", cmd);
+                       PMD_DRV_LOG(ERR, sc, "Unknown command: %d", cmd);
                        return ECORE_INVAL;
                }
 
@@ -3048,8 +3051,8 @@ int ecore_config_mcast(struct bnx2x_softc *sc,
        if ((!p->mcast_list_len) && (!o->check_sched(o)))
                return ECORE_SUCCESS;
 
-       ECORE_MSG
-           ("o->total_pending_num=%d p->mcast_list_len=%d o->max_cmd_len=%d",
+           ECORE_MSG
+           (sc, "o->total_pending_num=%d p->mcast_list_len=%d o->max_cmd_len=%d",
             o->total_pending_num, p->mcast_list_len, o->max_cmd_len);
 
        /* Enqueue the current command to the pending list if we can't complete
@@ -3478,7 +3481,7 @@ static int ecore_setup_rss(struct bnx2x_softc *sc,
 
        ECORE_MEMSET(data, 0, sizeof(*data));
 
-       ECORE_MSG("Configuring RSS");
+       ECORE_MSG(sc, "Configuring RSS");
 
        /* Set an echo field */
        data->echo = ECORE_CPU_TO_LE32((r->cid & ECORE_SWCID_MASK) |
@@ -3492,7 +3495,7 @@ static int ecore_setup_rss(struct bnx2x_softc *sc,
 
        data->rss_mode = rss_mode;
 
-       ECORE_MSG("rss_mode=%d", rss_mode);
+       ECORE_MSG(sc, "rss_mode=%d", rss_mode);
 
        /* RSS capabilities */
        if (ECORE_TEST_BIT(ECORE_RSS_IPV4, &p->rss_flags))
@@ -3532,7 +3535,7 @@ static int ecore_setup_rss(struct bnx2x_softc *sc,
        /* RSS engine ID */
        data->rss_engine_id = o->engine_id;
 
-       ECORE_MSG("rss_engine_id=%d", data->rss_engine_id);
+       ECORE_MSG(sc, "rss_engine_id=%d", data->rss_engine_id);
 
        /* Indirection table */
        ECORE_MEMCPY(data->indirection_table, p->ind_table,
@@ -3627,15 +3630,15 @@ int ecore_queue_state_change(struct bnx2x_softc *sc,
        /* Check that the requested transition is legal */
        rc = o->check_transition(sc, o, params);
        if (rc) {
-               PMD_DRV_LOG(ERR, "check transition returned an error. rc %d",
+               PMD_DRV_LOG(ERR, sc, "check transition returned an error. rc %d",
                            rc);
                return ECORE_INVAL;
        }
 
        /* Set "pending" bit */
-       ECORE_MSG("pending bit was=%lx", o->pending);
+       ECORE_MSG(sc, "pending bit was=%lx", o->pending);
        pending_bit = o->set_pending(o, params);
-       ECORE_MSG("pending bit now=%lx", o->pending);
+       ECORE_MSG(sc, "pending bit now=%lx", o->pending);
 
        /* Don't send a command if only driver cleanup was requested */
        if (ECORE_TEST_BIT(RAMROD_DRV_CLR_ONLY, &params->ramrod_flags))
@@ -3702,7 +3705,7 @@ static int ecore_queue_comp_cmd(struct bnx2x_softc *sc __rte_unused,
        unsigned long cur_pending = o->pending;
 
        if (!ECORE_TEST_AND_CLEAR_BIT(cmd, &cur_pending)) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "Bad MC reply %d for queue %d in state %d pending 0x%lx, next_state %d",
                            cmd, o->cids[ECORE_PRIMARY_CID_INDEX], o->state,
                            cur_pending, o->next_state);
@@ -3713,15 +3716,15 @@ static int ecore_queue_comp_cmd(struct bnx2x_softc *sc __rte_unused,
                /* >= because tx only must always be smaller than cos since the
                 * primary connection supports COS 0
                 */
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "illegal value for next tx_only: %d. max cos was %d",
                            o->next_tx_only, o->max_cos);
 
-       ECORE_MSG("Completing command %d for queue %d, setting state to %d",
+       ECORE_MSG(sc, "Completing command %d for queue %d, setting state to %d",
                  cmd, o->cids[ECORE_PRIMARY_CID_INDEX], o->next_state);
 
        if (o->next_tx_only)    /* print num tx-only if any exist */
-               ECORE_MSG("primary cid %d: num tx-only cons %d",
+               ECORE_MSG(sc, "primary cid %d: num tx-only cons %d",
                          o->cids[ECORE_PRIMARY_CID_INDEX], o->next_tx_only);
 
        o->state = o->next_state;
@@ -3782,7 +3785,7 @@ static void ecore_q_fill_init_general_data(struct bnx2x_softc *sc __rte_unused,
            ECORE_TEST_BIT(ECORE_Q_FLG_FCOE, flags) ?
            LLFC_TRAFFIC_TYPE_FCOE : LLFC_TRAFFIC_TYPE_NW;
 
-       ECORE_MSG("flags: active %d, cos %d, stats en %d",
+       ECORE_MSG(sc, "flags: active %d, cos %d, stats en %d",
                  gen_data->activate_flg, gen_data->cos,
                  gen_data->statistics_en_flg);
 }
@@ -3923,7 +3926,7 @@ static void ecore_q_fill_setup_tx_only(struct bnx2x_softc *sc, struct ecore_queu
        ecore_q_fill_init_tx_data(&cmd_params->params.tx_only.txq_params,
                                  &data->tx, &cmd_params->params.tx_only.flags);
 
-       ECORE_MSG("cid %d, tx bd page lo %x hi %x",
+       ECORE_MSG(sc, "cid %d, tx bd page lo %x hi %x",
                  cmd_params->q_obj->cids[0],
                  data->tx.tx_bd_page_base.lo, data->tx.tx_bd_page_base.hi);
 }
@@ -3973,9 +3976,9 @@ static int ecore_q_init(struct bnx2x_softc *sc,
 
        /* Set CDU context validation values */
        for (cos = 0; cos < o->max_cos; cos++) {
-               ECORE_MSG("setting context validation. cid %d, cos %d",
+               ECORE_MSG(sc, "setting context validation. cid %d, cos %d",
                          o->cids[cos], cos);
-               ECORE_MSG("context pointer %p", init->cxts[cos]);
+               ECORE_MSG(sc, "context pointer %p", init->cxts[cos]);
                ECORE_SET_CTX_VALIDATION(sc, init->cxts[cos], o->cids[cos]);
        }
 
@@ -4059,15 +4062,15 @@ static int ecore_q_send_setup_tx_only(struct bnx2x_softc *sc, struct ecore_queue
 
        if (ECORE_TEST_BIT(ECORE_Q_TYPE_FWD, &o->type))
                ramrod = RAMROD_CMD_ID_ETH_FORWARD_SETUP;
-       ECORE_MSG("sending forward tx-only ramrod");
+       ECORE_MSG(sc, "sending forward tx-only ramrod");
 
        if (cid_index >= o->max_cos) {
-               PMD_DRV_LOG(ERR, "queue[%d]: cid_index (%d) is out of range",
+               PMD_DRV_LOG(ERR, sc, "queue[%d]: cid_index (%d) is out of range",
                            o->cl_id, cid_index);
                return ECORE_INVAL;
        }
 
-       ECORE_MSG("parameters received: cos: %d sp-id: %d",
+       ECORE_MSG(sc, "parameters received: cos: %d sp-id: %d",
                  tx_only_params->gen_params.cos,
                  tx_only_params->gen_params.spcl_id);
 
@@ -4077,8 +4080,8 @@ static int ecore_q_send_setup_tx_only(struct bnx2x_softc *sc, struct ecore_queue
        /* Fill the ramrod data */
        ecore_q_fill_setup_tx_only(sc, params, rdata);
 
-       ECORE_MSG
-           ("sending tx-only ramrod: cid %d, client-id %d, sp-client id %d, cos %d",
+           ECORE_MSG
+           (sc, "sending tx-only ramrod: cid %d, client-id %d, sp-client id %d, cos %d",
             o->cids[cid_index], rdata->general.client_id,
             rdata->general.sp_client_id, rdata->general.cos);
 
@@ -4173,7 +4176,7 @@ static int ecore_q_send_update(struct bnx2x_softc *sc,
        uint8_t cid_index = update_params->cid_index;
 
        if (cid_index >= o->max_cos) {
-               PMD_DRV_LOG(ERR, "queue[%d]: cid_index (%d) is out of range",
+               PMD_DRV_LOG(ERR, sc, "queue[%d]: cid_index (%d) is out of range",
                            o->cl_id, cid_index);
                return ECORE_INVAL;
        }
@@ -4267,7 +4270,7 @@ static int ecore_q_send_cfc_del(struct bnx2x_softc *sc,
        uint8_t cid_idx = params->params.cfc_del.cid_index;
 
        if (cid_idx >= o->max_cos) {
-               PMD_DRV_LOG(ERR, "queue[%d]: cid_index (%d) is out of range",
+               PMD_DRV_LOG(ERR, sc, "queue[%d]: cid_index (%d) is out of range",
                            o->cl_id, cid_idx);
                return ECORE_INVAL;
        }
@@ -4283,7 +4286,7 @@ static int ecore_q_send_terminate(struct bnx2x_softc *sc, struct ecore_queue_sta
        uint8_t cid_index = params->params.terminate.cid_index;
 
        if (cid_index >= o->max_cos) {
-               PMD_DRV_LOG(ERR, "queue[%d]: cid_index (%d) is out of range",
+               PMD_DRV_LOG(ERR, sc, "queue[%d]: cid_index (%d) is out of range",
                            o->cl_id, cid_index);
                return ECORE_INVAL;
        }
@@ -4327,7 +4330,7 @@ static int ecore_queue_send_cmd_cmn(struct bnx2x_softc *sc, struct ecore_queue_s
        case ECORE_Q_CMD_EMPTY:
                return ecore_q_send_empty(sc, params);
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", params->cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", params->cmd);
                return ECORE_INVAL;
        }
 }
@@ -4350,7 +4353,7 @@ static int ecore_queue_send_cmd_e1x(struct bnx2x_softc *sc,
        case ECORE_Q_CMD_EMPTY:
                return ecore_queue_send_cmd_cmn(sc, params);
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", params->cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", params->cmd);
                return ECORE_INVAL;
        }
 }
@@ -4373,7 +4376,7 @@ static int ecore_queue_send_cmd_e2(struct bnx2x_softc *sc,
        case ECORE_Q_CMD_EMPTY:
                return ecore_queue_send_cmd_cmn(sc, params);
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", params->cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", params->cmd);
                return ECORE_INVAL;
        }
 }
@@ -4416,7 +4419,7 @@ static int ecore_queue_chk_transition(struct bnx2x_softc *sc __rte_unused,
         * the previous one.
         */
        if (o->pending) {
-               PMD_DRV_LOG(ERR, "Blocking transition since pending was %lx",
+               PMD_DRV_LOG(ERR, sc, "Blocking transition since pending was %lx",
                            o->pending);
                return ECORE_BUSY;
        }
@@ -4543,19 +4546,19 @@ static int ecore_queue_chk_transition(struct bnx2x_softc *sc __rte_unused,
 
                break;
        default:
-               PMD_DRV_LOG(ERR, "Illegal state: %d", state);
+               PMD_DRV_LOG(ERR, sc, "Illegal state: %d", state);
        }
 
        /* Transition is assured */
        if (next_state != ECORE_Q_STATE_MAX) {
-               ECORE_MSG("Good state transition: %d(%d)->%d",
+               ECORE_MSG(sc, "Good state transition: %d(%d)->%d",
                          state, cmd, next_state);
                o->next_state = next_state;
                o->next_tx_only = next_tx_only;
                return ECORE_SUCCESS;
        }
 
-       ECORE_MSG("Bad state transition request: %d %d", state, cmd);
+       ECORE_MSG(sc, "Bad state transition request: %d %d", state, cmd);
 
        return ECORE_INVAL;
 }
@@ -4606,18 +4609,18 @@ static int ecore_queue_chk_fwd_transition(struct bnx2x_softc *sc __rte_unused,
 
                break;
        default:
-               PMD_DRV_LOG(ERR, "Illegal state: %d", state);
+               PMD_DRV_LOG(ERR, sc, "Illegal state: %d", state);
        }
 
        /* Transition is assured */
        if (next_state != ECORE_Q_STATE_MAX) {
-               ECORE_MSG("Good state transition: %d(%d)->%d",
+               ECORE_MSG(sc, "Good state transition: %d(%d)->%d",
                          state, cmd, next_state);
                o->next_state = next_state;
                return ECORE_SUCCESS;
        }
 
-       ECORE_MSG("Bad state transition request: %d %d", state, cmd);
+       ECORE_MSG(sc, "Bad state transition request: %d %d", state, cmd);
        return ECORE_INVAL;
 }
 
@@ -4697,14 +4700,14 @@ ecore_func_state_change_comp(struct bnx2x_softc *sc __rte_unused,
        unsigned long cur_pending = o->pending;
 
        if (!ECORE_TEST_AND_CLEAR_BIT(cmd, &cur_pending)) {
-               PMD_DRV_LOG(ERR,
+               PMD_DRV_LOG(ERR, sc,
                            "Bad MC reply %d for func %d in state %d pending 0x%lx, next_state %d",
                            cmd, ECORE_FUNC_ID(sc), o->state, cur_pending,
                            o->next_state);
                return ECORE_INVAL;
        }
 
-       ECORE_MSG("Completing command %d for func %d, setting state to %d",
+       ECORE_MSG(sc, "Completing command %d for func %d, setting state to %d",
                  cmd, ECORE_FUNC_ID(sc), o->next_state);
 
        o->state = o->next_state;
@@ -4827,18 +4830,19 @@ static int ecore_func_chk_transition(struct bnx2x_softc *sc __rte_unused,
 
                break;
        default:
-               PMD_DRV_LOG(ERR, "Unknown state: %d", state);
+               PMD_DRV_LOG(ERR, sc, "Unknown state: %d", state);
        }
 
        /* Transition is assured */
        if (next_state != ECORE_F_STATE_MAX) {
-               ECORE_MSG("Good function state transition: %d(%d)->%d",
+               ECORE_MSG(sc, "Good function state transition: %d(%d)->%d",
                          state, cmd, next_state);
                o->next_state = next_state;
                return ECORE_SUCCESS;
        }
 
-       ECORE_MSG("Bad function state transition request: %d %d", state, cmd);
+       ECORE_MSG(sc,
+                 "Bad function state transition request: %d %d", state, cmd);
 
        return ECORE_INVAL;
 }
@@ -4928,13 +4932,13 @@ static int ecore_func_hw_init(struct bnx2x_softc *sc,
        const struct ecore_func_sp_drv_ops *drv = o->drv;
        int rc = 0;
 
-       ECORE_MSG("function %d  load_code %x",
+       ECORE_MSG(sc, "function %d  load_code %x",
                  ECORE_ABS_FUNC_ID(sc), load_code);
 
        /* Prepare FW */
        rc = drv->init_fw(sc);
        if (rc) {
-               PMD_DRV_LOG(ERR, "Error loading firmware");
+               PMD_DRV_LOG(ERR, sc, "Error loading firmware");
                goto init_err;
        }
 
@@ -4965,7 +4969,7 @@ static int ecore_func_hw_init(struct bnx2x_softc *sc,
 
                break;
        default:
-               PMD_DRV_LOG(ERR, "Unknown load_code (0x%x) from MCP",
+               PMD_DRV_LOG(ERR, sc, "Unknown load_code (0x%x) from MCP",
                            load_code);
                rc = ECORE_INVAL;
        }
@@ -5041,7 +5045,7 @@ static int ecore_func_hw_reset(struct bnx2x_softc *sc,
        struct ecore_func_sp_obj *o = params->f_obj;
        const struct ecore_func_sp_drv_ops *drv = o->drv;
 
-       ECORE_MSG("function %d  reset_phase %x", ECORE_ABS_FUNC_ID(sc),
+       ECORE_MSG(sc, "function %d  reset_phase %x", ECORE_ABS_FUNC_ID(sc),
                  reset_phase);
 
        switch (reset_phase) {
@@ -5055,7 +5059,7 @@ static int ecore_func_hw_reset(struct bnx2x_softc *sc,
                ecore_func_reset_func(sc, drv);
                break;
        default:
-               PMD_DRV_LOG(ERR, "Unknown reset_phase (0x%x) from MCP",
+               PMD_DRV_LOG(ERR, sc, "Unknown reset_phase (0x%x) from MCP",
                            reset_phase);
                break;
        }
@@ -5146,7 +5150,7 @@ static int ecore_func_send_afex_update(struct bnx2x_softc *sc, struct ecore_func
         *  read and we will have to put a full memory barrier there
         *  (inside ecore_sp_post()).
         */
-       ECORE_MSG("afex: sending func_update vif_id 0x%x dvlan 0x%x prio 0x%x",
+       ECORE_MSG(sc, "afex: sending func_update vif_id 0x%x dvlan 0x%x prio 0x%x",
                  rdata->vif_id,
                  rdata->afex_default_vlan, rdata->allowed_priorities);
 
@@ -5184,8 +5188,8 @@ inline int ecore_func_send_afex_viflists(struct bnx2x_softc *sc,
         *  (inside ecore_sp_post()).
         */
 
-       ECORE_MSG
-           ("afex: ramrod lists, cmd 0x%x index 0x%x func_bit_map 0x%x func_to_clr 0x%x",
+           ECORE_MSG
+           (sc, "afex: ramrod lists, cmd 0x%x index 0x%x func_bit_map 0x%x func_to_clr 0x%x",
             rdata->afex_vif_list_command, rdata->vif_list_index,
             rdata->func_bit_map, rdata->func_to_clear);
 
@@ -5256,7 +5260,7 @@ static int ecore_func_send_cmd(struct bnx2x_softc *sc,
        case ECORE_F_CMD_SWITCH_UPDATE:
                return ecore_func_send_switch_update(sc, params);
        default:
-               PMD_DRV_LOG(ERR, "Unknown command: %d", params->cmd);
+               PMD_DRV_LOG(ERR, sc, "Unknown command: %d", params->cmd);
                return ECORE_INVAL;
        }
 }
@@ -5317,7 +5321,7 @@ int ecore_func_state_change(struct bnx2x_softc *sc,
                }
                if (rc == ECORE_BUSY) {
                        ECORE_MUTEX_UNLOCK(&o->one_pending_mutex);
-                       PMD_DRV_LOG(ERR,
+                       PMD_DRV_LOG(ERR, sc,
                                    "timeout waiting for previous ramrod completion");
                        return rc;
                }
index 6b65a49..f295bf5 100644 (file)
@@ -215,8 +215,8 @@ ECORE_CRC32_LE(uint32_t seed, uint8_t *mac, uint32_t len)
     } while (0)
 
 
-#define ECORE_MSG(m, ...) \
-       PMD_DRV_LOG(DEBUG, m, ##__VA_ARGS__)
+#define ECORE_MSG(sc, m, ...) \
+       PMD_DRV_LOG(DEBUG, sc, m, ##__VA_ARGS__)
 
 typedef struct _ecore_list_entry_t
 {
index b63fd23..d569366 100644 (file)
 #include "ecore_hsi.h"
 #include "ecore_reg.h"
 
-static elink_status_t elink_link_reset(struct elink_params *params,
-                                      struct elink_vars *vars,
-                                      uint8_t reset_ext_phy);
-static elink_status_t elink_check_half_open_conn(struct elink_params *params,
-                                                struct elink_vars *vars,
-                                                uint8_t notify);
-static elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
-                                                struct elink_params *params);
 
 #define MDIO_REG_BANK_CL73_IEEEB0                      0x0
-#define MDIO_CL73_IEEEB0_CL73_AN_CONTROL               0x0
-#define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_RESTART_AN    0x0200
-#define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_AN_EN         0x1000
-#define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_MAIN_RST      0x8000
+       #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL                0x0
+               #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_RESTART_AN     0x0200
+               #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_AN_EN          0x1000
+               #define MDIO_CL73_IEEEB0_CL73_AN_CONTROL_MAIN_RST       0x8000
 
 #define MDIO_REG_BANK_CL73_IEEEB1                      0x10
-#define MDIO_CL73_IEEEB1_AN_ADV1                       0x00
-#define        MDIO_CL73_IEEEB1_AN_ADV1_PAUSE                  0x0400
-#define        MDIO_CL73_IEEEB1_AN_ADV1_ASYMMETRIC             0x0800
-#define        MDIO_CL73_IEEEB1_AN_ADV1_PAUSE_BOTH             0x0C00
-#define        MDIO_CL73_IEEEB1_AN_ADV1_PAUSE_MASK             0x0C00
-#define MDIO_CL73_IEEEB1_AN_ADV2                               0x01
-#define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M            0x0000
-#define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M_KX         0x0020
-#define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KX4          0x0040
-#define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KR           0x0080
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV1                     0x03
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE               0x0400
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV1_ASYMMETRIC          0x0800
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE_BOTH          0x0C00
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE_MASK          0x0C00
-#define        MDIO_CL73_IEEEB1_AN_LP_ADV2                     0x04
+       #define MDIO_CL73_IEEEB1_AN_ADV1                        0x00
+               #define MDIO_CL73_IEEEB1_AN_ADV1_PAUSE                  0x0400
+               #define MDIO_CL73_IEEEB1_AN_ADV1_ASYMMETRIC             0x0800
+               #define MDIO_CL73_IEEEB1_AN_ADV1_PAUSE_BOTH             0x0C00
+               #define MDIO_CL73_IEEEB1_AN_ADV1_PAUSE_MASK             0x0C00
+       #define MDIO_CL73_IEEEB1_AN_ADV2                                0x01
+               #define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M             0x0000
+               #define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M_KX          0x0020
+               #define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KX4           0x0040
+               #define MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KR            0x0080
+       #define MDIO_CL73_IEEEB1_AN_LP_ADV1                     0x03
+               #define MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE               0x0400
+               #define MDIO_CL73_IEEEB1_AN_LP_ADV1_ASYMMETRIC          0x0800
+               #define MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE_BOTH          0x0C00
+               #define MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE_MASK          0x0C00
+       #define MDIO_CL73_IEEEB1_AN_LP_ADV2                     0x04
 
 #define        MDIO_REG_BANK_RX0                               0x80b0
-#define        MDIO_RX0_RX_STATUS                              0x10
-#define        MDIO_RX0_RX_STATUS_SIGDET                       0x8000
-#define        MDIO_RX0_RX_STATUS_RX_SEQ_DONE                  0x1000
-#define        MDIO_RX0_RX_EQ_BOOST                            0x1c
-#define        MDIO_RX0_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
-#define        MDIO_RX0_RX_EQ_BOOST_OFFSET_CTRL                0x10
+       #define MDIO_RX0_RX_STATUS                              0x10
+               #define MDIO_RX0_RX_STATUS_SIGDET                       0x8000
+               #define MDIO_RX0_RX_STATUS_RX_SEQ_DONE                  0x1000
+       #define MDIO_RX0_RX_EQ_BOOST                            0x1c
+               #define MDIO_RX0_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
+               #define MDIO_RX0_RX_EQ_BOOST_OFFSET_CTRL                0x10
 
 #define        MDIO_REG_BANK_RX1                               0x80c0
-#define        MDIO_RX1_RX_EQ_BOOST                            0x1c
-#define        MDIO_RX1_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
-#define        MDIO_RX1_RX_EQ_BOOST_OFFSET_CTRL                0x10
+       #define MDIO_RX1_RX_EQ_BOOST                            0x1c
+               #define MDIO_RX1_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
+               #define MDIO_RX1_RX_EQ_BOOST_OFFSET_CTRL                0x10
 
 #define        MDIO_REG_BANK_RX2                               0x80d0
-#define        MDIO_RX2_RX_EQ_BOOST                            0x1c
-#define        MDIO_RX2_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
-#define        MDIO_RX2_RX_EQ_BOOST_OFFSET_CTRL                0x10
+       #define MDIO_RX2_RX_EQ_BOOST                            0x1c
+               #define MDIO_RX2_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
+               #define MDIO_RX2_RX_EQ_BOOST_OFFSET_CTRL                0x10
 
 #define        MDIO_REG_BANK_RX3                               0x80e0
-#define        MDIO_RX3_RX_EQ_BOOST                            0x1c
-#define        MDIO_RX3_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
-#define        MDIO_RX3_RX_EQ_BOOST_OFFSET_CTRL                0x10
+       #define MDIO_RX3_RX_EQ_BOOST                            0x1c
+               #define MDIO_RX3_RX_EQ_BOOST_EQUALIZER_CTRL_MASK        0x7
+               #define MDIO_RX3_RX_EQ_BOOST_OFFSET_CTRL                0x10
 
 #define        MDIO_REG_BANK_RX_ALL                            0x80f0
-#define        MDIO_RX_ALL_RX_EQ_BOOST                         0x1c
-#define        MDIO_RX_ALL_RX_EQ_BOOST_EQUALIZER_CTRL_MASK     0x7
-#define        MDIO_RX_ALL_RX_EQ_BOOST_OFFSET_CTRL     0x10
+       #define MDIO_RX_ALL_RX_EQ_BOOST                         0x1c
+               #define MDIO_RX_ALL_RX_EQ_BOOST_EQUALIZER_CTRL_MASK     0x7
+               #define MDIO_RX_ALL_RX_EQ_BOOST_OFFSET_CTRL     0x10
 
 #define        MDIO_REG_BANK_TX0                               0x8060
-#define        MDIO_TX0_TX_DRIVER                              0x17
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
-#define        MDIO_TX0_TX_DRIVER_ICBUF1T                      1
+       #define MDIO_TX0_TX_DRIVER                              0x17
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
+               #define MDIO_TX0_TX_DRIVER_ICBUF1T                      1
 
 #define        MDIO_REG_BANK_TX1                               0x8070
-#define        MDIO_TX1_TX_DRIVER                              0x17
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
-#define        MDIO_TX0_TX_DRIVER_ICBUF1T                      1
+       #define MDIO_TX1_TX_DRIVER                              0x17
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
+               #define MDIO_TX0_TX_DRIVER_ICBUF1T                      1
 
 #define        MDIO_REG_BANK_TX2                               0x8080
-#define        MDIO_TX2_TX_DRIVER                              0x17
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
-#define        MDIO_TX0_TX_DRIVER_ICBUF1T                      1
+       #define MDIO_TX2_TX_DRIVER                              0x17
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
+               #define MDIO_TX0_TX_DRIVER_ICBUF1T                      1
 
 #define        MDIO_REG_BANK_TX3                               0x8090
-#define        MDIO_TX3_TX_DRIVER                              0x17
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
-#define        MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
-#define        MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
-#define        MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
-#define        MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
-#define        MDIO_TX0_TX_DRIVER_ICBUF1T                      1
+       #define MDIO_TX3_TX_DRIVER                              0x17
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK             0xf000
+               #define MDIO_TX0_TX_DRIVER_PREEMPHASIS_SHIFT            12
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_MASK                 0x0f00
+               #define MDIO_TX0_TX_DRIVER_IDRIVER_SHIFT                8
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_MASK              0x00f0
+               #define MDIO_TX0_TX_DRIVER_IPREDRIVER_SHIFT             4
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_MASK                0x000e
+               #define MDIO_TX0_TX_DRIVER_IFULLSPD_SHIFT               1
+               #define MDIO_TX0_TX_DRIVER_ICBUF1T                      1
 
 #define        MDIO_REG_BANK_XGXS_BLOCK0                       0x8000
-#define        MDIO_BLOCK0_XGXS_CONTROL                        0x10
+       #define MDIO_BLOCK0_XGXS_CONTROL                        0x10
 
 #define        MDIO_REG_BANK_XGXS_BLOCK1                       0x8010
-#define        MDIO_BLOCK1_LANE_CTRL0                          0x15
-#define        MDIO_BLOCK1_LANE_CTRL1                          0x16
-#define        MDIO_BLOCK1_LANE_CTRL2                          0x17
-#define        MDIO_BLOCK1_LANE_PRBS                           0x19
+       #define MDIO_BLOCK1_LANE_CTRL0                          0x15
+       #define MDIO_BLOCK1_LANE_CTRL1                          0x16
+       #define MDIO_BLOCK1_LANE_CTRL2                          0x17
+       #define MDIO_BLOCK1_LANE_PRBS                           0x19
 
 #define        MDIO_REG_BANK_XGXS_BLOCK2                       0x8100
-#define        MDIO_XGXS_BLOCK2_RX_LN_SWAP                     0x10
-#define        MDIO_XGXS_BLOCK2_RX_LN_SWAP_ENABLE              0x8000
-#define        MDIO_XGXS_BLOCK2_RX_LN_SWAP_FORCE_ENABLE        0x4000
-#define        MDIO_XGXS_BLOCK2_TX_LN_SWAP             0x11
-#define        MDIO_XGXS_BLOCK2_TX_LN_SWAP_ENABLE              0x8000
-#define        MDIO_XGXS_BLOCK2_UNICORE_MODE_10G       0x14
-#define        MDIO_XGXS_BLOCK2_UNICORE_MODE_10G_CX4_XGXS      0x0001
-#define        MDIO_XGXS_BLOCK2_UNICORE_MODE_10G_HIGIG_XGXS    0x0010
-#define        MDIO_XGXS_BLOCK2_TEST_MODE_LANE         0x15
+       #define MDIO_XGXS_BLOCK2_RX_LN_SWAP                     0x10
+               #define MDIO_XGXS_BLOCK2_RX_LN_SWAP_ENABLE              0x8000
+               #define MDIO_XGXS_BLOCK2_RX_LN_SWAP_FORCE_ENABLE        0x4000
+               #define MDIO_XGXS_BLOCK2_TX_LN_SWAP             0x11
+               #define MDIO_XGXS_BLOCK2_TX_LN_SWAP_ENABLE              0x8000
+               #define MDIO_XGXS_BLOCK2_UNICORE_MODE_10G       0x14
+               #define MDIO_XGXS_BLOCK2_UNICORE_MODE_10G_CX4_XGXS      0x0001
+               #define MDIO_XGXS_BLOCK2_UNICORE_MODE_10G_HIGIG_XGXS    0x0010
+               #define MDIO_XGXS_BLOCK2_TEST_MODE_LANE         0x15
 
 #define        MDIO_REG_BANK_GP_STATUS                         0x8120
 #define        MDIO_GP_STATUS_TOP_AN_STATUS1                           0x1B
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_AUTONEG_COMPLETE     0x0001
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_CL37_AUTONEG_COMPLETE     0x0002
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS               0x0004
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_DUPLEX_STATUS             0x0008
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_MR_LP_NP_AN_ABLE     0x0010
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_LP_NP_BAM_ABLE       0x0020
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_PAUSE_RSOLUTION_TXSIDE    0x0040
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_PAUSE_RSOLUTION_RXSIDE    0x0080
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_MASK         0x3f00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10M          0x0000
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_100M         0x0100
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_1G           0x0200
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_2_5G         0x0300
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_5G           0x0400
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_6G           0x0500
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_HIG      0x0600
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_CX4      0x0700
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_12G_HIG      0x0800
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_12_5G        0x0900
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_13G          0x0A00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_15G          0x0B00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_16G          0x0C00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_1G_KX        0x0D00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_KX4      0x0E00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_KR       0x0F00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_XFI      0x1B00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_DXGXS    0x1E00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_SFI      0x1F00
-#define        MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_KR2      0x3900
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_AUTONEG_COMPLETE     0x0001
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_CL37_AUTONEG_COMPLETE     0x0002
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS               0x0004
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_DUPLEX_STATUS             0x0008
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_MR_LP_NP_AN_ABLE     0x0010
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_CL73_LP_NP_BAM_ABLE       0x0020
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_PAUSE_RSOLUTION_TXSIDE    0x0040
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_PAUSE_RSOLUTION_RXSIDE    0x0080
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_MASK         0x3f00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10M          0x0000
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_100M         0x0100
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_1G           0x0200
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_2_5G         0x0300
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_5G           0x0400
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_6G           0x0500
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_HIG      0x0600
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_CX4      0x0700
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_12G_HIG      0x0800
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_12_5G        0x0900
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_13G          0x0A00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_15G          0x0B00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_16G          0x0C00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_1G_KX        0x0D00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_KX4      0x0E00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_KR       0x0F00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_XFI      0x1B00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_DXGXS    0x1E00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_10G_SFI      0x1F00
+       #define MDIO_GP_STATUS_TOP_AN_STATUS1_ACTUAL_SPEED_20G_KR2      0x3900
+
 
 #define        MDIO_REG_BANK_10G_PARALLEL_DETECT               0x8130
 #define        MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_STATUS             0x10
@@ -320,6 +313,7 @@ bit15=link,bit12=duplex,bits11:10=speed,bit14=acknowledge.
 Theotherbitsarereservedandshouldbezero*/
 #define        MDIO_COMBO_IEEE0_AUTO_NEG_LINK_PARTNER_ABILITY1_SGMII_MODE      0x0001
 
+
 #define        MDIO_PMA_DEVAD                  0x1
 /*ieee*/
 #define        MDIO_PMA_REG_CTRL               0x0
@@ -328,7 +322,7 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_PMA_REG_TX_DISABLE                0x0009
 #define        MDIO_PMA_REG_RX_SD              0xa
 /*bnx2x*/
-#define        MDIO_PMA_REG_BNX2X_CTRL         0x0096
+#define        MDIO_PMA_REG_BCM_CTRL           0x0096
 #define MDIO_PMA_REG_FEC_CTRL          0x00ab
 #define        MDIO_PMA_LASI_RXCTRL            0x9000
 #define        MDIO_PMA_LASI_TXCTRL            0x9001
@@ -343,8 +337,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define        MDIO_PMA_REG_CMU_PLL_BYPASS     0xca09
 #define        MDIO_PMA_REG_MISC_CTRL          0xca0a
 #define        MDIO_PMA_REG_GEN_CTRL           0xca10
-#define        MDIO_PMA_REG_GEN_CTRL_ROM_RESET_INTERNAL_MP     0x0188
-#define        MDIO_PMA_REG_GEN_CTRL_ROM_MICRO_RESET           0x018a
+       #define MDIO_PMA_REG_GEN_CTRL_ROM_RESET_INTERNAL_MP     0x0188
+       #define MDIO_PMA_REG_GEN_CTRL_ROM_MICRO_RESET           0x018a
 #define        MDIO_PMA_REG_M8051_MSGIN_REG    0xca12
 #define        MDIO_PMA_REG_M8051_MSGOUT_REG   0xca13
 #define        MDIO_PMA_REG_ROM_VER1           0xca19
@@ -358,21 +352,21 @@ Theotherbitsarereservedandshouldbezero*/
 #define        MDIO_PMA_REG_MISC_CTRL1         0xca85
 
 #define MDIO_PMA_REG_SFP_TWO_WIRE_CTRL         0x8000
-#define MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK     0x000c
-#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IDLE          0x0000
-#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE      0x0004
-#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IN_PROGRESS   0x0008
-#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_FAILED        0x000c
+#define MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK     0x000c
+#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IDLE          0x0000
+#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE      0x0004
+#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_IN_PROGRESS   0x0008
+#define MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_FAILED                0x000c
 #define MDIO_PMA_REG_SFP_TWO_WIRE_BYTE_CNT     0x8002
 #define MDIO_PMA_REG_SFP_TWO_WIRE_MEM_ADDR     0x8003
 #define MDIO_PMA_REG_8726_TWO_WIRE_DATA_BUF    0xc820
-#define MDIO_PMA_REG_8726_TWO_WIRE_DATA_MASK 0xff
+       #define MDIO_PMA_REG_8726_TWO_WIRE_DATA_MASK 0xff
 #define MDIO_PMA_REG_8726_TX_CTRL1             0xca01
 #define MDIO_PMA_REG_8726_TX_CTRL2             0xca05
 
 #define MDIO_PMA_REG_8727_TWO_WIRE_SLAVE_ADDR  0x8005
 #define MDIO_PMA_REG_8727_TWO_WIRE_DATA_BUF    0x8007
-#define MDIO_PMA_REG_8727_TWO_WIRE_DATA_MASK 0xff
+       #define MDIO_PMA_REG_8727_TWO_WIRE_DATA_MASK 0xff
 #define MDIO_PMA_REG_8727_MISC_CTRL            0x8309
 #define MDIO_PMA_REG_8727_TX_CTRL1             0xca02
 #define MDIO_PMA_REG_8727_TX_CTRL2             0xca05
@@ -404,6 +398,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_PMA_REG_8481_LINK_SIGNAL_LED4_ENABLE_MASK 0x800
 #define MDIO_PMA_REG_8481_LINK_SIGNAL_LED4_ENABLE_SHIFT        11
 
+
+
 #define        MDIO_WIS_DEVAD                  0x2
 /*bnx2x*/
 #define        MDIO_WIS_REG_LASI_CNTL          0x9002
@@ -415,13 +411,15 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_PCS_REG_7101_DSP_ACCESS   0xD000
 #define MDIO_PCS_REG_7101_SPI_MUX      0xD008
 #define MDIO_PCS_REG_7101_SPI_CTRL_ADDR 0xE12A
-#define MDIO_PCS_REG_7101_SPI_RESET_BIT (5)
+       #define MDIO_PCS_REG_7101_SPI_RESET_BIT (5)
 #define MDIO_PCS_REG_7101_SPI_FIFO_ADDR 0xE02A
-#define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_WRITE_ENABLE_CMD (6)
-#define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_BULK_ERASE_CMD   (0xC7)
-#define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_PAGE_PROGRAM_CMD (2)
+       #define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_WRITE_ENABLE_CMD (6)
+       #define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_BULK_ERASE_CMD   (0xC7)
+       #define MDIO_PCS_REG_7101_SPI_FIFO_ADDR_PAGE_PROGRAM_CMD (2)
 #define MDIO_PCS_REG_7101_SPI_BYTES_TO_TRANSFER_ADDR 0xE028
 
+
+
 #define        MDIO_XS_DEVAD                   0x4
 #define        MDIO_XS_REG_STATUS              0x0001
 #define MDIO_XS_PLL_SEQUENCER          0x8000
@@ -439,12 +437,12 @@ Theotherbitsarereservedandshouldbezero*/
 /*ieee*/
 #define        MDIO_AN_REG_CTRL                0x0000
 #define        MDIO_AN_REG_STATUS              0x0001
-#define        MDIO_AN_REG_STATUS_AN_COMPLETE          0x0020
+       #define MDIO_AN_REG_STATUS_AN_COMPLETE          0x0020
 #define        MDIO_AN_REG_ADV_PAUSE           0x0010
-#define        MDIO_AN_REG_ADV_PAUSE_PAUSE             0x0400
-#define        MDIO_AN_REG_ADV_PAUSE_ASYMMETRIC        0x0800
-#define        MDIO_AN_REG_ADV_PAUSE_BOTH              0x0C00
-#define        MDIO_AN_REG_ADV_PAUSE_MASK              0x0C00
+       #define MDIO_AN_REG_ADV_PAUSE_PAUSE             0x0400
+       #define MDIO_AN_REG_ADV_PAUSE_ASYMMETRIC        0x0800
+       #define MDIO_AN_REG_ADV_PAUSE_BOTH              0x0C00
+       #define MDIO_AN_REG_ADV_PAUSE_MASK              0x0C00
 #define        MDIO_AN_REG_ADV                 0x0011
 #define MDIO_AN_REG_ADV2               0x0012
 #define        MDIO_AN_REG_LP_AUTO_NEG         0x0013
@@ -465,13 +463,16 @@ Theotherbitsarereservedandshouldbezero*/
 
 #define MDIO_AN_REG_8481_10GBASE_T_AN_CTRL     0x0020
 #define MDIO_AN_REG_8481_LEGACY_MII_CTRL       0xffe0
-#define MDIO_AN_REG_8481_MII_CTRL_FORCE_1G     0x40
+       #define MDIO_AN_REG_8481_MII_CTRL_FORCE_1G      0x40
 #define MDIO_AN_REG_8481_LEGACY_MII_STATUS     0xffe1
+#define MDIO_AN_REG_848xx_ID_MSB               0xffe2
+       #define BNX2X84858_PHY_ID                                       0x600d
+#define MDIO_AN_REG_848xx_ID_LSB               0xffe3
 #define MDIO_AN_REG_8481_LEGACY_AN_ADV         0xffe4
 #define MDIO_AN_REG_8481_LEGACY_AN_EXPANSION   0xffe6
 #define MDIO_AN_REG_8481_1000T_CTRL            0xffe9
 #define MDIO_AN_REG_8481_1G_100T_EXT_CTRL      0xfff0
-#define MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF       0x0008
+       #define MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF        0x0008
 #define MDIO_AN_REG_8481_EXPANSION_REG_RD_RW   0xfff5
 #define MDIO_AN_REG_8481_EXPANSION_REG_ACCESS  0xfff7
 #define MDIO_AN_REG_8481_AUX_CTRL              0xfff8
@@ -480,62 +481,62 @@ Theotherbitsarereservedandshouldbezero*/
 /* BNX2X84823 only */
 #define        MDIO_CTL_DEVAD                  0x1e
 #define MDIO_CTL_REG_84823_MEDIA               0x401a
-#define MDIO_CTL_REG_84823_MEDIA_MAC_MASK              0x0018
+       #define MDIO_CTL_REG_84823_MEDIA_MAC_MASK               0x0018
        /* These pins configure the BNX2X84823 interface to MAC after reset. */
-#define MDIO_CTL_REG_84823_CTRL_MAC_XFI                        0x0008
-#define MDIO_CTL_REG_84823_MEDIA_MAC_XAUI_M            0x0010
+               #define MDIO_CTL_REG_84823_CTRL_MAC_XFI                 0x0008
+               #define MDIO_CTL_REG_84823_MEDIA_MAC_XAUI_M             0x0010
        /* These pins configure the BNX2X84823 interface to Line after reset. */
-#define MDIO_CTL_REG_84823_MEDIA_LINE_MASK             0x0060
-#define MDIO_CTL_REG_84823_MEDIA_LINE_XAUI_L           0x0020
-#define MDIO_CTL_REG_84823_MEDIA_LINE_XFI              0x0040
+       #define MDIO_CTL_REG_84823_MEDIA_LINE_MASK              0x0060
+               #define MDIO_CTL_REG_84823_MEDIA_LINE_XAUI_L            0x0020
+               #define MDIO_CTL_REG_84823_MEDIA_LINE_XFI               0x0040
        /* When this pin is active high during reset, 10GBASE-T core is power
         * down, When it is active low the 10GBASE-T is power up
         */
-#define MDIO_CTL_REG_84823_MEDIA_COPPER_CORE_DOWN      0x0080
-#define MDIO_CTL_REG_84823_MEDIA_PRIORITY_MASK         0x0100
-#define MDIO_CTL_REG_84823_MEDIA_PRIORITY_COPPER       0x0000
-#define MDIO_CTL_REG_84823_MEDIA_PRIORITY_FIBER                0x0100
-#define MDIO_CTL_REG_84823_MEDIA_FIBER_1G                      0x1000
+       #define MDIO_CTL_REG_84823_MEDIA_COPPER_CORE_DOWN       0x0080
+       #define MDIO_CTL_REG_84823_MEDIA_PRIORITY_MASK          0x0100
+               #define MDIO_CTL_REG_84823_MEDIA_PRIORITY_COPPER        0x0000
+               #define MDIO_CTL_REG_84823_MEDIA_PRIORITY_FIBER         0x0100
+       #define MDIO_CTL_REG_84823_MEDIA_FIBER_1G                       0x1000
 #define MDIO_CTL_REG_84823_USER_CTRL_REG                       0x4005
-#define MDIO_CTL_REG_84823_USER_CTRL_CMS                       0x0080
+       #define MDIO_CTL_REG_84823_USER_CTRL_CMS                        0x0080
 #define MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH               0xa82b
-#define MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ       0x2f
+       #define MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ        0x2f
 #define MDIO_PMA_REG_84823_CTL_LED_CTL_1                       0xa8e3
 #define MDIO_PMA_REG_84833_CTL_LED_CTL_1                       0xa8ec
-#define MDIO_PMA_REG_84823_LED3_STRETCH_EN                     0x0080
+       #define MDIO_PMA_REG_84823_LED3_STRETCH_EN                      0x0080
 
 /* BNX2X84833 only */
 #define MDIO_84833_TOP_CFG_FW_REV                      0x400f
-#define MDIO_84833_TOP_CFG_FW_EEE              0x10b1
-#define MDIO_84833_TOP_CFG_FW_NO_EEE           0x1f81
+#define MDIO_84833_TOP_CFG_FW_EEE                      0x10b1
+#define MDIO_84833_TOP_CFG_FW_NO_EEE                   0x1f81
 #define MDIO_84833_TOP_CFG_XGPHY_STRAP1                0x401a
-#define MDIO_84833_SUPER_ISOLATE               0x8000
-/* These are mailbox register set used by 84833. */
-#define MDIO_84833_TOP_CFG_SCRATCH_REG0                        0x4005
-#define MDIO_84833_TOP_CFG_SCRATCH_REG1                0x4006
-#define MDIO_84833_TOP_CFG_SCRATCH_REG2                        0x4007
-#define MDIO_84833_TOP_CFG_SCRATCH_REG3                        0x4008
-#define MDIO_84833_TOP_CFG_SCRATCH_REG4                        0x4009
-#define MDIO_84833_TOP_CFG_SCRATCH_REG26               0x4037
-#define MDIO_84833_TOP_CFG_SCRATCH_REG27               0x4038
-#define MDIO_84833_TOP_CFG_SCRATCH_REG28               0x4039
-#define MDIO_84833_TOP_CFG_SCRATCH_REG29               0x403a
-#define MDIO_84833_TOP_CFG_SCRATCH_REG30               0x403b
-#define MDIO_84833_TOP_CFG_SCRATCH_REG31               0x403c
-#define MDIO_84833_CMD_HDLR_COMMAND    MDIO_84833_TOP_CFG_SCRATCH_REG0
-#define MDIO_84833_CMD_HDLR_STATUS     MDIO_84833_TOP_CFG_SCRATCH_REG26
-#define MDIO_84833_CMD_HDLR_DATA1      MDIO_84833_TOP_CFG_SCRATCH_REG27
-#define MDIO_84833_CMD_HDLR_DATA2      MDIO_84833_TOP_CFG_SCRATCH_REG28
-#define MDIO_84833_CMD_HDLR_DATA3      MDIO_84833_TOP_CFG_SCRATCH_REG29
-#define MDIO_84833_CMD_HDLR_DATA4      MDIO_84833_TOP_CFG_SCRATCH_REG30
-#define MDIO_84833_CMD_HDLR_DATA5      MDIO_84833_TOP_CFG_SCRATCH_REG31
-
-/* Mailbox command set used by 84833. */
-#define PHY84833_CMD_SET_PAIR_SWAP                     0x8001
-#define PHY84833_CMD_GET_EEE_MODE                      0x8008
-#define PHY84833_CMD_SET_EEE_MODE                      0x8009
-#define PHY84833_CMD_GET_CURRENT_TEMP                  0x8031
-/* Mailbox status set used by 84833. */
+#define MDIO_84833_SUPER_ISOLATE                       0x8000
+/* These are mailbox register set used by 84833/84858. */
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG0                        0x4005
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG1                        0x4006
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG2                        0x4007
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG3                        0x4008
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG4                        0x4009
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG26               0x4037
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG27               0x4038
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG28               0x4039
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG29               0x403a
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG30               0x403b
+#define MDIO_848xx_TOP_CFG_SCRATCH_REG31               0x403c
+#define MDIO_848xx_CMD_HDLR_COMMAND    (MDIO_848xx_TOP_CFG_SCRATCH_REG0)
+#define MDIO_848xx_CMD_HDLR_STATUS     (MDIO_848xx_TOP_CFG_SCRATCH_REG26)
+#define MDIO_848xx_CMD_HDLR_DATA1      (MDIO_848xx_TOP_CFG_SCRATCH_REG27)
+#define MDIO_848xx_CMD_HDLR_DATA2      (MDIO_848xx_TOP_CFG_SCRATCH_REG28)
+#define MDIO_848xx_CMD_HDLR_DATA3      (MDIO_848xx_TOP_CFG_SCRATCH_REG29)
+#define MDIO_848xx_CMD_HDLR_DATA4      (MDIO_848xx_TOP_CFG_SCRATCH_REG30)
+#define MDIO_848xx_CMD_HDLR_DATA5      (MDIO_848xx_TOP_CFG_SCRATCH_REG31)
+
+/* Mailbox command set used by 84833/84858 */
+#define PHY848xx_CMD_SET_PAIR_SWAP                     0x8001
+#define PHY848xx_CMD_GET_EEE_MODE                      0x8008
+#define PHY848xx_CMD_SET_EEE_MODE                      0x8009
+#define PHY848xx_CMD_GET_CURRENT_TEMP                  0x8031
+/* Mailbox status set used by 84833 only */
 #define PHY84833_STATUS_CMD_RECEIVED                   0x0001
 #define PHY84833_STATUS_CMD_IN_PROGRESS                        0x0002
 #define PHY84833_STATUS_CMD_COMPLETE_PASS              0x0004
@@ -545,6 +546,19 @@ Theotherbitsarereservedandshouldbezero*/
 #define PHY84833_STATUS_CMD_NOT_OPEN_FOR_CMDS          0x0040
 #define PHY84833_STATUS_CMD_CLEAR_COMPLETE             0x0080
 #define PHY84833_STATUS_CMD_OPEN_OVERRIDE              0xa5a5
+/* Mailbox Process */
+#define PHY84833_MB_PROCESS1                           1
+#define PHY84833_MB_PROCESS2                           2
+#define PHY84833_MB_PROCESS3                           3
+
+
+/* Mailbox status set used by 84858 only */
+#define PHY84858_STATUS_CMD_RECEIVED                   0x0001
+#define PHY84858_STATUS_CMD_IN_PROGRESS                        0x0002
+#define PHY84858_STATUS_CMD_COMPLETE_PASS              0x0004
+#define PHY84858_STATUS_CMD_COMPLETE_ERROR             0x0008
+#define PHY84858_STATUS_CMD_SYSTEM_BUSY                 0xbbbb
+
 
 /* Warpcore clause 45 addressing */
 #define MDIO_WC_DEVAD                                  0x3
@@ -553,8 +567,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT0       0x10
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1       0x11
 #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT2       0x12
-#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY    0x4000
-#define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ                0x8000
+       #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_ABILITY     0x4000
+       #define MDIO_WC_REG_AN_IEEE1BLK_AN_ADV2_FEC_REQ         0x8000
 #define MDIO_WC_REG_PCS_STATUS2                                0x0021
 #define MDIO_WC_REG_PMD_KR_CONTROL                     0x0096
 #define MDIO_WC_REG_XGXSBLK0_XGXSCONTROL                0x8000
@@ -570,6 +584,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_TX2_ANA_CTRL0                      0x8081
 #define MDIO_WC_REG_TX3_ANA_CTRL0                      0x8091
 #define MDIO_WC_REG_TX0_TX_DRIVER                      0x8067
+#define MDIO_WC_REG_TX0_TX_DRIVER_IFIR_OFFSET                  0x01
+#define MDIO_WC_REG_TX0_TX_DRIVER_IFIR_MASK                            0x000e
 #define MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET           0x04
 #define MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_MASK                     0x00f0
 #define MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET               0x08
@@ -585,7 +601,9 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_RX1_PCI_CTRL                       0x80ca
 #define MDIO_WC_REG_RX2_PCI_CTRL                       0x80da
 #define MDIO_WC_REG_RX3_PCI_CTRL                       0x80ea
+#define MDIO_WC_REG_RXB_ANA_RX_CONTROL_PCI             0x80fa
 #define MDIO_WC_REG_XGXSBLK2_UNICORE_MODE_10G          0x8104
+#define MDIO_WC_REG_XGXSBLK2_LANE_RESET                        0x810a
 #define MDIO_WC_REG_XGXS_STATUS3                       0x8129
 #define MDIO_WC_REG_PAR_DET_10G_STATUS                 0x8130
 #define MDIO_WC_REG_PAR_DET_10G_CTRL                   0x8131
@@ -599,35 +617,35 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_GP2_STATUS_GP_2_2                  0x81d2
 #define MDIO_WC_REG_GP2_STATUS_GP_2_3                  0x81d3
 #define MDIO_WC_REG_GP2_STATUS_GP_2_4                  0x81d4
-#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL73_AN_CMPL 0x1000
-#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CMPL 0x0100
-#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_LP_AN_CAP 0x0010
-#define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CAP 0x1
+       #define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL73_AN_CMPL 0x1000
+       #define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CMPL 0x0100
+       #define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_LP_AN_CAP 0x0010
+       #define MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_AN_CAP 0x1
 #define MDIO_WC_REG_UC_INFO_B0_DEAD_TRAP                0x81EE
 #define MDIO_WC_REG_UC_INFO_B1_VERSION                  0x81F0
 #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE           0x81F2
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE0_OFFSET   0x0
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_DEFAULT        0x0
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_OPT_LR     0x1
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_DAC        0x2
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_XLAUI      0x3
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_LONG_CH_6G     0x4
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE1_OFFSET   0x4
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE2_OFFSET   0x8
-#define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE3_OFFSET   0xc
+       #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE0_OFFSET    0x0
+               #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_DEFAULT        0x0
+               #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_OPT_LR     0x1
+               #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_DAC        0x2
+               #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_SFP_XLAUI      0x3
+               #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_LONG_CH_6G     0x4
+       #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE1_OFFSET    0x4
+       #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE2_OFFSET    0x8
+       #define MDIO_WC_REG_UC_INFO_B1_FIRMWARE_LANE3_OFFSET    0xc
 #define MDIO_WC_REG_UC_INFO_B1_CRC                      0x81FE
 #define MDIO_WC_REG_DSC1B0_UC_CTRL                             0x820e
 #define MDIO_WC_REG_DSC1B0_UC_CTRL_RDY4CMD                     (1<<7)
 #define MDIO_WC_REG_DSC_SMC                            0x8213
 #define MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0              0x821e
 #define MDIO_WC_REG_TX_FIR_TAP                         0x82e2
-#define MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET          0x00
-#define MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_MASK                    0x000f
-#define MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET         0x04
-#define MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_MASK           0x03f0
-#define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET         0x0a
-#define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_MASK           0x7c00
-#define MDIO_WC_REG_TX_FIR_TAP_ENABLE          0x8000
+       #define MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET           0x00
+       #define MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_MASK                     0x000f
+       #define MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET          0x04
+       #define MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_MASK            0x03f0
+       #define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET          0x0a
+       #define MDIO_WC_REG_TX_FIR_TAP_POST_TAP_MASK            0x7c00
+       #define MDIO_WC_REG_TX_FIR_TAP_ENABLE           0x8000
 #define MDIO_WC_REG_CL72_USERB0_CL72_TX_FIR_TAP                0x82e2
 #define MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL      0x82e3
 #define MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL       0x82e6
@@ -689,8 +707,8 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_WC_REG_COMBO_IEEE0_MIIISTAT                0xffe1
 
 #define MDIO_WC0_XGXS_BLK2_LANE_RESET                   0x810A
-#define MDIO_WC0_XGXS_BLK2_LANE_RESET_RX_BITSHIFT      0
-#define MDIO_WC0_XGXS_BLK2_LANE_RESET_TX_BITSHIFT      4
+#define MDIO_WC0_XGXS_BLK2_LANE_RESET_RX_BITSHIFT      0
+#define MDIO_WC0_XGXS_BLK2_LANE_RESET_TX_BITSHIFT      4
 
 #define MDIO_WC0_XGXS_BLK6_XGXS_X2_CONTROL2             0x8141
 
@@ -700,33 +718,31 @@ Theotherbitsarereservedandshouldbezero*/
 #define MDIO_REG_GPHY_MII_STATUS                       0x1
 #define MDIO_REG_GPHY_PHYID_LSB                                0x3
 #define MDIO_REG_GPHY_CL45_ADDR_REG                    0xd
-#define MDIO_REG_GPHY_CL45_REG_WRITE           0x4000
-#define MDIO_REG_GPHY_CL45_REG_READ            0xc000
+       #define MDIO_REG_GPHY_CL45_REG_WRITE            0x4000
+       #define MDIO_REG_GPHY_CL45_REG_READ             0xc000
 #define MDIO_REG_GPHY_CL45_DATA_REG                    0xe
-#define MDIO_REG_GPHY_EEE_RESOLVED             0x803e
+       #define MDIO_REG_GPHY_EEE_RESOLVED              0x803e
 #define MDIO_REG_GPHY_EXP_ACCESS_GATE                  0x15
 #define MDIO_REG_GPHY_EXP_ACCESS                       0x17
-#define MDIO_REG_GPHY_EXP_ACCESS_TOP           0xd00
-#define MDIO_REG_GPHY_EXP_TOP_2K_BUF           0x40
+       #define MDIO_REG_GPHY_EXP_ACCESS_TOP            0xd00
+       #define MDIO_REG_GPHY_EXP_TOP_2K_BUF            0x40
 #define MDIO_REG_GPHY_AUX_STATUS                       0x19
 #define MDIO_REG_INTR_STATUS                           0x1a
 #define MDIO_REG_INTR_MASK                             0x1b
-#define MDIO_REG_INTR_MASK_LINK_STATUS                 (0x1 << 1)
+       #define MDIO_REG_INTR_MASK_LINK_STATUS                  (0x1 << 1)
 #define MDIO_REG_GPHY_SHADOW                           0x1c
-#define MDIO_REG_GPHY_SHADOW_LED_SEL1                  (0x0d << 10)
-#define MDIO_REG_GPHY_SHADOW_LED_SEL2                  (0x0e << 10)
-#define MDIO_REG_GPHY_SHADOW_WR_ENA                    (0x1 << 15)
-#define MDIO_REG_GPHY_SHADOW_AUTO_DET_MED              (0x1e << 10)
-#define MDIO_REG_GPHY_SHADOW_INVERT_FIB_SD             (0x1 << 8)
-
-typedef elink_status_t(*read_sfp_module_eeprom_func_p) (struct elink_phy * phy,
-                                                       struct elink_params *
-                                                       params,
-                                                       uint8_t dev_addr,
-                                                       uint16_t addr,
-                                                       uint8_t byte_cnt,
-                                                       uint8_t * o_buf,
-                                                       uint8_t);
+       #define MDIO_REG_GPHY_SHADOW_LED_SEL1                   (0x0d << 10)
+       #define MDIO_REG_GPHY_SHADOW_LED_SEL2                   (0x0e << 10)
+       #define MDIO_REG_GPHY_SHADOW_WR_ENA                     (0x1 << 15)
+       #define MDIO_REG_GPHY_SHADOW_AUTO_DET_MED               (0x1e << 10)
+       #define MDIO_REG_GPHY_SHADOW_INVERT_FIB_SD              (0x1 << 8)
+
+
+typedef elink_status_t (*read_sfp_module_eeprom_func_p)(struct elink_phy *phy,
+                                            struct elink_params *params,
+                                            uint8_t dev_addr, uint16_t addr,
+                                            uint8_t byte_cnt,
+                                            uint8_t *o_buf, uint8_t);
 /********************************************************/
 #define ELINK_ETH_HLEN                 14
 /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
@@ -850,21 +866,29 @@ typedef elink_status_t(*read_sfp_module_eeprom_func_p) (struct elink_phy * phy,
                         LINK_STATUS_LINK_PARTNER_ASYMMETRIC_PAUSE)
 
 #define ELINK_SFP_EEPROM_CON_TYPE_ADDR         0x2
-#define ELINK_SFP_EEPROM_CON_TYPE_VAL_LC       0x7
-#define ELINK_SFP_EEPROM_CON_TYPE_VAL_COPPER   0x21
-#define ELINK_SFP_EEPROM_CON_TYPE_VAL_RJ45     0x22
+       #define ELINK_SFP_EEPROM_CON_TYPE_VAL_UNKNOWN   0x0
+       #define ELINK_SFP_EEPROM_CON_TYPE_VAL_LC        0x7
+       #define ELINK_SFP_EEPROM_CON_TYPE_VAL_COPPER    0x21
+       #define ELINK_SFP_EEPROM_CON_TYPE_VAL_RJ45      0x22
+
+
+#define ELINK_SFP_EEPROM_10G_COMP_CODE_ADDR            0x3
+       #define ELINK_SFP_EEPROM_10G_COMP_CODE_SR_MASK  (1 << 4)
+       #define ELINK_SFP_EEPROM_10G_COMP_CODE_LR_MASK  (1 << 5)
+       #define ELINK_SFP_EEPROM_10G_COMP_CODE_LRM_MASK (1 << 6)
 
-#define ELINK_SFP_EEPROM_COMP_CODE_ADDR                0x3
-#define ELINK_SFP_EEPROM_COMP_CODE_SR_MASK     (1<<4)
-#define ELINK_SFP_EEPROM_COMP_CODE_LR_MASK     (1<<5)
-#define ELINK_SFP_EEPROM_COMP_CODE_LRM_MASK    (1<<6)
+#define ELINK_SFP_EEPROM_1G_COMP_CODE_ADDR             0x6
+       #define ELINK_SFP_EEPROM_1G_COMP_CODE_SX        (1 << 0)
+       #define ELINK_SFP_EEPROM_1G_COMP_CODE_LX        (1 << 1)
+       #define ELINK_SFP_EEPROM_1G_COMP_CODE_CX        (1 << 2)
+       #define ELINK_SFP_EEPROM_1G_COMP_CODE_BASE_T    (1 << 3)
 
 #define ELINK_SFP_EEPROM_FC_TX_TECH_ADDR               0x8
-#define ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE 0x4
-#define ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE  0x8
+       #define ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE 0x4
+       #define ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE  0x8
 
 #define ELINK_SFP_EEPROM_OPTIONS_ADDR                  0x40
-#define ELINK_SFP_EEPROM_OPTIONS_LINEAR_RX_OUT_MASK 0x1
+       #define ELINK_SFP_EEPROM_OPTIONS_LINEAR_RX_OUT_MASK 0x1
 #define ELINK_SFP_EEPROM_OPTIONS_SIZE                  2
 
 #define ELINK_EDC_MODE_LINEAR                          0x0022
@@ -883,6 +907,10 @@ typedef elink_status_t(*read_sfp_module_eeprom_func_p) (struct elink_phy * phy,
 
 #define ELINK_MAX_PACKET_SIZE                                  (9700)
 #define MAX_KR_LINK_RETRY                              4
+#define DEFAULT_TX_DRV_BRDCT           2
+#define DEFAULT_TX_DRV_IFIR            0
+#define DEFAULT_TX_DRV_POST2           3
+#define DEFAULT_TX_DRV_IPRE_DRIVER     6
 
 /**********************************************************/
 /*                     INTERFACE                          */
@@ -900,6 +928,11 @@ typedef elink_status_t(*read_sfp_module_eeprom_func_p) (struct elink_phy * phy,
                (_bank + (_addr & 0xf)), \
                _val)
 
+static elink_status_t elink_check_half_open_conn(struct elink_params *params,
+                                     struct elink_vars *vars, uint8_t notify);
+static elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
+                                     struct elink_params *params);
+
 static uint32_t elink_bits_en(struct bnx2x_softc *sc, uint32_t reg, uint32_t bits)
 {
        uint32_t val = REG_RD(sc, reg);
@@ -935,16 +968,16 @@ static int elink_check_lfa(struct elink_params *params)
        struct bnx2x_softc *sc = params->sc;
 
        additional_config =
-           REG_RD(sc, params->lfa_base +
-                  offsetof(struct shmem_lfa, additional_config));
+               REG_RD(sc, params->lfa_base +
+                          offsetof(struct shmem_lfa, additional_config));
 
        /* NOTE: must be first condition checked -
-        * to verify DCC bit is cleared in any case!
-        */
+       * to verify DCC bit is cleared in any case!
+       */
        if (additional_config & NO_LFA_DUE_TO_DCC_MASK) {
-               PMD_DRV_LOG(DEBUG, "No LFA due to DCC flap after clp exit");
+               ELINK_DEBUG_P0(sc, "No LFA due to DCC flap after clp exit");
                REG_WR(sc, params->lfa_base +
-                      offsetof(struct shmem_lfa, additional_config),
+                          offsetof(struct shmem_lfa, additional_config),
                       additional_config & ~NO_LFA_DUE_TO_DCC_MASK);
                return LFA_DCC_LFA_DISABLED;
        }
@@ -983,8 +1016,8 @@ static int elink_check_lfa(struct elink_params *params)
                           offsetof(struct shmem_lfa, req_duplex));
        req_val = params->req_duplex[0] | (params->req_duplex[1] << 16);
        if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
-               PMD_DRV_LOG(INFO, "Duplex mismatch %x vs. %x",
-                           (saved_val & lfa_mask), (req_val & lfa_mask));
+               ELINK_DEBUG_P2(sc, "Duplex mismatch %x vs. %x",
+                              (saved_val & lfa_mask), (req_val & lfa_mask));
                return LFA_DUPLEX_MISMATCH;
        }
        /* Compare Flow Control */
@@ -992,8 +1025,8 @@ static int elink_check_lfa(struct elink_params *params)
                           offsetof(struct shmem_lfa, req_flow_ctrl));
        req_val = params->req_flow_ctrl[0] | (params->req_flow_ctrl[1] << 16);
        if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
-               PMD_DRV_LOG(DEBUG, "Flow control mismatch %x vs. %x",
-                           (saved_val & lfa_mask), (req_val & lfa_mask));
+               ELINK_DEBUG_P2(sc, "Flow control mismatch %x vs. %x",
+                              (saved_val & lfa_mask), (req_val & lfa_mask));
                return LFA_FLOW_CTRL_MISMATCH;
        }
        /* Compare Link Speed */
@@ -1001,8 +1034,8 @@ static int elink_check_lfa(struct elink_params *params)
                           offsetof(struct shmem_lfa, req_line_speed));
        req_val = params->req_line_speed[0] | (params->req_line_speed[1] << 16);
        if ((saved_val & lfa_mask) != (req_val & lfa_mask)) {
-               PMD_DRV_LOG(DEBUG, "Link speed mismatch %x vs. %x",
-                           (saved_val & lfa_mask), (req_val & lfa_mask));
+               ELINK_DEBUG_P2(sc, "Link speed mismatch %x vs. %x",
+                              (saved_val & lfa_mask), (req_val & lfa_mask));
                return LFA_LINK_SPEED_MISMATCH;
        }
 
@@ -1012,21 +1045,21 @@ static int elink_check_lfa(struct elink_params *params)
                                                     speed_cap_mask[cfg_idx]));
 
                if (cur_speed_cap_mask != params->speed_cap_mask[cfg_idx]) {
-                       PMD_DRV_LOG(DEBUG, "Speed Cap mismatch %x vs. %x",
-                                   cur_speed_cap_mask,
-                                   params->speed_cap_mask[cfg_idx]);
+                       ELINK_DEBUG_P2(sc, "Speed Cap mismatch %x vs. %x",
+                                      cur_speed_cap_mask,
+                                      params->speed_cap_mask[cfg_idx]);
                        return LFA_SPEED_CAP_MISMATCH;
                }
        }
 
        cur_req_fc_auto_adv =
-           REG_RD(sc, params->lfa_base +
-                  offsetof(struct shmem_lfa, additional_config)) &
-           REQ_FC_AUTO_ADV_MASK;
+               REG_RD(sc, params->lfa_base +
+                      offsetof(struct shmem_lfa, additional_config)) &
+               REQ_FC_AUTO_ADV_MASK;
 
-       if ((uint16_t) cur_req_fc_auto_adv != params->req_fc_auto_adv) {
-               PMD_DRV_LOG(DEBUG, "Flow Ctrl AN mismatch %x vs. %x",
-                           cur_req_fc_auto_adv, params->req_fc_auto_adv);
+       if ((uint16_t)cur_req_fc_auto_adv != params->req_fc_auto_adv) {
+               ELINK_DEBUG_P2(sc, "Flow Ctrl AN mismatch %x vs. %x",
+                              cur_req_fc_auto_adv, params->req_fc_auto_adv);
                return LFA_FLOW_CTRL_MISMATCH;
        }
 
@@ -1038,26 +1071,25 @@ static int elink_check_lfa(struct elink_params *params)
             (params->eee_mode & ELINK_EEE_MODE_ENABLE_LPI)) ||
            ((eee_status & SHMEM_EEE_REQUESTED_BIT) ^
             (params->eee_mode & ELINK_EEE_MODE_ADV_LPI))) {
-               PMD_DRV_LOG(DEBUG, "EEE mismatch %x vs. %x", params->eee_mode,
-                           eee_status);
+               ELINK_DEBUG_P2(sc, "EEE mismatch %x vs. %x", params->eee_mode,
+                              eee_status);
                return LFA_EEE_MISMATCH;
        }
 
        /* LFA conditions are met */
        return 0;
 }
-
 /******************************************************************/
 /*                     EPIO/GPIO section                         */
 /******************************************************************/
 static void elink_get_epio(struct bnx2x_softc *sc, uint32_t epio_pin,
-                          uint32_t * en)
+                          uint32_t *en)
 {
        uint32_t epio_mask, gp_oenable;
        *en = 0;
        /* Sanity check */
        if (epio_pin > 31) {
-               PMD_DRV_LOG(DEBUG, "Invalid EPIO pin %d to get", epio_pin);
+               ELINK_DEBUG_P1(sc, "Invalid EPIO pin %d to get", epio_pin);
                return;
        }
 
@@ -1068,17 +1100,16 @@ static void elink_get_epio(struct bnx2x_softc *sc, uint32_t epio_pin,
 
        *en = (REG_RD(sc, MCP_REG_MCPR_GP_INPUTS) & epio_mask) >> epio_pin;
 }
-
 static void elink_set_epio(struct bnx2x_softc *sc, uint32_t epio_pin, uint32_t en)
 {
        uint32_t epio_mask, gp_output, gp_oenable;
 
        /* Sanity check */
        if (epio_pin > 31) {
-               PMD_DRV_LOG(DEBUG, "Invalid EPIO pin %d to set", epio_pin);
+               ELINK_DEBUG_P1(sc, "Invalid EPIO pin %d to set", epio_pin);
                return;
        }
-       PMD_DRV_LOG(DEBUG, "Setting EPIO pin %d to %d", epio_pin, en);
+       ELINK_DEBUG_P2(sc, "Setting EPIO pin %d to %d", epio_pin, en);
        epio_mask = 1 << epio_pin;
        /* Set this EPIO to output */
        gp_output = REG_RD(sc, MCP_REG_MCPR_GP_OUTPUTS);
@@ -1104,12 +1135,12 @@ static void elink_set_cfg_pin(struct bnx2x_softc *sc, uint32_t pin_cfg,
        } else {
                uint8_t gpio_num = (pin_cfg - PIN_CFG_GPIO0_P0) & 0x3;
                uint8_t gpio_port = (pin_cfg - PIN_CFG_GPIO0_P0) >> 2;
-               elink_cb_gpio_write(sc, gpio_num, (uint8_t) val, gpio_port);
+               elink_cb_gpio_write(sc, gpio_num, (uint8_t)val, gpio_port);
        }
 }
 
 static uint32_t elink_get_cfg_pin(struct bnx2x_softc *sc, uint32_t pin_cfg,
-                                 uint32_t * val)
+                                 uint32_t *val)
 {
        if (pin_cfg == PIN_CFG_NA)
                return ELINK_STATUS_ERROR;
@@ -1121,14 +1152,939 @@ static uint32_t elink_get_cfg_pin(struct bnx2x_softc *sc, uint32_t pin_cfg,
                *val = elink_cb_gpio_read(sc, gpio_num, gpio_port);
        }
        return ELINK_STATUS_OK;
+}
+
+/******************************************************************/
+/*                             ETS section                       */
+/******************************************************************/
+static void elink_ets_e2e3a0_disabled(struct elink_params *params)
+{
+       /* ETS disabled configuration*/
+       struct bnx2x_softc *sc = params->sc;
+
+       ELINK_DEBUG_P0(sc, "ETS E2E3 disabled configuration");
+
+       /* mapping between entry  priority to client number (0,1,2 -debug and
+        * management clients, 3 - COS0 client, 4 - COS client)(HIGHEST)
+        * 3bits client num.
+        *   PRI4    |    PRI3    |    PRI2    |    PRI1    |    PRI0
+        * cos1-100     cos0-011     dbg1-010     dbg0-001     MCP-000
+        */
+
+       REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT, 0x4688);
+       /* Bitmap of 5bits length. Each bit specifies whether the entry behaves
+        * as strict.  Bits 0,1,2 - debug and management entries, 3 -
+        * COS0 entry, 4 - COS1 entry.
+        * COS1 | COS0 | DEBUG1 | DEBUG0 | MGMT
+        * bit4   bit3    bit2   bit1     bit0
+        * MCP and debug are strict
+        */
+
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_STRICT, 0x7);
+       /* defines which entries (clients) are subjected to WFQ arbitration */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_SUBJECT2WFQ, 0);
+       /* For strict priority entries defines the number of consecutive
+        * slots for the highest priority.
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100);
+       /* mapping between the CREDIT_WEIGHT registers and actual client
+        * numbers
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_CREDIT_MAP, 0);
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0, 0);
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1, 0);
+
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_0, 0);
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_1, 0);
+       REG_WR(sc, PBF_REG_HIGH_PRIORITY_COS_NUM, 0);
+       /* ETS mode disable */
+       REG_WR(sc, PBF_REG_ETS_ENABLED, 0);
+       /* If ETS mode is enabled (there is no strict priority) defines a WFQ
+        * weight for COS0/COS1.
+        */
+       REG_WR(sc, PBF_REG_COS0_WEIGHT, 0x2710);
+       REG_WR(sc, PBF_REG_COS1_WEIGHT, 0x2710);
+       /* Upper bound that COS0_WEIGHT can reach in the WFQ arbiter */
+       REG_WR(sc, PBF_REG_COS0_UPPER_BOUND, 0x989680);
+       REG_WR(sc, PBF_REG_COS1_UPPER_BOUND, 0x989680);
+       /* Defines the number of consecutive slots for the strict priority */
+       REG_WR(sc, PBF_REG_NUM_STRICT_ARB_SLOTS, 0);
+}
+/******************************************************************************
+ * Description:
+ *     Getting min_w_val will be set according to line speed .
+ *.
+ ******************************************************************************/
+static uint32_t elink_ets_get_min_w_val_nig(const struct elink_vars *vars)
+{
+       uint32_t min_w_val = 0;
+       /* Calculate min_w_val.*/
+       if (vars->link_up) {
+               if (vars->line_speed == ELINK_SPEED_20000)
+                       min_w_val = ELINK_ETS_E3B0_NIG_MIN_W_VAL_20GBPS;
+               else
+                       min_w_val = ELINK_ETS_E3B0_NIG_MIN_W_VAL_UP_TO_10GBPS;
+       } else {
+               min_w_val = ELINK_ETS_E3B0_NIG_MIN_W_VAL_20GBPS;
+       }
+       /* If the link isn't up (static configuration for example ) The
+        * link will be according to 20GBPS.
+        */
+       return min_w_val;
+}
+/******************************************************************************
+ * Description:
+ *     Getting credit upper bound form min_w_val.
+ *.
+ ******************************************************************************/
+static uint32_t elink_ets_get_credit_upper_bound(const uint32_t min_w_val)
+{
+       const uint32_t credit_upper_bound = (uint32_t)
+                                               ELINK_MAXVAL((150 * min_w_val),
+                                                       ELINK_MAX_PACKET_SIZE);
+       return credit_upper_bound;
+}
+/******************************************************************************
+ * Description:
+ *     Set credit upper bound for NIG.
+ *.
+ ******************************************************************************/
+static void elink_ets_e3b0_set_credit_upper_bound_nig(
+       const struct elink_params *params,
+       const uint32_t min_w_val)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint8_t port = params->port;
+       const uint32_t credit_upper_bound =
+           elink_ets_get_credit_upper_bound(min_w_val);
+
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_0 :
+               NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_0, credit_upper_bound);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_1 :
+                  NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_1, credit_upper_bound);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_2 :
+                  NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_2, credit_upper_bound);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_3 :
+                  NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_3, credit_upper_bound);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_4 :
+                  NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_4, credit_upper_bound);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_UPPER_BOUND_5 :
+                  NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_5, credit_upper_bound);
+
+       if (!port) {
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_6,
+                       credit_upper_bound);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_7,
+                       credit_upper_bound);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_8,
+                       credit_upper_bound);
+       }
+}
+/******************************************************************************
+ * Description:
+ *     Will return the NIG ETS registers to init values.Except
+ *     credit_upper_bound.
+ *     That isn't used in this configuration (No WFQ is enabled) and will be
+ *     configured according to spec
+ *.
+ ******************************************************************************/
+static void elink_ets_e3b0_nig_disabled(const struct elink_params *params,
+                                       const struct elink_vars *vars)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint8_t port = params->port;
+       const uint32_t min_w_val = elink_ets_get_min_w_val_nig(vars);
+       /* Mapping between entry  priority to client number (0,1,2 -debug and
+        * management clients, 3 - COS0 client, 4 - COS1, ... 8 -
+        * COS5)(HIGHEST) 4bits client num.TODO_ETS - Should be done by
+        * reset value or init tool
+        */
+       if (port) {
+               REG_WR(sc, NIG_REG_P1_TX_ARB_PRIORITY_CLIENT2_LSB, 0x543210);
+               REG_WR(sc, NIG_REG_P1_TX_ARB_PRIORITY_CLIENT2_MSB, 0x0);
+       } else {
+               REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT2_LSB, 0x76543210);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT2_MSB, 0x8);
+       }
+       /* For strict priority entries defines the number of consecutive
+        * slots for the highest priority.
+        */
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS :
+                  NIG_REG_P1_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100);
+       /* Mapping between the CREDIT_WEIGHT registers and actual client
+        * numbers
+        */
+       if (port) {
+               /*Port 1 has 6 COS*/
+               REG_WR(sc, NIG_REG_P1_TX_ARB_CLIENT_CREDIT_MAP2_LSB, 0x210543);
+               REG_WR(sc, NIG_REG_P1_TX_ARB_CLIENT_CREDIT_MAP2_MSB, 0x0);
+       } else {
+               /*Port 0 has 9 COS*/
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_CREDIT_MAP2_LSB,
+                      0x43210876);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_CREDIT_MAP2_MSB, 0x5);
+       }
+
+       /* Bitmap of 5bits length. Each bit specifies whether the entry behaves
+        * as strict.  Bits 0,1,2 - debug and management entries, 3 -
+        * COS0 entry, 4 - COS1 entry.
+        * COS1 | COS0 | DEBUG1 | DEBUG0 | MGMT
+        * bit4   bit3    bit2   bit1     bit0
+        * MCP and debug are strict
+        */
+       if (port)
+               REG_WR(sc, NIG_REG_P1_TX_ARB_CLIENT_IS_STRICT, 0x3f);
+       else
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_STRICT, 0x1ff);
+       /* defines which entries (clients) are subjected to WFQ arbitration */
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CLIENT_IS_SUBJECT2WFQ :
+                  NIG_REG_P0_TX_ARB_CLIENT_IS_SUBJECT2WFQ, 0);
+
+       /* Please notice the register address are note continuous and a
+        * for here is note appropriate.In 2 port mode port0 only COS0-5
+        * can be used. DEBUG1,DEBUG1,MGMT are never used for WFQ* In 4
+        * port mode port1 only COS0-2 can be used. DEBUG1,DEBUG1,MGMT
+        * are never used for WFQ
+        */
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0, 0x0);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1, 0x0);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2, 0x0);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_3 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3, 0x0);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_4 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4, 0x0);
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_5 :
+                  NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5, 0x0);
+       if (!port) {
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_6, 0x0);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_7, 0x0);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_8, 0x0);
+       }
+
+       elink_ets_e3b0_set_credit_upper_bound_nig(params, min_w_val);
+}
+/******************************************************************************
+ * Description:
+ *     Set credit upper bound for PBF.
+ *.
+ ******************************************************************************/
+static void elink_ets_e3b0_set_credit_upper_bound_pbf(
+       const struct elink_params *params,
+       const uint32_t min_w_val)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint32_t credit_upper_bound =
+           elink_ets_get_credit_upper_bound(min_w_val);
+       const uint8_t port = params->port;
+       uint32_t base_upper_bound = 0;
+       uint8_t max_cos = 0;
+       uint8_t i = 0;
+       /* In 2 port mode port0 has COS0-5 that can be used for WFQ.In 4
+        * port mode port1 has COS0-2 that can be used for WFQ.
+        */
+       if (!port) {
+               base_upper_bound = PBF_REG_COS0_UPPER_BOUND_P0;
+               max_cos = ELINK_DCBX_E3B0_MAX_NUM_COS_PORT0;
+       } else {
+               base_upper_bound = PBF_REG_COS0_UPPER_BOUND_P1;
+               max_cos = ELINK_DCBX_E3B0_MAX_NUM_COS_PORT1;
+       }
+
+       for (i = 0; i < max_cos; i++)
+               REG_WR(sc, base_upper_bound + (i << 2), credit_upper_bound);
+}
+
+/******************************************************************************
+ * Description:
+ *     Will return the PBF ETS registers to init values.Except
+ *     credit_upper_bound.
+ *     That isn't used in this configuration (No WFQ is enabled) and will be
+ *     configured according to spec
+ *.
+ ******************************************************************************/
+static void elink_ets_e3b0_pbf_disabled(const struct elink_params *params)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint8_t port = params->port;
+       const uint32_t min_w_val_pbf = ELINK_ETS_E3B0_PBF_MIN_W_VAL;
+       uint8_t i = 0;
+       uint32_t base_weight = 0;
+       uint8_t max_cos = 0;
+
+       /* Mapping between entry  priority to client number 0 - COS0
+        * client, 2 - COS1, ... 5 - COS5)(HIGHEST) 4bits client num.
+        * TODO_ETS - Should be done by reset value or init tool
+        */
+       if (port)
+               /*  0x688 (|011|0 10|00 1|000) */
+               REG_WR(sc, PBF_REG_ETS_ARB_PRIORITY_CLIENT_P1, 0x688);
+       else
+               /*  (10 1|100 |011|0 10|00 1|000) */
+               REG_WR(sc, PBF_REG_ETS_ARB_PRIORITY_CLIENT_P0, 0x2C688);
+
+       /* TODO_ETS - Should be done by reset value or init tool */
+       if (port)
+               /* 0x688 (|011|0 10|00 1|000)*/
+               REG_WR(sc, PBF_REG_ETS_ARB_CLIENT_CREDIT_MAP_P1, 0x688);
+       else
+       /* 0x2C688 (10 1|100 |011|0 10|00 1|000) */
+       REG_WR(sc, PBF_REG_ETS_ARB_CLIENT_CREDIT_MAP_P0, 0x2C688);
+
+       REG_WR(sc, (port) ? PBF_REG_ETS_ARB_NUM_STRICT_ARB_SLOTS_P1 :
+                  PBF_REG_ETS_ARB_NUM_STRICT_ARB_SLOTS_P0, 0x100);
+
+
+       REG_WR(sc, (port) ? PBF_REG_ETS_ARB_CLIENT_IS_STRICT_P1 :
+                  PBF_REG_ETS_ARB_CLIENT_IS_STRICT_P0, 0);
+
+       REG_WR(sc, (port) ? PBF_REG_ETS_ARB_CLIENT_IS_SUBJECT2WFQ_P1 :
+                  PBF_REG_ETS_ARB_CLIENT_IS_SUBJECT2WFQ_P0, 0);
+       /* In 2 port mode port0 has COS0-5 that can be used for WFQ.
+        * In 4 port mode port1 has COS0-2 that can be used for WFQ.
+        */
+       if (!port) {
+               base_weight = PBF_REG_COS0_WEIGHT_P0;
+               max_cos = ELINK_DCBX_E3B0_MAX_NUM_COS_PORT0;
+       } else {
+               base_weight = PBF_REG_COS0_WEIGHT_P1;
+               max_cos = ELINK_DCBX_E3B0_MAX_NUM_COS_PORT1;
+       }
+
+       for (i = 0; i < max_cos; i++)
+               REG_WR(sc, base_weight + (0x4 * i), 0);
+
+       elink_ets_e3b0_set_credit_upper_bound_pbf(params, min_w_val_pbf);
+}
+/******************************************************************************
+ * Description:
+ *     E3B0 disable will return basicly the values to init values.
+ *.
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_disabled(const struct elink_params *params,
+                                  const struct elink_vars *vars)
+{
+       struct bnx2x_softc *sc = params->sc;
+
+       if (!CHIP_IS_E3B0(sc)) {
+               ELINK_DEBUG_P0(sc,
+                  "elink_ets_e3b0_disabled the chip isn't E3B0");
+               return ELINK_STATUS_ERROR;
+       }
+
+       elink_ets_e3b0_nig_disabled(params, vars);
+
+       elink_ets_e3b0_pbf_disabled(params);
+
+       return ELINK_STATUS_OK;
+}
+
+/******************************************************************************
+ * Description:
+ *     Disable will return basicly the values to init values.
+ *
+ ******************************************************************************/
+elink_status_t elink_ets_disabled(struct elink_params *params,
+                     struct elink_vars *vars)
+{
+       struct bnx2x_softc *sc = params->sc;
+       elink_status_t elink_status = ELINK_STATUS_OK;
+
+       if ((CHIP_IS_E2(sc)) || (CHIP_IS_E3A0(sc))) {
+               elink_ets_e2e3a0_disabled(params);
+       } else if (CHIP_IS_E3B0(sc)) {
+               elink_status = elink_ets_e3b0_disabled(params, vars);
+       } else {
+               ELINK_DEBUG_P0(sc, "elink_ets_disabled - chip not supported");
+               return ELINK_STATUS_ERROR;
+       }
+
+       return elink_status;
+}
+
+/******************************************************************************
+ * Description
+ *     Set the COS mappimg to SP and BW until this point all the COS are not
+ *     set as SP or BW.
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_cli_map(const struct elink_params *params,
+                 __rte_unused const struct elink_ets_params *ets_params,
+                 const uint8_t cos_sp_bitmap,
+                 const uint8_t cos_bw_bitmap)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint8_t port = params->port;
+       const uint8_t nig_cli_sp_bitmap = 0x7 | (cos_sp_bitmap << 3);
+       const uint8_t pbf_cli_sp_bitmap = cos_sp_bitmap;
+       const uint8_t nig_cli_subject2wfq_bitmap = cos_bw_bitmap << 3;
+       const uint8_t pbf_cli_subject2wfq_bitmap = cos_bw_bitmap;
+
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CLIENT_IS_STRICT :
+              NIG_REG_P0_TX_ARB_CLIENT_IS_STRICT, nig_cli_sp_bitmap);
+
+       REG_WR(sc, (port) ? PBF_REG_ETS_ARB_CLIENT_IS_STRICT_P1 :
+              PBF_REG_ETS_ARB_CLIENT_IS_STRICT_P0, pbf_cli_sp_bitmap);
+
+       REG_WR(sc, (port) ? NIG_REG_P1_TX_ARB_CLIENT_IS_SUBJECT2WFQ :
+              NIG_REG_P0_TX_ARB_CLIENT_IS_SUBJECT2WFQ,
+              nig_cli_subject2wfq_bitmap);
+
+       REG_WR(sc, (port) ? PBF_REG_ETS_ARB_CLIENT_IS_SUBJECT2WFQ_P1 :
+              PBF_REG_ETS_ARB_CLIENT_IS_SUBJECT2WFQ_P0,
+              pbf_cli_subject2wfq_bitmap);
+
+       return ELINK_STATUS_OK;
+}
+
+/******************************************************************************
+ * Description:
+ *     This function is needed because NIG ARB_CREDIT_WEIGHT_X are
+ *     not continues and ARB_CREDIT_WEIGHT_0 + offset is suitable.
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_set_cos_bw(struct bnx2x_softc *sc,
+                                    const uint8_t cos_entry,
+                                    const uint32_t min_w_val_nig,
+                                    const uint32_t min_w_val_pbf,
+                                    const uint16_t total_bw,
+                                    const uint8_t bw,
+                                    const uint8_t port)
+{
+       uint32_t nig_reg_address_crd_weight = 0;
+       uint32_t pbf_reg_address_crd_weight = 0;
+       /* Calculate and set BW for this COS - use 1 instead of 0 for BW */
+       const uint32_t cos_bw_nig = ((bw ? bw : 1) * min_w_val_nig) / total_bw;
+       const uint32_t cos_bw_pbf = ((bw ? bw : 1) * min_w_val_pbf) / total_bw;
+
+       switch (cos_entry) {
+       case 0:
+           nig_reg_address_crd_weight =
+                (port) ? NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_0 :
+                    NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0;
+            pbf_reg_address_crd_weight = (port) ?
+                PBF_REG_COS0_WEIGHT_P1 : PBF_REG_COS0_WEIGHT_P0;
+               break;
+       case 1:
+            nig_reg_address_crd_weight = (port) ?
+                NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_1 :
+                NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1;
+            pbf_reg_address_crd_weight = (port) ?
+                PBF_REG_COS1_WEIGHT_P1 : PBF_REG_COS1_WEIGHT_P0;
+               break;
+       case 2:
+            nig_reg_address_crd_weight = (port) ?
+                NIG_REG_P1_TX_ARB_CREDIT_WEIGHT_2 :
+                NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_2;
+
+                pbf_reg_address_crd_weight = (port) ?
+                    PBF_REG_COS2_WEIGHT_P1 : PBF_REG_COS2_WEIGHT_P0;
+               break;
+       case 3:
+               if (port)
+                       return ELINK_STATUS_ERROR;
+               nig_reg_address_crd_weight =
+                       NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_3;
+               pbf_reg_address_crd_weight =
+                       PBF_REG_COS3_WEIGHT_P0;
+               break;
+       case 4:
+               if (port)
+               return ELINK_STATUS_ERROR;
+            nig_reg_address_crd_weight =
+                NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_4;
+            pbf_reg_address_crd_weight = PBF_REG_COS4_WEIGHT_P0;
+               break;
+       case 5:
+               if (port)
+               return ELINK_STATUS_ERROR;
+            nig_reg_address_crd_weight =
+                NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_5;
+            pbf_reg_address_crd_weight = PBF_REG_COS5_WEIGHT_P0;
+               break;
+       }
+
+       REG_WR(sc, nig_reg_address_crd_weight, cos_bw_nig);
+
+       REG_WR(sc, pbf_reg_address_crd_weight, cos_bw_pbf);
+
+       return ELINK_STATUS_OK;
+}
+/******************************************************************************
+ * Description:
+ *     Calculate the total BW.A value of 0 isn't legal.
+ *
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_get_total_bw(
+       const struct elink_params *params,
+       struct elink_ets_params *ets_params,
+       uint16_t *total_bw)
+{
+       struct bnx2x_softc *sc = params->sc;
+       uint8_t cos_idx = 0;
+       uint8_t is_bw_cos_exist = 0;
+
+       *total_bw = 0;
+       /* Calculate total BW requested */
+       for (cos_idx = 0; cos_idx < ets_params->num_of_cos; cos_idx++) {
+               if (ets_params->cos[cos_idx].state == elink_cos_state_bw) {
+                       is_bw_cos_exist = 1;
+                       if (!ets_params->cos[cos_idx].params.bw_params.bw) {
+                               ELINK_DEBUG_P0(sc, "elink_ets_E3B0_config BW"
+                                                  " was set to 0");
+                               /* This is to prevent a state when ramrods
+                                * can't be sent
+                                */
+                               ets_params->cos[cos_idx].params.bw_params.bw
+                                        = 1;
+                       }
+                       *total_bw +=
+                               ets_params->cos[cos_idx].params.bw_params.bw;
+               }
+       }
+
+       /* Check total BW is valid */
+       if ((is_bw_cos_exist == 1) && (*total_bw != 100)) {
+               if (*total_bw == 0) {
+                       ELINK_DEBUG_P0(sc,
+                          "elink_ets_E3B0_config total BW shouldn't be 0");
+                       return ELINK_STATUS_ERROR;
+               }
+               ELINK_DEBUG_P0(sc,
+                  "elink_ets_E3B0_config total BW should be 100");
+               /* We can handle a case whre the BW isn't 100 this can happen
+                * if the TC are joined.
+                */
+       }
+       return ELINK_STATUS_OK;
+}
+
+/******************************************************************************
+ * Description:
+ *     Invalidate all the sp_pri_to_cos.
+ *
+ ******************************************************************************/
+static void elink_ets_e3b0_sp_pri_to_cos_init(uint8_t *sp_pri_to_cos)
+{
+       uint8_t pri = 0;
+       for (pri = 0; pri < ELINK_DCBX_MAX_NUM_COS; pri++)
+               sp_pri_to_cos[pri] = DCBX_INVALID_COS;
+}
+/******************************************************************************
+ * Description:
+ *     Calculate and set the SP (ARB_PRIORITY_CLIENT) NIG and PBF registers
+ *     according to sp_pri_to_cos.
+ *
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_sp_pri_to_cos_set(
+                                           const struct elink_params *params,
+                                           uint8_t *sp_pri_to_cos,
+                                           const uint8_t pri,
+                                           const uint8_t cos_entry)
+{
+       struct bnx2x_softc *sc = params->sc;
+       const uint8_t port = params->port;
+       const uint8_t max_num_of_cos = (port) ?
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT1 :
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT0;
+
+       if (pri >= max_num_of_cos) {
+               ELINK_DEBUG_P0(sc, "elink_ets_e3b0_sp_pri_to_cos_set invalid "
+                  "parameter Illegal strict priority");
+               return ELINK_STATUS_ERROR;
+       }
+
+       if (sp_pri_to_cos[pri] != DCBX_INVALID_COS) {
+               ELINK_DEBUG_P0(sc, "elink_ets_e3b0_sp_pri_to_cos_set invalid "
+                                  "parameter There can't be two COS's with "
+                                  "the same strict pri");
+               return ELINK_STATUS_ERROR;
+       }
+
+       sp_pri_to_cos[pri] = cos_entry;
+       return ELINK_STATUS_OK;
+}
+
+/******************************************************************************
+ * Description:
+ *     Returns the correct value according to COS and priority in
+ *     the sp_pri_cli register.
+ *
+ ******************************************************************************/
+static uint64_t elink_e3b0_sp_get_pri_cli_reg(const uint8_t cos,
+                                        const uint8_t cos_offset,
+                                        const uint8_t pri_set,
+                                        const uint8_t pri_offset,
+                                        const uint8_t entry_size)
+{
+       uint64_t pri_cli_nig = 0;
+       pri_cli_nig = ((uint64_t)(cos + cos_offset)) << (entry_size *
+                                                   (pri_set + pri_offset));
+
+       return pri_cli_nig;
+}
+/******************************************************************************
+ * Description:
+ *     Returns the correct value according to COS and priority in the
+ *     sp_pri_cli register for NIG.
+ *
+ ******************************************************************************/
+static uint64_t elink_e3b0_sp_get_pri_cli_reg_nig(const uint8_t cos,
+                                                 const uint8_t pri_set)
+{
+       /* MCP Dbg0 and dbg1 are always with higher strict pri*/
+       const uint8_t nig_cos_offset = 3;
+       const uint8_t nig_pri_offset = 3;
+
+       return elink_e3b0_sp_get_pri_cli_reg(cos, nig_cos_offset, pri_set,
+               nig_pri_offset, 4);
+}
+
+/******************************************************************************
+ * Description:
+ *     Returns the correct value according to COS and priority in the
+ *     sp_pri_cli register for PBF.
+ *
+ ******************************************************************************/
+static uint64_t elink_e3b0_sp_get_pri_cli_reg_pbf(const uint8_t cos,
+                                                 const uint8_t pri_set)
+{
+       const uint8_t pbf_cos_offset = 0;
+       const uint8_t pbf_pri_offset = 0;
+
+       return elink_e3b0_sp_get_pri_cli_reg(cos, pbf_cos_offset, pri_set,
+               pbf_pri_offset, 3);
+}
+
+/******************************************************************************
+ * Description:
+ *     Calculate and set the SP (ARB_PRIORITY_CLIENT) NIG and PBF registers
+ *     according to sp_pri_to_cos.(which COS has higher priority)
+ *
+ ******************************************************************************/
+static elink_status_t elink_ets_e3b0_sp_set_pri_cli_reg(
+                                            const struct elink_params *params,
+                                            uint8_t *sp_pri_to_cos)
+{
+       struct bnx2x_softc *sc = params->sc;
+       uint8_t i = 0;
+       const uint8_t port = params->port;
+       /* MCP Dbg0 and dbg1 are always with higher strict pri*/
+       uint64_t pri_cli_nig = 0x210;
+       uint32_t pri_cli_pbf = 0x0;
+       uint8_t pri_set = 0;
+       uint8_t pri_bitmask = 0;
+       const uint8_t max_num_of_cos = (port) ?
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT1 :
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT0;
+
+       uint8_t cos_bit_to_set = (1 << max_num_of_cos) - 1;
+
+       /* Set all the strict priority first */
+       for (i = 0; i < max_num_of_cos; i++) {
+               if (sp_pri_to_cos[i] != DCBX_INVALID_COS) {
+                       if (sp_pri_to_cos[i] >= ELINK_DCBX_MAX_NUM_COS) {
+                               ELINK_DEBUG_P0(sc,
+                                          "elink_ets_e3b0_sp_set_pri_cli_reg "
+                                          "invalid cos entry");
+                               return ELINK_STATUS_ERROR;
+                       }
+
+                       pri_cli_nig |= elink_e3b0_sp_get_pri_cli_reg_nig(
+                           sp_pri_to_cos[i], pri_set);
+
+                       pri_cli_pbf |= elink_e3b0_sp_get_pri_cli_reg_pbf(
+                           sp_pri_to_cos[i], pri_set);
+                       pri_bitmask = 1 << sp_pri_to_cos[i];
+                       /* COS is used remove it from bitmap.*/
+                       if (!(pri_bitmask & cos_bit_to_set)) {
+                               ELINK_DEBUG_P0(sc,
+                                       "elink_ets_e3b0_sp_set_pri_cli_reg "
+                                       "invalid There can't be two COS's with"
+                                       " the same strict pri");
+                               return ELINK_STATUS_ERROR;
+                       }
+                       cos_bit_to_set &= ~pri_bitmask;
+                       pri_set++;
+               }
+       }
+
+       /* Set all the Non strict priority i= COS*/
+       for (i = 0; i < max_num_of_cos; i++) {
+               pri_bitmask = 1 << i;
+               /* Check if COS was already used for SP */
+               if (pri_bitmask & cos_bit_to_set) {
+                       /* COS wasn't used for SP */
+                       pri_cli_nig |= elink_e3b0_sp_get_pri_cli_reg_nig(
+                           i, pri_set);
+
+                       pri_cli_pbf |= elink_e3b0_sp_get_pri_cli_reg_pbf(
+                           i, pri_set);
+                       /* COS is used remove it from bitmap.*/
+                       cos_bit_to_set &= ~pri_bitmask;
+                       pri_set++;
+               }
+       }
+
+       if (pri_set != max_num_of_cos) {
+               ELINK_DEBUG_P0(sc, "elink_ets_e3b0_sp_set_pri_cli_reg not all "
+                                  "entries were set");
+               return ELINK_STATUS_ERROR;
+       }
+
+       if (port) {
+               /* Only 6 usable clients*/
+               REG_WR(sc, NIG_REG_P1_TX_ARB_PRIORITY_CLIENT2_LSB,
+                      (uint32_t)pri_cli_nig);
+
+               REG_WR(sc, PBF_REG_ETS_ARB_PRIORITY_CLIENT_P1, pri_cli_pbf);
+       } else {
+               /* Only 9 usable clients*/
+               const uint32_t pri_cli_nig_lsb = (uint32_t)(pri_cli_nig);
+               const uint32_t pri_cli_nig_msb = (uint32_t)
+                                               ((pri_cli_nig >> 32) & 0xF);
+
+               REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT2_LSB,
+                      pri_cli_nig_lsb);
+               REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT2_MSB,
+                      pri_cli_nig_msb);
+
+               REG_WR(sc, PBF_REG_ETS_ARB_PRIORITY_CLIENT_P0, pri_cli_pbf);
+       }
+       return ELINK_STATUS_OK;
+}
+
+/******************************************************************************
+ * Description:
+ *     Configure the COS to ETS according to BW and SP settings.
+ ******************************************************************************/
+elink_status_t elink_ets_e3b0_config(const struct elink_params *params,
+                        const struct elink_vars *vars,
+                        struct elink_ets_params *ets_params)
+{
+       struct bnx2x_softc *sc = params->sc;
+       elink_status_t elink_status = ELINK_STATUS_OK;
+       const uint8_t port = params->port;
+       uint16_t total_bw = 0;
+       const uint32_t min_w_val_nig = elink_ets_get_min_w_val_nig(vars);
+       const uint32_t min_w_val_pbf = ELINK_ETS_E3B0_PBF_MIN_W_VAL;
+       uint8_t cos_bw_bitmap = 0;
+       uint8_t cos_sp_bitmap = 0;
+       uint8_t sp_pri_to_cos[ELINK_DCBX_MAX_NUM_COS] = {0};
+       const uint8_t max_num_of_cos = (port) ?
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT1 :
+               ELINK_DCBX_E3B0_MAX_NUM_COS_PORT0;
+       uint8_t cos_entry = 0;
+
+       if (!CHIP_IS_E3B0(sc)) {
+               ELINK_DEBUG_P0(sc,
+                  "elink_ets_e3b0_disabled the chip isn't E3B0");
+               return ELINK_STATUS_ERROR;
+       }
+
+       if (ets_params->num_of_cos > max_num_of_cos) {
+               ELINK_DEBUG_P0(sc, "elink_ets_E3B0_config the number of COS "
+                                  "isn't supported");
+               return ELINK_STATUS_ERROR;
+       }
+
+       /* Prepare sp strict priority parameters*/
+       elink_ets_e3b0_sp_pri_to_cos_init(sp_pri_to_cos);
+
+       /* Prepare BW parameters*/
+       elink_status = elink_ets_e3b0_get_total_bw(params, ets_params,
+                                                  &total_bw);
+       if (elink_status != ELINK_STATUS_OK) {
+               ELINK_DEBUG_P0(sc,
+                  "elink_ets_E3B0_config get_total_bw failed");
+               return ELINK_STATUS_ERROR;
+       }
+
+       /* Upper bound is set according to current link speed (min_w_val
+        * should be the same for upper bound and COS credit val).
+        */
+       elink_ets_e3b0_set_credit_upper_bound_nig(params, min_w_val_nig);
+       elink_ets_e3b0_set_credit_upper_bound_pbf(params, min_w_val_pbf);
+
+
+       for (cos_entry = 0; cos_entry < ets_params->num_of_cos; cos_entry++) {
+               if (elink_cos_state_bw == ets_params->cos[cos_entry].state) {
+                       cos_bw_bitmap |= (1 << cos_entry);
+                       /* The function also sets the BW in HW(not the mappin
+                        * yet)
+                        */
+                       elink_status = elink_ets_e3b0_set_cos_bw(
+                               sc, cos_entry, min_w_val_nig, min_w_val_pbf,
+                               total_bw,
+                               ets_params->cos[cos_entry].params.bw_params.bw,
+                                port);
+               } else if (elink_cos_state_strict ==
+                       ets_params->cos[cos_entry].state){
+                       cos_sp_bitmap |= (1 << cos_entry);
+
+                       elink_status = elink_ets_e3b0_sp_pri_to_cos_set(
+                               params,
+                               sp_pri_to_cos,
+                               ets_params->cos[cos_entry].params.sp_params.pri,
+                               cos_entry);
+
+               } else {
+                       ELINK_DEBUG_P0(sc,
+                          "elink_ets_e3b0_config cos state not valid");
+                       return ELINK_STATUS_ERROR;
+               }
+               if (elink_status != ELINK_STATUS_OK) {
+                       ELINK_DEBUG_P0(sc,
+                          "elink_ets_e3b0_config set cos bw failed");
+                       return elink_status;
+               }
+       }
+
+       /* Set SP register (which COS has higher priority) */
+       elink_status = elink_ets_e3b0_sp_set_pri_cli_reg(params,
+                                                        sp_pri_to_cos);
+
+       if (elink_status != ELINK_STATUS_OK) {
+               ELINK_DEBUG_P0(sc,
+                  "elink_ets_E3B0_config set_pri_cli_reg failed");
+               return elink_status;
+       }
+
+       /* Set client mapping of BW and strict */
+       elink_status = elink_ets_e3b0_cli_map(params, ets_params,
+                                             cos_sp_bitmap,
+                                             cos_bw_bitmap);
+
+       if (elink_status != ELINK_STATUS_OK) {
+               ELINK_DEBUG_P0(sc, "elink_ets_E3B0_config SP failed");
+               return elink_status;
+       }
+       return ELINK_STATUS_OK;
+}
+static void elink_ets_bw_limit_common(const struct elink_params *params)
+{
+       /* ETS disabled configuration */
+       struct bnx2x_softc *sc = params->sc;
+       ELINK_DEBUG_P0(sc, "ETS enabled BW limit configuration");
+       /* Defines which entries (clients) are subjected to WFQ arbitration
+        * COS0 0x8
+        * COS1 0x10
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_SUBJECT2WFQ, 0x18);
+       /* Mapping between the ARB_CREDIT_WEIGHT registers and actual
+        * client numbers (WEIGHT_0 does not actually have to represent
+        * client 0)
+        *    PRI4    |    PRI3    |    PRI2    |    PRI1    |    PRI0
+        *  cos1-001     cos0-000     dbg1-100     dbg0-011     MCP-010
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_CREDIT_MAP, 0x111A);
+
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_0,
+              ELINK_ETS_BW_LIMIT_CREDIT_UPPER_BOUND);
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_UPPER_BOUND_1,
+              ELINK_ETS_BW_LIMIT_CREDIT_UPPER_BOUND);
+
+       /* ETS mode enabled*/
+       REG_WR(sc, PBF_REG_ETS_ENABLED, 1);
+
+       /* Defines the number of consecutive slots for the strict priority */
+       REG_WR(sc, PBF_REG_NUM_STRICT_ARB_SLOTS, 0);
+       /* Bitmap of 5bits length. Each bit specifies whether the entry behaves
+        * as strict.  Bits 0,1,2 - debug and management entries, 3 - COS0
+        * entry, 4 - COS1 entry.
+        * COS1 | COS0 | DEBUG21 | DEBUG0 | MGMT
+        * bit4   bit3    bit2     bit1    bit0
+        * MCP and debug are strict
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_STRICT, 0x7);
 
+       /* Upper bound that COS0_WEIGHT can reach in the WFQ arbiter.*/
+       REG_WR(sc, PBF_REG_COS0_UPPER_BOUND,
+              ELINK_ETS_BW_LIMIT_CREDIT_UPPER_BOUND);
+       REG_WR(sc, PBF_REG_COS1_UPPER_BOUND,
+              ELINK_ETS_BW_LIMIT_CREDIT_UPPER_BOUND);
+}
+
+void elink_ets_bw_limit(const struct elink_params *params,
+                       const uint32_t cos0_bw,
+                       const uint32_t cos1_bw)
+{
+       /* ETS disabled configuration*/
+       struct bnx2x_softc *sc = params->sc;
+       const uint32_t total_bw = cos0_bw + cos1_bw;
+       uint32_t cos0_credit_weight = 0;
+       uint32_t cos1_credit_weight = 0;
+
+       ELINK_DEBUG_P0(sc, "ETS enabled BW limit configuration");
+
+       if ((!total_bw) ||
+           (!cos0_bw) ||
+           (!cos1_bw)) {
+               ELINK_DEBUG_P0(sc, "Total BW can't be zero");
+               return;
+       }
+
+       cos0_credit_weight = (cos0_bw * ELINK_ETS_BW_LIMIT_CREDIT_WEIGHT) /
+               total_bw;
+       cos1_credit_weight = (cos1_bw * ELINK_ETS_BW_LIMIT_CREDIT_WEIGHT) /
+               total_bw;
+
+       elink_ets_bw_limit_common(params);
+
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_0, cos0_credit_weight);
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CREDIT_WEIGHT_1, cos1_credit_weight);
+
+       REG_WR(sc, PBF_REG_COS0_WEIGHT, cos0_credit_weight);
+       REG_WR(sc, PBF_REG_COS1_WEIGHT, cos1_credit_weight);
+}
+
+elink_status_t elink_ets_strict(const struct elink_params *params,
+                               const uint8_t strict_cos)
+{
+       /* ETS disabled configuration*/
+       struct bnx2x_softc *sc = params->sc;
+       uint32_t val    = 0;
+
+       ELINK_DEBUG_P0(sc, "ETS enabled strict configuration");
+       /* Bitmap of 5bits length. Each bit specifies whether the entry behaves
+        * as strict.  Bits 0,1,2 - debug and management entries,
+        * 3 - COS0 entry, 4 - COS1 entry.
+        *  COS1 | COS0 | DEBUG21 | DEBUG0 | MGMT
+        *  bit4   bit3   bit2      bit1     bit0
+        * MCP and debug are strict
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_CLIENT_IS_STRICT, 0x1F);
+       /* For strict priority entries defines the number of consecutive slots
+        * for the highest priority.
+        */
+       REG_WR(sc, NIG_REG_P0_TX_ARB_NUM_STRICT_ARB_SLOTS, 0x100);
+       /* ETS mode disable */
+       REG_WR(sc, PBF_REG_ETS_ENABLED, 0);
+       /* Defines the number of consecutive slots for the strict priority */
+       REG_WR(sc, PBF_REG_NUM_STRICT_ARB_SLOTS, 0x100);
+
+       /* Defines the number of consecutive slots for the strict priority */
+       REG_WR(sc, PBF_REG_HIGH_PRIORITY_COS_NUM, strict_cos);
+
+       /* Mapping between entry  priority to client number (0,1,2 -debug and
+        * management clients, 3 - COS0 client, 4 - COS client)(HIGHEST)
+        * 3bits client num.
+        *   PRI4    |    PRI3    |    PRI2    |    PRI1    |    PRI0
+        * dbg0-010     dbg1-001     cos1-100     cos0-011     MCP-000
+        * dbg0-010     dbg1-001     cos0-011     cos1-100     MCP-000
+        */
+       val = (!strict_cos) ? 0x2318 : 0x22E0;
+       REG_WR(sc, NIG_REG_P0_TX_ARB_PRIORITY_CLIENT, val);
+
+       return ELINK_STATUS_OK;
 }
 
 /******************************************************************/
 /*                     PFC section                               */
 /******************************************************************/
 static void elink_update_pfc_xmac(struct elink_params *params,
-                                 struct elink_vars *vars)
+                                 struct elink_vars *vars,
+                                 __rte_unused uint8_t is_lb)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t xmac_base;
@@ -1143,7 +2099,8 @@ static void elink_update_pfc_xmac(struct elink_params *params,
        pfc1_val = 0x2;
 
        /* No PFC support */
-       if (!(params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
+       if (!(params->feature_config_flags &
+             ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
 
                /* RX flow control - Process pause frame in receive direction
                 */
@@ -1153,12 +2110,12 @@ static void elink_update_pfc_xmac(struct elink_params *params,
                /* TX flow control - Send pause packet when buffer is full */
                if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
                        pause_val |= XMAC_PAUSE_CTRL_REG_TX_PAUSE_EN;
-       } else {                /* PFC support */
+       } else {/* PFC support */
                pfc1_val |= XMAC_PFC_CTRL_HI_REG_PFC_REFRESH_EN |
-                   XMAC_PFC_CTRL_HI_REG_PFC_STATS_EN |
-                   XMAC_PFC_CTRL_HI_REG_RX_PFC_EN |
-                   XMAC_PFC_CTRL_HI_REG_TX_PFC_EN |
-                   XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON;
+                       XMAC_PFC_CTRL_HI_REG_PFC_STATS_EN |
+                       XMAC_PFC_CTRL_HI_REG_RX_PFC_EN |
+                       XMAC_PFC_CTRL_HI_REG_TX_PFC_EN |
+                       XMAC_PFC_CTRL_HI_REG_FORCE_PFC_XON;
                /* Write pause and PFC registers */
                REG_WR(sc, xmac_base + XMAC_REG_PAUSE_CTRL, pause_val);
                REG_WR(sc, xmac_base + XMAC_REG_PFC_CTRL, pfc0_val);
@@ -1172,21 +2129,76 @@ static void elink_update_pfc_xmac(struct elink_params *params,
        REG_WR(sc, xmac_base + XMAC_REG_PFC_CTRL, pfc0_val);
        REG_WR(sc, xmac_base + XMAC_REG_PFC_CTRL_HI, pfc1_val);
 
+
        /* Set MAC address for source TX Pause/PFC frames */
        REG_WR(sc, xmac_base + XMAC_REG_CTRL_SA_LO,
               ((params->mac_addr[2] << 24) |
                (params->mac_addr[3] << 16) |
-               (params->mac_addr[4] << 8) | (params->mac_addr[5])));
+               (params->mac_addr[4] << 8) |
+               (params->mac_addr[5])));
        REG_WR(sc, xmac_base + XMAC_REG_CTRL_SA_HI,
-              ((params->mac_addr[0] << 8) | (params->mac_addr[1])));
+              ((params->mac_addr[0] << 8) |
+               (params->mac_addr[1])));
 
        DELAY(30);
 }
 
+static void elink_emac_get_pfc_stat(struct elink_params *params,
+                                   uint32_t pfc_frames_sent[2],
+                                   uint32_t pfc_frames_received[2])
+{
+       /* Read pfc statistic */
+       struct bnx2x_softc *sc = params->sc;
+       uint32_t emac_base = params->port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
+       uint32_t val_xon = 0;
+       uint32_t val_xoff = 0;
+
+       ELINK_DEBUG_P0(sc, "pfc statistic read from EMAC");
+
+       /* PFC received frames */
+       val_xoff = REG_RD(sc, emac_base +
+                               EMAC_REG_RX_PFC_STATS_XOFF_RCVD);
+       val_xoff &= EMAC_REG_RX_PFC_STATS_XOFF_RCVD_COUNT;
+       val_xon = REG_RD(sc, emac_base + EMAC_REG_RX_PFC_STATS_XON_RCVD);
+       val_xon &= EMAC_REG_RX_PFC_STATS_XON_RCVD_COUNT;
+
+       pfc_frames_received[0] = val_xon + val_xoff;
+
+       /* PFC received sent */
+       val_xoff = REG_RD(sc, emac_base +
+                               EMAC_REG_RX_PFC_STATS_XOFF_SENT);
+       val_xoff &= EMAC_REG_RX_PFC_STATS_XOFF_SENT_COUNT;
+       val_xon = REG_RD(sc, emac_base + EMAC_REG_RX_PFC_STATS_XON_SENT);
+       val_xon &= EMAC_REG_RX_PFC_STATS_XON_SENT_COUNT;
+
+       pfc_frames_sent[0] = val_xon + val_xoff;
+}
+
+/* Read pfc statistic*/
+void elink_pfc_statistic(struct elink_params *params, struct elink_vars *vars,
+                        uint32_t pfc_frames_sent[2],
+                        uint32_t pfc_frames_received[2])
+{
+       /* Read pfc statistic */
+       struct bnx2x_softc *sc = params->sc;
+
+       ELINK_DEBUG_P0(sc, "pfc statistic");
+
+       if (!vars->link_up)
+               return;
+
+       if (vars->mac_type == ELINK_MAC_TYPE_EMAC) {
+               ELINK_DEBUG_P0(sc, "About to read PFC stats from EMAC");
+               elink_emac_get_pfc_stat(params, pfc_frames_sent,
+                                       pfc_frames_received);
+       }
+}
 /******************************************************************/
 /*                     MAC/PBF section                           */
 /******************************************************************/
-static void elink_set_mdio_clk(struct bnx2x_softc *sc, uint32_t emac_base)
+static void elink_set_mdio_clk(struct bnx2x_softc *sc,
+                              __rte_unused uint32_t chip_id,
+                              uint32_t emac_base)
 {
        uint32_t new_mode, cur_mode;
        uint32_t clc_cnt;
@@ -1205,26 +2217,16 @@ static void elink_set_mdio_clk(struct bnx2x_softc *sc, uint32_t emac_base)
                return;
 
        new_mode = cur_mode &
-           ~(EMAC_MDIO_MODE_AUTO_POLL | EMAC_MDIO_MODE_CLOCK_CNT);
+               ~(EMAC_MDIO_MODE_AUTO_POLL | EMAC_MDIO_MODE_CLOCK_CNT);
        new_mode |= clc_cnt;
        new_mode |= (EMAC_MDIO_MODE_CLAUSE_45);
 
-       PMD_DRV_LOG(DEBUG, "Changing emac_mode from 0x%x to 0x%x",
-                   cur_mode, new_mode);
+       ELINK_DEBUG_P2(sc, "Changing emac_mode from 0x%x to 0x%x",
+          cur_mode, new_mode);
        REG_WR(sc, emac_base + EMAC_REG_EMAC_MDIO_MODE, new_mode);
        DELAY(40);
 }
 
-static void elink_set_mdio_emac_per_phy(struct bnx2x_softc *sc,
-                                       struct elink_params *params)
-{
-       uint8_t phy_index;
-       /* Set mdio clock per phy */
-       for (phy_index = ELINK_INT_PHY; phy_index < params->num_phys;
-            phy_index++)
-               elink_set_mdio_clk(sc, params->phy[phy_index].mdio_ctrl);
-}
-
 static uint8_t elink_is_4_port_mode(struct bnx2x_softc *sc)
 {
        uint32_t port4mode_ovwr_val;
@@ -1232,13 +2234,26 @@ static uint8_t elink_is_4_port_mode(struct bnx2x_softc *sc)
        port4mode_ovwr_val = REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR);
        if (port4mode_ovwr_val & (1 << 0)) {
                /* Return 4-port mode override value */
-               return (port4mode_ovwr_val & (1 << 1)) == (1 << 1);
+               return ((port4mode_ovwr_val & (1 << 1)) == (1 << 1));
        }
        /* Return 4-port mode from input pin */
-       return (uint8_t) REG_RD(sc, MISC_REG_PORT4MODE_EN);
+       return (uint8_t)REG_RD(sc, MISC_REG_PORT4MODE_EN);
+}
+
+static void elink_set_mdio_emac_per_phy(struct bnx2x_softc *sc,
+                                       struct elink_params *params)
+{
+       uint8_t phy_index;
+
+       /* Set mdio clock per phy */
+       for (phy_index = ELINK_INT_PHY; phy_index < params->num_phys;
+             phy_index++)
+               elink_set_mdio_clk(sc, params->chip_id,
+                                  params->phy[phy_index].mdio_ctrl);
 }
 
-static void elink_emac_init(struct elink_params *params)
+static void elink_emac_init(struct elink_params *params,
+                           __rte_unused struct elink_vars *vars)
 {
        /* reset and unreset the emac core */
        struct bnx2x_softc *sc = params->sc;
@@ -1262,9 +2277,9 @@ static void elink_emac_init(struct elink_params *params)
        timeout = 200;
        do {
                val = REG_RD(sc, emac_base + EMAC_REG_EMAC_MODE);
-               PMD_DRV_LOG(DEBUG, "EMAC reset reg is %u", val);
+               ELINK_DEBUG_P1(sc, "EMAC reset reg is %u", val);
                if (!timeout) {
-                       PMD_DRV_LOG(DEBUG, "EMAC timeout!");
+                       ELINK_DEBUG_P0(sc, "EMAC timeout!");
                        return;
                }
                timeout--;
@@ -1272,17 +2287,20 @@ static void elink_emac_init(struct elink_params *params)
 
        elink_set_mdio_emac_per_phy(sc, params);
        /* Set mac address */
-       val = ((params->mac_addr[0] << 8) | params->mac_addr[1]);
+       val = ((params->mac_addr[0] << 8) |
+               params->mac_addr[1]);
        elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MAC_MATCH, val);
 
        val = ((params->mac_addr[2] << 24) |
               (params->mac_addr[3] << 16) |
-              (params->mac_addr[4] << 8) | params->mac_addr[5]);
+              (params->mac_addr[4] << 8) |
+               params->mac_addr[5]);
        elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MAC_MATCH + 4, val);
 }
 
 static void elink_set_xumac_nig(struct elink_params *params,
-                               uint16_t tx_pause_en, uint8_t enable)
+                               uint16_t tx_pause_en,
+                               uint8_t enable)
 {
        struct bnx2x_softc *sc = params->sc;
 
@@ -1300,7 +2318,7 @@ static void elink_set_umac_rxtx(struct elink_params *params, uint8_t en)
        uint32_t val;
        struct bnx2x_softc *sc = params->sc;
        if (!(REG_RD(sc, MISC_REG_RESET_REG_2) &
-             (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port)))
+                  (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port)))
                return;
        val = REG_RD(sc, umac_base + UMAC_REG_COMMAND_CONFIG);
        if (en)
@@ -1314,7 +2332,7 @@ static void elink_set_umac_rxtx(struct elink_params *params, uint8_t en)
 }
 
 static void elink_umac_enable(struct elink_params *params,
-                             struct elink_vars *vars, uint8_t lb)
+                           struct elink_vars *vars, uint8_t lb)
 {
        uint32_t val;
        uint32_t umac_base = params->port ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
@@ -1327,15 +2345,15 @@ static void elink_umac_enable(struct elink_params *params,
        REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
               (MISC_REGISTERS_RESET_REG_2_UMAC0 << params->port));
 
-       PMD_DRV_LOG(DEBUG, "enabling UMAC");
+       ELINK_DEBUG_P0(sc, "enabling UMAC");
 
        /* This register opens the gate for the UMAC despite its name */
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + params->port * 4, 1);
 
        val = UMAC_COMMAND_CONFIG_REG_PROMIS_EN |
-           UMAC_COMMAND_CONFIG_REG_PAD_EN |
-           UMAC_COMMAND_CONFIG_REG_SW_RESET |
-           UMAC_COMMAND_CONFIG_REG_NO_LGTH_CHECK;
+               UMAC_COMMAND_CONFIG_REG_PAD_EN |
+               UMAC_COMMAND_CONFIG_REG_SW_RESET |
+               UMAC_COMMAND_CONFIG_REG_NO_LGTH_CHECK;
        switch (vars->line_speed) {
        case ELINK_SPEED_10:
                val |= (0 << 2);
@@ -1350,8 +2368,8 @@ static void elink_umac_enable(struct elink_params *params,
                val |= (3 << 2);
                break;
        default:
-               PMD_DRV_LOG(DEBUG, "Invalid speed for UMAC %d",
-                           vars->line_speed);
+               ELINK_DEBUG_P1(sc, "Invalid speed for UMAC %d",
+                              vars->line_speed);
                break;
        }
        if (!(vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
@@ -1368,7 +2386,7 @@ static void elink_umac_enable(struct elink_params *params,
 
        /* Configure UMAC for EEE */
        if (vars->eee_status & SHMEM_EEE_ADV_STATUS_MASK) {
-               PMD_DRV_LOG(DEBUG, "configured UMAC for EEE");
+               ELINK_DEBUG_P0(sc, "configured UMAC for EEE");
                REG_WR(sc, umac_base + UMAC_REG_UMAC_EEE_CTRL,
                       UMAC_UMAC_EEE_CTRL_REG_EEE_EN);
                REG_WR(sc, umac_base + UMAC_REG_EEE_WAKE_TIMER, 0x11);
@@ -1380,13 +2398,16 @@ static void elink_umac_enable(struct elink_params *params,
        REG_WR(sc, umac_base + UMAC_REG_MAC_ADDR0,
               ((params->mac_addr[2] << 24) |
                (params->mac_addr[3] << 16) |
-               (params->mac_addr[4] << 8) | (params->mac_addr[5])));
+               (params->mac_addr[4] << 8) |
+               (params->mac_addr[5])));
        REG_WR(sc, umac_base + UMAC_REG_MAC_ADDR1,
-              ((params->mac_addr[0] << 8) | (params->mac_addr[1])));
+              ((params->mac_addr[0] << 8) |
+               (params->mac_addr[1])));
 
        /* Enable RX and TX */
        val &= ~UMAC_COMMAND_CONFIG_REG_PAD_EN;
-       val |= UMAC_COMMAND_CONFIG_REG_TX_ENA | UMAC_COMMAND_CONFIG_REG_RX_ENA;
+       val |= UMAC_COMMAND_CONFIG_REG_TX_ENA |
+               UMAC_COMMAND_CONFIG_REG_RX_ENA;
        REG_WR(sc, umac_base + UMAC_REG_COMMAND_CONFIG, val);
        DELAY(50);
 
@@ -1426,7 +2447,8 @@ static void elink_xmac_init(struct elink_params *params, uint32_t max_speed)
            is_port4mode &&
            (REG_RD(sc, MISC_REG_RESET_REG_2) &
             MISC_REGISTERS_RESET_REG_2_XMAC)) {
-               PMD_DRV_LOG(DEBUG, "XMAC already out of reset in 4-port mode");
+               ELINK_DEBUG_P0(sc,
+                  "XMAC already out of reset in 4-port mode");
                return;
        }
 
@@ -1438,7 +2460,7 @@ static void elink_xmac_init(struct elink_params *params, uint32_t max_speed)
        REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
               MISC_REGISTERS_RESET_REG_2_XMAC);
        if (is_port4mode) {
-               PMD_DRV_LOG(DEBUG, "Init XMAC to 2 ports x 10G per path");
+               ELINK_DEBUG_P0(sc, "Init XMAC to 2 ports x 10G per path");
 
                /* Set the number of ports on the system side to up to 2 */
                REG_WR(sc, MISC_REG_XMAC_CORE_PORT_MODE, 1);
@@ -1449,13 +2471,13 @@ static void elink_xmac_init(struct elink_params *params, uint32_t max_speed)
                /* Set the number of ports on the system side to 1 */
                REG_WR(sc, MISC_REG_XMAC_CORE_PORT_MODE, 0);
                if (max_speed == ELINK_SPEED_10000) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "Init XMAC to 10G x 1 port per path");
+                       ELINK_DEBUG_P0(sc,
+                          "Init XMAC to 10G x 1 port per path");
                        /* Set the number of ports on the Warp Core to 10G */
                        REG_WR(sc, MISC_REG_XMAC_PHY_PORT_MODE, 3);
                } else {
-                       PMD_DRV_LOG(DEBUG,
-                                   "Init XMAC to 20G x 2 ports per path");
+                       ELINK_DEBUG_P0(sc,
+                          "Init XMAC to 20G x 2 ports per path");
                        /* Set the number of ports on the Warp Core to 20G */
                        REG_WR(sc, MISC_REG_XMAC_PHY_PORT_MODE, 1);
                }
@@ -1477,7 +2499,8 @@ static void elink_set_xmac_rxtx(struct elink_params *params, uint8_t en)
        uint32_t pfc_ctrl, xmac_base = (port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
        uint32_t val;
 
-       if (REG_RD(sc, MISC_REG_RESET_REG_2) & MISC_REGISTERS_RESET_REG_2_XMAC) {
+       if (REG_RD(sc, MISC_REG_RESET_REG_2) &
+           MISC_REGISTERS_RESET_REG_2_XMAC) {
                /* Send an indication to change the state in the NIG back to XON
                 * Clearing this bit enables the next set of this bit to get
                 * rising edge
@@ -1487,7 +2510,7 @@ static void elink_set_xmac_rxtx(struct elink_params *params, uint8_t en)
                       (pfc_ctrl & ~(1 << 1)));
                REG_WR(sc, xmac_base + XMAC_REG_PFC_CTRL_HI,
                       (pfc_ctrl | (1 << 1)));
-               PMD_DRV_LOG(DEBUG, "Disable XMAC on port %x", port);
+               ELINK_DEBUG_P1(sc, "Disable XMAC on port %x", port);
                val = REG_RD(sc, xmac_base + XMAC_REG_CTRL);
                if (en)
                        val |= (XMAC_CTRL_REG_TX_EN | XMAC_CTRL_REG_RX_EN);
@@ -1498,11 +2521,11 @@ static void elink_set_xmac_rxtx(struct elink_params *params, uint8_t en)
 }
 
 static elink_status_t elink_xmac_enable(struct elink_params *params,
-                                       struct elink_vars *vars, uint8_t lb)
+                            struct elink_vars *vars, uint8_t lb)
 {
        uint32_t val, xmac_base;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "enabling XMAC");
+       ELINK_DEBUG_P0(sc, "enabling XMAC");
 
        xmac_base = (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
 
@@ -1536,10 +2559,10 @@ static elink_status_t elink_xmac_enable(struct elink_params *params,
        REG_WR(sc, xmac_base + XMAC_REG_TX_CTRL, 0xC800);
 
        /* update PFC */
-       elink_update_pfc_xmac(params, vars);
+       elink_update_pfc_xmac(params, vars, 0);
 
        if (vars->eee_status & SHMEM_EEE_ADV_STATUS_MASK) {
-               PMD_DRV_LOG(DEBUG, "Setting XMAC for EEE");
+               ELINK_DEBUG_P0(sc, "Setting XMAC for EEE");
                REG_WR(sc, xmac_base + XMAC_REG_EEE_TIMERS_HI, 0x1380008);
                REG_WR(sc, xmac_base + XMAC_REG_EEE_CTRL, 0x1);
        } else {
@@ -1568,14 +2591,14 @@ static elink_status_t elink_xmac_enable(struct elink_params *params,
 }
 
 static elink_status_t elink_emac_enable(struct elink_params *params,
-                                       struct elink_vars *vars, uint8_t lb)
+                            struct elink_vars *vars, uint8_t lb)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
        uint32_t emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "enabling EMAC");
+       ELINK_DEBUG_P0(sc, "enabling EMAC");
 
        /* Disable BMAC */
        REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_CLEAR,
@@ -1584,19 +2607,39 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        /* enable emac and not bmac */
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 1);
 
+#ifdef ELINK_INCLUDE_EMUL
+       /* for paladium */
+       if (CHIP_REV_IS_EMUL(sc)) {
+               /* Use lane 1 (of lanes 0-3) */
+               REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1);
+               REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 1);
+       }
+       /* for fpga */
+       else
+#endif
+#ifdef ELINK_INCLUDE_FPGA
+       if (CHIP_REV_IS_FPGA(sc)) {
+               /* Use lane 1 (of lanes 0-3) */
+               ELINK_DEBUG_P0(sc, "elink_emac_enable: Setting FPGA");
+
+               REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, 1);
+               REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 0);
+       } else
+#endif
+       /* ASIC */
        if (vars->phy_flags & PHY_XGXS_FLAG) {
                uint32_t ser_lane = ((params->lane_config &
-                                     PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
-                                    PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
+                                PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
+                               PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
 
-               PMD_DRV_LOG(DEBUG, "XGXS");
+               ELINK_DEBUG_P0(sc, "XGXS");
                /* select the master lanes (out of 0-3) */
                REG_WR(sc, NIG_REG_XGXS_LANE_SEL_P0 + port * 4, ser_lane);
                /* select XGXS */
                REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 1);
 
-       } else {                /* SerDes */
-               PMD_DRV_LOG(DEBUG, "SerDes");
+       } else { /* SerDes */
+               ELINK_DEBUG_P0(sc, "SerDes");
                /* select SerDes */
                REG_WR(sc, NIG_REG_XGXS_SERDES0_MODE_SEL + port * 4, 0);
        }
@@ -1606,28 +2649,39 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
                      EMAC_TX_MODE_RESET);
 
-       /* pause enable/disable */
-       elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE,
-                      EMAC_RX_MODE_FLOW_EN);
-
-       elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
-                      (EMAC_TX_MODE_EXT_PAUSE_EN |
-                       EMAC_TX_MODE_FLOW_EN));
-       if (!(params->feature_config_flags &
-             ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
-               if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX)
-                       elink_bits_en(sc, emac_base +
-                                     EMAC_REG_EMAC_RX_MODE,
-                                     EMAC_RX_MODE_FLOW_EN);
+#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
+       if (CHIP_REV_IS_SLOW(sc)) {
+               /* config GMII mode */
+               val = REG_RD(sc, emac_base + EMAC_REG_EMAC_MODE);
+               elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_MODE,
+                                  (val | EMAC_MODE_PORT_GMII));
+       } else { /* ASIC */
+#endif
+               /* pause enable/disable */
+               elink_bits_dis(sc, emac_base + EMAC_REG_EMAC_RX_MODE,
+                              EMAC_RX_MODE_FLOW_EN);
 
-               if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
-                       elink_bits_en(sc, emac_base +
-                                     EMAC_REG_EMAC_TX_MODE,
-                                     (EMAC_TX_MODE_EXT_PAUSE_EN |
-                                      EMAC_TX_MODE_FLOW_EN));
-       } else
-               elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
-                             EMAC_TX_MODE_FLOW_EN);
+               elink_bits_dis(sc,  emac_base + EMAC_REG_EMAC_TX_MODE,
+                              (EMAC_TX_MODE_EXT_PAUSE_EN |
+                               EMAC_TX_MODE_FLOW_EN));
+               if (!(params->feature_config_flags &
+                     ELINK_FEATURE_CONFIG_PFC_ENABLED)) {
+                       if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX)
+                               elink_bits_en(sc, emac_base +
+                                             EMAC_REG_EMAC_RX_MODE,
+                                             EMAC_RX_MODE_FLOW_EN);
+
+                       if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
+                               elink_bits_en(sc, emac_base +
+                                             EMAC_REG_EMAC_TX_MODE,
+                                             (EMAC_TX_MODE_EXT_PAUSE_EN |
+                                              EMAC_TX_MODE_FLOW_EN));
+               } else
+                       elink_bits_en(sc, emac_base + EMAC_REG_EMAC_TX_MODE,
+                                     EMAC_TX_MODE_FLOW_EN);
+#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
+       }
+#endif
 
        /* KEEP_VLAN_TAG, promiscuous */
        val = REG_RD(sc, emac_base + EMAC_REG_EMAC_RX_MODE);
@@ -1642,18 +2696,18 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
         */
        elink_cb_reg_write(sc, emac_base + EMAC_REG_RX_PFC_MODE, 0);
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED) {
-               PMD_DRV_LOG(DEBUG, "PFC is enabled");
+               ELINK_DEBUG_P0(sc, "PFC is enabled");
                /* Enable PFC again */
                elink_cb_reg_write(sc, emac_base + EMAC_REG_RX_PFC_MODE,
-                                  EMAC_REG_RX_PFC_MODE_RX_EN |
-                                  EMAC_REG_RX_PFC_MODE_TX_EN |
-                                  EMAC_REG_RX_PFC_MODE_PRIORITIES);
+                       EMAC_REG_RX_PFC_MODE_RX_EN |
+                       EMAC_REG_RX_PFC_MODE_TX_EN |
+                       EMAC_REG_RX_PFC_MODE_PRIORITIES);
 
                elink_cb_reg_write(sc, emac_base + EMAC_REG_RX_PFC_PARAM,
-                                  ((0x0101 <<
-                                    EMAC_REG_RX_PFC_PARAM_OPCODE_BITSHIFT) |
-                                   (0x00ff <<
-                                    EMAC_REG_RX_PFC_PARAM_PRIORITY_EN_BITSHIFT)));
+                       ((0x0101 <<
+                         EMAC_REG_RX_PFC_PARAM_OPCODE_BITSHIFT) |
+                        (0x00ff <<
+                         EMAC_REG_RX_PFC_PARAM_PRIORITY_EN_BITSHIFT)));
                val |= EMAC_RX_MODE_KEEP_MAC_CONTROL;
        }
        elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_RX_MODE, val);
@@ -1671,9 +2725,8 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
 
        /* Enable emac for jumbo packets */
        elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_RX_MTU_SIZE,
-                          (EMAC_RX_MTU_SIZE_JUMBO_ENA |
-                           (ELINK_ETH_MAX_JUMBO_PACKET_SIZE +
-                            ELINK_ETH_OVREHEAD)));
+               (EMAC_RX_MTU_SIZE_JUMBO_ENA |
+                (ELINK_ETH_MAX_JUMBO_PACKET_SIZE + ELINK_ETH_OVREHEAD)));
 
        /* Strip CRC */
        REG_WR(sc, NIG_REG_NIG_INGRESS_EMAC0_NO_CRC + port * 4, 0x1);
@@ -1687,13 +2740,23 @@ static elink_status_t elink_emac_enable(struct elink_params *params,
        REG_WR(sc, NIG_REG_EMAC0_IN_EN + port * 4, 0x1);
        val = 0;
        if ((params->feature_config_flags &
-            ELINK_FEATURE_CONFIG_PFC_ENABLED) ||
+             ELINK_FEATURE_CONFIG_PFC_ENABLED) ||
            (vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
                val = 1;
 
        REG_WR(sc, NIG_REG_EMAC0_PAUSE_OUT_EN + port * 4, val);
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0x1);
 
+#ifdef ELINK_INCLUDE_EMUL
+       if (CHIP_REV_IS_EMUL(sc)) {
+               /* Take the BigMac out of reset */
+               REG_WR(sc, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
+                      (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port));
+
+               /* Enable access for bmac registers */
+               REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x1);
+       } else
+#endif
        REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x0);
 
        vars->mac_type = ELINK_MAC_TYPE_EMAC;
@@ -1705,13 +2768,13 @@ static void elink_update_pfc_bmac1(struct elink_params *params,
 {
        uint32_t wb_data[2];
        struct bnx2x_softc *sc = params->sc;
-       uint32_t bmac_addr = params->port ? NIG_REG_INGRESS_BMAC1_MEM :
-           NIG_REG_INGRESS_BMAC0_MEM;
+       uint32_t bmac_addr =  params->port ? NIG_REG_INGRESS_BMAC1_MEM :
+               NIG_REG_INGRESS_BMAC0_MEM;
 
        uint32_t val = 0x14;
        if ((!(params->feature_config_flags &
-              ELINK_FEATURE_CONFIG_PFC_ENABLED)) &&
-           (vars->flow_ctrl & ELINK_FLOW_CTRL_RX))
+             ELINK_FEATURE_CONFIG_PFC_ENABLED)) &&
+               (vars->flow_ctrl & ELINK_FLOW_CTRL_RX))
                /* Enable BigMAC to react on received Pause packets */
                val |= (1 << 5);
        wb_data[0] = val;
@@ -1722,7 +2785,7 @@ static void elink_update_pfc_bmac1(struct elink_params *params,
        val = 0xc0;
        if (!(params->feature_config_flags &
              ELINK_FEATURE_CONFIG_PFC_ENABLED) &&
-           (vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
+               (vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
                val |= 0x800000;
        wb_data[0] = val;
        wb_data[1] = 0;
@@ -1730,7 +2793,8 @@ static void elink_update_pfc_bmac1(struct elink_params *params,
 }
 
 static void elink_update_pfc_bmac2(struct elink_params *params,
-                                  struct elink_vars *vars, uint8_t is_lb)
+                                  struct elink_vars *vars,
+                                  uint8_t is_lb)
 {
        /* Set rx control: Strip CRC and enable BigMAC to relay
         * control packets to the system as well
@@ -1738,12 +2802,12 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
        uint32_t wb_data[2];
        struct bnx2x_softc *sc = params->sc;
        uint32_t bmac_addr = params->port ? NIG_REG_INGRESS_BMAC1_MEM :
-           NIG_REG_INGRESS_BMAC0_MEM;
+               NIG_REG_INGRESS_BMAC0_MEM;
        uint32_t val = 0x14;
 
        if ((!(params->feature_config_flags &
-              ELINK_FEATURE_CONFIG_PFC_ENABLED)) &&
-           (vars->flow_ctrl & ELINK_FLOW_CTRL_RX))
+             ELINK_FEATURE_CONFIG_PFC_ENABLED)) &&
+               (vars->flow_ctrl & ELINK_FLOW_CTRL_RX))
                /* Enable BigMAC to react on received Pause packets */
                val |= (1 << 5);
        wb_data[0] = val;
@@ -1754,7 +2818,7 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
        /* Tx control */
        val = 0xc0;
        if (!(params->feature_config_flags &
-             ELINK_FEATURE_CONFIG_PFC_ENABLED) &&
+                               ELINK_FEATURE_CONFIG_PFC_ENABLED) &&
            (vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
                val |= 0x800000;
        wb_data[0] = val;
@@ -1762,21 +2826,21 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
        REG_WR_DMAE(sc, bmac_addr + BIGMAC2_REGISTER_TX_CONTROL, wb_data, 2);
 
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED) {
-               PMD_DRV_LOG(DEBUG, "PFC is enabled");
+               ELINK_DEBUG_P0(sc, "PFC is enabled");
                /* Enable PFC RX & TX & STATS and set 8 COS  */
                wb_data[0] = 0x0;
-               wb_data[0] |= (1 << 0); /* RX */
-               wb_data[0] |= (1 << 1); /* TX */
-               wb_data[0] |= (1 << 2); /* Force initial Xon */
-               wb_data[0] |= (1 << 3); /* 8 cos */
-               wb_data[0] |= (1 << 5); /* STATS */
+               wb_data[0] |= (1 << 0);  /* RX */
+               wb_data[0] |= (1 << 1);  /* TX */
+               wb_data[0] |= (1 << 2);  /* Force initial Xon */
+               wb_data[0] |= (1 << 3);  /* 8 cos */
+               wb_data[0] |= (1 << 5);  /* STATS */
                wb_data[1] = 0;
                REG_WR_DMAE(sc, bmac_addr + BIGMAC2_REGISTER_PFC_CONTROL,
                            wb_data, 2);
                /* Clear the force Xon */
                wb_data[0] &= ~(1 << 2);
        } else {
-               PMD_DRV_LOG(DEBUG, "PFC is disabled");
+               ELINK_DEBUG_P0(sc, "PFC is disabled");
                /* Disable PFC RX & TX & STATS and set 8 COS */
                wb_data[0] = 0x8;
                wb_data[1] = 0;
@@ -1791,7 +2855,7 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
         */
        val = 0x8000;
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
-               val |= (1 << 16);       /* enable automatic re-send */
+               val |= (1 << 16); /* enable automatic re-send */
 
        wb_data[0] = val;
        wb_data[1] = 0;
@@ -1799,10 +2863,10 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
                    wb_data, 2);
 
        /* mac control */
-       val = 0x3;              /* Enable RX and TX */
+       val = 0x3; /* Enable RX and TX */
        if (is_lb) {
-               val |= 0x4;     /* Local loopback */
-               PMD_DRV_LOG(DEBUG, "enable bmac loopback");
+               val |= 0x4; /* Local loopback */
+               ELINK_DEBUG_P0(sc, "enable bmac loopback");
        }
        /* When PFC enabled, Pass pause frames towards the NIG. */
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
@@ -1814,47 +2878,46 @@ static void elink_update_pfc_bmac2(struct elink_params *params,
 }
 
 /******************************************************************************
-* Description:
-*  This function is needed because NIG ARB_CREDIT_WEIGHT_X are
-*  not continues and ARB_CREDIT_WEIGHT_0 + offset is suitable.
-******************************************************************************/
+ * Description:
+ *  This function is needed because NIG ARB_CREDIT_WEIGHT_X are
+ *  not continues and ARB_CREDIT_WEIGHT_0 + offset is suitable.
+ ******************************************************************************/
 static elink_status_t elink_pfc_nig_rx_priority_mask(struct bnx2x_softc *sc,
-                                                    uint8_t cos_entry,
-                                                    uint32_t priority_mask,
-                                                    uint8_t port)
+                                          uint8_t cos_entry,
+                                          uint32_t priority_mask, uint8_t port)
 {
        uint32_t nig_reg_rx_priority_mask_add = 0;
 
        switch (cos_entry) {
        case 0:
-               nig_reg_rx_priority_mask_add = (port) ?
-                   NIG_REG_P1_RX_COS0_PRIORITY_MASK :
-                   NIG_REG_P0_RX_COS0_PRIORITY_MASK;
+            nig_reg_rx_priority_mask_add = (port) ?
+                NIG_REG_P1_RX_COS0_PRIORITY_MASK :
+                NIG_REG_P0_RX_COS0_PRIORITY_MASK;
                break;
        case 1:
-               nig_reg_rx_priority_mask_add = (port) ?
-                   NIG_REG_P1_RX_COS1_PRIORITY_MASK :
-                   NIG_REG_P0_RX_COS1_PRIORITY_MASK;
+           nig_reg_rx_priority_mask_add = (port) ?
+               NIG_REG_P1_RX_COS1_PRIORITY_MASK :
+               NIG_REG_P0_RX_COS1_PRIORITY_MASK;
                break;
        case 2:
-               nig_reg_rx_priority_mask_add = (port) ?
-                   NIG_REG_P1_RX_COS2_PRIORITY_MASK :
-                   NIG_REG_P0_RX_COS2_PRIORITY_MASK;
+           nig_reg_rx_priority_mask_add = (port) ?
+               NIG_REG_P1_RX_COS2_PRIORITY_MASK :
+               NIG_REG_P0_RX_COS2_PRIORITY_MASK;
                break;
        case 3:
                if (port)
-                       return ELINK_STATUS_ERROR;
-               nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS3_PRIORITY_MASK;
+               return ELINK_STATUS_ERROR;
+           nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS3_PRIORITY_MASK;
                break;
        case 4:
                if (port)
-                       return ELINK_STATUS_ERROR;
-               nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS4_PRIORITY_MASK;
+               return ELINK_STATUS_ERROR;
+           nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS4_PRIORITY_MASK;
                break;
        case 5:
                if (port)
-                       return ELINK_STATUS_ERROR;
-               nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS5_PRIORITY_MASK;
+               return ELINK_STATUS_ERROR;
+           nig_reg_rx_priority_mask_add = NIG_REG_P0_RX_COS5_PRIORITY_MASK;
                break;
        }
 
@@ -1862,7 +2925,6 @@ static elink_status_t elink_pfc_nig_rx_priority_mask(struct bnx2x_softc *sc,
 
        return ELINK_STATUS_OK;
 }
-
 static void elink_update_mng(struct elink_params *params, uint32_t link_status)
 {
        struct bnx2x_softc *sc = params->sc;
@@ -1872,31 +2934,20 @@ static void elink_update_mng(struct elink_params *params, uint32_t link_status)
                        port_mb[params->port].link_status), link_status);
 }
 
-static void elink_update_link_attr(struct elink_params *params,
-                                  uint32_t link_attr)
-{
-       struct bnx2x_softc *sc = params->sc;
-
-       if (SHMEM2_HAS(sc, link_attr_sync))
-               REG_WR(sc, params->shmem2_base +
-                      offsetof(struct shmem2_region,
-                               link_attr_sync[params->port]), link_attr);
-}
-
 static void elink_update_pfc_nig(struct elink_params *params,
-                                struct elink_nig_brb_pfc_port_params
-                                *nig_params)
+               __rte_unused struct elink_vars *vars,
+               struct elink_nig_brb_pfc_port_params *nig_params)
 {
-       uint32_t xcm_mask = 0, ppp_enable = 0, pause_enable = 0, llfc_out_en =
-           0;
+       uint32_t xcm_mask = 0, ppp_enable = 0, pause_enable = 0;
+       uint32_t llfc_out_en = 0;
        uint32_t llfc_enable = 0, xcm_out_en = 0, hwpfc_enable = 0;
        uint32_t pkt_priority_to_cos = 0;
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
 
        int set_pfc = params->feature_config_flags &
-           ELINK_FEATURE_CONFIG_PFC_ENABLED;
-       PMD_DRV_LOG(DEBUG, "updating pfc nig parameters");
+               ELINK_FEATURE_CONFIG_PFC_ENABLED;
+       ELINK_DEBUG_P0(sc, "updating pfc nig parameters");
 
        /* When NIG_LLH0_XCM_MASK_REG_LLHX_XCM_MASK_BCN bit is set
         * MAC control frames (that are not pause packets)
@@ -1916,19 +2967,19 @@ static void elink_update_pfc_nig(struct elink_params *params,
                else
                        ppp_enable = 1;
                xcm_mask &= ~(port ? NIG_LLH1_XCM_MASK_REG_LLH1_XCM_MASK_BCN :
-                             NIG_LLH0_XCM_MASK_REG_LLH0_XCM_MASK_BCN);
+                                    NIG_LLH0_XCM_MASK_REG_LLH0_XCM_MASK_BCN);
                xcm_out_en = 0;
                hwpfc_enable = 1;
-       } else {
+       } else  {
                if (nig_params) {
                        llfc_out_en = nig_params->llfc_out_en;
                        llfc_enable = nig_params->llfc_enable;
                        pause_enable = nig_params->pause_enable;
-               } else          /* Default non PFC mode - PAUSE */
+               } else  /* Default non PFC mode - PAUSE */
                        pause_enable = 1;
 
                xcm_mask |= (port ? NIG_LLH1_XCM_MASK_REG_LLH1_XCM_MASK_BCN :
-                            NIG_LLH0_XCM_MASK_REG_LLH0_XCM_MASK_BCN);
+                       NIG_LLH0_XCM_MASK_REG_LLH0_XCM_MASK_BCN);
                xcm_out_en = 1;
        }
 
@@ -1965,9 +3016,7 @@ static void elink_update_pfc_nig(struct elink_params *params,
 
                for (i = 0; i < nig_params->num_of_rx_cos_priority_mask; i++)
                        elink_pfc_nig_rx_priority_mask(sc, i,
-                                                      nig_params->
-                                                      rx_cos_priority_mask[i],
-                                                      port);
+               nig_params->rx_cos_priority_mask[i], port);
 
                REG_WR(sc, port ? NIG_REG_LLFC_HIGH_PRIORITY_CLASSES_1 :
                       NIG_REG_LLFC_HIGH_PRIORITY_CLASSES_0,
@@ -1978,13 +3027,13 @@ static void elink_update_pfc_nig(struct elink_params *params,
                       nig_params->llfc_low_priority_classes);
        }
        REG_WR(sc, port ? NIG_REG_P1_PKT_PRIORITY_TO_COS :
-              NIG_REG_P0_PKT_PRIORITY_TO_COS, pkt_priority_to_cos);
+              NIG_REG_P0_PKT_PRIORITY_TO_COS,
+              pkt_priority_to_cos);
 }
 
 elink_status_t elink_update_pfc(struct elink_params *params,
-                               struct elink_vars *vars,
-                               struct elink_nig_brb_pfc_port_params
-                               *pfc_params)
+                     struct elink_vars *vars,
+                     struct elink_nig_brb_pfc_port_params *pfc_params)
 {
        /* The PFC and pause are orthogonal to one another, meaning when
         * PFC is enabled, the pause are disabled, and when PFC is
@@ -1992,7 +3041,6 @@ elink_status_t elink_update_pfc(struct elink_params *params,
         */
        uint32_t val;
        struct bnx2x_softc *sc = params->sc;
-       elink_status_t elink_status = ELINK_STATUS_OK;
        uint8_t bmac_loopback = (params->loopback_mode == ELINK_LOOPBACK_BMAC);
 
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
@@ -2003,24 +3051,24 @@ elink_status_t elink_update_pfc(struct elink_params *params,
        elink_update_mng(params, vars->link_status);
 
        /* Update NIG params */
-       elink_update_pfc_nig(params, pfc_params);
+       elink_update_pfc_nig(params, vars, pfc_params);
 
        if (!vars->link_up)
-               return elink_status;
+               return ELINK_STATUS_OK;
 
-       PMD_DRV_LOG(DEBUG, "About to update PFC in BMAC");
+       ELINK_DEBUG_P0(sc, "About to update PFC in BMAC");
 
        if (CHIP_IS_E3(sc)) {
                if (vars->mac_type == ELINK_MAC_TYPE_XMAC)
-                       elink_update_pfc_xmac(params, vars);
+                       elink_update_pfc_xmac(params, vars, 0);
        } else {
                val = REG_RD(sc, MISC_REG_RESET_REG_2);
                if ((val &
                     (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << params->port))
                    == 0) {
-                       PMD_DRV_LOG(DEBUG, "About to update PFC in EMAC");
+                       ELINK_DEBUG_P0(sc, "About to update PFC in EMAC");
                        elink_emac_enable(params, vars, 0);
-                       return elink_status;
+                       return ELINK_STATUS_OK;
                }
                if (CHIP_IS_E2(sc))
                        elink_update_pfc_bmac2(params, vars, bmac_loopback);
@@ -2034,20 +3082,21 @@ elink_status_t elink_update_pfc(struct elink_params *params,
                        val = 1;
                REG_WR(sc, NIG_REG_BMAC0_PAUSE_OUT_EN + params->port * 4, val);
        }
-       return elink_status;
+       return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_bmac1_enable(struct elink_params *params,
-                                        struct elink_vars *vars, uint8_t is_lb)
+                             struct elink_vars *vars,
+                             uint8_t is_lb)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
        uint32_t bmac_addr = port ? NIG_REG_INGRESS_BMAC1_MEM :
-           NIG_REG_INGRESS_BMAC0_MEM;
+                              NIG_REG_INGRESS_BMAC0_MEM;
        uint32_t wb_data[2];
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "Enabling BigMAC1");
+       ELINK_DEBUG_P0(sc, "Enabling BigMAC1");
 
        /* XGXS control */
        wb_data[0] = 0x3c;
@@ -2057,16 +3106,18 @@ static elink_status_t elink_bmac1_enable(struct elink_params *params,
 
        /* TX MAC SA */
        wb_data[0] = ((params->mac_addr[2] << 24) |
-                     (params->mac_addr[3] << 16) |
-                     (params->mac_addr[4] << 8) | params->mac_addr[5]);
-       wb_data[1] = ((params->mac_addr[0] << 8) | params->mac_addr[1]);
+                      (params->mac_addr[3] << 16) |
+                      (params->mac_addr[4] << 8) |
+                       params->mac_addr[5]);
+       wb_data[1] = ((params->mac_addr[0] << 8) |
+                       params->mac_addr[1]);
        REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_TX_SOURCE_ADDR, wb_data, 2);
 
        /* MAC control */
        val = 0x3;
        if (is_lb) {
                val |= 0x4;
-               PMD_DRV_LOG(DEBUG, "enable bmac loopback");
+               ELINK_DEBUG_P0(sc,  "enable bmac loopback");
        }
        wb_data[0] = val;
        wb_data[1] = 0;
@@ -2094,20 +3145,30 @@ static elink_status_t elink_bmac1_enable(struct elink_params *params,
        wb_data[1] = 0;
        REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_RX_LLFC_MSG_FLDS,
                    wb_data, 2);
+#ifdef ELINK_INCLUDE_EMUL
+       /* Fix for emulation */
+       if (CHIP_REV_IS_EMUL(sc)) {
+               wb_data[0] = 0xf000;
+               wb_data[1] = 0;
+               REG_WR_DMAE(sc, bmac_addr + BIGMAC_REGISTER_TX_PAUSE_THRESHOLD,
+                           wb_data, 2);
+       }
+#endif
 
        return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_bmac2_enable(struct elink_params *params,
-                                        struct elink_vars *vars, uint8_t is_lb)
+                             struct elink_vars *vars,
+                             uint8_t is_lb)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
        uint32_t bmac_addr = port ? NIG_REG_INGRESS_BMAC1_MEM :
-           NIG_REG_INGRESS_BMAC0_MEM;
+                              NIG_REG_INGRESS_BMAC0_MEM;
        uint32_t wb_data[2];
 
-       PMD_DRV_LOG(DEBUG, "Enabling BigMAC2");
+       ELINK_DEBUG_P0(sc, "Enabling BigMAC2");
 
        wb_data[0] = 0;
        wb_data[1] = 0;
@@ -2124,9 +3185,11 @@ static elink_status_t elink_bmac2_enable(struct elink_params *params,
 
        /* TX MAC SA */
        wb_data[0] = ((params->mac_addr[2] << 24) |
-                     (params->mac_addr[3] << 16) |
-                     (params->mac_addr[4] << 8) | params->mac_addr[5]);
-       wb_data[1] = ((params->mac_addr[0] << 8) | params->mac_addr[1]);
+                      (params->mac_addr[3] << 16) |
+                      (params->mac_addr[4] << 8) |
+                       params->mac_addr[5]);
+       wb_data[1] = ((params->mac_addr[0] << 8) |
+                       params->mac_addr[1]);
        REG_WR_DMAE(sc, bmac_addr + BIGMAC2_REGISTER_TX_SOURCE_ADDR,
                    wb_data, 2);
 
@@ -2161,8 +3224,8 @@ static elink_status_t elink_bmac2_enable(struct elink_params *params,
 }
 
 static elink_status_t elink_bmac_enable(struct elink_params *params,
-                                       struct elink_vars *vars,
-                                       uint8_t is_lb, uint8_t reset_bmac)
+                            struct elink_vars *vars,
+                            uint8_t is_lb, uint8_t reset_bmac)
 {
        elink_status_t rc = ELINK_STATUS_OK;
        uint8_t port = params->port;
@@ -2181,7 +3244,7 @@ static elink_status_t elink_bmac_enable(struct elink_params *params,
        /* Enable access for bmac registers */
        REG_WR(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4, 0x1);
 
-       /* Enable BMAC according to BMAC type */
+       /* Enable BMAC according to BMAC type*/
        if (CHIP_IS_E2(sc))
                rc = elink_bmac2_enable(params, vars, is_lb);
        else
@@ -2191,7 +3254,7 @@ static elink_status_t elink_bmac_enable(struct elink_params *params,
        REG_WR(sc, NIG_REG_EGRESS_EMAC0_PORT + port * 4, 0x0);
        val = 0;
        if ((params->feature_config_flags &
-            ELINK_FEATURE_CONFIG_PFC_ENABLED) ||
+             ELINK_FEATURE_CONFIG_PFC_ENABLED) ||
            (vars->flow_ctrl & ELINK_FLOW_CTRL_TX))
                val = 1;
        REG_WR(sc, NIG_REG_BMAC0_PAUSE_OUT_EN + port * 4, val);
@@ -2205,13 +3268,15 @@ static elink_status_t elink_bmac_enable(struct elink_params *params,
        return rc;
 }
 
-static void elink_set_bmac_rx(struct bnx2x_softc *sc, uint8_t port, uint8_t en)
+static void elink_set_bmac_rx(struct bnx2x_softc *sc,
+                             __rte_unused uint32_t chip_id,
+                             uint8_t port, uint8_t en)
 {
        uint32_t bmac_addr = port ? NIG_REG_INGRESS_BMAC1_MEM :
-           NIG_REG_INGRESS_BMAC0_MEM;
+                       NIG_REG_INGRESS_BMAC0_MEM;
        uint32_t wb_data[2];
-       uint32_t nig_bmac_enable =
-           REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN + port * 4);
+       uint32_t nig_bmac_enable = REG_RD(sc, NIG_REG_BMAC0_REGS_OUT_EN +
+                                         port * 4);
 
        if (CHIP_IS_E2(sc))
                bmac_addr += BIGMAC2_REGISTER_BMAC_CONTROL;
@@ -2219,7 +3284,8 @@ static void elink_set_bmac_rx(struct bnx2x_softc *sc, uint8_t port, uint8_t en)
                bmac_addr += BIGMAC_REGISTER_BMAC_CONTROL;
        /* Only if the bmac is out of reset */
        if (REG_RD(sc, MISC_REG_RESET_REG_2) &
-           (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port) && nig_bmac_enable) {
+                       (MISC_REGISTERS_RESET_REG_2_RST_BMAC0 << port) &&
+           nig_bmac_enable) {
                /* Clear Rx Enable bit in BMAC_CONTROL register */
                REG_RD_DMAE(sc, bmac_addr, wb_data, 2);
                if (en)
@@ -2232,7 +3298,8 @@ static void elink_set_bmac_rx(struct bnx2x_softc *sc, uint8_t port, uint8_t en)
 }
 
 static elink_status_t elink_pbf_update(struct elink_params *params,
-                                      uint32_t flow_ctrl, uint32_t line_speed)
+                           uint32_t flow_ctrl,
+                           uint32_t line_speed)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
@@ -2245,7 +3312,7 @@ static elink_status_t elink_pbf_update(struct elink_params *params,
        /* Wait for init credit */
        init_crd = REG_RD(sc, PBF_REG_P0_INIT_CRD + port * 4);
        crd = REG_RD(sc, PBF_REG_P0_CREDIT + port * 8);
-       PMD_DRV_LOG(DEBUG, "init_crd 0x%x  crd 0x%x", init_crd, crd);
+       ELINK_DEBUG_P2(sc, "init_crd 0x%x  crd 0x%x", init_crd, crd);
 
        while ((init_crd != crd) && count) {
                DELAY(1000 * 5);
@@ -2254,24 +3321,25 @@ static elink_status_t elink_pbf_update(struct elink_params *params,
        }
        crd = REG_RD(sc, PBF_REG_P0_CREDIT + port * 8);
        if (init_crd != crd) {
-               PMD_DRV_LOG(DEBUG, "BUG! init_crd 0x%x != crd 0x%x",
-                           init_crd, crd);
+               ELINK_DEBUG_P2(sc, "BUG! init_crd 0x%x != crd 0x%x",
+                         init_crd, crd);
                return ELINK_STATUS_ERROR;
        }
 
        if (flow_ctrl & ELINK_FLOW_CTRL_RX ||
            line_speed == ELINK_SPEED_10 ||
            line_speed == ELINK_SPEED_100 ||
-           line_speed == ELINK_SPEED_1000 || line_speed == ELINK_SPEED_2500) {
+           line_speed == ELINK_SPEED_1000 ||
+           line_speed == ELINK_SPEED_2500) {
                REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port * 4, 1);
                /* Update threshold */
                REG_WR(sc, PBF_REG_P0_ARB_THRSH + port * 4, 0);
                /* Update init credit */
-               init_crd = 778; /* (800-18-4) */
+               init_crd = 778;         /* (800-18-4) */
 
        } else {
                uint32_t thresh = (ELINK_ETH_MAX_JUMBO_PACKET_SIZE +
-                                  ELINK_ETH_OVREHEAD) / 16;
+                             ELINK_ETH_OVREHEAD) / 16;
                REG_WR(sc, PBF_REG_P0_PAUSE_ENABLE + port * 4, 0);
                /* Update threshold */
                REG_WR(sc, PBF_REG_P0_ARB_THRSH + port * 4, thresh);
@@ -2281,14 +3349,14 @@ static elink_status_t elink_pbf_update(struct elink_params *params,
                        init_crd = thresh + 553 - 22;
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG, "Invalid line_speed 0x%x",
-                                   line_speed);
+                       ELINK_DEBUG_P1(sc, "Invalid line_speed 0x%x",
+                                 line_speed);
                        return ELINK_STATUS_ERROR;
                }
        }
        REG_WR(sc, PBF_REG_P0_INIT_CRD + port * 4, init_crd);
-       PMD_DRV_LOG(DEBUG, "PBF updated to speed %d credit %d",
-                   line_speed, init_crd);
+       ELINK_DEBUG_P2(sc, "PBF updated to speed %d credit %d",
+                line_speed, init_crd);
 
        /* Probe the credit changes */
        REG_WR(sc, PBF_REG_INIT_P0 + port * 4, 0x1);
@@ -2316,7 +3384,7 @@ static elink_status_t elink_pbf_update(struct elink_params *params,
  * the emac_base for the CL45 read/writes operations
  */
 static uint32_t elink_get_emac_base(struct bnx2x_softc *sc,
-                                   uint32_t mdc_mdio_access, uint8_t port)
+                              uint32_t mdc_mdio_access, uint8_t port)
 {
        uint32_t emac_base = 0;
        switch (mdc_mdio_access) {
@@ -2364,7 +3432,8 @@ static elink_status_t elink_cl22_write(struct bnx2x_softc *sc,
 
        /* Address */
        tmp = ((phy->addr << 21) | (reg << 16) | val |
-              EMAC_MDIO_COMM_COMMAND_WRITE_22 | EMAC_MDIO_COMM_START_BUSY);
+              EMAC_MDIO_COMM_COMMAND_WRITE_22 |
+              EMAC_MDIO_COMM_START_BUSY);
        REG_WR(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_COMM, tmp);
 
        for (i = 0; i < 50; i++) {
@@ -2377,7 +3446,7 @@ static elink_status_t elink_cl22_write(struct bnx2x_softc *sc,
                }
        }
        if (tmp & EMAC_MDIO_COMM_START_BUSY) {
-               PMD_DRV_LOG(DEBUG, "write phy register failed");
+               ELINK_DEBUG_P0(sc, "write phy register failed");
                rc = ELINK_STATUS_TIMEOUT;
        }
        REG_WR(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_MODE, mode);
@@ -2386,7 +3455,7 @@ static elink_status_t elink_cl22_write(struct bnx2x_softc *sc,
 
 static elink_status_t elink_cl22_read(struct bnx2x_softc *sc,
                                      struct elink_phy *phy,
-                                     uint16_t reg, uint16_t * ret_val)
+                                     uint16_t reg, uint16_t *ret_val)
 {
        uint32_t val, mode;
        uint16_t i;
@@ -2399,7 +3468,8 @@ static elink_status_t elink_cl22_read(struct bnx2x_softc *sc,
 
        /* Address */
        val = ((phy->addr << 21) | (reg << 16) |
-              EMAC_MDIO_COMM_COMMAND_READ_22 | EMAC_MDIO_COMM_START_BUSY);
+              EMAC_MDIO_COMM_COMMAND_READ_22 |
+              EMAC_MDIO_COMM_START_BUSY);
        REG_WR(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_COMM, val);
 
        for (i = 0; i < 50; i++) {
@@ -2407,13 +3477,13 @@ static elink_status_t elink_cl22_read(struct bnx2x_softc *sc,
 
                val = REG_RD(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_COMM);
                if (!(val & EMAC_MDIO_COMM_START_BUSY)) {
-                       *ret_val = (uint16_t) (val & EMAC_MDIO_COMM_DATA);
+                       *ret_val = (uint16_t)(val & EMAC_MDIO_COMM_DATA);
                        DELAY(5);
                        break;
                }
        }
        if (val & EMAC_MDIO_COMM_START_BUSY) {
-               PMD_DRV_LOG(DEBUG, "read phy register failed");
+               ELINK_DEBUG_P0(sc, "read phy register failed");
 
                *ret_val = 0;
                rc = ELINK_STATUS_TIMEOUT;
@@ -2426,14 +3496,17 @@ static elink_status_t elink_cl22_read(struct bnx2x_softc *sc,
 /*                     CL45 access functions                     */
 /******************************************************************/
 static elink_status_t elink_cl45_read(struct bnx2x_softc *sc,
-                                     struct elink_phy *phy, uint8_t devad,
-                                     uint16_t reg, uint16_t * ret_val)
+                          struct elink_phy *phy,
+                          uint8_t devad, uint16_t reg, uint16_t *ret_val)
 {
        uint32_t val;
        uint16_t i;
        elink_status_t rc = ELINK_STATUS_OK;
+       uint32_t chip_id;
        if (phy->flags & ELINK_FLAGS_MDC_MDIO_WA_G) {
-               elink_set_mdio_clk(sc, phy->mdio_ctrl);
+               chip_id = (REG_RD(sc, MISC_REG_CHIP_NUM) << 16) |
+                         ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12);
+               elink_set_mdio_clk(sc, chip_id, phy->mdio_ctrl);
        }
 
        if (phy->flags & ELINK_FLAGS_MDC_MDIO_WA_B0)
@@ -2441,7 +3514,8 @@ static elink_status_t elink_cl45_read(struct bnx2x_softc *sc,
                              EMAC_MDIO_STATUS_10MB);
        /* Address */
        val = ((phy->addr << 21) | (devad << 16) | reg |
-              EMAC_MDIO_COMM_COMMAND_ADDRESS | EMAC_MDIO_COMM_START_BUSY);
+              EMAC_MDIO_COMM_COMMAND_ADDRESS |
+              EMAC_MDIO_COMM_START_BUSY);
        REG_WR(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_COMM, val);
 
        for (i = 0; i < 50; i++) {
@@ -2454,8 +3528,9 @@ static elink_status_t elink_cl45_read(struct bnx2x_softc *sc,
                }
        }
        if (val & EMAC_MDIO_COMM_START_BUSY) {
-               PMD_DRV_LOG(DEBUG, "read phy register failed");
-               elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);       // "MDC/MDIO access timeout"
+               ELINK_DEBUG_P0(sc, "read phy register failed");
+               elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);
+               /* "MDC/MDIO access timeout" */
 
                *ret_val = 0;
                rc = ELINK_STATUS_TIMEOUT;
@@ -2472,14 +3547,16 @@ static elink_status_t elink_cl45_read(struct bnx2x_softc *sc,
                        val = REG_RD(sc, phy->mdio_ctrl +
                                     EMAC_REG_EMAC_MDIO_COMM);
                        if (!(val & EMAC_MDIO_COMM_START_BUSY)) {
-                               *ret_val =
-                                   (uint16_t) (val & EMAC_MDIO_COMM_DATA);
+                               *ret_val = (uint16_t)
+                                               (val & EMAC_MDIO_COMM_DATA);
                                break;
                        }
                }
                if (val & EMAC_MDIO_COMM_START_BUSY) {
-                       PMD_DRV_LOG(DEBUG, "read phy register failed");
-                       elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);       // "MDC/MDIO access timeout"
+                       ELINK_DEBUG_P0(sc, "read phy register failed");
+                       elink_cb_event_log(sc,
+                                          ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);
+                       /* "MDC/MDIO access timeout" */
 
                        *ret_val = 0;
                        rc = ELINK_STATUS_TIMEOUT;
@@ -2501,14 +3578,17 @@ static elink_status_t elink_cl45_read(struct bnx2x_softc *sc,
 }
 
 static elink_status_t elink_cl45_write(struct bnx2x_softc *sc,
-                                      struct elink_phy *phy, uint8_t devad,
-                                      uint16_t reg, uint16_t val)
+                           struct elink_phy *phy,
+                           uint8_t devad, uint16_t reg, uint16_t val)
 {
        uint32_t tmp;
        uint8_t i;
        elink_status_t rc = ELINK_STATUS_OK;
+       uint32_t chip_id;
        if (phy->flags & ELINK_FLAGS_MDC_MDIO_WA_G) {
-               elink_set_mdio_clk(sc, phy->mdio_ctrl);
+               chip_id = (REG_RD(sc, MISC_REG_CHIP_NUM) << 16) |
+                         ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12);
+               elink_set_mdio_clk(sc, chip_id, phy->mdio_ctrl);
        }
 
        if (phy->flags & ELINK_FLAGS_MDC_MDIO_WA_B0)
@@ -2517,7 +3597,8 @@ static elink_status_t elink_cl45_write(struct bnx2x_softc *sc,
 
        /* Address */
        tmp = ((phy->addr << 21) | (devad << 16) | reg |
-              EMAC_MDIO_COMM_COMMAND_ADDRESS | EMAC_MDIO_COMM_START_BUSY);
+              EMAC_MDIO_COMM_COMMAND_ADDRESS |
+              EMAC_MDIO_COMM_START_BUSY);
        REG_WR(sc, phy->mdio_ctrl + EMAC_REG_EMAC_MDIO_COMM, tmp);
 
        for (i = 0; i < 50; i++) {
@@ -2530,8 +3611,9 @@ static elink_status_t elink_cl45_write(struct bnx2x_softc *sc,
                }
        }
        if (tmp & EMAC_MDIO_COMM_START_BUSY) {
-               PMD_DRV_LOG(DEBUG, "write phy register failed");
-               elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);       // "MDC/MDIO access timeout"
+               ELINK_DEBUG_P0(sc, "write phy register failed");
+               elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);
+               /* "MDC/MDIO access timeout" */
 
                rc = ELINK_STATUS_TIMEOUT;
        } else {
@@ -2552,8 +3634,10 @@ static elink_status_t elink_cl45_write(struct bnx2x_softc *sc,
                        }
                }
                if (tmp & EMAC_MDIO_COMM_START_BUSY) {
-                       PMD_DRV_LOG(DEBUG, "write phy register failed");
-                       elink_cb_event_log(sc, ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);       // "MDC/MDIO access timeout"
+                       ELINK_DEBUG_P0(sc, "write phy register failed");
+                       elink_cb_event_log(sc,
+                                          ELINK_LOG_ID_MDIO_ACCESS_TIMEOUT);
+                       /* "MDC/MDIO access timeout" */
 
                        rc = ELINK_STATUS_TIMEOUT;
                }
@@ -2580,14 +3664,14 @@ static uint8_t elink_eee_has_cap(struct elink_params *params)
        struct bnx2x_softc *sc = params->sc;
 
        if (REG_RD(sc, params->shmem2_base) <=
-           offsetof(struct shmem2_region, eee_status[params->port]))
-                return 0;
+                  offsetof(struct shmem2_region, eee_status[params->port]))
+               return 0;
 
        return 1;
 }
 
 static elink_status_t elink_eee_nvram_to_time(uint32_t nvram_mode,
-                                             uint32_t * idle_timer)
+                                             uint32_t *idle_timer)
 {
        switch (nvram_mode) {
        case PORT_FEAT_CFG_EEE_POWER_MODE_BALANCED:
@@ -2608,7 +3692,7 @@ static elink_status_t elink_eee_nvram_to_time(uint32_t nvram_mode,
 }
 
 static elink_status_t elink_eee_time_to_nvram(uint32_t idle_timer,
-                                             uint32_t * nvram_mode)
+                                             uint32_t *nvram_mode)
 {
        switch (idle_timer) {
        case ELINK_EEE_MODE_NVRAM_BALANCED_TIME:
@@ -2635,7 +3719,7 @@ static uint32_t elink_eee_calc_timer(struct elink_params *params)
 
        if (params->eee_mode & ELINK_EEE_MODE_OVERRIDE_NVRAM) {
                if (params->eee_mode & ELINK_EEE_MODE_OUTPUT_TIME) {
-                       /* time value in eee_mode --> used directly */
+                       /* time value in eee_mode --> used directly*/
                        eee_idle = params->eee_mode & ELINK_EEE_MODE_TIMER_MASK;
                } else {
                        /* hsi value in eee_mode --> time */
@@ -2645,12 +3729,11 @@ static uint32_t elink_eee_calc_timer(struct elink_params *params)
                                return 0;
                }
        } else {
-               /* hsi values in nvram --> time */
+               /* hsi values in nvram --> time*/
                eee_mode = ((REG_RD(sc, params->shmem_base +
-                                   offsetof(struct shmem_region,
-                                            dev_info.port_feature_config
-                                            [params->
-                                             port].eee_power_mode)) &
+                                   offsetof(struct shmem_region, dev_info.
+                                   port_feature_config[params->port].
+                                   eee_power_mode)) &
                             PORT_FEAT_CFG_EEE_POWER_MODE_MASK) >>
                            PORT_FEAT_CFG_EEE_POWER_MODE_SHIFT);
 
@@ -2662,7 +3745,7 @@ static uint32_t elink_eee_calc_timer(struct elink_params *params)
 }
 
 static elink_status_t elink_eee_set_timers(struct elink_params *params,
-                                          struct elink_vars *vars)
+                                  struct elink_vars *vars)
 {
        uint32_t eee_idle = 0, eee_mode;
        struct bnx2x_softc *sc = params->sc;
@@ -2675,7 +3758,7 @@ static elink_status_t elink_eee_set_timers(struct elink_params *params,
        } else if ((params->eee_mode & ELINK_EEE_MODE_ENABLE_LPI) &&
                   (params->eee_mode & ELINK_EEE_MODE_OVERRIDE_NVRAM) &&
                   (params->eee_mode & ELINK_EEE_MODE_OUTPUT_TIME)) {
-               PMD_DRV_LOG(DEBUG, "Error: Tx LPI is enabled with timer 0");
+               ELINK_DEBUG_P0(sc, "Error: Tx LPI is enabled with timer 0");
                return ELINK_STATUS_ERROR;
        }
 
@@ -2684,7 +3767,7 @@ static elink_status_t elink_eee_set_timers(struct elink_params *params,
                /* eee_idle in 1u --> eee_status in 16u */
                eee_idle >>= 4;
                vars->eee_status |= (eee_idle & SHMEM_EEE_TIMER_MASK) |
-                   SHMEM_EEE_TIME_OUTPUT_BIT;
+                                   SHMEM_EEE_TIME_OUTPUT_BIT;
        } else {
                if (elink_eee_time_to_nvram(eee_idle, &eee_mode))
                        return ELINK_STATUS_ERROR;
@@ -2695,8 +3778,7 @@ static elink_status_t elink_eee_set_timers(struct elink_params *params,
 }
 
 static elink_status_t elink_eee_initial_config(struct elink_params *params,
-                                              struct elink_vars *vars,
-                                              uint8_t mode)
+                                    struct elink_vars *vars, uint8_t mode)
 {
        vars->eee_status |= ((uint32_t) mode) << SHMEM_EEE_SUPPORTED_SHIFT;
 
@@ -2715,8 +3797,8 @@ static elink_status_t elink_eee_initial_config(struct elink_params *params,
 }
 
 static elink_status_t elink_eee_disable(struct elink_phy *phy,
-                                       struct elink_params *params,
-                                       struct elink_vars *vars)
+                               struct elink_params *params,
+                               struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
 
@@ -2731,9 +3813,8 @@ static elink_status_t elink_eee_disable(struct elink_phy *phy,
 }
 
 static elink_status_t elink_eee_advertise(struct elink_phy *phy,
-                                         struct elink_params *params,
-                                         struct elink_vars *vars,
-                                         uint8_t modes)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars, uint8_t modes)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val = 0;
@@ -2742,11 +3823,11 @@ static elink_status_t elink_eee_advertise(struct elink_phy *phy,
        REG_WR(sc, MISC_REG_CPMU_LP_MASK_EXT_P0 + (params->port << 2), 0xfc20);
 
        if (modes & SHMEM_EEE_10G_ADV) {
-               PMD_DRV_LOG(DEBUG, "Advertise 10GBase-T EEE");
+               ELINK_DEBUG_P0(sc, "Advertise 10GBase-T EEE");
                val |= 0x8;
        }
        if (modes & SHMEM_EEE_1G_ADV) {
-               PMD_DRV_LOG(DEBUG, "Advertise 1GBase-T EEE");
+               ELINK_DEBUG_P0(sc, "Advertise 1GBase-T EEE");
                val |= 0x4;
        }
 
@@ -2770,8 +3851,8 @@ static void elink_update_mng_eee(struct elink_params *params,
 }
 
 static void elink_eee_an_resolve(struct elink_phy *phy,
-                                struct elink_params *params,
-                                struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t adv = 0, lp = 0;
@@ -2786,7 +3867,7 @@ static void elink_eee_an_resolve(struct elink_phy *phy,
                if (adv & 0x2) {
                        if (vars->line_speed == ELINK_SPEED_100)
                                neg = 1;
-                       PMD_DRV_LOG(DEBUG, "EEE negotiated - 100M");
+                       ELINK_DEBUG_P0(sc, "EEE negotiated - 100M");
                }
        }
        if (lp & 0x14) {
@@ -2794,7 +3875,7 @@ static void elink_eee_an_resolve(struct elink_phy *phy,
                if (adv & 0x14) {
                        if (vars->line_speed == ELINK_SPEED_1000)
                                neg = 1;
-                       PMD_DRV_LOG(DEBUG, "EEE negotiated - 1G");
+                       ELINK_DEBUG_P0(sc, "EEE negotiated - 1G");
                }
        }
        if (lp & 0x68) {
@@ -2802,7 +3883,7 @@ static void elink_eee_an_resolve(struct elink_phy *phy,
                if (adv & 0x68) {
                        if (vars->line_speed == ELINK_SPEED_10000)
                                neg = 1;
-                       PMD_DRV_LOG(DEBUG, "EEE negotiated - 10G");
+                       ELINK_DEBUG_P0(sc, "EEE negotiated - 10G");
                }
        }
 
@@ -2810,7 +3891,7 @@ static void elink_eee_an_resolve(struct elink_phy *phy,
        vars->eee_status |= (lp_adv << SHMEM_EEE_LP_ADV_STATUS_SHIFT);
 
        if (neg) {
-               PMD_DRV_LOG(DEBUG, "EEE is active");
+               ELINK_DEBUG_P0(sc, "EEE is active");
                vars->eee_status |= SHMEM_EEE_ACTIVE_BIT;
        }
 }
@@ -2831,37 +3912,34 @@ static void elink_bsc_module_sel(struct elink_params *params)
                                    dev_info.shared_hw_config.board));
        i2c_pins[I2C_BSC0] = board_cfg & SHARED_HW_CFG_E3_I2C_MUX0_MASK;
        i2c_pins[I2C_BSC1] = (board_cfg & SHARED_HW_CFG_E3_I2C_MUX1_MASK) >>
-           SHARED_HW_CFG_E3_I2C_MUX1_SHIFT;
+                       SHARED_HW_CFG_E3_I2C_MUX1_SHIFT;
 
        /* Read I2C output value */
        sfp_ctrl = REG_RD(sc, params->shmem_base +
                          offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[port].
-                                  e3_cmn_pin_cfg));
+                                dev_info.port_hw_config[port].e3_cmn_pin_cfg));
        i2c_val[I2C_BSC0] = (sfp_ctrl & PORT_HW_CFG_E3_I2C_MUX0_MASK) > 0;
        i2c_val[I2C_BSC1] = (sfp_ctrl & PORT_HW_CFG_E3_I2C_MUX1_MASK) > 0;
-       PMD_DRV_LOG(DEBUG, "Setting BSC switch");
+       ELINK_DEBUG_P0(sc, "Setting BSC switch");
        for (idx = 0; idx < I2C_SWITCH_WIDTH; idx++)
                elink_set_cfg_pin(sc, i2c_pins[idx], i2c_val[idx]);
 }
 
-static elink_status_t elink_bsc_read(struct elink_params *params,
-                                    struct bnx2x_softc *sc,
-                                    uint8_t sl_devid,
-                                    uint16_t sl_addr,
-                                    uint8_t lc_addr,
-                                    uint8_t xfer_cnt, uint32_t * data_array)
+static elink_status_t elink_bsc_read(struct bnx2x_softc *sc,
+                         uint8_t sl_devid,
+                         uint16_t sl_addr,
+                         uint8_t lc_addr,
+                         uint8_t xfer_cnt,
+                         uint32_t *data_array)
 {
        uint32_t val, i;
        elink_status_t rc = ELINK_STATUS_OK;
 
        if (xfer_cnt > 16) {
-               PMD_DRV_LOG(DEBUG, "invalid xfer_cnt %d. Max is 16 bytes",
-                           xfer_cnt);
+               ELINK_DEBUG_P1(sc, "invalid xfer_cnt %d. Max is 16 bytes",
+                                       xfer_cnt);
                return ELINK_STATUS_ERROR;
        }
-       if (params)
-               elink_bsc_module_sel(params);
 
        xfer_cnt = 16 - lc_addr;
 
@@ -2874,11 +3952,11 @@ static elink_status_t elink_bsc_read(struct elink_params *params,
        val = (sl_devid << 16) | sl_addr;
        REG_WR(sc, MCP_REG_MCPR_IMC_SLAVE_CONTROL, val);
 
-       /* Start xfer with 0 byte to update the address pointer ??? */
+       /* Start xfer with 0 byte to update the address pointer ???*/
        val = (MCPR_IMC_COMMAND_ENABLE) |
-           (MCPR_IMC_COMMAND_WRITE_OP <<
-            MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
-           (lc_addr << MCPR_IMC_COMMAND_TRANSFER_ADDRESS_BITSHIFT) | (0);
+             (MCPR_IMC_COMMAND_WRITE_OP <<
+               MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
+               (lc_addr << MCPR_IMC_COMMAND_TRANSFER_ADDRESS_BITSHIFT) | (0);
        REG_WR(sc, MCP_REG_MCPR_IMC_COMMAND, val);
 
        /* Poll for completion */
@@ -2888,8 +3966,8 @@ static elink_status_t elink_bsc_read(struct elink_params *params,
                DELAY(10);
                val = REG_RD(sc, MCP_REG_MCPR_IMC_COMMAND);
                if (i++ > 1000) {
-                       PMD_DRV_LOG(DEBUG, "wr 0 byte timed out after %d try",
-                                   i);
+                       ELINK_DEBUG_P1(sc, "wr 0 byte timed out after %d try",
+                                                               i);
                        rc = ELINK_STATUS_TIMEOUT;
                        break;
                }
@@ -2899,10 +3977,10 @@ static elink_status_t elink_bsc_read(struct elink_params *params,
 
        /* Start xfer with read op */
        val = (MCPR_IMC_COMMAND_ENABLE) |
-           (MCPR_IMC_COMMAND_READ_OP <<
-            MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
-           (lc_addr << MCPR_IMC_COMMAND_TRANSFER_ADDRESS_BITSHIFT) |
-           (xfer_cnt);
+               (MCPR_IMC_COMMAND_READ_OP <<
+               MCPR_IMC_COMMAND_OPERATION_BITSHIFT) |
+               (lc_addr << MCPR_IMC_COMMAND_TRANSFER_ADDRESS_BITSHIFT) |
+                 (xfer_cnt);
        REG_WR(sc, MCP_REG_MCPR_IMC_COMMAND, val);
 
        /* Poll for completion */
@@ -2912,7 +3990,7 @@ static elink_status_t elink_bsc_read(struct elink_params *params,
                DELAY(10);
                val = REG_RD(sc, MCP_REG_MCPR_IMC_COMMAND);
                if (i++ > 1000) {
-                       PMD_DRV_LOG(DEBUG, "rd op timed out after %d try", i);
+                       ELINK_DEBUG_P1(sc, "rd op timed out after %d try", i);
                        rc = ELINK_STATUS_TIMEOUT;
                        break;
                }
@@ -2924,17 +4002,18 @@ static elink_status_t elink_bsc_read(struct elink_params *params,
                data_array[i] = REG_RD(sc, (MCP_REG_MCPR_IMC_DATAREG0 + i * 4));
 #ifdef __BIG_ENDIAN
                data_array[i] = ((data_array[i] & 0x000000ff) << 24) |
-                   ((data_array[i] & 0x0000ff00) << 8) |
-                   ((data_array[i] & 0x00ff0000) >> 8) |
-                   ((data_array[i] & 0xff000000) >> 24);
+                               ((data_array[i] & 0x0000ff00) << 8) |
+                               ((data_array[i] & 0x00ff0000) >> 8) |
+                               ((data_array[i] & 0xff000000) >> 24);
 #endif
        }
        return rc;
 }
 
 static void elink_cl45_read_or_write(struct bnx2x_softc *sc,
-                                    struct elink_phy *phy, uint8_t devad,
-                                    uint16_t reg, uint16_t or_val)
+                                    struct elink_phy *phy,
+                                    uint8_t devad, uint16_t reg,
+                                    uint16_t or_val)
 {
        uint16_t val;
        elink_cl45_read(sc, phy, devad, reg, &val);
@@ -2951,7 +4030,42 @@ static void elink_cl45_read_and_write(struct bnx2x_softc *sc,
        elink_cl45_write(sc, phy, devad, reg, val & and_val);
 }
 
-static uint8_t elink_get_warpcore_lane(struct elink_params *params)
+elink_status_t elink_phy_read(struct elink_params *params, uint8_t phy_addr,
+                  uint8_t devad, uint16_t reg, uint16_t *ret_val)
+{
+       uint8_t phy_index;
+       /* Probe for the phy according to the given phy_addr, and execute
+        * the read request on it
+        */
+       for (phy_index = 0; phy_index < params->num_phys; phy_index++) {
+               if (params->phy[phy_index].addr == phy_addr) {
+                       return elink_cl45_read(params->sc,
+                                              &params->phy[phy_index], devad,
+                                              reg, ret_val);
+               }
+       }
+       return ELINK_STATUS_ERROR;
+}
+
+elink_status_t elink_phy_write(struct elink_params *params, uint8_t phy_addr,
+                   uint8_t devad, uint16_t reg, uint16_t val)
+{
+       uint8_t phy_index;
+       /* Probe for the phy according to the given phy_addr, and execute
+        * the write request on it
+        */
+       for (phy_index = 0; phy_index < params->num_phys; phy_index++) {
+               if (params->phy[phy_index].addr == phy_addr) {
+                       return elink_cl45_write(params->sc,
+                                               &params->phy[phy_index], devad,
+                                               reg, val);
+               }
+       }
+       return ELINK_STATUS_ERROR;
+}
+
+static uint8_t elink_get_warpcore_lane(__rte_unused struct elink_phy *phy,
+                                 struct elink_params *params)
 {
        uint8_t lane = 0;
        struct bnx2x_softc *sc = params->sc;
@@ -2985,14 +4099,16 @@ static uint8_t elink_get_warpcore_lane(struct elink_params *params)
                        port = port ^ 1;
 
                lane = (port << 1) + path;
-       } else {                /* Two port mode - no port swap */
+       } else { /* Two port mode - no port swap */
 
                /* Figure out path swap value */
-               path_swap_ovr = REG_RD(sc, MISC_REG_TWO_PORT_PATH_SWAP_OVWR);
+               path_swap_ovr =
+                       REG_RD(sc, MISC_REG_TWO_PORT_PATH_SWAP_OVWR);
                if (path_swap_ovr & 0x1) {
                        path_swap = (path_swap_ovr & 0x2);
                } else {
-                       path_swap = REG_RD(sc, MISC_REG_TWO_PORT_PATH_SWAP);
+                       path_swap =
+                               REG_RD(sc, MISC_REG_TWO_PORT_PATH_SWAP);
                }
                if (path_swap)
                        path = path ^ 1;
@@ -3002,6 +4118,7 @@ static uint8_t elink_get_warpcore_lane(struct elink_params *params)
        return lane;
 }
 
+
 static void elink_set_aer_mmd(struct elink_params *params,
                              struct elink_phy *phy)
 {
@@ -3010,13 +4127,13 @@ static void elink_set_aer_mmd(struct elink_params *params,
        struct bnx2x_softc *sc = params->sc;
        ser_lane = ((params->lane_config &
                     PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
-                   PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
+                    PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
 
        offset = (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) ?
-           (phy->addr + ser_lane) : 0;
+               (phy->addr + ser_lane) : 0;
 
        if (USES_WARPCORE(sc)) {
-               aer_val = elink_get_warpcore_lane(params);
+               aer_val = elink_get_warpcore_lane(phy, params);
                /* In Dual-lane mode, two lanes are joined together,
                 * so in order to configure them, the AER broadcast method is
                 * used here.
@@ -3049,7 +4166,7 @@ static void elink_set_serdes_access(struct bnx2x_softc *sc, uint8_t port)
        DELAY(500);
        REG_WR(sc, emac_base + EMAC_REG_EMAC_MDIO_COMM, 0x245d000f);
        DELAY(500);
-       /* Set Clause 45 */
+        /* Set Clause 45 */
        REG_WR(sc, NIG_REG_SERDES0_CTRL_MD_ST + port * 0x10, 0);
 }
 
@@ -3057,7 +4174,7 @@ static void elink_serdes_deassert(struct bnx2x_softc *sc, uint8_t port)
 {
        uint32_t val;
 
-       PMD_DRV_LOG(DEBUG, "elink_serdes_deassert");
+       ELINK_DEBUG_P0(sc, "elink_serdes_deassert");
 
        val = ELINK_SERDES_RESET_BITS << (port * 16);
 
@@ -3092,7 +4209,7 @@ static void elink_xgxs_deassert(struct elink_params *params)
        struct bnx2x_softc *sc = params->sc;
        uint8_t port;
        uint32_t val;
-       PMD_DRV_LOG(DEBUG, "elink_xgxs_deassert");
+       ELINK_DEBUG_P0(sc, "elink_xgxs_deassert");
        port = params->port;
 
        val = ELINK_XGXS_RESET_BITS << (port * 16);
@@ -3107,8 +4224,9 @@ static void elink_xgxs_deassert(struct elink_params *params)
 
 static void elink_calc_ieee_aneg_adv(struct elink_phy *phy,
                                     struct elink_params *params,
-                                    uint16_t * ieee_fc)
+                                    uint16_t *ieee_fc)
 {
+       struct bnx2x_softc *sc = params->sc;
        *ieee_fc = MDIO_COMBO_IEEE0_AUTO_NEG_ADV_FULL_DUPLEX;
        /* Resolve pause mode and advertisement Please refer to Table
         * 28B-3 of the 802.3ab-1999 spec
@@ -3118,12 +4236,12 @@ static void elink_calc_ieee_aneg_adv(struct elink_phy *phy,
        case ELINK_FLOW_CTRL_AUTO:
                switch (params->req_fc_auto_adv) {
                case ELINK_FLOW_CTRL_BOTH:
+               case ELINK_FLOW_CTRL_RX:
                        *ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH;
                        break;
-               case ELINK_FLOW_CTRL_RX:
                case ELINK_FLOW_CTRL_TX:
                        *ieee_fc |=
-                           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
+                               MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
                        break;
                default:
                        break;
@@ -3143,16 +4261,18 @@ static void elink_calc_ieee_aneg_adv(struct elink_phy *phy,
                *ieee_fc |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE;
                break;
        }
-       PMD_DRV_LOG(DEBUG, "ieee_fc = 0x%x", *ieee_fc);
+       ELINK_DEBUG_P1(sc, "ieee_fc = 0x%x", *ieee_fc);
 }
 
-static void set_phy_vars(struct elink_params *params, struct elink_vars *vars)
+static void set_phy_vars(struct elink_params *params,
+                        struct elink_vars *vars)
 {
+       struct bnx2x_softc *sc = params->sc;
        uint8_t actual_phy_idx, phy_index, link_cfg_idx;
        uint8_t phy_config_swapped = params->multi_phy_config &
-           PORT_HW_CFG_PHY_SWAPPED_ENABLED;
+                       PORT_HW_CFG_PHY_SWAPPED_ENABLED;
        for (phy_index = ELINK_INT_PHY; phy_index < params->num_phys;
-            phy_index++) {
+             phy_index++) {
                link_cfg_idx = ELINK_LINK_CONFIG_IDX(phy_index);
                actual_phy_idx = phy_index;
                if (phy_config_swapped) {
@@ -3162,26 +4282,26 @@ static void set_phy_vars(struct elink_params *params, struct elink_vars *vars)
                                actual_phy_idx = ELINK_EXT_PHY1;
                }
                params->phy[actual_phy_idx].req_flow_ctrl =
-                   params->req_flow_ctrl[link_cfg_idx];
+                       params->req_flow_ctrl[link_cfg_idx];
 
                params->phy[actual_phy_idx].req_line_speed =
-                   params->req_line_speed[link_cfg_idx];
+                       params->req_line_speed[link_cfg_idx];
 
                params->phy[actual_phy_idx].speed_cap_mask =
-                   params->speed_cap_mask[link_cfg_idx];
+                       params->speed_cap_mask[link_cfg_idx];
 
                params->phy[actual_phy_idx].req_duplex =
-                   params->req_duplex[link_cfg_idx];
+                       params->req_duplex[link_cfg_idx];
 
                if (params->req_line_speed[link_cfg_idx] ==
                    ELINK_SPEED_AUTO_NEG)
                        vars->link_status |= LINK_STATUS_AUTO_NEGOTIATE_ENABLED;
 
-               PMD_DRV_LOG(DEBUG, "req_flow_ctrl %x, req_line_speed %x,"
-                           " speed_cap_mask %x",
-                           params->phy[actual_phy_idx].req_flow_ctrl,
-                           params->phy[actual_phy_idx].req_line_speed,
-                           params->phy[actual_phy_idx].speed_cap_mask);
+               ELINK_DEBUG_P3(sc, "req_flow_ctrl %x, req_line_speed %x,"
+                          " speed_cap_mask %x",
+                          params->phy[actual_phy_idx].req_flow_ctrl,
+                          params->phy[actual_phy_idx].req_line_speed,
+                          params->phy[actual_phy_idx].speed_cap_mask);
        }
 }
 
@@ -3199,38 +4319,57 @@ static void elink_ext_phy_set_pause(struct elink_params *params,
        /* Please refer to Table 28B-3 of 802.3ab-1999 spec. */
        elink_calc_ieee_aneg_adv(phy, params, &vars->ieee_fc);
        if ((vars->ieee_fc &
-            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) ==
+           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) ==
            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) {
                val |= MDIO_AN_REG_ADV_PAUSE_ASYMMETRIC;
        }
        if ((vars->ieee_fc &
-            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) ==
+           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) ==
            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) {
                val |= MDIO_AN_REG_ADV_PAUSE_PAUSE;
        }
-       PMD_DRV_LOG(DEBUG, "Ext phy AN advertize 0x%x", val);
+       ELINK_DEBUG_P1(sc, "Ext phy AN advertize 0x%x", val);
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV_PAUSE, val);
 }
 
-static void elink_pause_resolve(struct elink_vars *vars, uint32_t pause_result)
-{                              /*  LD      LP   */
-       switch (pause_result) { /* ASYM P ASYM P */
-       case 0xb:               /*   1  0   1  1 */
+static void elink_pause_resolve(__rte_unused struct elink_phy *phy,
+                               struct elink_params *params,
+                               struct elink_vars *vars,
+                               uint32_t pause_result)
+{
+       struct bnx2x_softc *sc = params->sc;
+                                               /*  LD      LP   */
+       switch (pause_result) {                 /* ASYM P ASYM P */
+       case 0xb:                               /*   1  0   1  1 */
+               ELINK_DEBUG_P0(sc, "Flow Control: TX only");
                vars->flow_ctrl = ELINK_FLOW_CTRL_TX;
                break;
 
-       case 0xe:               /*   1  1   1  0 */
+       case 0xe:                               /*   1  1   1  0 */
+               ELINK_DEBUG_P0(sc, "Flow Control: RX only");
                vars->flow_ctrl = ELINK_FLOW_CTRL_RX;
                break;
 
-       case 0x5:               /*   0  1   0  1 */
-       case 0x7:               /*   0  1   1  1 */
-       case 0xd:               /*   1  1   0  1 */
-       case 0xf:               /*   1  1   1  1 */
+       case 0x5:                               /*   0  1   0  1 */
+       case 0x7:                               /*   0  1   1  1 */
+       case 0xd:                               /*   1  1   0  1 */
+       case 0xf:                               /*   1  1   1  1 */
+               /* If the user selected to advertise RX ONLY,
+                * although we advertised both, need to enable
+                * RX only.
+                */
+
+               if (params->req_fc_auto_adv == ELINK_FLOW_CTRL_BOTH) {
+                       ELINK_DEBUG_P0(sc, "Flow Control: RX & TX");
                vars->flow_ctrl = ELINK_FLOW_CTRL_BOTH;
+               } else {
+                       ELINK_DEBUG_P0(sc, "Flow Control: RX only");
+                       vars->flow_ctrl = ELINK_FLOW_CTRL_RX;
+               }
                break;
-
        default:
+               ELINK_DEBUG_P0(sc, "Flow Control: None");
+               vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
                break;
        }
        if (pause_result & (1 << 0))
@@ -3244,22 +4383,23 @@ static void elink_ext_phy_update_adv_fc(struct elink_phy *phy,
                                        struct elink_params *params,
                                        struct elink_vars *vars)
 {
-       uint16_t ld_pause;      /* local */
-       uint16_t lp_pause;      /* link partner */
+       uint16_t ld_pause;              /* local */
+       uint16_t lp_pause;              /* link partner */
        uint16_t pause_result;
        struct bnx2x_softc *sc = params->sc;
-       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE) {
+       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE) {
                elink_cl22_read(sc, phy, 0x4, &ld_pause);
                elink_cl22_read(sc, phy, 0x5, &lp_pause);
-       } else if (CHIP_IS_E3(sc) && ELINK_SINGLE_MEDIA_DIRECT(params)) {
-               uint8_t lane = elink_get_warpcore_lane(params);
+       } else if (CHIP_IS_E3(sc) &&
+               ELINK_SINGLE_MEDIA_DIRECT(params)) {
+               uint8_t lane = elink_get_warpcore_lane(phy, params);
                uint16_t gp_status, gp_mask;
                elink_cl45_read(sc, phy,
                                MDIO_AN_DEVAD, MDIO_WC_REG_GP2_STATUS_GP_2_4,
                                &gp_status);
                gp_mask = (MDIO_WC_REG_GP2_STATUS_GP_2_4_CL73_AN_CMPL |
                           MDIO_WC_REG_GP2_STATUS_GP_2_4_CL37_LP_AN_CAP) <<
-                   lane;
+                       lane;
                if ((gp_status & gp_mask) == gp_mask) {
                        elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
                                        MDIO_AN_REG_ADV_PAUSE, &ld_pause);
@@ -3285,16 +4425,18 @@ static void elink_ext_phy_update_adv_fc(struct elink_phy *phy,
                                MDIO_AN_DEVAD,
                                MDIO_AN_REG_LP_AUTO_NEG, &lp_pause);
        }
-       pause_result = (ld_pause & MDIO_AN_REG_ADV_PAUSE_MASK) >> 8;
-       pause_result |= (lp_pause & MDIO_AN_REG_ADV_PAUSE_MASK) >> 10;
-       PMD_DRV_LOG(DEBUG, "Ext PHY pause result 0x%x", pause_result);
-       elink_pause_resolve(vars, pause_result);
+       pause_result = (ld_pause &
+                       MDIO_AN_REG_ADV_PAUSE_MASK) >> 8;
+       pause_result |= (lp_pause &
+                        MDIO_AN_REG_ADV_PAUSE_MASK) >> 10;
+       ELINK_DEBUG_P1(sc, "Ext PHY pause result 0x%x", pause_result);
+       elink_pause_resolve(phy, params, vars, pause_result);
 
 }
 
 static uint8_t elink_ext_phy_resolve_fc(struct elink_phy *phy,
-                                       struct elink_params *params,
-                                       struct elink_vars *vars)
+                                  struct elink_params *params,
+                                  struct elink_vars *vars)
 {
        uint8_t ret = 0;
        vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
@@ -3312,7 +4454,6 @@ static uint8_t elink_ext_phy_resolve_fc(struct elink_phy *phy,
        }
        return ret;
 }
-
 /******************************************************************/
 /*                     Warpcore section                          */
 /******************************************************************/
@@ -3321,19 +4462,31 @@ static uint8_t elink_ext_phy_resolve_fc(struct elink_phy *phy,
  * init configuration, and set/clear SGMII flag. Internal
  * phy init is done purely in phy_init stage.
  */
-#define WC_TX_DRIVER(post2, idriver, ipre) \
+#define WC_TX_DRIVER(post2, idriver, ipre, ifir) \
        ((post2 << MDIO_WC_REG_TX0_TX_DRIVER_POST2_COEFF_OFFSET) | \
         (idriver << MDIO_WC_REG_TX0_TX_DRIVER_IDRIVER_OFFSET) | \
-        (ipre << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET))
+        (ipre << MDIO_WC_REG_TX0_TX_DRIVER_IPRE_DRIVER_OFFSET) | \
+        (ifir << MDIO_WC_REG_TX0_TX_DRIVER_IFIR_OFFSET))
 
 #define WC_TX_FIR(post, main, pre) \
        ((post << MDIO_WC_REG_TX_FIR_TAP_POST_TAP_OFFSET) | \
         (main << MDIO_WC_REG_TX_FIR_TAP_MAIN_TAP_OFFSET) | \
         (pre << MDIO_WC_REG_TX_FIR_TAP_PRE_TAP_OFFSET))
 
+static void elink_update_link_attr(struct elink_params *params,
+                                  uint32_t link_attr)
+{
+       struct bnx2x_softc *sc = params->sc;
+
+       if (SHMEM2_HAS(sc, link_attr_sync))
+               REG_WR(sc, params->shmem2_base +
+                      offsetof(struct shmem2_region,
+                               link_attr_sync[params->port]), link_attr);
+}
+
 static void elink_warpcore_enable_AN_KR2(struct elink_phy *phy,
                                         struct elink_params *params,
-                                        struct elink_vars *vars)
+                                        __rte_unused struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t i;
@@ -3356,7 +4509,7 @@ static void elink_warpcore_enable_AN_KR2(struct elink_phy *phy,
                {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0157},
                {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0620}
        };
-       PMD_DRV_LOG(DEBUG, "Enabling 20G-KR2");
+       ELINK_DEBUG_P0(sc, "Enabling 20G-KR2");
 
        elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
                                 MDIO_WC_REG_CL49_USERB0_CTRL, (3 << 6));
@@ -3366,15 +4519,16 @@ static void elink_warpcore_enable_AN_KR2(struct elink_phy *phy,
                                 reg_set[i].val);
 
        /* Start KR2 work-around timer which handles BNX2X8073 link-parner */
-       vars->link_attr_sync |= LINK_ATTR_SYNC_KR2_ENABLE;
-       elink_update_link_attr(params, vars->link_attr_sync);
+       params->link_attr_sync |= LINK_ATTR_SYNC_KR2_ENABLE;
+       elink_update_link_attr(params, params->link_attr_sync);
 }
 
 static void elink_disable_kr2(struct elink_params *params,
-                             struct elink_vars *vars, struct elink_phy *phy)
+                             struct elink_vars *vars,
+                             struct elink_phy *phy)
 {
        struct bnx2x_softc *sc = params->sc;
-       uint32_t i;
+       int i;
        static struct elink_reg_set reg_set[] = {
                /* Step 1 - Program the TX/RX alignment markers */
                {MDIO_WC_DEVAD, MDIO_WC_REG_CL82_USERB1_TX_CTRL5, 0x7690},
@@ -3393,13 +4547,13 @@ static void elink_disable_kr2(struct elink_params *params,
                {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_BAM_CODE, 0x0002},
                {MDIO_WC_DEVAD, MDIO_WC_REG_ETA_CL73_LD_UD_CODE, 0x0000}
        };
-       PMD_DRV_LOG(DEBUG, "Disabling 20G-KR2");
+       ELINK_DEBUG_P0(sc, "Disabling 20G-KR2");
 
-       for (i = 0; i < ARRAY_SIZE(reg_set); i++)
+       for (i = 0; i < (int)ARRAY_SIZE(reg_set); i++)
                elink_cl45_write(sc, phy, reg_set[i].devad, reg_set[i].reg,
                                 reg_set[i].val);
-       vars->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
-       elink_update_link_attr(params, vars->link_attr_sync);
+       params->link_attr_sync &= ~LINK_ATTR_SYNC_KR2_ENABLE;
+       elink_update_link_attr(params, params->link_attr_sync);
 
        vars->check_kr2_recovery_cnt = ELINK_CHECK_KR2_RECOVERY_CNT;
 }
@@ -3409,7 +4563,7 @@ static void elink_warpcore_set_lpi_passthrough(struct elink_phy *phy,
 {
        struct bnx2x_softc *sc = params->sc;
 
-       PMD_DRV_LOG(DEBUG, "Configure WC for LPI pass through");
+       ELINK_DEBUG_P0(sc, "Configure WC for LPI pass through");
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_EEE_COMBO_CONTROL0, 0x7c);
        elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
@@ -3421,7 +4575,7 @@ static void elink_warpcore_restart_AN_KR(struct elink_phy *phy,
 {
        /* Restart autoneg on the leading lane only */
        struct bnx2x_softc *sc = params->sc;
-       uint16_t lane = elink_get_warpcore_lane(params);
+       uint16_t lane = elink_get_warpcore_lane(phy, params);
        CL22_WR_OVER_CL45(sc, phy, MDIO_REG_BANK_AER_BLOCK,
                          MDIO_AER_BLOCK_AER_REG, lane);
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
@@ -3433,9 +4587,9 @@ static void elink_warpcore_restart_AN_KR(struct elink_phy *phy,
 
 static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
                                        struct elink_params *params,
-                                       struct elink_vars *vars)
-{
-       uint16_t lane, i, cl72_ctrl, an_adv = 0;
+                                       struct elink_vars *vars) {
+       uint16_t lane, i, cl72_ctrl, an_adv = 0, val;
+       uint32_t wc_lane_config;
        struct bnx2x_softc *sc = params->sc;
        static struct elink_reg_set reg_set[] = {
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
@@ -3447,7 +4601,7 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
                {MDIO_PMA_DEVAD, MDIO_WC_REG_PMD_KR_CONTROL, 0x2},
                {MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_TX_FIR_TAP, 0},
        };
-       PMD_DRV_LOG(DEBUG, "Enable Auto Negotiation for KR");
+       ELINK_DEBUG_P0(sc,  "Enable Auto Negotiation for KR");
        /* Set to default registers that may be overridden by 10G force */
        for (i = 0; i < ARRAY_SIZE(reg_set); i++)
                elink_cl45_write(sc, phy, reg_set[i].devad, reg_set[i].reg,
@@ -3469,11 +4623,11 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 
                /* Enable CL37 1G Parallel Detect */
                elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD, addr, 0x1);
-               PMD_DRV_LOG(DEBUG, "Advertize 1G");
+               ELINK_DEBUG_P0(sc, "Advertize 1G");
        }
        if (((vars->line_speed == ELINK_SPEED_AUTO_NEG) &&
             (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) ||
-           (vars->line_speed == ELINK_SPEED_10000)) {
+           (vars->line_speed ==  ELINK_SPEED_10000)) {
                /* Check adding advertisement for 10G KR */
                an_adv |= (1 << 7);
                /* Enable 10G Parallel Detect */
@@ -3483,23 +4637,25 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
                elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
                                 MDIO_WC_REG_PAR_DET_10G_CTRL, 1);
                elink_set_aer_mmd(params, phy);
-               PMD_DRV_LOG(DEBUG, "Advertize 10G");
+               ELINK_DEBUG_P0(sc, "Advertize 10G");
        }
 
        /* Set Transmit PMD settings */
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_TX0_TX_DRIVER + 0x10 * lane,
-                        WC_TX_DRIVER(0x02, 0x06, 0x09));
+                        WC_TX_DRIVER(0x02, 0x06, 0x09, 0));
        /* Configure the next lane if dual mode */
        if (phy->flags & ELINK_FLAGS_WC_DUAL_MODE)
                elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                                 MDIO_WC_REG_TX0_TX_DRIVER + 0x10 * (lane + 1),
-                                WC_TX_DRIVER(0x02, 0x06, 0x09));
+                                WC_TX_DRIVER(0x02, 0x06, 0x09, 0));
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL, 0x03f0);
+                        MDIO_WC_REG_CL72_USERB0_CL72_OS_DEF_CTRL,
+                        0x03f0);
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_CL72_USERB0_CL72_2P5_DEF_CTRL, 0x03f0);
+                        MDIO_WC_REG_CL72_USERB0_CL72_2P5_DEF_CTRL,
+                        0x03f0);
 
        /* Advertised speeds */
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
@@ -3513,14 +4669,13 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 
        /* Enable CL37 BAM */
        if (REG_RD(sc, params->shmem_base +
-                  offsetof(struct shmem_region,
-                           dev_info.port_hw_config[params->port].
-                           default_cfg)) &
+                  offsetof(struct shmem_region, dev_info.
+                           port_hw_config[params->port].default_cfg)) &
            PORT_HW_CFG_ENABLE_BAM_ON_KR_ENABLED) {
                elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
                                         MDIO_WC_REG_DIGITAL6_MP5_NEXTPAGECTRL,
                                         1);
-               PMD_DRV_LOG(DEBUG, "Enable CL37 BAM on KR");
+               ELINK_DEBUG_P0(sc, "Enable CL37 BAM on KR");
        }
 
        /* Advertise pause */
@@ -3531,7 +4686,7 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 
        /* Over 1G - AN local device user page 1 */
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_DIGITAL3_UP1, 0x1f);
+                       MDIO_WC_REG_DIGITAL3_UP1, 0x1f);
 
        if (((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
             (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
@@ -3542,7 +4697,8 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 
                elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
                                         MDIO_WC_REG_RX1_PCI_CTRL +
-                                        (0x10 * lane), (1 << 11));
+                                        (0x10 * lane),
+                                        (1 << 11));
 
                elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                                 MDIO_WC_REG_XGXS_X2_CONTROL3, 0x7);
@@ -3550,6 +4706,31 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 
                elink_warpcore_enable_AN_KR2(phy, params, vars);
        } else {
+               /* Enable Auto-Detect to support 1G over CL37 as well */
+               elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                                MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, 0x10);
+               wc_lane_config = REG_RD(sc, params->shmem_base +
+                                       offsetof(struct shmem_region, dev_info.
+                                       shared_hw_config.wc_lane_config));
+               elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
+                               MDIO_WC_REG_RX0_PCI_CTRL + (lane << 4), &val);
+               /* Force cl48 sync_status LOW to avoid getting stuck in CL73
+                * parallel-detect loop when CL73 and CL37 are enabled.
+                */
+               val |= 1 << 11;
+
+               /* Restore Polarity settings in case it was run over by
+                * previous link owner
+                */
+               if (wc_lane_config &
+                   (SHARED_HW_CFG_RX_LANE0_POL_FLIP_ENABLED << lane))
+                       val |= 3 << 2;
+               else
+                       val &= ~(3 << 2);
+               elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                                MDIO_WC_REG_RX0_PCI_CTRL + (lane << 4),
+                                val);
+
                elink_disable_kr2(params, vars, phy);
        }
 
@@ -3558,7 +4739,8 @@ static void elink_warpcore_enable_AN_KR(struct elink_phy *phy,
 }
 
 static void elink_warpcore_set_10G_KR(struct elink_phy *phy,
-                                     struct elink_params *params)
+                                     struct elink_params *params,
+                                     __rte_unused struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val16, i, lane;
@@ -3566,7 +4748,7 @@ static void elink_warpcore_set_10G_KR(struct elink_phy *phy,
                /* Disable Autoneg */
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, 0x7},
                {MDIO_WC_DEVAD, MDIO_WC_REG_CL72_USERB0_CL72_MISC1_CONTROL,
-                0x3f00},
+                       0x3f00},
                {MDIO_AN_DEVAD, MDIO_WC_REG_AN_IEEE1BLK_AN_ADVERTISEMENT1, 0},
                {MDIO_AN_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x0},
                {MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL3_UP1, 0x1},
@@ -3579,7 +4761,7 @@ static void elink_warpcore_set_10G_KR(struct elink_phy *phy,
                elink_cl45_write(sc, phy, reg_set[i].devad, reg_set[i].reg,
                                 reg_set[i].val);
 
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        /* Global registers */
        CL22_WR_OVER_CL45(sc, phy, MDIO_REG_BANK_AER_BLOCK,
                          MDIO_AER_BLOCK_AER_REG, 0);
@@ -3609,7 +4791,8 @@ static void elink_warpcore_set_10G_KR(struct elink_phy *phy,
                         MDIO_WC_REG_SERDESDIGITAL_MISC2, 0x30);
 
        /* Turn TX scramble payload only the 64/66 scrambler */
-       elink_cl45_write(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_TX66_CONTROL, 0x9);
+       elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                        MDIO_WC_REG_TX66_CONTROL, 0x9);
 
        /* Turn RX scramble payload only the 64/66 scrambler */
        elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
@@ -3630,7 +4813,7 @@ static void elink_warpcore_set_10G_XFI(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        uint16_t misc1_val, tap_val, tx_driver_val, lane, val;
        uint32_t cfg_tap_val, tx_drv_brdct, tx_equal;
-
+       uint32_t ifir_val, ipost2_val, ipre_driver_val;
        /* Hold rxSeqStart */
        elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
                                 MDIO_WC_REG_DSC2B0_DSC_MISC_CTRL0, 0x8000);
@@ -3675,38 +4858,59 @@ static void elink_warpcore_set_10G_XFI(struct elink_phy *phy,
        if (is_xfi) {
                misc1_val |= 0x5;
                tap_val = WC_TX_FIR(0x08, 0x37, 0x00);
-               tx_driver_val = WC_TX_DRIVER(0x00, 0x02, 0x03);
+               tx_driver_val = WC_TX_DRIVER(0x00, 0x02, 0x03, 0);
        } else {
                cfg_tap_val = REG_RD(sc, params->shmem_base +
-                                    offsetof(struct shmem_region,
-                                             dev_info.port_hw_config[params->
-                                                                     port].sfi_tap_values));
+                                    offsetof(struct shmem_region, dev_info.
+                                             port_hw_config[params->port].
+                                             sfi_tap_values));
 
                tx_equal = cfg_tap_val & PORT_HW_CFG_TX_EQUALIZATION_MASK;
 
-               tx_drv_brdct = (cfg_tap_val &
-                               PORT_HW_CFG_TX_DRV_BROADCAST_MASK) >>
-                   PORT_HW_CFG_TX_DRV_BROADCAST_SHIFT;
-
                misc1_val |= 0x9;
 
                /* TAP values are controlled by nvram, if value there isn't 0 */
                if (tx_equal)
-                       tap_val = (uint16_t) tx_equal;
+                       tap_val = (uint16_t)tx_equal;
                else
                        tap_val = WC_TX_FIR(0x0f, 0x2b, 0x02);
 
-               if (tx_drv_brdct)
-                       tx_driver_val =
-                           WC_TX_DRIVER(0x03, (uint16_t) tx_drv_brdct, 0x06);
-               else
-                       tx_driver_val = WC_TX_DRIVER(0x03, 0x02, 0x06);
+               ifir_val = DEFAULT_TX_DRV_IFIR;
+               ipost2_val = DEFAULT_TX_DRV_POST2;
+               ipre_driver_val = DEFAULT_TX_DRV_IPRE_DRIVER;
+               tx_drv_brdct = DEFAULT_TX_DRV_BRDCT;
+
+               /* If any of the IFIR/IPRE_DRIVER/POST@ is set, apply all
+                * configuration.
+                */
+               if (cfg_tap_val & (PORT_HW_CFG_TX_DRV_IFIR_MASK |
+                                  PORT_HW_CFG_TX_DRV_IPREDRIVER_MASK |
+                                  PORT_HW_CFG_TX_DRV_POST2_MASK)) {
+                       ifir_val = (cfg_tap_val &
+                                   PORT_HW_CFG_TX_DRV_IFIR_MASK) >>
+                               PORT_HW_CFG_TX_DRV_IFIR_SHIFT;
+                       ipre_driver_val = (cfg_tap_val &
+                                          PORT_HW_CFG_TX_DRV_IPREDRIVER_MASK)
+                       >> PORT_HW_CFG_TX_DRV_IPREDRIVER_SHIFT;
+                       ipost2_val = (cfg_tap_val &
+                                     PORT_HW_CFG_TX_DRV_POST2_MASK) >>
+                               PORT_HW_CFG_TX_DRV_POST2_SHIFT;
+               }
+
+               if (cfg_tap_val & PORT_HW_CFG_TX_DRV_BROADCAST_MASK) {
+                       tx_drv_brdct = (cfg_tap_val &
+                                       PORT_HW_CFG_TX_DRV_BROADCAST_MASK) >>
+                               PORT_HW_CFG_TX_DRV_BROADCAST_SHIFT;
+               }
+
+               tx_driver_val = WC_TX_DRIVER(ipost2_val, tx_drv_brdct,
+                                            ipre_driver_val, ifir_val);
        }
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_SERDESDIGITAL_MISC1, misc1_val);
 
        /* Set Transmit PMD settings */
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_TX_FIR_TAP,
                         tap_val | MDIO_WC_REG_TX_FIR_TAP_ENABLE);
@@ -3754,7 +4958,8 @@ static void elink_warpcore_set_20G_force_KR2(struct elink_phy *phy,
 
        elink_cl45_read_and_write(sc, phy, MDIO_PMA_DEVAD,
                                  MDIO_WC_REG_PMD_KR_CONTROL, ~(1 << 1));
-       elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0);
+       elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
+                        MDIO_AN_REG_CTRL, 0);
        /* Turn off CL73 */
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                        MDIO_WC_REG_CL73_USERB0_CTRL, &val);
@@ -3790,7 +4995,8 @@ static void elink_warpcore_set_20G_force_KR2(struct elink_phy *phy,
 }
 
 static void elink_warpcore_set_20G_DXGXS(struct bnx2x_softc *sc,
-                                        struct elink_phy *phy, uint16_t lane)
+                                        struct elink_phy *phy,
+                                        uint16_t lane)
 {
        /* Rx0 anaRxControl1G */
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
@@ -3800,13 +5006,17 @@ static void elink_warpcore_set_20G_DXGXS(struct bnx2x_softc *sc,
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_RX2_ANARXCONTROL1G, 0x90);
 
-       elink_cl45_write(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_RX66_SCW0, 0xE070);
+       elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                        MDIO_WC_REG_RX66_SCW0, 0xE070);
 
-       elink_cl45_write(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_RX66_SCW1, 0xC0D0);
+       elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                        MDIO_WC_REG_RX66_SCW1, 0xC0D0);
 
-       elink_cl45_write(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_RX66_SCW2, 0xA0B0);
+       elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                        MDIO_WC_REG_RX66_SCW2, 0xA0B0);
 
-       elink_cl45_write(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_RX66_SCW3, 0x8090);
+       elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
+                        MDIO_WC_REG_RX66_SCW3, 0x8090);
 
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_RX66_SCW0_MASK, 0xF0F0);
@@ -3835,7 +5045,7 @@ static void elink_warpcore_set_20G_DXGXS(struct bnx2x_softc *sc,
                          MDIO_WC_REG_TX_FIR_TAP_ENABLE));
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_TX0_TX_DRIVER + 0x10 * lane,
-                        WC_TX_DRIVER(0x02, 0x02, 0x02));
+                        WC_TX_DRIVER(0x02, 0x02, 0x02, 0));
 }
 
 static void elink_warpcore_set_sgmii_speed(struct elink_phy *phy,
@@ -3857,7 +5067,7 @@ static void elink_warpcore_set_sgmii_speed(struct elink_phy *phy,
                elink_cl45_read_or_write(sc, phy, MDIO_WC_DEVAD,
                                         MDIO_WC_REG_COMBO_IEEE0_MIICTRL,
                                         0x1000);
-               PMD_DRV_LOG(DEBUG, "set SGMII AUTONEG");
+               ELINK_DEBUG_P0(sc, "set SGMII AUTONEG");
        } else {
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_COMBO_IEEE0_MIICTRL, &val16);
@@ -3872,9 +5082,8 @@ static void elink_warpcore_set_sgmii_speed(struct elink_phy *phy,
                        val16 |= 0x0040;
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG,
-                                   "Speed not supported: 0x%x",
-                                   phy->req_line_speed);
+                       ELINK_DEBUG_P1(sc,
+                          "Speed not supported: 0x%x", phy->req_line_speed);
                        return;
                }
 
@@ -3882,13 +5091,13 @@ static void elink_warpcore_set_sgmii_speed(struct elink_phy *phy,
                        val16 |= 0x0100;
 
                elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                                MDIO_WC_REG_COMBO_IEEE0_MIICTRL, val16);
+                               MDIO_WC_REG_COMBO_IEEE0_MIICTRL, val16);
 
-               PMD_DRV_LOG(DEBUG, "set SGMII force speed %d",
-                           phy->req_line_speed);
+               ELINK_DEBUG_P1(sc, "set SGMII force speed %d",
+                              phy->req_line_speed);
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_COMBO_IEEE0_MIICTRL, &val16);
-               PMD_DRV_LOG(DEBUG, "  (readback) %x", val16);
+               ELINK_DEBUG_P1(sc, "  (readback) %x", val16);
        }
 
        /* SGMII Slave mode and disable signal detect */
@@ -3900,28 +5109,31 @@ static void elink_warpcore_set_sgmii_speed(struct elink_phy *phy,
                digctrl_kx1 &= 0xff4a;
 
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1, digctrl_kx1);
+                       MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1,
+                       digctrl_kx1);
 
        /* Turn off parallel detect */
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                        MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2, &digctrl_kx2);
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
-                        (digctrl_kx2 & ~(1 << 2)));
+                       MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
+                       (digctrl_kx2 & ~(1 << 2)));
 
        /* Re-enable parallel detect */
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
-                        (digctrl_kx2 | (1 << 2)));
+                       MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
+                       (digctrl_kx2 | (1 << 2)));
 
        /* Enable autodet */
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                        MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1,
-                        (digctrl_kx1 | 0x10));
+                       MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1,
+                       (digctrl_kx1 | 0x10));
 }
 
+
 static void elink_warpcore_reset_lane(struct bnx2x_softc *sc,
-                                     struct elink_phy *phy, uint8_t reset)
+                                     struct elink_phy *phy,
+                                     uint8_t reset)
 {
        uint16_t val;
        /* Take lane out of reset after configuration is finished */
@@ -3934,7 +5146,7 @@ static void elink_warpcore_reset_lane(struct bnx2x_softc *sc,
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_DIGITAL5_MISC6, val);
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
-                       MDIO_WC_REG_DIGITAL5_MISC6, &val);
+                        MDIO_WC_REG_DIGITAL5_MISC6, &val);
 }
 
 /* Clear SFI/XFI link settings registers */
@@ -3950,11 +5162,11 @@ static void elink_warpcore_clear_regs(struct elink_phy *phy,
                {MDIO_WC_DEVAD, MDIO_WC_REG_FX100_CTRL3, 0x0800},
                {MDIO_WC_DEVAD, MDIO_WC_REG_DIGITAL4_MISC3, 0x8008},
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X1,
-                0x0195},
+                       0x0195},
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X2,
-                0x0007},
+                       0x0007},
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_CONTROL1000X3,
-                0x0002},
+                       0x0002},
                {MDIO_WC_DEVAD, MDIO_WC_REG_SERDESDIGITAL_MISC1, 0x6000},
                {MDIO_WC_DEVAD, MDIO_WC_REG_TX_FIR_TAP, 0x0000},
                {MDIO_WC_DEVAD, MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x2040},
@@ -3968,40 +5180,41 @@ static void elink_warpcore_clear_regs(struct elink_phy *phy,
                elink_cl45_write(sc, phy, wc_regs[i].devad, wc_regs[i].reg,
                                 wc_regs[i].val);
 
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
                         MDIO_WC_REG_TX0_TX_DRIVER + 0x10 * lane, 0x0990);
 
 }
 
 static elink_status_t elink_get_mod_abs_int_cfg(struct bnx2x_softc *sc,
+                                               __rte_unused uint32_t chip_id,
                                                uint32_t shmem_base,
                                                uint8_t port,
-                                               uint8_t * gpio_num,
-                                               uint8_t * gpio_port)
+                                               uint8_t *gpio_num,
+                                               uint8_t *gpio_port)
 {
        uint32_t cfg_pin;
        *gpio_num = 0;
        *gpio_port = 0;
        if (CHIP_IS_E3(sc)) {
                cfg_pin = (REG_RD(sc, shmem_base +
-                                 offsetof(struct shmem_region,
-                                          dev_info.port_hw_config[port].
-                                          e3_sfp_ctrl)) &
-                          PORT_HW_CFG_E3_MOD_ABS_MASK) >>
-                   PORT_HW_CFG_E3_MOD_ABS_SHIFT;
-
-               /* Should not happen. This function called upon interrupt
-                * triggered by GPIO ( since EPIO can only generate interrupts
-                * to MCP).
+                               offsetof(struct shmem_region,
+                               dev_info.port_hw_config[port].e3_sfp_ctrl)) &
+                               PORT_HW_CFG_E3_MOD_ABS_MASK) >>
+                               PORT_HW_CFG_E3_MOD_ABS_SHIFT;
+
+               /*
+                * This should not happen since this function is called
+                * from interrupt triggered by GPIO (since EPIO can only
+                * generate interrupts to MCP).
                 * So if this function was called and none of the GPIOs was set,
-                * it means the shit hit the fan.
+                * it means something disastrous has already happened.
                 */
                if ((cfg_pin < PIN_CFG_GPIO0_P0) ||
                    (cfg_pin > PIN_CFG_GPIO3_P1)) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "No cfg pin %x for module detect indication",
-                                   cfg_pin);
+                       ELINK_DEBUG_P1(sc,
+                          "No cfg pin %x for module detect indication",
+                          cfg_pin);
                        return ELINK_STATUS_ERROR;
                }
 
@@ -4015,12 +5228,13 @@ static elink_status_t elink_get_mod_abs_int_cfg(struct bnx2x_softc *sc,
        return ELINK_STATUS_OK;
 }
 
-static int elink_is_sfp_module_plugged(struct elink_params *params)
+static int elink_is_sfp_module_plugged(__rte_unused struct elink_phy *phy,
+                                      struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t gpio_num, gpio_port;
        uint32_t gpio_val;
-       if (elink_get_mod_abs_int_cfg(sc,
+       if (elink_get_mod_abs_int_cfg(sc, params->chip_id,
                                      params->shmem_base, params->port,
                                      &gpio_num, &gpio_port) != ELINK_STATUS_OK)
                return 0;
@@ -4032,17 +5246,16 @@ static int elink_is_sfp_module_plugged(struct elink_params *params)
        else
                return 0;
 }
-
 static int elink_warpcore_get_sigdet(struct elink_phy *phy,
                                     struct elink_params *params)
 {
        uint16_t gp2_status_reg0, lane;
        struct bnx2x_softc *sc = params->sc;
 
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
 
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD, MDIO_WC_REG_GP2_STATUS_GP_2_0,
-                       &gp2_status_reg0);
+                                &gp2_status_reg0);
 
        return (gp2_status_reg0 >> (8 + lane)) & 0x1;
 }
@@ -4061,38 +5274,35 @@ static void elink_warpcore_config_runtime(struct elink_phy *phy,
                return;
 
        if (vars->rx_tx_asic_rst) {
-               uint16_t lane = elink_get_warpcore_lane(params);
+               uint16_t lane = elink_get_warpcore_lane(phy, params);
                serdes_net_if = (REG_RD(sc, params->shmem_base +
-                                       offsetof(struct shmem_region,
-                                                dev_info.port_hw_config
-                                                [params->port].
-                                                default_cfg)) &
-                                PORT_HW_CFG_NET_SERDES_IF_MASK);
+                               offsetof(struct shmem_region, dev_info.
+                               port_hw_config[params->port].default_cfg)) &
+                               PORT_HW_CFG_NET_SERDES_IF_MASK);
 
                switch (serdes_net_if) {
                case PORT_HW_CFG_NET_SERDES_IF_KR:
                        /* Do we get link yet? */
                        elink_cl45_read(sc, phy, MDIO_WC_DEVAD, 0x81d1,
                                        &gp_status1);
-                       lnkup = (gp_status1 >> (8 + lane)) & 0x1;       /* 1G */
-                       /*10G KR */
+                       lnkup = (gp_status1 >> (8 + lane)) & 0x1;/* 1G */
+                               /*10G KR*/
                        lnkup_kr = (gp_status1 >> (12 + lane)) & 0x1;
 
                        if (lnkup_kr || lnkup) {
                                vars->rx_tx_asic_rst = 0;
                        } else {
-                               /* Reset the lane to see if link comes up. */
+                               /* Reset the lane to see if link comes up.*/
                                elink_warpcore_reset_lane(sc, phy, 1);
                                elink_warpcore_reset_lane(sc, phy, 0);
 
                                /* Restart Autoneg */
                                elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
-                                                MDIO_WC_REG_IEEE0BLK_MIICNTL,
-                                                0x1200);
+                                       MDIO_WC_REG_IEEE0BLK_MIICNTL, 0x1200);
 
                                vars->rx_tx_asic_rst--;
-                               PMD_DRV_LOG(DEBUG, "0x%x retry left",
-                                           vars->rx_tx_asic_rst);
+                               ELINK_DEBUG_P1(sc, "0x%x retry left",
+                               vars->rx_tx_asic_rst);
                        }
                        break;
 
@@ -4100,29 +5310,29 @@ static void elink_warpcore_config_runtime(struct elink_phy *phy,
                        break;
                }
 
-       }
-       /*params->rx_tx_asic_rst */
+       } /*params->rx_tx_asic_rst*/
 }
 
 static void elink_warpcore_config_sfi(struct elink_phy *phy,
                                      struct elink_params *params)
 {
-       uint16_t lane = elink_get_warpcore_lane(params);
-
+       uint16_t lane = elink_get_warpcore_lane(phy, params);
+       struct bnx2x_softc *sc = params->sc;
        elink_warpcore_clear_regs(phy, params, lane);
        if ((params->req_line_speed[ELINK_LINK_CONFIG_IDX(ELINK_INT_PHY)] ==
             ELINK_SPEED_10000) &&
            (phy->media_type != ELINK_ETH_PHY_SFP_1G_FIBER)) {
-               PMD_DRV_LOG(DEBUG, "Setting 10G SFI");
+               ELINK_DEBUG_P0(sc, "Setting 10G SFI");
                elink_warpcore_set_10G_XFI(phy, params, 0);
        } else {
-               PMD_DRV_LOG(DEBUG, "Setting 1G Fiber");
+               ELINK_DEBUG_P0(sc, "Setting 1G Fiber");
                elink_warpcore_set_sgmii_speed(phy, params, 1, 0);
        }
 }
 
 static void elink_sfp_e3_set_transmitter(struct elink_params *params,
-                                        struct elink_phy *phy, uint8_t tx_en)
+                                        struct elink_phy *phy,
+                                        uint8_t tx_en)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t cfg_pin;
@@ -4131,9 +5341,9 @@ static void elink_sfp_e3_set_transmitter(struct elink_params *params,
        cfg_pin = REG_RD(sc, params->shmem_base +
                         offsetof(struct shmem_region,
                                  dev_info.port_hw_config[port].e3_sfp_ctrl)) &
-           PORT_HW_CFG_E3_TX_LASER_MASK;
+               PORT_HW_CFG_E3_TX_LASER_MASK;
        /* Set the !tx_en since this pin is DISABLE_TX_LASER */
-       PMD_DRV_LOG(DEBUG, "Setting WC TX to %d", tx_en);
+       ELINK_DEBUG_P1(sc, "Setting WC TX to %d", tx_en);
 
        /* For 20G, the expected pin to be used is 3 pins after the current */
        elink_set_cfg_pin(sc, cfg_pin, tx_en ^ 1);
@@ -4142,21 +5352,20 @@ static void elink_sfp_e3_set_transmitter(struct elink_params *params,
 }
 
 static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
-                                         struct elink_params *params,
-                                         struct elink_vars *vars)
+                                      struct elink_params *params,
+                                      struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t serdes_net_if;
        uint8_t fiber_mode;
-       uint16_t lane = elink_get_warpcore_lane(params);
+       uint16_t lane = elink_get_warpcore_lane(phy, params);
        serdes_net_if = (REG_RD(sc, params->shmem_base +
-                               offsetof(struct shmem_region,
-                                        dev_info.port_hw_config[params->port].
-                                        default_cfg)) &
+                        offsetof(struct shmem_region, dev_info.
+                                 port_hw_config[params->port].default_cfg)) &
                         PORT_HW_CFG_NET_SERDES_IF_MASK);
-       PMD_DRV_LOG(DEBUG,
-                   "Begin Warpcore init, link_speed %d, "
-                   "serdes_net_if = 0x%x", vars->line_speed, serdes_net_if);
+       ELINK_DEBUG_P2(sc, "Begin Warpcore init, link_speed %d, "
+                          "serdes_net_if = 0x%x",
+                      vars->line_speed, serdes_net_if);
        elink_set_aer_mmd(params, phy);
        elink_warpcore_reset_lane(sc, phy, 1);
        vars->phy_flags |= PHY_XGXS_FLAG;
@@ -4165,7 +5374,7 @@ static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
             ((phy->req_line_speed == ELINK_SPEED_100) ||
              (phy->req_line_speed == ELINK_SPEED_10)))) {
                vars->phy_flags |= PHY_SGMII_FLAG;
-               PMD_DRV_LOG(DEBUG, "Setting SGMII mode");
+               ELINK_DEBUG_P0(sc, "Setting SGMII mode");
                elink_warpcore_clear_regs(phy, params, lane);
                elink_warpcore_set_sgmii_speed(phy, params, 0, 1);
        } else {
@@ -4175,27 +5384,28 @@ static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
                        if (params->loopback_mode != ELINK_LOOPBACK_EXT)
                                elink_warpcore_enable_AN_KR(phy, params, vars);
                        else {
-                               PMD_DRV_LOG(DEBUG, "Setting KR 10G-Force");
-                               elink_warpcore_set_10G_KR(phy, params);
+                               ELINK_DEBUG_P0(sc, "Setting KR 10G-Force");
+                               elink_warpcore_set_10G_KR(phy, params, vars);
                        }
                        break;
 
                case PORT_HW_CFG_NET_SERDES_IF_XFI:
                        elink_warpcore_clear_regs(phy, params, lane);
                        if (vars->line_speed == ELINK_SPEED_10000) {
-                               PMD_DRV_LOG(DEBUG, "Setting 10G XFI");
+                               ELINK_DEBUG_P0(sc, "Setting 10G XFI");
                                elink_warpcore_set_10G_XFI(phy, params, 1);
                        } else {
                                if (ELINK_SINGLE_MEDIA_DIRECT(params)) {
-                                       PMD_DRV_LOG(DEBUG, "1G Fiber");
+                                       ELINK_DEBUG_P0(sc, "1G Fiber");
                                        fiber_mode = 1;
                                } else {
-                                       PMD_DRV_LOG(DEBUG, "10/100/1G SGMII");
+                                       ELINK_DEBUG_P0(sc, "10/100/1G SGMII");
                                        fiber_mode = 0;
                                }
                                elink_warpcore_set_sgmii_speed(phy,
-                                                              params,
-                                                              fiber_mode, 0);
+                                                               params,
+                                                               fiber_mode,
+                                                               0);
                        }
 
                        break;
@@ -4207,7 +5417,7 @@ static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
                         */
                        if ((params->loopback_mode == ELINK_LOOPBACK_NONE) ||
                            (params->loopback_mode == ELINK_LOOPBACK_EXT)) {
-                               if (elink_is_sfp_module_plugged(params))
+                               if (elink_is_sfp_module_plugged(phy, params))
                                        elink_sfp_module_detection(phy, params);
                                else
                                        elink_sfp_e3_set_transmitter(params,
@@ -4219,10 +5429,10 @@ static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
 
                case PORT_HW_CFG_NET_SERDES_IF_DXGXS:
                        if (vars->line_speed != ELINK_SPEED_20000) {
-                               PMD_DRV_LOG(DEBUG, "Speed not supported yet");
+                               ELINK_DEBUG_P0(sc, "Speed not supported yet");
                                return 0;
                        }
-                       PMD_DRV_LOG(DEBUG, "Setting 20G DXGXS");
+                       ELINK_DEBUG_P0(sc, "Setting 20G DXGXS");
                        elink_warpcore_set_20G_DXGXS(sc, phy, lane);
                        /* Issue Module detection */
 
@@ -4232,21 +5442,21 @@ static uint8_t elink_warpcore_config_init(struct elink_phy *phy,
                        if (!params->loopback_mode) {
                                elink_warpcore_enable_AN_KR(phy, params, vars);
                        } else {
-                               PMD_DRV_LOG(DEBUG, "Setting KR 20G-Force");
+                               ELINK_DEBUG_P0(sc, "Setting KR 20G-Force");
                                elink_warpcore_set_20G_force_KR2(phy, params);
                        }
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG,
-                                   "Unsupported Serdes Net Interface 0x%x",
-                                   serdes_net_if);
+                       ELINK_DEBUG_P1(sc,
+                          "Unsupported Serdes Net Interface 0x%x",
+                          serdes_net_if);
                        return 0;
                }
        }
 
        /* Take lane out of reset after configuration is finished */
        elink_warpcore_reset_lane(sc, phy, 0);
-       PMD_DRV_LOG(DEBUG, "Exit config init");
+       ELINK_DEBUG_P0(sc, "Exit config init");
 
        return 0;
 }
@@ -4275,11 +5485,12 @@ static void elink_warpcore_link_reset(struct elink_phy *phy,
                          MDIO_AER_BLOCK_AER_REG, 0);
        /* Enable 1G MDIO (1-copy) */
        elink_cl45_read_and_write(sc, phy, MDIO_WC_DEVAD,
-                                 MDIO_WC_REG_XGXSBLK0_XGXSCONTROL, ~0x10);
+                                 MDIO_WC_REG_XGXSBLK0_XGXSCONTROL,
+                                 ~0x10);
 
        elink_cl45_read_and_write(sc, phy, MDIO_WC_DEVAD,
                                  MDIO_WC_REG_XGXSBLK1_LANECTRL2, 0xff00);
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        /* Disable CL36 PCS Tx */
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                        MDIO_WC_REG_XGXSBLK1_LANECTRL0, &val16);
@@ -4311,8 +5522,8 @@ static void elink_set_warpcore_loopback(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        uint16_t val16;
        uint32_t lane;
-       PMD_DRV_LOG(DEBUG, "Setting Warpcore loopback type %x, speed %d",
-                   params->loopback_mode, phy->req_line_speed);
+       ELINK_DEBUG_P2(sc, "Setting Warpcore loopback type %x, speed %d",
+                      params->loopback_mode, phy->req_line_speed);
 
        if (phy->req_line_speed < ELINK_SPEED_10000 ||
            phy->supported & ELINK_SUPPORTED_20000baseKR2_Full) {
@@ -4326,14 +5537,15 @@ static void elink_set_warpcore_loopback(struct elink_phy *phy,
                                         MDIO_WC_REG_XGXSBLK0_XGXSCONTROL,
                                         0x10);
                /* Set 1G loopback based on lane (1-copy) */
-               lane = elink_get_warpcore_lane(params);
+               lane = elink_get_warpcore_lane(phy, params);
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_XGXSBLK1_LANECTRL2, &val16);
                val16 |= (1 << lane);
                if (phy->flags & ELINK_FLAGS_WC_DUAL_MODE)
                        val16 |= (2 << lane);
                elink_cl45_write(sc, phy, MDIO_WC_DEVAD,
-                                MDIO_WC_REG_XGXSBLK1_LANECTRL2, val16);
+                                MDIO_WC_REG_XGXSBLK1_LANECTRL2,
+                                val16);
 
                /* Switch back to 4-copy registers */
                elink_set_aer_mmd(params, phy);
@@ -4347,8 +5559,10 @@ static void elink_set_warpcore_loopback(struct elink_phy *phy,
        }
 }
 
+
+
 static void elink_sync_link(struct elink_params *params,
-                           struct elink_vars *vars)
+                            struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t link_10g_plus;
@@ -4356,21 +5570,23 @@ static void elink_sync_link(struct elink_params *params,
                vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG;
        vars->link_up = (vars->link_status & LINK_STATUS_LINK_UP);
        if (vars->link_up) {
-               PMD_DRV_LOG(DEBUG, "phy link up");
+               ELINK_DEBUG_P0(sc, "phy link up");
+               ELINK_DEBUG_P1(sc, "link status = %x", vars->link_status);
 
                vars->phy_link_up = 1;
                vars->duplex = DUPLEX_FULL;
-               switch (vars->link_status & LINK_STATUS_SPEED_AND_DUPLEX_MASK) {
+               switch (vars->link_status &
+                       LINK_STATUS_SPEED_AND_DUPLEX_MASK) {
                case ELINK_LINK_10THD:
                        vars->duplex = DUPLEX_HALF;
-                       /* Fall through */
+                       /* Fall thru */
                case ELINK_LINK_10TFD:
                        vars->line_speed = ELINK_SPEED_10;
                        break;
 
                case ELINK_LINK_100TXHD:
                        vars->duplex = DUPLEX_HALF;
-                       /* Fall through */
+                       /* Fall thru */
                case ELINK_LINK_100T4:
                case ELINK_LINK_100TXFD:
                        vars->line_speed = ELINK_SPEED_100;
@@ -4378,14 +5594,14 @@ static void elink_sync_link(struct elink_params *params,
 
                case ELINK_LINK_1000THD:
                        vars->duplex = DUPLEX_HALF;
-                       /* Fall through */
+                       /* Fall thru */
                case ELINK_LINK_1000TFD:
                        vars->line_speed = ELINK_SPEED_1000;
                        break;
 
                case ELINK_LINK_2500THD:
                        vars->duplex = DUPLEX_HALF;
-                       /* Fall through */
+                       /* Fall thru */
                case ELINK_LINK_2500TFD:
                        vars->line_speed = ELINK_SPEED_2500;
                        break;
@@ -4417,7 +5633,8 @@ static void elink_sync_link(struct elink_params *params,
                        vars->phy_flags &= ~PHY_SGMII_FLAG;
                }
                if (vars->line_speed &&
-                   USES_WARPCORE(sc) && (vars->line_speed == ELINK_SPEED_1000))
+                   USES_WARPCORE(sc) &&
+                   (vars->line_speed == ELINK_SPEED_1000))
                        vars->phy_flags |= PHY_SGMII_FLAG;
                /* Anything 10 and over uses the bmac */
                link_10g_plus = (vars->line_speed >= ELINK_SPEED_10000);
@@ -4433,8 +5650,8 @@ static void elink_sync_link(struct elink_params *params,
                        else
                                vars->mac_type = ELINK_MAC_TYPE_EMAC;
                }
-       } else {                /* Link down */
-               PMD_DRV_LOG(DEBUG, "phy link down");
+       } else { /* Link down */
+               ELINK_DEBUG_P0(sc, "phy link down");
 
                vars->phy_link_up = 0;
 
@@ -4478,44 +5695,44 @@ void elink_link_status_update(struct elink_params *params,
        elink_sync_link(params, vars);
        /* Sync media type */
        sync_offset = params->shmem_base +
-           offsetof(struct shmem_region,
-                    dev_info.port_hw_config[port].media_type);
+                       offsetof(struct shmem_region,
+                                dev_info.port_hw_config[port].media_type);
        media_types = REG_RD(sc, sync_offset);
 
        params->phy[ELINK_INT_PHY].media_type =
-           (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) >>
-           PORT_HW_CFG_MEDIA_TYPE_PHY0_SHIFT;
+               (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) >>
+               PORT_HW_CFG_MEDIA_TYPE_PHY0_SHIFT;
        params->phy[ELINK_EXT_PHY1].media_type =
-           (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY1_MASK) >>
-           PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT;
+               (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY1_MASK) >>
+               PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT;
        params->phy[ELINK_EXT_PHY2].media_type =
-           (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY2_MASK) >>
-           PORT_HW_CFG_MEDIA_TYPE_PHY2_SHIFT;
-       PMD_DRV_LOG(DEBUG, "media_types = 0x%x", media_types);
+               (media_types & PORT_HW_CFG_MEDIA_TYPE_PHY2_MASK) >>
+               PORT_HW_CFG_MEDIA_TYPE_PHY2_SHIFT;
+       ELINK_DEBUG_P1(sc, "media_types = 0x%x", media_types);
 
        /* Sync AEU offset */
        sync_offset = params->shmem_base +
-           offsetof(struct shmem_region,
-                    dev_info.port_hw_config[port].aeu_int_mask);
+                       offsetof(struct shmem_region,
+                                dev_info.port_hw_config[port].aeu_int_mask);
 
        vars->aeu_int_mask = REG_RD(sc, sync_offset);
 
        /* Sync PFC status */
        if (vars->link_status & LINK_STATUS_PFC_ENABLED)
                params->feature_config_flags |=
-                   ELINK_FEATURE_CONFIG_PFC_ENABLED;
+                                       ELINK_FEATURE_CONFIG_PFC_ENABLED;
        else
                params->feature_config_flags &=
-                   ~ELINK_FEATURE_CONFIG_PFC_ENABLED;
+                                       ~ELINK_FEATURE_CONFIG_PFC_ENABLED;
 
        if (SHMEM2_HAS(sc, link_attr_sync))
-               vars->link_attr_sync = SHMEM2_RD(sc,
+               params->link_attr_sync = SHMEM2_RD(sc,
                                                 link_attr_sync[params->port]);
 
-       PMD_DRV_LOG(DEBUG, "link_status 0x%x  phy_link_up %x int_mask 0x%x",
-                   vars->link_status, vars->phy_link_up, vars->aeu_int_mask);
-       PMD_DRV_LOG(DEBUG, "line_speed %x  duplex %x  flow_ctrl 0x%x",
-                   vars->line_speed, vars->duplex, vars->flow_ctrl);
+       ELINK_DEBUG_P3(sc, "link_status 0x%x  phy_link_up %x int_mask 0x%x",
+                vars->link_status, vars->phy_link_up, vars->aeu_int_mask);
+       ELINK_DEBUG_P3(sc, "line_speed %x  duplex %x  flow_ctrl 0x%x",
+                vars->line_speed, vars->duplex, vars->flow_ctrl);
 }
 
 static void elink_set_master_ln(struct elink_params *params,
@@ -4530,7 +5747,8 @@ static void elink_set_master_ln(struct elink_params *params,
        /* Set the master_ln for AN */
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_XGXS_BLOCK2,
-                         MDIO_XGXS_BLOCK2_TEST_MODE_LANE, &new_master_ln);
+                         MDIO_XGXS_BLOCK2_TEST_MODE_LANE,
+                         &new_master_ln);
 
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_XGXS_BLOCK2,
@@ -4539,8 +5757,8 @@ static void elink_set_master_ln(struct elink_params *params,
 }
 
 static elink_status_t elink_reset_unicore(struct elink_params *params,
-                                         struct elink_phy *phy,
-                                         uint8_t set_serdes)
+                              struct elink_phy *phy,
+                              uint8_t set_serdes)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t mii_control;
@@ -4553,7 +5771,8 @@ static elink_status_t elink_reset_unicore(struct elink_params *params,
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_COMBO_IEEE0,
                          MDIO_COMBO_IEEE0_MII_CONTROL,
-                         (mii_control | MDIO_COMBO_IEEO_MII_CONTROL_RESET));
+                         (mii_control |
+                          MDIO_COMBO_IEEO_MII_CONTROL_RESET));
        if (set_serdes)
                elink_set_serdes_access(sc, params->port);
 
@@ -4564,7 +5783,8 @@ static elink_status_t elink_reset_unicore(struct elink_params *params,
                /* The reset erased the previous bank value */
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_MII_CONTROL, &mii_control);
+                                 MDIO_COMBO_IEEE0_MII_CONTROL,
+                                 &mii_control);
 
                if (!(mii_control & MDIO_COMBO_IEEO_MII_CONTROL_RESET)) {
                        DELAY(5);
@@ -4572,10 +5792,12 @@ static elink_status_t elink_reset_unicore(struct elink_params *params,
                }
        }
 
-       elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED, params->port);   // "Warning: PHY was not initialized,"
-       // " Port %d",
+       elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED, params->port);
+                            /* "Warning: PHY was not initialized,"
+                             * " Port %d",
+                             */
 
-       PMD_DRV_LOG(DEBUG, "BUG! XGXS is still in reset!");
+       ELINK_DEBUG_P0(sc, "BUG! XGXS is still in reset!");
        return ELINK_STATUS_ERROR;
 
 }
@@ -4629,31 +5851,35 @@ static void elink_set_parallel_detection(struct elink_phy *phy,
        uint16_t control2;
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL2, &control2);
+                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL2,
+                         &control2);
        if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)
                control2 |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL2_PRL_DT_EN;
        else
                control2 &= ~MDIO_SERDES_DIGITAL_A_1000X_CONTROL2_PRL_DT_EN;
-       PMD_DRV_LOG(DEBUG, "phy->speed_cap_mask = 0x%x, control2 = 0x%x",
-                   phy->speed_cap_mask, control2);
+       ELINK_DEBUG_P2(sc, "phy->speed_cap_mask = 0x%x, control2 = 0x%x",
+               phy->speed_cap_mask, control2);
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL2, control2);
+                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL2,
+                         control2);
 
        if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT) &&
-           (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
-               PMD_DRV_LOG(DEBUG, "XGXS");
+            (phy->speed_cap_mask &
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
+               ELINK_DEBUG_P0(sc, "XGXS");
 
                CL22_WR_OVER_CL45(sc, phy,
-                                 MDIO_REG_BANK_10G_PARALLEL_DETECT,
-                                 MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_LINK,
-                                 MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_LINK_CNT);
+                                MDIO_REG_BANK_10G_PARALLEL_DETECT,
+                                MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_LINK,
+                                MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_LINK_CNT);
 
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_10G_PARALLEL_DETECT,
                                  MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_CONTROL,
                                  &control2);
 
+
                control2 |=
                    MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_CONTROL_PARDET10G_EN;
 
@@ -4673,7 +5899,8 @@ static void elink_set_parallel_detection(struct elink_phy *phy,
 
 static void elink_set_autoneg(struct elink_phy *phy,
                              struct elink_params *params,
-                             struct elink_vars *vars, uint8_t enable_cl73)
+                             struct elink_vars *vars,
+                             uint8_t enable_cl73)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t reg_val;
@@ -4686,7 +5913,7 @@ static void elink_set_autoneg(struct elink_phy *phy,
        /* CL37 Autoneg Enabled */
        if (vars->line_speed == ELINK_SPEED_AUTO_NEG)
                reg_val |= MDIO_COMBO_IEEO_MII_CONTROL_AN_EN;
-       else                    /* CL37 Autoneg Disabled */
+       else /* CL37 Autoneg Disabled */
                reg_val &= ~(MDIO_COMBO_IEEO_MII_CONTROL_AN_EN |
                             MDIO_COMBO_IEEO_MII_CONTROL_RESTART_AN);
 
@@ -4700,7 +5927,7 @@ static void elink_set_autoneg(struct elink_phy *phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
                          MDIO_SERDES_DIGITAL_A_1000X_CONTROL1, &reg_val);
        reg_val &= ~(MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_SIGNAL_DETECT_EN |
-                    MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_INVERT_SIGNAL_DETECT);
+                   MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_INVERT_SIGNAL_DETECT);
        reg_val |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_FIBER_MODE;
        if (vars->line_speed == ELINK_SPEED_AUTO_NEG)
                reg_val |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_AUTODET;
@@ -4714,7 +5941,8 @@ static void elink_set_autoneg(struct elink_phy *phy,
        /* Enable TetonII and BAM autoneg */
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_BAM_NEXT_PAGE,
-                         MDIO_BAM_NEXT_PAGE_MP5_NEXT_PAGE_CTRL, &reg_val);
+                         MDIO_BAM_NEXT_PAGE_MP5_NEXT_PAGE_CTRL,
+                         &reg_val);
        if (vars->line_speed == ELINK_SPEED_AUTO_NEG) {
                /* Enable BAM aneg Mode and TetonII aneg Mode */
                reg_val |= (MDIO_BAM_NEXT_PAGE_MP5_NEXT_PAGE_CTRL_BAM_MODE |
@@ -4726,40 +5954,45 @@ static void elink_set_autoneg(struct elink_phy *phy,
        }
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_BAM_NEXT_PAGE,
-                         MDIO_BAM_NEXT_PAGE_MP5_NEXT_PAGE_CTRL, reg_val);
+                         MDIO_BAM_NEXT_PAGE_MP5_NEXT_PAGE_CTRL,
+                         reg_val);
 
        if (enable_cl73) {
                /* Enable Cl73 FSM status bits */
                CL22_WR_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_USERB0,
-                                 MDIO_CL73_USERB0_CL73_UCTRL, 0xe);
+                                 MDIO_CL73_USERB0_CL73_UCTRL,
+                                 0xe);
 
-               /* Enable BAM Station Manager */
+               /* Enable BAM Station Manager*/
                CL22_WR_OVER_CL45(sc, phy,
-                                 MDIO_REG_BANK_CL73_USERB0,
-                                 MDIO_CL73_USERB0_CL73_BAM_CTRL1,
-                                 MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_EN |
-                                 MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_STATION_MNGR_EN
-                                 |
-                                 MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_NP_AFTER_BP_EN);
+                       MDIO_REG_BANK_CL73_USERB0,
+                       MDIO_CL73_USERB0_CL73_BAM_CTRL1,
+                       MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_EN |
+                       MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_STATION_MNGR_EN |
+                       MDIO_CL73_USERB0_CL73_BAM_CTRL1_BAM_NP_AFTER_BP_EN);
 
                /* Advertise CL73 link speeds */
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_IEEEB1,
-                                 MDIO_CL73_IEEEB1_AN_ADV2, &reg_val);
-               if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+                                 MDIO_CL73_IEEEB1_AN_ADV2,
+                                 &reg_val);
+               if (phy->speed_cap_mask &
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
                        reg_val |= MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KX4;
-               if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)
+               if (phy->speed_cap_mask &
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)
                        reg_val |= MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M_KX;
 
                CL22_WR_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_IEEEB1,
-                                 MDIO_CL73_IEEEB1_AN_ADV2, reg_val);
+                                 MDIO_CL73_IEEEB1_AN_ADV2,
+                                 reg_val);
 
                /* CL73 Autoneg Enabled */
                reg_val = MDIO_CL73_IEEEB0_CL73_AN_CONTROL_AN_EN;
 
-       } else                  /* CL73 Autoneg Disabled */
+       } else /* CL73 Autoneg Disabled */
                reg_val = 0;
 
        CL22_WR_OVER_CL45(sc, phy,
@@ -4775,7 +6008,7 @@ static void elink_program_serdes(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        uint16_t reg_val;
 
-       /* Program duplex, disable autoneg and sgmii */
+       /* Program duplex, disable autoneg and sgmii*/
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_COMBO_IEEE0,
                          MDIO_COMBO_IEEE0_MII_CONTROL, &reg_val);
@@ -4795,7 +6028,7 @@ static void elink_program_serdes(struct elink_phy *phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
                          MDIO_SERDES_DIGITAL_MISC1, &reg_val);
        /* Clearing the speed value before setting the right speed */
-       PMD_DRV_LOG(DEBUG, "MDIO_REG_BANK_SERDES_DIGITAL = 0x%x", reg_val);
+       ELINK_DEBUG_P1(sc, "MDIO_REG_BANK_SERDES_DIGITAL = 0x%x", reg_val);
 
        reg_val &= ~(MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_MASK |
                     MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_SEL);
@@ -4808,7 +6041,7 @@ static void elink_program_serdes(struct elink_phy *phy,
                            MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_SEL);
                if (vars->line_speed == ELINK_SPEED_10000)
                        reg_val |=
-                           MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_10G_CX4;
+                               MDIO_SERDES_DIGITAL_MISC1_FORCE_SPEED_10G_CX4;
        }
 
        CL22_WR_OVER_CL45(sc, phy,
@@ -4829,10 +6062,12 @@ static void elink_set_brcm_cl37_advertisement(struct elink_phy *phy,
        if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
                val |= MDIO_OVER_1G_UP1_10G;
        CL22_WR_OVER_CL45(sc, phy,
-                         MDIO_REG_BANK_OVER_1G, MDIO_OVER_1G_UP1, val);
+                         MDIO_REG_BANK_OVER_1G,
+                         MDIO_OVER_1G_UP1, val);
 
        CL22_WR_OVER_CL45(sc, phy,
-                         MDIO_REG_BANK_OVER_1G, MDIO_OVER_1G_UP3, 0x400);
+                         MDIO_REG_BANK_OVER_1G,
+                         MDIO_OVER_1G_UP3, 0x400);
 }
 
 static void elink_set_ieee_aneg_advertisement(struct elink_phy *phy,
@@ -4863,7 +6098,7 @@ static void elink_restart_autoneg(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        uint16_t mii_control;
 
-       PMD_DRV_LOG(DEBUG, "elink_restart_autoneg");
+       ELINK_DEBUG_P0(sc, "elink_restart_autoneg");
        /* Enable and restart BAM/CL37 aneg */
 
        if (enable_cl73) {
@@ -4876,16 +6111,17 @@ static void elink_restart_autoneg(struct elink_phy *phy,
                                  MDIO_REG_BANK_CL73_IEEEB0,
                                  MDIO_CL73_IEEEB0_CL73_AN_CONTROL,
                                  (mii_control |
-                                  MDIO_CL73_IEEEB0_CL73_AN_CONTROL_AN_EN |
-                                  MDIO_CL73_IEEEB0_CL73_AN_CONTROL_RESTART_AN));
+                                 MDIO_CL73_IEEEB0_CL73_AN_CONTROL_AN_EN |
+                                 MDIO_CL73_IEEEB0_CL73_AN_CONTROL_RESTART_AN));
        } else {
 
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_MII_CONTROL, &mii_control);
-               PMD_DRV_LOG(DEBUG,
-                           "elink_restart_autoneg mii_control before = 0x%x",
-                           mii_control);
+                                 MDIO_COMBO_IEEE0_MII_CONTROL,
+                                 &mii_control);
+               ELINK_DEBUG_P1(sc,
+                        "elink_restart_autoneg mii_control before = 0x%x",
+                        mii_control);
                CL22_WR_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
                                  MDIO_COMBO_IEEE0_MII_CONTROL,
@@ -4906,7 +6142,8 @@ static void elink_initialize_sgmii_process(struct elink_phy *phy,
 
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL1, &control1);
+                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL1,
+                         &control1);
        control1 |= MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_INVERT_SIGNAL_DETECT;
        /* Set sgmii mode (and not fiber) */
        control1 &= ~(MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_FIBER_MODE |
@@ -4914,7 +6151,8 @@ static void elink_initialize_sgmii_process(struct elink_phy *phy,
                      MDIO_SERDES_DIGITAL_A_1000X_CONTROL1_MSTR_MODE);
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL1, control1);
+                         MDIO_SERDES_DIGITAL_A_1000X_CONTROL1,
+                         control1);
 
        /* If forced speed */
        if (!(vars->line_speed == ELINK_SPEED_AUTO_NEG)) {
@@ -4923,7 +6161,8 @@ static void elink_initialize_sgmii_process(struct elink_phy *phy,
 
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_MII_CONTROL, &mii_control);
+                                 MDIO_COMBO_IEEE0_MII_CONTROL,
+                                 &mii_control);
                mii_control &= ~(MDIO_COMBO_IEEO_MII_CONTROL_AN_EN |
                                 MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_MASK |
                                 MDIO_COMBO_IEEO_MII_CONTROL_FULL_DUPLEX);
@@ -4931,30 +6170,32 @@ static void elink_initialize_sgmii_process(struct elink_phy *phy,
                switch (vars->line_speed) {
                case ELINK_SPEED_100:
                        mii_control |=
-                           MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_100;
+                               MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_100;
                        break;
                case ELINK_SPEED_1000:
                        mii_control |=
-                           MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_1000;
+                               MDIO_COMBO_IEEO_MII_CONTROL_MAN_SGMII_SP_1000;
                        break;
                case ELINK_SPEED_10:
                        /* There is nothing to set for 10M */
                        break;
                default:
                        /* Invalid speed for SGMII */
-                       PMD_DRV_LOG(DEBUG, "Invalid line_speed 0x%x",
-                                   vars->line_speed);
+                       ELINK_DEBUG_P1(sc, "Invalid line_speed 0x%x",
+                                 vars->line_speed);
                        break;
                }
 
                /* Setting the full duplex */
                if (phy->req_duplex == DUPLEX_FULL)
-                       mii_control |= MDIO_COMBO_IEEO_MII_CONTROL_FULL_DUPLEX;
+                       mii_control |=
+                               MDIO_COMBO_IEEO_MII_CONTROL_FULL_DUPLEX;
                CL22_WR_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_MII_CONTROL, mii_control);
+                                 MDIO_COMBO_IEEE0_MII_CONTROL,
+                                 mii_control);
 
-       } else {                /* AN mode */
+       } else { /* AN mode */
                /* Enable and restart AN */
                elink_restart_autoneg(phy, params, 0);
        }
@@ -4963,8 +6204,7 @@ static void elink_initialize_sgmii_process(struct elink_phy *phy,
 /* Link management
  */
 static elink_status_t elink_direct_parallel_detect_used(struct elink_phy *phy,
-                                                       struct elink_params
-                                                       *params)
+                                            struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t pd_10g, status2_1000x;
@@ -4972,34 +6212,38 @@ static elink_status_t elink_direct_parallel_detect_used(struct elink_phy *phy,
                return ELINK_STATUS_OK;
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_STATUS2, &status2_1000x);
+                         MDIO_SERDES_DIGITAL_A_1000X_STATUS2,
+                         &status2_1000x);
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_SERDES_DIGITAL,
-                         MDIO_SERDES_DIGITAL_A_1000X_STATUS2, &status2_1000x);
+                         MDIO_SERDES_DIGITAL_A_1000X_STATUS2,
+                         &status2_1000x);
        if (status2_1000x & MDIO_SERDES_DIGITAL_A_1000X_STATUS2_AN_DISABLED) {
-               PMD_DRV_LOG(DEBUG, "1G parallel detect link on port %d",
-                           params->port);
-               return ELINK_STATUS_ERROR;
+               ELINK_DEBUG_P1(sc, "1G parallel detect link on port %d",
+                        params->port);
+               return 1;
        }
 
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_10G_PARALLEL_DETECT,
-                         MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_STATUS, &pd_10g);
+                         MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_STATUS,
+                         &pd_10g);
 
        if (pd_10g & MDIO_10G_PARALLEL_DETECT_PAR_DET_10G_STATUS_PD_LINK) {
-               PMD_DRV_LOG(DEBUG, "10G parallel detect link on port %d",
-                           params->port);
-               return ELINK_STATUS_ERROR;
+               ELINK_DEBUG_P1(sc, "10G parallel detect link on port %d",
+                        params->port);
+               return 1;
        }
        return ELINK_STATUS_OK;
 }
 
 static void elink_update_adv_fc(struct elink_phy *phy,
                                struct elink_params *params,
-                               struct elink_vars *vars, uint32_t gp_status)
+                               struct elink_vars *vars,
+                               uint32_t gp_status)
 {
-       uint16_t ld_pause;      /* local driver */
-       uint16_t lp_pause;      /* link partner */
+       uint16_t ld_pause;   /* local driver */
+       uint16_t lp_pause;   /* link partner */
        uint16_t pause_result;
        struct bnx2x_softc *sc = params->sc;
        if ((gp_status &
@@ -5010,37 +6254,42 @@ static void elink_update_adv_fc(struct elink_phy *phy,
 
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_IEEEB1,
-                                 MDIO_CL73_IEEEB1_AN_ADV1, &ld_pause);
+                                 MDIO_CL73_IEEEB1_AN_ADV1,
+                                 &ld_pause);
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_IEEEB1,
-                                 MDIO_CL73_IEEEB1_AN_LP_ADV1, &lp_pause);
+                                 MDIO_CL73_IEEEB1_AN_LP_ADV1,
+                                 &lp_pause);
                pause_result = (ld_pause &
                                MDIO_CL73_IEEEB1_AN_ADV1_PAUSE_MASK) >> 8;
                pause_result |= (lp_pause &
                                 MDIO_CL73_IEEEB1_AN_LP_ADV1_PAUSE_MASK) >> 10;
-               PMD_DRV_LOG(DEBUG, "pause_result CL73 0x%x", pause_result);
+               ELINK_DEBUG_P1(sc, "pause_result CL73 0x%x", pause_result);
        } else {
                CL22_RD_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV, &ld_pause);
+                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV,
+                                 &ld_pause);
                CL22_RD_OVER_CL45(sc, phy,
-                                 MDIO_REG_BANK_COMBO_IEEE0,
-                                 MDIO_COMBO_IEEE0_AUTO_NEG_LINK_PARTNER_ABILITY1,
-                                 &lp_pause);
+                       MDIO_REG_BANK_COMBO_IEEE0,
+                       MDIO_COMBO_IEEE0_AUTO_NEG_LINK_PARTNER_ABILITY1,
+                       &lp_pause);
                pause_result = (ld_pause &
                                MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) >> 5;
                pause_result |= (lp_pause &
                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) >> 7;
-               PMD_DRV_LOG(DEBUG, "pause_result CL37 0x%x", pause_result);
+               ELINK_DEBUG_P1(sc, "pause_result CL37 0x%x", pause_result);
        }
-       elink_pause_resolve(vars, pause_result);
+       elink_pause_resolve(phy, params, vars, pause_result);
 
 }
 
 static void elink_flow_ctrl_resolve(struct elink_phy *phy,
                                    struct elink_params *params,
-                                   struct elink_vars *vars, uint32_t gp_status)
+                                   struct elink_vars *vars,
+                                   uint32_t gp_status)
 {
+       struct bnx2x_softc *sc = params->sc;
        vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
 
        /* Resolve from gp_status in case of AN complete and not sgmii */
@@ -5060,7 +6309,7 @@ static void elink_flow_ctrl_resolve(struct elink_phy *phy,
                }
                elink_update_adv_fc(phy, params, vars, gp_status);
        }
-       PMD_DRV_LOG(DEBUG, "flow_ctrl 0x%x", vars->flow_ctrl);
+       ELINK_DEBUG_P1(sc, "flow_ctrl 0x%x", vars->flow_ctrl);
 }
 
 static void elink_check_fallback_to_cl37(struct elink_phy *phy,
@@ -5068,14 +6317,16 @@ static void elink_check_fallback_to_cl37(struct elink_phy *phy,
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t rx_status, ustat_val, cl37_fsm_received;
-       PMD_DRV_LOG(DEBUG, "elink_check_fallback_to_cl37");
+       ELINK_DEBUG_P0(sc, "elink_check_fallback_to_cl37");
        /* Step 1: Make sure signal is detected */
        CL22_RD_OVER_CL45(sc, phy,
-                         MDIO_REG_BANK_RX0, MDIO_RX0_RX_STATUS, &rx_status);
+                         MDIO_REG_BANK_RX0,
+                         MDIO_RX0_RX_STATUS,
+                         &rx_status);
        if ((rx_status & MDIO_RX0_RX_STATUS_SIGDET) !=
            (MDIO_RX0_RX_STATUS_SIGDET)) {
-               PMD_DRV_LOG(DEBUG, "Signal is not detected. Restoring CL73."
-                           "rx_status(0x80b0) = 0x%x", rx_status);
+               ELINK_DEBUG_P1(sc, "Signal is not detected. Restoring CL73."
+                            "rx_status(0x80b0) = 0x%x", rx_status);
                CL22_WR_OVER_CL45(sc, phy,
                                  MDIO_REG_BANK_CL73_IEEEB0,
                                  MDIO_CL73_IEEEB0_CL73_AN_CONTROL,
@@ -5085,14 +6336,15 @@ static void elink_check_fallback_to_cl37(struct elink_phy *phy,
        /* Step 2: Check CL73 state machine */
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_CL73_USERB0,
-                         MDIO_CL73_USERB0_CL73_USTAT1, &ustat_val);
+                         MDIO_CL73_USERB0_CL73_USTAT1,
+                         &ustat_val);
        if ((ustat_val &
             (MDIO_CL73_USERB0_CL73_USTAT1_LINK_STATUS_CHECK |
              MDIO_CL73_USERB0_CL73_USTAT1_AN_GOOD_CHECK_BAM37)) !=
            (MDIO_CL73_USERB0_CL73_USTAT1_LINK_STATUS_CHECK |
-            MDIO_CL73_USERB0_CL73_USTAT1_AN_GOOD_CHECK_BAM37)) {
-               PMD_DRV_LOG(DEBUG, "CL73 state-machine is not stable. "
-                           "ustat_val(0x8371) = 0x%x", ustat_val);
+             MDIO_CL73_USERB0_CL73_USTAT1_AN_GOOD_CHECK_BAM37)) {
+               ELINK_DEBUG_P1(sc, "CL73 state-machine is not stable. "
+                            "ustat_val(0x8371) = 0x%x", ustat_val);
                return;
        }
        /* Step 3: Check CL37 Message Pages received to indicate LP
@@ -5100,14 +6352,16 @@ static void elink_check_fallback_to_cl37(struct elink_phy *phy,
         */
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_REMOTE_PHY,
-                         MDIO_REMOTE_PHY_MISC_RX_STATUS, &cl37_fsm_received);
+                         MDIO_REMOTE_PHY_MISC_RX_STATUS,
+                         &cl37_fsm_received);
        if ((cl37_fsm_received &
             (MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_OVER1G_MSG |
-             MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_BRCM_OUI_MSG)) !=
+            MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_BRCM_OUI_MSG)) !=
            (MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_OVER1G_MSG |
-            MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_BRCM_OUI_MSG)) {
-               PMD_DRV_LOG(DEBUG, "No CL37 FSM were received. "
-                           "misc_rx_status(0x8330) = 0x%x", cl37_fsm_received);
+             MDIO_REMOTE_PHY_MISC_RX_STATUS_CL37_FSM_RECEIVED_BRCM_OUI_MSG)) {
+               ELINK_DEBUG_P1(sc, "No CL37 FSM were received. "
+                            "misc_rx_status(0x8330) = 0x%x",
+                        cl37_fsm_received);
                return;
        }
        /* The combined cl37/cl73 fsm state information indicating that
@@ -5119,34 +6373,38 @@ static void elink_check_fallback_to_cl37(struct elink_phy *phy,
        /* Disable CL73 */
        CL22_WR_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_CL73_IEEEB0,
-                         MDIO_CL73_IEEEB0_CL73_AN_CONTROL, 0);
+                         MDIO_CL73_IEEEB0_CL73_AN_CONTROL,
+                         0);
        /* Restart CL37 autoneg */
        elink_restart_autoneg(phy, params, 0);
-       PMD_DRV_LOG(DEBUG, "Disabling CL73, and restarting CL37 autoneg");
+       ELINK_DEBUG_P0(sc, "Disabling CL73, and restarting CL37 autoneg");
 }
 
 static void elink_xgxs_an_resolve(struct elink_phy *phy,
                                  struct elink_params *params,
-                                 struct elink_vars *vars, uint32_t gp_status)
+                                 struct elink_vars *vars,
+                                 uint32_t gp_status)
 {
        if (gp_status & ELINK_MDIO_AN_CL73_OR_37_COMPLETE)
-               vars->link_status |= LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
+               vars->link_status |=
+                       LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
 
        if (elink_direct_parallel_detect_used(phy, params))
-               vars->link_status |= LINK_STATUS_PARALLEL_DETECTION_USED;
+               vars->link_status |=
+                       LINK_STATUS_PARALLEL_DETECTION_USED;
 }
-
 static elink_status_t elink_get_link_speed_duplex(struct elink_phy *phy,
-                                                 struct elink_params *params __rte_unused,
-                                                 struct elink_vars *vars,
-                                                 uint16_t is_link_up,
-                                                 uint16_t speed_mask,
-                                                 uint16_t is_duplex)
+                                    struct elink_params *params,
+                                     struct elink_vars *vars,
+                                     uint16_t is_link_up,
+                                     uint16_t speed_mask,
+                                     uint16_t is_duplex)
 {
+       struct bnx2x_softc *sc = params->sc;
        if (phy->req_line_speed == ELINK_SPEED_AUTO_NEG)
                vars->link_status |= LINK_STATUS_AUTO_NEGOTIATE_ENABLED;
        if (is_link_up) {
-               PMD_DRV_LOG(DEBUG, "phy link up");
+               ELINK_DEBUG_P0(sc, "phy link up");
 
                vars->phy_link_up = 1;
                vars->link_status |= LINK_STATUS_LINK_UP;
@@ -5187,9 +6445,9 @@ static elink_status_t elink_get_link_speed_duplex(struct elink_phy *phy,
 
                case ELINK_GP_STATUS_5G:
                case ELINK_GP_STATUS_6G:
-                       PMD_DRV_LOG(DEBUG,
-                                   "link speed unsupported  gp_status 0x%x",
-                                   speed_mask);
+                       ELINK_DEBUG_P1(sc,
+                                "link speed unsupported  gp_status 0x%x",
+                                 speed_mask);
                        return ELINK_STATUS_ERROR;
 
                case ELINK_GP_STATUS_10G_KX4:
@@ -5207,13 +6465,13 @@ static elink_status_t elink_get_link_speed_duplex(struct elink_phy *phy,
                        vars->link_status |= ELINK_LINK_20GTFD;
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG,
-                                   "link speed unsupported gp_status 0x%x",
-                                   speed_mask);
+                       ELINK_DEBUG_P1(sc,
+                                 "link speed unsupported gp_status 0x%x",
+                                 speed_mask);
                        return ELINK_STATUS_ERROR;
                }
-       } else {                /* link_down */
-               PMD_DRV_LOG(DEBUG, "phy link down");
+       } else { /* link_down */
+               ELINK_DEBUG_P0(sc, "phy link down");
 
                vars->phy_link_up = 0;
 
@@ -5221,14 +6479,16 @@ static elink_status_t elink_get_link_speed_duplex(struct elink_phy *phy,
                vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
                vars->mac_type = ELINK_MAC_TYPE_NONE;
        }
-       PMD_DRV_LOG(DEBUG, " phy_link_up %x line_speed %d",
+       ELINK_DEBUG_P2(sc, " in elink_get_link_speed_duplex vars->link_status = %x, vars->duplex = %x",
+                       vars->link_status, vars->duplex);
+       ELINK_DEBUG_P2(sc, " phy_link_up %x line_speed %d",
                    vars->phy_link_up, vars->line_speed);
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_link_settings_status(struct elink_phy *phy,
-                                         struct elink_params *params,
-                                         struct elink_vars *vars)
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
 
@@ -5238,14 +6498,23 @@ static uint8_t elink_link_settings_status(struct elink_phy *phy,
        /* Read gp_status */
        CL22_RD_OVER_CL45(sc, phy,
                          MDIO_REG_BANK_GP_STATUS,
-                         MDIO_GP_STATUS_TOP_AN_STATUS1, &gp_status);
-       if (gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_DUPLEX_STATUS)
+                         MDIO_GP_STATUS_TOP_AN_STATUS1,
+                         &gp_status);
+       if (gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_DUPLEX_STATUS) {
                duplex = DUPLEX_FULL;
+               ELINK_DEBUG_P1(sc, "duplex status read from phy is = %x",
+                               duplex);
+       } else {
+               ELINK_DEBUG_P1(sc, "phy status does not allow interface to be FULL_DUPLEX : %x",
+                       gp_status);
+       }
+
+
        if (gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS)
                link_up = 1;
        speed_mask = gp_status & ELINK_GP_STATUS_SPEED_MASK;
-       PMD_DRV_LOG(DEBUG, "gp_status 0x%x, is_link_up %d, speed_mask 0x%x",
-                   gp_status, link_up, speed_mask);
+       ELINK_DEBUG_P3(sc, "gp_status 0x%x, is_link_up %d, speed_mask 0x%x",
+                      gp_status, link_up, speed_mask);
        rc = elink_get_link_speed_duplex(phy, params, vars, link_up, speed_mask,
                                         duplex);
        if (rc == ELINK_STATUS_ERROR)
@@ -5259,7 +6528,7 @@ static uint8_t elink_link_settings_status(struct elink_phy *phy,
                                elink_xgxs_an_resolve(phy, params, vars,
                                                      gp_status);
                }
-       } else {                /* Link_down */
+       } else { /* Link_down */
                if ((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
                    ELINK_SINGLE_MEDIA_DIRECT(params)) {
                        /* Check signal is detected */
@@ -5267,7 +6536,7 @@ static uint8_t elink_link_settings_status(struct elink_phy *phy,
                }
        }
 
-       /* Read LP advertised speeds */
+       /* Read LP advertised speeds*/
        if (ELINK_SINGLE_MEDIA_DIRECT(params) &&
            (vars->link_status & LINK_STATUS_AUTO_NEGOTIATE_COMPLETE)) {
                uint16_t val;
@@ -5277,61 +6546,69 @@ static uint8_t elink_link_settings_status(struct elink_phy *phy,
 
                if (val & MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M_KX)
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
                if (val & (MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KX4 |
                           MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KR))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
 
                CL22_RD_OVER_CL45(sc, phy, MDIO_REG_BANK_OVER_1G,
                                  MDIO_OVER_1G_LP_UP1, &val);
 
                if (val & MDIO_OVER_1G_UP1_2_5G)
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE;
                if (val & (MDIO_OVER_1G_UP1_10G | MDIO_OVER_1G_UP1_10GH))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
        }
 
-       PMD_DRV_LOG(DEBUG, "duplex %x  flow_ctrl 0x%x link_status 0x%x",
-                   vars->duplex, vars->flow_ctrl, vars->link_status);
+       ELINK_DEBUG_P3(sc, "duplex %x  flow_ctrl 0x%x link_status 0x%x",
+                  vars->duplex, vars->flow_ctrl, vars->link_status);
        return rc;
 }
 
 static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
-                                         struct elink_params *params,
-                                         struct elink_vars *vars)
+                                    struct elink_params *params,
+                                    struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t lane;
        uint16_t gp_status1, gp_speed, link_up, duplex = DUPLEX_FULL;
        elink_status_t rc = ELINK_STATUS_OK;
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        /* Read gp_status */
-       if ((params->loopback_mode) && (phy->flags & ELINK_FLAGS_WC_DUAL_MODE)) {
+       if ((params->loopback_mode) &&
+           (phy->flags & ELINK_FLAGS_WC_DUAL_MODE)) {
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_DIGITAL5_LINK_STATUS, &link_up);
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_DIGITAL5_LINK_STATUS, &link_up);
                link_up &= 0x1;
+               ELINK_DEBUG_P1(sc, "params->loopback_mode link_up read = %x",
+                               link_up);
        } else if ((phy->req_line_speed > ELINK_SPEED_10000) &&
-                  (phy->supported & ELINK_SUPPORTED_20000baseMLD2_Full)) {
+               (phy->supported & ELINK_SUPPORTED_20000baseMLD2_Full)) {
                uint16_t temp_link_up;
-               elink_cl45_read(sc, phy, MDIO_WC_DEVAD, 1, &temp_link_up);
-               elink_cl45_read(sc, phy, MDIO_WC_DEVAD, 1, &link_up);
-               PMD_DRV_LOG(DEBUG, "PCS RX link status = 0x%x-->0x%x",
-                           temp_link_up, link_up);
+               elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
+                               1, &temp_link_up);
+               elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
+                               1, &link_up);
+               ELINK_DEBUG_P2(sc, "PCS RX link status = 0x%x-->0x%x",
+                              temp_link_up, link_up);
                link_up &= (1 << 2);
                if (link_up)
                        elink_ext_phy_resolve_fc(phy, params, vars);
        } else {
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
-                               MDIO_WC_REG_GP2_STATUS_GP_2_1, &gp_status1);
-               PMD_DRV_LOG(DEBUG, "0x81d1 = 0x%x", gp_status1);
+                               MDIO_WC_REG_GP2_STATUS_GP_2_1,
+                               &gp_status1);
+               ELINK_DEBUG_P1(sc, "0x81d1 = 0x%x", gp_status1);
                /* Check for either KR, 1G, or AN up. */
                link_up = ((gp_status1 >> 8) |
-                          (gp_status1 >> 12) | (gp_status1)) & (1 << lane);
+                          (gp_status1 >> 12) |
+                          (gp_status1)) &
+                       (1 << lane);
                if (phy->supported & ELINK_SUPPORTED_20000baseKR2_Full) {
                        uint16_t an_link;
                        elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
@@ -5339,6 +6616,8 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
                        elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
                                        MDIO_AN_REG_STATUS, &an_link);
                        link_up |= (an_link & (1 << 2));
+                       ELINK_DEBUG_P2(sc, "an_link = %x, link_up = %x",
+                                       an_link, link_up);
                }
                if (link_up && ELINK_SINGLE_MEDIA_DIRECT(params)) {
                        uint16_t pd, gp_status4;
@@ -5349,7 +6628,7 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
                                                &gp_status4);
                                if (gp_status4 & ((1 << 12) << lane))
                                        vars->link_status |=
-                                           LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
+                                       LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
 
                                /* Check parallel detect used */
                                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
@@ -5357,13 +6636,19 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
                                                &pd);
                                if (pd & (1 << 15))
                                        vars->link_status |=
-                                           LINK_STATUS_PARALLEL_DETECTION_USED;
+                                       LINK_STATUS_PARALLEL_DETECTION_USED;
+                               ELINK_DEBUG_P2(sc, "pd = %x, link_status = %x",
+                                               pd, vars->link_status);
                        }
                        elink_ext_phy_resolve_fc(phy, params, vars);
                        vars->duplex = duplex;
+                       ELINK_DEBUG_P3(sc, " ELINK_SINGLE_MEDIA_DIRECT duplex %x  flow_ctrl 0x%x link_status 0x%x",
+                                       vars->duplex, vars->flow_ctrl,
+                                       vars->link_status);
                }
        }
-
+       ELINK_DEBUG_P3(sc, "duplex %x  flow_ctrl 0x%x link_status 0x%x",
+                       vars->duplex, vars->flow_ctrl, vars->link_status);
        if ((vars->link_status & LINK_STATUS_AUTO_NEGOTIATE_COMPLETE) &&
            ELINK_SINGLE_MEDIA_DIRECT(params)) {
                uint16_t val;
@@ -5373,24 +6658,28 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
 
                if (val & MDIO_CL73_IEEEB1_AN_ADV2_ADVR_1000M_KX)
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
                if (val & (MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KX4 |
                           MDIO_CL73_IEEEB1_AN_ADV2_ADVR_10G_KR))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
-
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+               ELINK_DEBUG_P2(sc, "val = %x, link_status = %x",
+                               val, vars->link_status);
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_DIGITAL3_LP_UP1, &val);
 
                if (val & MDIO_OVER_1G_UP1_2_5G)
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_2500XFD_CAPABLE;
                if (val & (MDIO_OVER_1G_UP1_10G | MDIO_OVER_1G_UP1_10GH))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+               ELINK_DEBUG_P2(sc, "val = %x, link_status = %x",
+                               val, vars->link_status);
 
        }
 
+
        if (lane < 2) {
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_GP2_STATUS_GP_2_2, &gp_speed);
@@ -5398,12 +6687,12 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
                elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                                MDIO_WC_REG_GP2_STATUS_GP_2_3, &gp_speed);
        }
-       PMD_DRV_LOG(DEBUG, "lane %d gp_speed 0x%x", lane, gp_speed);
+       ELINK_DEBUG_P2(sc, "lane %d gp_speed 0x%x", lane, gp_speed);
 
        if ((lane & 1) == 0)
                gp_speed <<= 8;
        gp_speed &= 0x3f00;
-       link_up = ! !link_up;
+       link_up = !!link_up;
 
        /* Reset the TX FIFO to fix SGMII issue */
        rc = elink_get_link_speed_duplex(phy, params, vars, link_up, gp_speed,
@@ -5414,11 +6703,10 @@ static uint8_t elink_warpcore_read_status(struct elink_phy *phy,
            (!(phy->flags & ELINK_FLAGS_WC_DUAL_MODE)))
                vars->rx_tx_asic_rst = MAX_KR_LINK_RETRY;
 
-       PMD_DRV_LOG(DEBUG, "duplex %x  flow_ctrl 0x%x link_status 0x%x",
-                   vars->duplex, vars->flow_ctrl, vars->link_status);
+       ELINK_DEBUG_P3(sc, "duplex %x  flow_ctrl 0x%x link_status 0x%x",
+                  vars->duplex, vars->flow_ctrl, vars->link_status);
        return rc;
 }
-
 static void elink_set_gmii_tx_driver(struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
@@ -5429,7 +6717,8 @@ static void elink_set_gmii_tx_driver(struct elink_params *params)
 
        /* Read precomp */
        CL22_RD_OVER_CL45(sc, phy,
-                         MDIO_REG_BANK_OVER_1G, MDIO_OVER_1G_LP_UP2, &lp_up2);
+                         MDIO_REG_BANK_OVER_1G,
+                         MDIO_OVER_1G_LP_UP2, &lp_up2);
 
        /* Bits [10:7] at lp_up2, positioned at [15:12] */
        lp_up2 = (((lp_up2 & MDIO_OVER_1G_LP_UP2_PREEMPHASIS_MASK) >>
@@ -5440,32 +6729,36 @@ static void elink_set_gmii_tx_driver(struct elink_params *params)
                return;
 
        for (bank = MDIO_REG_BANK_TX0; bank <= MDIO_REG_BANK_TX3;
-            bank += (MDIO_REG_BANK_TX1 - MDIO_REG_BANK_TX0)) {
+             bank += (MDIO_REG_BANK_TX1 - MDIO_REG_BANK_TX0)) {
                CL22_RD_OVER_CL45(sc, phy,
-                                 bank, MDIO_TX0_TX_DRIVER, &tx_driver);
+                                 bank,
+                                 MDIO_TX0_TX_DRIVER, &tx_driver);
 
                /* Replace tx_driver bits [15:12] */
-               if (lp_up2 != (tx_driver & MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK)) {
+               if (lp_up2 !=
+                   (tx_driver & MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK)) {
                        tx_driver &= ~MDIO_TX0_TX_DRIVER_PREEMPHASIS_MASK;
                        tx_driver |= lp_up2;
                        CL22_WR_OVER_CL45(sc, phy,
-                                         bank, MDIO_TX0_TX_DRIVER, tx_driver);
+                                         bank,
+                                         MDIO_TX0_TX_DRIVER, tx_driver);
                }
        }
 }
 
 static elink_status_t elink_emac_program(struct elink_params *params,
-                                        struct elink_vars *vars)
+                             struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
        uint16_t mode = 0;
 
-       PMD_DRV_LOG(DEBUG, "setting link speed & duplex");
+       ELINK_DEBUG_P0(sc, "setting link speed & duplex");
        elink_bits_dis(sc, GRCBASE_EMAC0 + port * 0x400 +
                       EMAC_REG_EMAC_MODE,
                       (EMAC_MODE_25G_MODE |
-                       EMAC_MODE_PORT_MII_10M | EMAC_MODE_HALF_DUPLEX));
+                       EMAC_MODE_PORT_MII_10M |
+                       EMAC_MODE_HALF_DUPLEX));
        switch (vars->line_speed) {
        case ELINK_SPEED_10:
                mode |= EMAC_MODE_PORT_MII_10M;
@@ -5485,14 +6778,16 @@ static elink_status_t elink_emac_program(struct elink_params *params,
 
        default:
                /* 10G not valid for EMAC */
-               PMD_DRV_LOG(DEBUG, "Invalid line_speed 0x%x", vars->line_speed);
+               ELINK_DEBUG_P1(sc, "Invalid line_speed 0x%x",
+                          vars->line_speed);
                return ELINK_STATUS_ERROR;
        }
 
        if (vars->duplex == DUPLEX_HALF)
                mode |= EMAC_MODE_HALF_DUPLEX;
        elink_bits_en(sc,
-                     GRCBASE_EMAC0 + port * 0x400 + EMAC_REG_EMAC_MODE, mode);
+                     GRCBASE_EMAC0 + port * 0x400 + EMAC_REG_EMAC_MODE,
+                     mode);
 
        elink_set_led(params, vars, ELINK_LED_MODE_OPER, vars->line_speed);
        return ELINK_STATUS_OK;
@@ -5509,24 +6804,26 @@ static void elink_set_preemphasis(struct elink_phy *phy,
             bank += (MDIO_REG_BANK_RX1 - MDIO_REG_BANK_RX0), i++) {
                CL22_WR_OVER_CL45(sc, phy,
                                  bank,
-                                 MDIO_RX0_RX_EQ_BOOST, phy->rx_preemphasis[i]);
+                                 MDIO_RX0_RX_EQ_BOOST,
+                                 phy->rx_preemphasis[i]);
        }
 
        for (bank = MDIO_REG_BANK_TX0, i = 0; bank <= MDIO_REG_BANK_TX3;
             bank += (MDIO_REG_BANK_TX1 - MDIO_REG_BANK_TX0), i++) {
                CL22_WR_OVER_CL45(sc, phy,
                                  bank,
-                                 MDIO_TX0_TX_DRIVER, phy->tx_preemphasis[i]);
+                                 MDIO_TX0_TX_DRIVER,
+                                 phy->tx_preemphasis[i]);
        }
 }
 
 static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                  struct elink_params *params,
+                                  struct elink_vars *vars)
 {
+       struct bnx2x_softc *sc = params->sc;
        uint8_t enable_cl73 = (ELINK_SINGLE_MEDIA_DIRECT(params) ||
-                              (params->loopback_mode == ELINK_LOOPBACK_XGXS));
-
+                         (params->loopback_mode == ELINK_LOOPBACK_XGXS));
        if (!(vars->phy_flags & PHY_SGMII_FLAG)) {
                if (ELINK_SINGLE_MEDIA_DIRECT(params) &&
                    (params->feature_config_flags &
@@ -5537,7 +6834,7 @@ static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
                if (vars->line_speed != ELINK_SPEED_AUTO_NEG ||
                    (ELINK_SINGLE_MEDIA_DIRECT(params) &&
                     params->loopback_mode == ELINK_LOOPBACK_EXT)) {
-                       PMD_DRV_LOG(DEBUG, "not SGMII, no AN");
+                       ELINK_DEBUG_P0(sc, "not SGMII, no AN");
 
                        /* Disable autoneg */
                        elink_set_autoneg(phy, params, vars, 0);
@@ -5545,8 +6842,8 @@ static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
                        /* Program speed and duplex */
                        elink_program_serdes(phy, params, vars);
 
-               } else {        /* AN_mode */
-                       PMD_DRV_LOG(DEBUG, "not SGMII, AN");
+               } else { /* AN_mode */
+                       ELINK_DEBUG_P0(sc, "not SGMII, AN");
 
                        /* AN enabled */
                        elink_set_brcm_cl37_advertisement(phy, params);
@@ -5562,8 +6859,8 @@ static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
                        elink_restart_autoneg(phy, params, enable_cl73);
                }
 
-       } else {                /* SGMII mode */
-               PMD_DRV_LOG(DEBUG, "SGMII");
+       } else { /* SGMII mode */
+               ELINK_DEBUG_P0(sc, "SGMII");
 
                elink_initialize_sgmii_process(phy, params, vars);
        }
@@ -5572,8 +6869,8 @@ static uint8_t elink_xgxs_config_init(struct elink_phy *phy,
 }
 
 static elink_status_t elink_prepare_xgxs(struct elink_phy *phy,
-                                        struct elink_params *params,
-                                        struct elink_vars *vars)
+                         struct elink_params *params,
+                         struct elink_vars *vars)
 {
        elink_status_t rc;
        vars->phy_flags |= PHY_XGXS_FLAG;
@@ -5611,28 +6908,32 @@ static elink_status_t elink_prepare_xgxs(struct elink_phy *phy,
 }
 
 static uint16_t elink_wait_reset_complete(struct bnx2x_softc *sc,
-                                         struct elink_phy *phy,
-                                         struct elink_params *params)
+                                    struct elink_phy *phy,
+                                    struct elink_params *params)
 {
        uint16_t cnt, ctrl;
        /* Wait for soft reset to get cleared up to 1 sec */
        for (cnt = 0; cnt < 1000; cnt++) {
-               if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE)
-                       elink_cl22_read(sc, phy, MDIO_PMA_REG_CTRL, &ctrl);
+               if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE)
+                       elink_cl22_read(sc, phy,
+                               MDIO_PMA_REG_CTRL, &ctrl);
                else
                        elink_cl45_read(sc, phy,
-                                       MDIO_PMA_DEVAD,
-                                       MDIO_PMA_REG_CTRL, &ctrl);
+                               MDIO_PMA_DEVAD,
+                               MDIO_PMA_REG_CTRL, &ctrl);
                if (!(ctrl & (1 << 15)))
                        break;
                DELAY(1000 * 1);
        }
 
        if (cnt == 1000)
-               elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED, params->port);   // "Warning: PHY was not initialized,"
-       // " Port %d",
+               elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED,
+                                  params->port);
+                                    /* "Warning: PHY was not initialized,"
+                                     * " Port %d",
+                                     */
 
-       PMD_DRV_LOG(DEBUG, "control reg 0x%x (after %d ms)", ctrl, cnt);
+       ELINK_DEBUG_P2(sc, "control reg 0x%x (after %d ms)", ctrl, cnt);
        return cnt;
 }
 
@@ -5650,37 +6951,38 @@ static void elink_link_int_enable(struct elink_params *params)
        } else if (params->switch_cfg == ELINK_SWITCH_CFG_10G) {
                mask = (ELINK_NIG_MASK_XGXS0_LINK10G |
                        ELINK_NIG_MASK_XGXS0_LINK_STATUS);
-               PMD_DRV_LOG(DEBUG, "enabled XGXS interrupt");
+               ELINK_DEBUG_P0(sc, "enabled XGXS interrupt");
                if (!(ELINK_SINGLE_MEDIA_DIRECT(params)) &&
-                   params->phy[ELINK_INT_PHY].type !=
-                   PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE) {
+                       params->phy[ELINK_INT_PHY].type !=
+                               PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE) {
                        mask |= ELINK_NIG_MASK_MI_INT;
-                       PMD_DRV_LOG(DEBUG, "enabled external phy int");
+                       ELINK_DEBUG_P0(sc, "enabled external phy int");
                }
 
-       } else {                /* SerDes */
+       } else { /* SerDes */
                mask = ELINK_NIG_MASK_SERDES0_LINK_STATUS;
-               PMD_DRV_LOG(DEBUG, "enabled SerDes interrupt");
+               ELINK_DEBUG_P0(sc, "enabled SerDes interrupt");
                if (!(ELINK_SINGLE_MEDIA_DIRECT(params)) &&
-                   params->phy[ELINK_INT_PHY].type !=
-                   PORT_HW_CFG_SERDES_EXT_PHY_TYPE_NOT_CONN) {
+                       params->phy[ELINK_INT_PHY].type !=
+                               PORT_HW_CFG_SERDES_EXT_PHY_TYPE_NOT_CONN) {
                        mask |= ELINK_NIG_MASK_MI_INT;
-                       PMD_DRV_LOG(DEBUG, "enabled external phy int");
+                       ELINK_DEBUG_P0(sc, "enabled external phy int");
                }
        }
-       elink_bits_en(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4, mask);
+       elink_bits_en(sc,
+                     NIG_REG_MASK_INTERRUPT_PORT0 + port * 4,
+                     mask);
 
-       PMD_DRV_LOG(DEBUG, "port %x, is_xgxs %x, int_status 0x%x", port,
-                   (params->switch_cfg == ELINK_SWITCH_CFG_10G),
-                   REG_RD(sc, NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4));
-       PMD_DRV_LOG(DEBUG, " int_mask 0x%x, MI_INT %x, SERDES_LINK %x",
-                   REG_RD(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4),
-                   REG_RD(sc, NIG_REG_EMAC0_STATUS_MISC_MI_INT + port * 0x18),
-                   REG_RD(sc,
-                          NIG_REG_SERDES0_STATUS_LINK_STATUS + port * 0x3c));
-       PMD_DRV_LOG(DEBUG, " 10G %x, XGXS_LINK %x",
-                   REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK10G + port * 0x68),
-                   REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK_STATUS + port * 0x68));
+       ELINK_DEBUG_P3(sc, "port %x, is_xgxs %x, int_status 0x%x", port,
+                (params->switch_cfg == ELINK_SWITCH_CFG_10G),
+                REG_RD(sc, NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4));
+       ELINK_DEBUG_P3(sc, " int_mask 0x%x, MI_INT %x, SERDES_LINK %x",
+                REG_RD(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4),
+                REG_RD(sc, NIG_REG_EMAC0_STATUS_MISC_MI_INT + port * 0x18),
+                REG_RD(sc, NIG_REG_SERDES0_STATUS_LINK_STATUS + port * 0x3c));
+       ELINK_DEBUG_P2(sc, " 10G %x, XGXS_LINK %x",
+          REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK10G + port * 0x68),
+          REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK_STATUS + port * 0x68));
 }
 
 static void elink_rearm_latch_signal(struct bnx2x_softc *sc, uint8_t port,
@@ -5693,17 +6995,20 @@ static void elink_rearm_latch_signal(struct bnx2x_softc *sc, uint8_t port,
         * so in this case we need to write the status to clear the XOR
         */
        /* Read Latched signals */
-       latch_status = REG_RD(sc, NIG_REG_LATCH_STATUS_0 + port * 8);
-       PMD_DRV_LOG(DEBUG, "latch_status = 0x%x", latch_status);
-       /* Handle only those with latched-signal=up. */
+       latch_status = REG_RD(sc,
+                                   NIG_REG_LATCH_STATUS_0 + port * 8);
+       ELINK_DEBUG_P1(sc, "latch_status = 0x%x", latch_status);
+       /* Handle only those with latched-signal=up.*/
        if (exp_mi_int)
                elink_bits_en(sc,
                              NIG_REG_STATUS_INTERRUPT_PORT0
-                             + port * 4, ELINK_NIG_STATUS_EMAC0_MI_INT);
+                             + port * 4,
+                             ELINK_NIG_STATUS_EMAC0_MI_INT);
        else
                elink_bits_dis(sc,
                               NIG_REG_STATUS_INTERRUPT_PORT0
-                              + port * 4, ELINK_NIG_STATUS_EMAC0_MI_INT);
+                              + port * 4,
+                              ELINK_NIG_STATUS_EMAC0_MI_INT);
 
        if (latch_status & 1) {
 
@@ -5738,23 +7043,24 @@ static void elink_link_int_ack(struct elink_params *params,
                                 * the relevant lane in the status register
                                 */
                                uint32_t ser_lane =
-                                   ((params->lane_config &
-                                     PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
-                                    PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
+                                       ((params->lane_config &
+                                   PORT_HW_CFG_LANE_SWAP_CFG_MASTER_MASK) >>
+                                   PORT_HW_CFG_LANE_SWAP_CFG_MASTER_SHIFT);
                                mask = ((1 << ser_lane) <<
-                                       ELINK_NIG_STATUS_XGXS0_LINK_STATUS_SIZE);
+                                      ELINK_NIG_STATUS_XGXS0_LINK_STATUS_SIZE);
                        } else
                                mask = ELINK_NIG_STATUS_SERDES0_LINK_STATUS;
                }
-               PMD_DRV_LOG(DEBUG, "Ack link up interrupt with mask 0x%x",
-                           mask);
+               ELINK_DEBUG_P1(sc, "Ack link up interrupt with mask 0x%x",
+                              mask);
                elink_bits_en(sc,
-                             NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4, mask);
+                             NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4,
+                             mask);
        }
 }
 
-static uint8_t elink_format_ver(uint32_t num, uint8_t * str,
-                               uint16_t * len)
+static elink_status_t elink_format_ver(uint32_t num, uint8_t *str,
+                                      uint16_t *len)
 {
        uint8_t *str_ptr = str;
        uint32_t mask = 0xf0000000;
@@ -5792,14 +7098,57 @@ static uint8_t elink_format_ver(uint32_t num, uint8_t * str,
        return ELINK_STATUS_OK;
 }
 
-static uint8_t elink_null_format_ver(__rte_unused uint32_t spirom_ver,
-                                    uint8_t * str, uint16_t * len)
+
+static elink_status_t elink_null_format_ver(__rte_unused uint32_t spirom_ver,
+                                uint8_t *str,
+                                uint16_t *len)
 {
        str[0] = '\0';
        (*len)--;
        return ELINK_STATUS_OK;
 }
 
+elink_status_t elink_get_ext_phy_fw_version(struct elink_params *params,
+                                uint8_t *version,
+                                uint16_t len)
+{
+       struct bnx2x_softc *sc;
+       uint32_t spirom_ver = 0;
+       elink_status_t status = ELINK_STATUS_OK;
+       uint8_t *ver_p = version;
+       uint16_t remain_len = len;
+       if (version == NULL || params == NULL)
+               return ELINK_STATUS_ERROR;
+       sc = params->sc;
+
+       /* Extract first external phy*/
+       version[0] = '\0';
+       spirom_ver = REG_RD(sc, params->phy[ELINK_EXT_PHY1].ver_addr);
+
+       if (params->phy[ELINK_EXT_PHY1].format_fw_ver) {
+               status |= params->phy[ELINK_EXT_PHY1].format_fw_ver(spirom_ver,
+                                                             ver_p,
+                                                             &remain_len);
+               ver_p += (len - remain_len);
+       }
+       if ((params->num_phys == ELINK_MAX_PHYS) &&
+           (params->phy[ELINK_EXT_PHY2].ver_addr != 0)) {
+               spirom_ver = REG_RD(sc, params->phy[ELINK_EXT_PHY2].ver_addr);
+               if (params->phy[ELINK_EXT_PHY2].format_fw_ver) {
+                       *ver_p = '/';
+                       ver_p++;
+                       remain_len--;
+                       status |= params->phy[ELINK_EXT_PHY2].format_fw_ver(
+                               spirom_ver,
+                               ver_p,
+                               &remain_len);
+                       ver_p = version + (len - remain_len);
+               }
+       }
+       *ver_p = '\0';
+       return status;
+}
+
 static void elink_set_xgxs_loopback(struct elink_phy *phy,
                                    struct elink_params *params)
 {
@@ -5809,7 +7158,7 @@ static void elink_set_xgxs_loopback(struct elink_phy *phy,
        if (phy->req_line_speed != ELINK_SPEED_1000) {
                uint32_t md_devad = 0;
 
-               PMD_DRV_LOG(DEBUG, "XGXS 10G loopback enable");
+               ELINK_DEBUG_P0(sc, "XGXS 10G loopback enable");
 
                if (!CHIP_IS_E3(sc)) {
                        /* Change the uni_phy_addr in the nig */
@@ -5823,7 +7172,8 @@ static void elink_set_xgxs_loopback(struct elink_phy *phy,
                elink_cl45_write(sc, phy,
                                 5,
                                 (MDIO_REG_BANK_AER_BLOCK +
-                                 (MDIO_AER_BLOCK_AER_REG & 0xf)), 0x2800);
+                                 (MDIO_AER_BLOCK_AER_REG & 0xf)),
+                                0x2800);
 
                elink_cl45_write(sc, phy,
                                 5,
@@ -5841,22 +7191,21 @@ static void elink_set_xgxs_loopback(struct elink_phy *phy,
                }
        } else {
                uint16_t mii_ctrl;
-               PMD_DRV_LOG(DEBUG, "XGXS 1G loopback enable");
+               ELINK_DEBUG_P0(sc, "XGXS 1G loopback enable");
                elink_cl45_read(sc, phy, 5,
                                (MDIO_REG_BANK_COMBO_IEEE0 +
-                                (MDIO_COMBO_IEEE0_MII_CONTROL & 0xf)),
+                               (MDIO_COMBO_IEEE0_MII_CONTROL & 0xf)),
                                &mii_ctrl);
                elink_cl45_write(sc, phy, 5,
                                 (MDIO_REG_BANK_COMBO_IEEE0 +
-                                 (MDIO_COMBO_IEEE0_MII_CONTROL & 0xf)),
+                                (MDIO_COMBO_IEEE0_MII_CONTROL & 0xf)),
                                 mii_ctrl |
                                 MDIO_COMBO_IEEO_MII_CONTROL_LOOPBACK);
        }
 }
 
 elink_status_t elink_set_led(struct elink_params *params,
-                            struct elink_vars *vars, uint8_t mode,
-                            uint32_t speed)
+                 struct elink_vars *vars, uint8_t mode, uint32_t speed)
 {
        uint8_t port = params->port;
        uint16_t hw_led_mode = params->hw_led_mode;
@@ -5865,15 +7214,21 @@ elink_status_t elink_set_led(struct elink_params *params,
        uint32_t tmp;
        uint32_t emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "elink_set_led: port %x, mode %d", port, mode);
-       PMD_DRV_LOG(DEBUG, "speed 0x%x, hw_led_mode 0x%x", speed, hw_led_mode);
+       ELINK_DEBUG_P2(sc, "elink_set_led: port %x, mode %d", port, mode);
+       ELINK_DEBUG_P2(sc, "speed 0x%x, hw_led_mode 0x%x",
+                speed, hw_led_mode);
        /* In case */
        for (phy_idx = ELINK_EXT_PHY1; phy_idx < ELINK_MAX_PHYS; phy_idx++) {
                if (params->phy[phy_idx].set_link_led) {
-                       params->phy[phy_idx].set_link_led(&params->phy[phy_idx],
-                                                         params, mode);
+                       params->phy[phy_idx].set_link_led(
+                               &params->phy[phy_idx], params, mode);
                }
        }
+#ifdef ELINK_INCLUDE_EMUL
+       if (params->feature_config_flags &
+           ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC)
+               return rc;
+#endif
 
        switch (mode) {
        case ELINK_LED_MODE_FRONT_PANEL_OFF:
@@ -5884,10 +7239,10 @@ elink_status_t elink_set_led(struct elink_params *params,
 
                tmp = elink_cb_reg_read(sc, emac_base + EMAC_REG_EMAC_LED);
                if (params->phy[ELINK_EXT_PHY1].type ==
-                   PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE)
+                       PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE)
                        tmp &= ~(EMAC_LED_1000MB_OVERRIDE |
-                                EMAC_LED_100MB_OVERRIDE |
-                                EMAC_LED_10MB_OVERRIDE);
+                               EMAC_LED_100MB_OVERRIDE |
+                               EMAC_LED_10MB_OVERRIDE);
                else
                        tmp |= EMAC_LED_OVERRIDE;
 
@@ -5900,25 +7255,22 @@ elink_status_t elink_set_led(struct elink_params *params,
                 */
                if (!vars->link_up)
                        break;
-               /* fall-through */
+               /* fallthrough */
        case ELINK_LED_MODE_ON:
                if (((params->phy[ELINK_EXT_PHY1].type ==
-                     PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8727) ||
-                    (params->phy[ELINK_EXT_PHY1].type ==
-                     PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8722)) &&
+                         PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8727) ||
+                        (params->phy[ELINK_EXT_PHY1].type ==
+                         PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8722)) &&
                    CHIP_IS_E2(sc) && params->num_phys == 2) {
-                       /* This is a work-around for E2+8727 Configurations */
+                       /* This is a work-around for E2 + 8727 Configurations */
                        if (mode == ELINK_LED_MODE_ON ||
-                           speed == ELINK_SPEED_10000) {
+                               speed == ELINK_SPEED_10000){
                                REG_WR(sc, NIG_REG_LED_MODE_P0 + port * 4, 0);
                                REG_WR(sc, NIG_REG_LED_10G_P0 + port * 4, 1);
 
-                               tmp =
-                                   elink_cb_reg_read(sc,
-                                                     emac_base +
-                                                     EMAC_REG_EMAC_LED);
-                               elink_cb_reg_write(sc,
-                                                  emac_base +
+                               tmp = elink_cb_reg_read(sc, emac_base +
+                                                       EMAC_REG_EMAC_LED);
+                               elink_cb_reg_write(sc, emac_base +
                                                   EMAC_REG_EMAC_LED,
                                                   (tmp | EMAC_LED_OVERRIDE));
                                /* Return here without enabling traffic
@@ -5934,22 +7286,23 @@ elink_status_t elink_set_led(struct elink_params *params,
                         * is up in CL73
                         */
                        if ((!CHIP_IS_E3(sc)) ||
-                           (CHIP_IS_E3(sc) && mode == ELINK_LED_MODE_ON))
+                           (CHIP_IS_E3(sc) &&
+                            mode == ELINK_LED_MODE_ON))
                                REG_WR(sc, NIG_REG_LED_10G_P0 + port * 4, 1);
 
                        if (CHIP_IS_E1x(sc) ||
-                           CHIP_IS_E2(sc) || (mode == ELINK_LED_MODE_ON))
+                           CHIP_IS_E2(sc) ||
+                           (mode == ELINK_LED_MODE_ON))
                                REG_WR(sc, NIG_REG_LED_MODE_P0 + port * 4, 0);
                        else
                                REG_WR(sc, NIG_REG_LED_MODE_P0 + port * 4,
                                       hw_led_mode);
                } else if ((params->phy[ELINK_EXT_PHY1].type ==
-                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE) &&
+                           PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE) &&
                           (mode == ELINK_LED_MODE_ON)) {
                        REG_WR(sc, NIG_REG_LED_MODE_P0 + port * 4, 0);
-                       tmp =
-                           elink_cb_reg_read(sc,
-                                             emac_base + EMAC_REG_EMAC_LED);
+                       tmp = elink_cb_reg_read(sc, emac_base +
+                                               EMAC_REG_EMAC_LED);
                        elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_LED,
                                           tmp | EMAC_LED_OVERRIDE |
                                           EMAC_LED_1000MB_OVERRIDE);
@@ -5959,11 +7312,10 @@ elink_status_t elink_set_led(struct elink_params *params,
                        break;
                } else {
                        uint32_t nig_led_mode = ((params->hw_led_mode <<
-                                                 SHARED_HW_CFG_LED_MODE_SHIFT)
-                                                ==
-                                                SHARED_HW_CFG_LED_EXTPHY2)
-                           ? (SHARED_HW_CFG_LED_PHY1 >>
-                              SHARED_HW_CFG_LED_MODE_SHIFT) : hw_led_mode;
+                                            SHARED_HW_CFG_LED_MODE_SHIFT) ==
+                                           SHARED_HW_CFG_LED_EXTPHY2) ?
+                               (SHARED_HW_CFG_LED_PHY1 >>
+                                SHARED_HW_CFG_LED_MODE_SHIFT) : hw_led_mode;
                        REG_WR(sc, NIG_REG_LED_MODE_P0 + port * 4,
                               nig_led_mode);
                }
@@ -5977,26 +7329,133 @@ elink_status_t elink_set_led(struct elink_params *params,
                else
                        REG_WR(sc, NIG_REG_LED_CONTROL_BLINK_RATE_P0 + port * 4,
                               LED_BLINK_RATE_VAL_E1X_E2);
-               REG_WR(sc, NIG_REG_LED_CONTROL_BLINK_RATE_ENA_P0 + port * 4, 1);
+               REG_WR(sc, NIG_REG_LED_CONTROL_BLINK_RATE_ENA_P0 +
+                      port * 4, 1);
                tmp = elink_cb_reg_read(sc, emac_base + EMAC_REG_EMAC_LED);
                elink_cb_reg_write(sc, emac_base + EMAC_REG_EMAC_LED,
-                                  (tmp & (~EMAC_LED_OVERRIDE)));
+                       (tmp & (~EMAC_LED_OVERRIDE)));
 
+               if (CHIP_IS_E1(sc) &&
+                   ((speed == ELINK_SPEED_2500) ||
+                    (speed == ELINK_SPEED_1000) ||
+                    (speed == ELINK_SPEED_100) ||
+                    (speed == ELINK_SPEED_10))) {
+                       /* For speeds less than 10G LED scheme is different */
+                       REG_WR(sc, NIG_REG_LED_CONTROL_OVERRIDE_TRAFFIC_P0
+                              + port * 4, 1);
+                       REG_WR(sc, NIG_REG_LED_CONTROL_TRAFFIC_P0 +
+                              port * 4, 0);
+                       REG_WR(sc, NIG_REG_LED_CONTROL_BLINK_TRAFFIC_P0 +
+                              port * 4, 1);
+               }
                break;
 
        default:
                rc = ELINK_STATUS_ERROR;
-               PMD_DRV_LOG(DEBUG, "elink_set_led: Invalid led mode %d", mode);
+               ELINK_DEBUG_P1(sc, "elink_set_led: Invalid led mode %d",
+                        mode);
                break;
        }
        return rc;
 
 }
 
+/* This function comes to reflect the actual link state read DIRECTLY from the
+ * HW
+ */
+elink_status_t elink_test_link(struct elink_params *params,
+                              __rte_unused struct elink_vars *vars,
+                   uint8_t is_serdes)
+{
+       struct bnx2x_softc *sc = params->sc;
+       uint16_t gp_status = 0, phy_index = 0;
+       uint8_t ext_phy_link_up = 0, serdes_phy_type;
+       struct elink_vars temp_vars;
+       struct elink_phy *int_phy = &params->phy[ELINK_INT_PHY];
+#ifdef ELINK_INCLUDE_FPGA
+       if (CHIP_REV_IS_FPGA(sc))
+               return ELINK_STATUS_OK;
+#endif
+#ifdef ELINK_INCLUDE_EMUL
+       if (CHIP_REV_IS_EMUL(sc))
+               return ELINK_STATUS_OK;
+#endif
+
+       if (CHIP_IS_E3(sc)) {
+               uint16_t link_up;
+               if (params->req_line_speed[ELINK_LINK_CONFIG_IDX(ELINK_INT_PHY)]
+                   > ELINK_SPEED_10000) {
+                       /* Check 20G link */
+                       elink_cl45_read(sc, int_phy, MDIO_WC_DEVAD,
+                                       1, &link_up);
+                       elink_cl45_read(sc, int_phy, MDIO_WC_DEVAD,
+                                       1, &link_up);
+                       link_up &= (1 << 2);
+               } else {
+                       /* Check 10G link and below*/
+                       uint8_t lane = elink_get_warpcore_lane(int_phy, params);
+                       elink_cl45_read(sc, int_phy, MDIO_WC_DEVAD,
+                                       MDIO_WC_REG_GP2_STATUS_GP_2_1,
+                                       &gp_status);
+                       gp_status = ((gp_status >> 8) & 0xf) |
+                               ((gp_status >> 12) & 0xf);
+                       link_up = gp_status & (1 << lane);
+               }
+               if (!link_up)
+                       return ELINK_STATUS_NO_LINK;
+       } else {
+               CL22_RD_OVER_CL45(sc, int_phy,
+                         MDIO_REG_BANK_GP_STATUS,
+                         MDIO_GP_STATUS_TOP_AN_STATUS1,
+                         &gp_status);
+       /* Link is up only if both local phy and external phy are up */
+       if (!(gp_status & MDIO_GP_STATUS_TOP_AN_STATUS1_LINK_STATUS))
+               return ELINK_STATUS_NO_LINK;
+       }
+       /* In XGXS loopback mode, do not check external PHY */
+       if (params->loopback_mode == ELINK_LOOPBACK_XGXS)
+               return ELINK_STATUS_OK;
+
+       switch (params->num_phys) {
+       case 1:
+               /* No external PHY */
+               return ELINK_STATUS_OK;
+       case 2:
+               ext_phy_link_up = params->phy[ELINK_EXT_PHY1].read_status(
+                       &params->phy[ELINK_EXT_PHY1],
+                       params, &temp_vars);
+               break;
+       case 3: /* Dual Media */
+               for (phy_index = ELINK_EXT_PHY1; phy_index < params->num_phys;
+                     phy_index++) {
+                       serdes_phy_type = ((params->phy[phy_index].media_type ==
+                                           ELINK_ETH_PHY_SFPP_10G_FIBER) ||
+                                          (params->phy[phy_index].media_type ==
+                                           ELINK_ETH_PHY_SFP_1G_FIBER) ||
+                                          (params->phy[phy_index].media_type ==
+                                           ELINK_ETH_PHY_XFP_FIBER) ||
+                                          (params->phy[phy_index].media_type ==
+                                           ELINK_ETH_PHY_DA_TWINAX));
+
+                       if (is_serdes != serdes_phy_type)
+                               continue;
+                       if (params->phy[phy_index].read_status) {
+                               ext_phy_link_up |=
+                                       params->phy[phy_index].read_status(
+                                               &params->phy[phy_index],
+                                               params, &temp_vars);
+                       }
+               }
+               break;
+       }
+       if (ext_phy_link_up)
+               return ELINK_STATUS_OK;
+       return ELINK_STATUS_NO_LINK;
+}
+
 static elink_status_t elink_link_initialize(struct elink_params *params,
-                                           struct elink_vars *vars)
+                                struct elink_vars *vars)
 {
-       elink_status_t rc = ELINK_STATUS_OK;
        uint8_t phy_index, non_ext_phy;
        struct bnx2x_softc *sc = params->sc;
        /* In case of external phy existence, the line speed would be the
@@ -6021,11 +7480,12 @@ static elink_status_t elink_link_initialize(struct elink_params *params,
            (params->loopback_mode == ELINK_LOOPBACK_EXT_PHY)) {
                struct elink_phy *phy = &params->phy[ELINK_INT_PHY];
                if (vars->line_speed == ELINK_SPEED_AUTO_NEG &&
-                   (CHIP_IS_E1x(sc) || CHIP_IS_E2(sc)))
+                   (CHIP_IS_E1x(sc) ||
+                    CHIP_IS_E2(sc)))
                        elink_set_parallel_detection(phy, params);
                if (params->phy[ELINK_INT_PHY].config_init)
-                       params->phy[ELINK_INT_PHY].config_init(phy,
-                                                              params, vars);
+                       params->phy[ELINK_INT_PHY].config_init(phy, params,
+                                                              vars);
        }
 
        /* Re-read this value in case it was changed inside config_init due to
@@ -6033,14 +7493,14 @@ static elink_status_t elink_link_initialize(struct elink_params *params,
         */
        vars->line_speed = params->phy[ELINK_INT_PHY].req_line_speed;
 
-       /* Init external phy */
+       /* Init external phy*/
        if (non_ext_phy) {
                if (params->phy[ELINK_INT_PHY].supported &
                    ELINK_SUPPORTED_FIBRE)
                        vars->link_status |= LINK_STATUS_SERDES_LINK;
        } else {
                for (phy_index = ELINK_EXT_PHY1; phy_index < params->num_phys;
-                    phy_index++) {
+                     phy_index++) {
                        /* No need to initialize second phy in case of first
                         * phy only selection. In case of second phy, we do
                         * need to initialize the first phy, since they are
@@ -6053,13 +7513,13 @@ static elink_status_t elink_link_initialize(struct elink_params *params,
                        if (phy_index == ELINK_EXT_PHY2 &&
                            (elink_phy_selection(params) ==
                             PORT_HW_CFG_PHY_SELECTION_FIRST_PHY)) {
-                               PMD_DRV_LOG(DEBUG,
-                                           "Not initializing second phy");
+                               ELINK_DEBUG_P0(sc,
+                                  "Not initializing second phy");
                                continue;
                        }
-                       params->phy[phy_index].config_init(&params->
-                                                          phy[phy_index],
-                                                          params, vars);
+                       params->phy[phy_index].config_init(
+                               &params->phy[phy_index],
+                               params, vars);
                }
        }
        /* Reset the interrupt indication after phy was initialized */
@@ -6069,7 +7529,7 @@ static elink_status_t elink_link_initialize(struct elink_params *params,
                        ELINK_NIG_STATUS_XGXS0_LINK_STATUS |
                        ELINK_NIG_STATUS_SERDES0_LINK_STATUS |
                        ELINK_NIG_MASK_MI_INT));
-       return rc;
+       return ELINK_STATUS_OK;
 }
 
 static void elink_int_link_reset(__rte_unused struct elink_phy *phy,
@@ -6091,19 +7551,21 @@ static void elink_common_ext_link_reset(__rte_unused struct elink_phy *phy,
        else
                gpio_port = params->port;
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, gpio_port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW,
+                      gpio_port);
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, gpio_port);
-       PMD_DRV_LOG(DEBUG, "reset external PHY");
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW,
+                      gpio_port);
+       ELINK_DEBUG_P0(sc, "reset external PHY");
 }
 
 static elink_status_t elink_update_link_down(struct elink_params *params,
-                                            struct elink_vars *vars)
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port = params->port;
 
-       PMD_DRV_LOG(DEBUG, "Port %x: Link is down", port);
+       ELINK_DEBUG_P1(sc, "Port %x: Link is down", port);
        elink_set_led(params, vars, ELINK_LED_MODE_OFF, 0);
        vars->phy_flags &= ~PHY_PHYSICAL_LINK_FLAG;
        /* Indicate no mac active */
@@ -6123,8 +7585,9 @@ static elink_status_t elink_update_link_down(struct elink_params *params,
 
        DELAY(1000 * 10);
        /* Reset BigMac/Xmac */
-       if (CHIP_IS_E1x(sc) || CHIP_IS_E2(sc))
-               elink_set_bmac_rx(sc, params->port, 0);
+       if (CHIP_IS_E1x(sc) ||
+           CHIP_IS_E2(sc))
+               elink_set_bmac_rx(sc, params->chip_id, params->port, 0);
 
        if (CHIP_IS_E3(sc)) {
                /* Prevent LPI Generation by chip */
@@ -6144,8 +7607,8 @@ static elink_status_t elink_update_link_down(struct elink_params *params,
 }
 
 static elink_status_t elink_update_link_up(struct elink_params *params,
-                                          struct elink_vars *vars,
-                                          uint8_t link_10g)
+                               struct elink_vars *vars,
+                               uint8_t link_10g)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t phy_idx, port = params->port;
@@ -6156,15 +7619,17 @@ static elink_status_t elink_update_link_up(struct elink_params *params,
        vars->phy_flags |= PHY_PHYSICAL_LINK_FLAG;
 
        if (vars->flow_ctrl & ELINK_FLOW_CTRL_TX)
-               vars->link_status |= LINK_STATUS_TX_FLOW_CONTROL_ENABLED;
+               vars->link_status |=
+                       LINK_STATUS_TX_FLOW_CONTROL_ENABLED;
 
        if (vars->flow_ctrl & ELINK_FLOW_CTRL_RX)
-               vars->link_status |= LINK_STATUS_RX_FLOW_CONTROL_ENABLED;
+               vars->link_status |=
+                       LINK_STATUS_RX_FLOW_CONTROL_ENABLED;
        if (USES_WARPCORE(sc)) {
                if (link_10g) {
                        if (elink_xmac_enable(params, vars, 0) ==
                            ELINK_STATUS_NO_LINK) {
-                               PMD_DRV_LOG(DEBUG, "Found errors on XMAC");
+                               ELINK_DEBUG_P0(sc, "Found errors on XMAC");
                                vars->link_up = 0;
                                vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
                                vars->link_status &= ~LINK_STATUS_LINK_UP;
@@ -6176,7 +7641,7 @@ static elink_status_t elink_update_link_up(struct elink_params *params,
 
                if ((vars->eee_status & SHMEM_EEE_ACTIVE_BIT) &&
                    (vars->eee_status & SHMEM_EEE_LPI_REQUESTED_BIT)) {
-                       PMD_DRV_LOG(DEBUG, "Enabling LPI assertion");
+                       ELINK_DEBUG_P0(sc, "Enabling LPI assertion");
                        REG_WR(sc, MISC_REG_CPMU_LP_FW_ENABLE_P0 +
                               (params->port << 2), 1);
                        REG_WR(sc, MISC_REG_CPMU_LP_DR_ENABLE, 1);
@@ -6184,11 +7649,12 @@ static elink_status_t elink_update_link_up(struct elink_params *params,
                               (params->port << 2), 0xfc20);
                }
        }
-       if ((CHIP_IS_E1x(sc) || CHIP_IS_E2(sc))) {
+       if ((CHIP_IS_E1x(sc) ||
+            CHIP_IS_E2(sc))) {
                if (link_10g) {
                        if (elink_bmac_enable(params, vars, 0, 1) ==
                            ELINK_STATUS_NO_LINK) {
-                               PMD_DRV_LOG(DEBUG, "Found errors on BMAC");
+                               ELINK_DEBUG_P0(sc, "Found errors on BMAC");
                                vars->link_up = 0;
                                vars->phy_flags |= PHY_HALF_OPEN_CONN_FLAG;
                                vars->link_status &= ~LINK_STATUS_LINK_UP;
@@ -6231,6 +7697,24 @@ static elink_status_t elink_update_link_up(struct elink_params *params,
        return rc;
 }
 
+static void elink_chng_link_count(struct elink_params *params, uint8_t clear)
+{
+       struct bnx2x_softc *sc = params->sc;
+       uint32_t addr, val;
+
+       /* Verify the link_change_count is supported by the MFW */
+       if (!(SHMEM2_HAS(sc, link_change_count)))
+               return;
+
+       addr = params->shmem2_base +
+               offsetof(struct shmem2_region, link_change_count[params->port]);
+       if (clear)
+               val = 0;
+       else
+               val = REG_RD(sc, addr) + 1;
+       REG_WR(sc, addr, val);
+}
+
 /* The elink_link_update function should be called upon link
  * interrupt.
  * Link is considered up as follows:
@@ -6243,24 +7727,24 @@ static elink_status_t elink_update_link_up(struct elink_params *params,
  *   external phy needs to be up, and at least one of the 2
  *   external phy link must be up.
  */
-elink_status_t elink_link_update(struct elink_params * params,
-                                struct elink_vars * vars)
+elink_status_t elink_link_update(struct elink_params *params,
+                                struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        struct elink_vars phy_vars[ELINK_MAX_PHYS];
        uint8_t port = params->port;
        uint8_t link_10g_plus, phy_index;
+       uint32_t prev_link_status = vars->link_status;
        uint8_t ext_phy_link_up = 0, cur_link_up;
        elink_status_t rc = ELINK_STATUS_OK;
-       __rte_unused uint8_t is_mi_int = 0;
        uint16_t ext_phy_line_speed = 0, prev_line_speed = vars->line_speed;
        uint8_t active_external_phy = ELINK_INT_PHY;
        vars->phy_flags &= ~PHY_HALF_OPEN_CONN_FLAG;
        vars->link_status &= ~ELINK_LINK_UPDATE_MASK;
        for (phy_index = ELINK_INT_PHY; phy_index < params->num_phys;
-            phy_index++) {
+             phy_index++) {
                phy_vars[phy_index].flow_ctrl = 0;
-               phy_vars[phy_index].link_status = ETH_LINK_DOWN;
+               phy_vars[phy_index].link_status = 0;
                phy_vars[phy_index].line_speed = 0;
                phy_vars[phy_index].duplex = DUPLEX_FULL;
                phy_vars[phy_index].phy_link_up = 0;
@@ -6273,21 +7757,18 @@ elink_status_t elink_link_update(struct elink_params * params,
        if (USES_WARPCORE(sc))
                elink_set_aer_mmd(params, &params->phy[ELINK_INT_PHY]);
 
-       PMD_DRV_LOG(DEBUG, "port %x, XGXS?%x, int_status 0x%x",
-                   port, (vars->phy_flags & PHY_XGXS_FLAG),
-                   REG_RD(sc, NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4));
+       ELINK_DEBUG_P3(sc, "port %x, XGXS?%x, int_status 0x%x",
+                port, (vars->phy_flags & PHY_XGXS_FLAG),
+                REG_RD(sc, NIG_REG_STATUS_INTERRUPT_PORT0 + port * 4));
 
-       is_mi_int = (uint8_t) (REG_RD(sc, NIG_REG_EMAC0_STATUS_MISC_MI_INT +
-                                     port * 0x18) > 0);
-       PMD_DRV_LOG(DEBUG, "int_mask 0x%x MI_INT %x, SERDES_LINK %x",
-                   REG_RD(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4),
-                   is_mi_int,
-                   REG_RD(sc,
-                          NIG_REG_SERDES0_STATUS_LINK_STATUS + port * 0x3c));
+       ELINK_DEBUG_P3(sc, "int_mask 0x%x MI_INT %x, SERDES_LINK %x",
+                REG_RD(sc, NIG_REG_MASK_INTERRUPT_PORT0 + port * 4),
+                REG_RD(sc, NIG_REG_EMAC0_STATUS_MISC_MI_INT + port * 0x18) > 0,
+                REG_RD(sc, NIG_REG_SERDES0_STATUS_LINK_STATUS + port * 0x3c));
 
-       PMD_DRV_LOG(DEBUG, " 10G %x, XGXS_LINK %x",
-                   REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK10G + port * 0x68),
-                   REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK_STATUS + port * 0x68));
+       ELINK_DEBUG_P2(sc, " 10G %x, XGXS_LINK %x",
+         REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK10G + port * 0x68),
+         REG_RD(sc, NIG_REG_XGXS0_STATUS_LINK_STATUS + port * 0x68));
 
        /* Disable emac */
        if (!CHIP_IS_E3(sc))
@@ -6301,7 +7782,7 @@ elink_status_t elink_link_update(struct elink_params * params,
         * speed/duplex result
         */
        for (phy_index = ELINK_EXT_PHY1; phy_index < params->num_phys;
-            phy_index++) {
+             phy_index++) {
                struct elink_phy *phy = &params->phy[phy_index];
                if (!phy->read_status)
                        continue;
@@ -6309,11 +7790,11 @@ elink_status_t elink_link_update(struct elink_params * params,
                cur_link_up = phy->read_status(phy, params,
                                               &phy_vars[phy_index]);
                if (cur_link_up) {
-                       PMD_DRV_LOG(DEBUG, "phy in index %d link is up",
-                                   phy_index);
+                       ELINK_DEBUG_P1(sc, "phy in index %d link is up",
+                                  phy_index);
                } else {
-                       PMD_DRV_LOG(DEBUG, "phy in index %d link is down",
-                                   phy_index);
+                       ELINK_DEBUG_P1(sc, "phy in index %d link is down",
+                                  phy_index);
                        continue;
                }
 
@@ -6324,30 +7805,30 @@ elink_status_t elink_link_update(struct elink_params * params,
                        switch (elink_phy_selection(params)) {
                        case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT:
                        case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
-                               /* In this option, the first PHY makes sure to pass the
-                                * traffic through itself only.
-                                * Its not clear how to reset the link on the second phy
-                                */
+                       /* In this option, the first PHY makes sure to pass the
+                        * traffic through itself only.
+                        * Its not clear how to reset the link on the second phy
+                        */
                                active_external_phy = ELINK_EXT_PHY1;
                                break;
                        case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
-                               /* In this option, the first PHY makes sure to pass the
-                                * traffic through the second PHY.
-                                */
+                       /* In this option, the first PHY makes sure to pass the
+                        * traffic through the second PHY.
+                        */
                                active_external_phy = ELINK_EXT_PHY2;
                                break;
                        default:
-                               /* Link indication on both PHYs with the following cases
-                                * is invalid:
-                                * - FIRST_PHY means that second phy wasn't initialized,
-                                * hence its link is expected to be down
-                                * - SECOND_PHY means that first phy should not be able
-                                * to link up by itself (using configuration)
-                                * - DEFAULT should be overridden during initialization
-                                */
-                               PMD_DRV_LOG(DEBUG, "Invalid link indication"
-                                           "mpc=0x%x. DISABLING LINK !!!",
-                                           params->multi_phy_config);
+                       /* Link indication on both PHYs with the following cases
+                        * is invalid:
+                        * - FIRST_PHY means that second phy wasn't initialized,
+                        * hence its link is expected to be down
+                        * - SECOND_PHY means that first phy should not be able
+                        * to link up by itself (using configuration)
+                        * - DEFAULT should be overridden during initialiazation
+                        */
+                               ELINK_DEBUG_P1(sc, "Invalid link indication"
+                                              " mpc=0x%x. DISABLING LINK !!!",
+                                          params->multi_phy_config);
                                ext_phy_link_up = 0;
                                break;
                        }
@@ -6361,9 +7842,9 @@ elink_status_t elink_link_update(struct elink_params * params,
         * external phy
         */
        if (params->phy[ELINK_INT_PHY].read_status)
-               params->phy[ELINK_INT_PHY].read_status(&params->
-                                                      phy[ELINK_INT_PHY],
-                                                      params, vars);
+               params->phy[ELINK_INT_PHY].read_status(
+                       &params->phy[ELINK_INT_PHY],
+                       params, vars);
        /* The INT_PHY flow control reside in the vars. This include the
         * case where the speed or flow control are not set to AUTO.
         * Otherwise, the active external phy flow control result is set
@@ -6383,11 +7864,11 @@ elink_status_t elink_link_update(struct elink_params * params,
                 */
                if (active_external_phy == ELINK_EXT_PHY1) {
                        if (params->phy[ELINK_EXT_PHY2].phy_specific_func) {
-                               PMD_DRV_LOG(DEBUG, "Disabling TX on EXT_PHY2");
-                               params->phy[ELINK_EXT_PHY2].
-                                   phy_specific_func(&params->
-                                                     phy[ELINK_EXT_PHY2],
-                                                     params, ELINK_DISABLE_TX);
+                               ELINK_DEBUG_P0(sc,
+                                  "Disabling TX on EXT_PHY2");
+                               params->phy[ELINK_EXT_PHY2].phy_specific_func(
+                                       &params->phy[ELINK_EXT_PHY2],
+                                       params, ELINK_DISABLE_TX);
                        }
                }
 
@@ -6401,12 +7882,27 @@ elink_status_t elink_link_update(struct elink_params * params,
 
                vars->eee_status = phy_vars[active_external_phy].eee_status;
 
-               PMD_DRV_LOG(DEBUG, "Active external phy selected: %x",
-                           active_external_phy);
-       }
+               ELINK_DEBUG_P1(sc, "Active external phy selected: %x",
+                          active_external_phy);
+       }
+
+       ELINK_DEBUG_P3(sc, "vars : phy_flags = %x, mac_type = %x, phy_link_up = %x",
+                      vars->phy_flags, vars->mac_type, vars->phy_link_up);
+       ELINK_DEBUG_P3(sc, "vars : link_up = %x, line_speed = %x, duplex = %x",
+                      vars->link_up, vars->line_speed, vars->duplex);
+       ELINK_DEBUG_P3(sc, "vars : flow_ctrl = %x, ieee_fc = %x, link_status = %x",
+                      vars->flow_ctrl, vars->ieee_fc, vars->link_status);
+       ELINK_DEBUG_P3(sc, "vars : eee_status = %x, fault_detected = %x, check_kr2_recovery_cnt = %x",
+                      vars->eee_status, vars->fault_detected,
+                      vars->check_kr2_recovery_cnt);
+       ELINK_DEBUG_P3(sc, "vars : periodic_flags = %x, aeu_int_mask = %x, rx_tx_asic_rst = %x",
+                      vars->periodic_flags, vars->aeu_int_mask,
+                      vars->rx_tx_asic_rst);
+       ELINK_DEBUG_P2(sc, "vars : turn_to_run_wc_rt = %x, rsrv2 = %x",
+                      vars->turn_to_run_wc_rt, vars->rsrv2);
 
        for (phy_index = ELINK_EXT_PHY1; phy_index < params->num_phys;
-            phy_index++) {
+             phy_index++) {
                if (params->phy[phy_index].flags &
                    ELINK_FLAGS_REARM_LATCH_SIGNAL) {
                        elink_rearm_latch_signal(sc, port,
@@ -6415,9 +7911,9 @@ elink_status_t elink_link_update(struct elink_params * params,
                        break;
                }
        }
-       PMD_DRV_LOG(DEBUG, "vars->flow_ctrl = 0x%x, vars->link_status = 0x%x,"
-                   " ext_phy_line_speed = %d", vars->flow_ctrl,
-                   vars->link_status, ext_phy_line_speed);
+       ELINK_DEBUG_P3(sc, "vars->flow_ctrl = 0x%x, vars->link_status = 0x%x,"
+                  " ext_phy_line_speed = %d", vars->flow_ctrl,
+                  vars->link_status, ext_phy_line_speed);
        /* Upon link speed change set the NIG into drain mode. Comes to
         * deals with possible FIFO glitch due to clk change when speed
         * is decreased without link down indicator
@@ -6426,15 +7922,15 @@ elink_status_t elink_link_update(struct elink_params * params,
        if (vars->phy_link_up) {
                if (!(ELINK_SINGLE_MEDIA_DIRECT(params)) && ext_phy_link_up &&
                    (ext_phy_line_speed != vars->line_speed)) {
-                       PMD_DRV_LOG(DEBUG, "Internal link speed %d is"
-                                   " different than the external"
-                                   " link speed %d", vars->line_speed,
-                                   ext_phy_line_speed);
+                       ELINK_DEBUG_P2(sc, "Internal link speed %d is"
+                                  " different than the external"
+                                  " link speed %d", vars->line_speed,
+                                  ext_phy_line_speed);
                        vars->phy_link_up = 0;
+                       ELINK_DEBUG_P0(sc, "phy_link_up set to 0");
                } else if (prev_line_speed != vars->line_speed) {
-                       REG_WR(sc,
-                              NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4,
-                              0);
+                       REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE +
+                              params->port * 4, 0);
                        DELAY(1000 * 1);
                }
        }
@@ -6452,11 +7948,11 @@ elink_status_t elink_link_update(struct elink_params * params,
         * initialize it
         */
        if (!(ELINK_SINGLE_MEDIA_DIRECT(params))) {
-               PMD_DRV_LOG(DEBUG, "ext_phy_link_up = %d, int_link_up = %d,"
-                           " init_preceding = %d", ext_phy_link_up,
-                           vars->phy_link_up,
-                           params->phy[ELINK_EXT_PHY1].flags &
-                           ELINK_FLAGS_INIT_XGXS_FIRST);
+               ELINK_DEBUG_P3(sc, "ext_phy_link_up = %d, int_link_up = %d,"
+                          " init_preceding = %d", ext_phy_link_up,
+                          vars->phy_link_up,
+                          params->phy[ELINK_EXT_PHY1].flags &
+                          ELINK_FLAGS_INIT_XGXS_FIRST);
                if (!(params->phy[ELINK_EXT_PHY1].flags &
                      ELINK_FLAGS_INIT_XGXS_FIRST)
                    && ext_phy_link_up && !vars->phy_link_up) {
@@ -6467,11 +7963,9 @@ elink_status_t elink_link_update(struct elink_params * params,
                                vars->phy_flags &= ~PHY_SGMII_FLAG;
 
                        if (params->phy[ELINK_INT_PHY].config_init)
-                               params->phy[ELINK_INT_PHY].config_init(&params->
-                                                                      phy
-                                                                      [ELINK_INT_PHY],
-                                                                      params,
-                                                                      vars);
+                               params->phy[ELINK_INT_PHY].config_init(
+                                       &params->phy[ELINK_INT_PHY], params,
+                                               vars);
                }
        }
        /* Link is up only if both local phy and external phy (in case of
@@ -6482,6 +7976,11 @@ elink_status_t elink_link_update(struct elink_params * params,
                          ELINK_SINGLE_MEDIA_DIRECT(params)) &&
                         (phy_vars[active_external_phy].fault_detected == 0));
 
+       if (vars->link_up)
+               ELINK_DEBUG_P0(sc, "local phy and external phy are up");
+       else
+               ELINK_DEBUG_P0(sc, "either local phy or external phy or both are down");
+
        /* Update the PFC configuration in case it was changed */
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
                vars->link_status |= LINK_STATUS_PFC_ENABLED;
@@ -6493,9 +7992,12 @@ elink_status_t elink_link_update(struct elink_params * params,
        else
                rc = elink_update_link_down(params, vars);
 
+       if ((prev_link_status ^ vars->link_status) & LINK_STATUS_LINK_UP)
+               elink_chng_link_count(params, 0);
+
        /* Update MCP link status was changed */
-       if (params->
-           feature_config_flags & ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX)
+       if (params->feature_config_flags &
+           ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX)
                elink_cb_fw_command(sc, DRV_MSG_CODE_LINK_STATUS_CHANGED, 0);
 
        return rc;
@@ -6504,28 +8006,28 @@ elink_status_t elink_link_update(struct elink_params * params,
 /*****************************************************************************/
 /*                         External Phy section                             */
 /*****************************************************************************/
-static void elink_ext_phy_hw_reset(struct bnx2x_softc *sc, uint8_t port)
+void elink_ext_phy_hw_reset(struct bnx2x_softc *sc, uint8_t port)
 {
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
        DELAY(1000 * 1);
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, port);
 }
 
-static void elink_save_spirom_version(struct bnx2x_softc *sc,
-                                     __rte_unused uint8_t port,
+static void elink_save_spirom_version(struct bnx2x_softc *sc, uint8_t port,
                                      uint32_t spirom_ver, uint32_t ver_addr)
 {
-       PMD_DRV_LOG(DEBUG, "FW version 0x%x:0x%x for port %d",
-                   (uint16_t) (spirom_ver >> 16), (uint16_t) spirom_ver, port);
+       ELINK_DEBUG_P3(sc, "FW version 0x%x:0x%x for port %d",
+                (uint16_t)(spirom_ver >> 16), (uint16_t)spirom_ver, port);
 
        if (ver_addr)
                REG_WR(sc, ver_addr, spirom_ver);
 }
 
 static void elink_save_bnx2x_spirom_ver(struct bnx2x_softc *sc,
-                                     struct elink_phy *phy, uint8_t port)
+                                     struct elink_phy *phy,
+                                     uint8_t port)
 {
        uint16_t fw_ver1, fw_ver2;
 
@@ -6533,18 +8035,21 @@ static void elink_save_bnx2x_spirom_ver(struct bnx2x_softc *sc,
                        MDIO_PMA_REG_ROM_VER1, &fw_ver1);
        elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
                        MDIO_PMA_REG_ROM_VER2, &fw_ver2);
-       elink_save_spirom_version(sc, port,
-                                 (uint32_t) (fw_ver1 << 16 | fw_ver2),
+       elink_save_spirom_version(sc, port, (uint32_t)(fw_ver1 << 16 | fw_ver2),
                                  phy->ver_addr);
 }
 
 static void elink_ext_phy_10G_an_resolve(struct bnx2x_softc *sc,
-                                        struct elink_phy *phy,
-                                        struct elink_vars *vars)
+                                      struct elink_phy *phy,
+                                      struct elink_vars *vars)
 {
        uint16_t val;
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_STATUS, &val);
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_STATUS, &val);
+       elink_cl45_read(sc, phy,
+                       MDIO_AN_DEVAD,
+                       MDIO_AN_REG_STATUS, &val);
+       elink_cl45_read(sc, phy,
+                       MDIO_AN_DEVAD,
+                       MDIO_AN_REG_STATUS, &val);
        if (val & (1 << 5))
                vars->link_status |= LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
        if ((val & (1 << 0)) == 0)
@@ -6568,8 +8073,8 @@ static void elink_8073_resolve_fc(struct elink_phy *phy,
        if (elink_ext_phy_resolve_fc(phy, params, vars) &&
            (vars->flow_ctrl == ELINK_FLOW_CTRL_NONE)) {
                uint16_t pause_result;
-               uint16_t ld_pause;      /* local */
-               uint16_t lp_pause;      /* link partner */
+               uint16_t ld_pause;              /* local */
+               uint16_t lp_pause;              /* link partner */
                elink_cl45_read(sc, phy,
                                MDIO_AN_DEVAD,
                                MDIO_AN_REG_CL37_FC_LD, &ld_pause);
@@ -6582,31 +8087,35 @@ static void elink_8073_resolve_fc(struct elink_phy *phy,
                pause_result |= (lp_pause &
                                 MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) >> 7;
 
-               elink_pause_resolve(vars, pause_result);
-               PMD_DRV_LOG(DEBUG, "Ext PHY CL37 pause result 0x%x",
-                           pause_result);
+               elink_pause_resolve(phy, params, vars, pause_result);
+               ELINK_DEBUG_P1(sc, "Ext PHY CL37 pause result 0x%x",
+                          pause_result);
        }
 }
-
 static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc,
-                                                       struct elink_phy *phy,
-                                                       uint8_t port)
+                                            struct elink_phy *phy,
+                                            uint8_t port)
 {
        uint32_t count = 0;
-       uint16_t fw_ver1 = 0, fw_msgout;
+       uint16_t fw_ver1, fw_msgout;
        elink_status_t rc = ELINK_STATUS_OK;
 
        /* Boot port from external ROM  */
        /* EDC grst */
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_GEN_CTRL, 0x0001);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_GEN_CTRL,
+                        0x0001);
 
        /* Ucode reboot and rst */
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_GEN_CTRL, 0x008c);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_GEN_CTRL,
+                        0x008c);
 
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_MISC_CTRL1, 0x0001);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_MISC_CTRL1, 0x0001);
 
        /* Reset internal microprocessor */
        elink_cl45_write(sc, phy,
@@ -6627,10 +8136,10 @@ static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc,
        do {
                count++;
                if (count > 300) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "elink_8073_8727_external_rom_boot port %x:"
-                                   "Download failed. fw version = 0x%x",
-                                   port, fw_ver1);
+                       ELINK_DEBUG_P2(sc,
+                                "elink_8073_8727_external_rom_boot port %x:"
+                                "Download failed. fw version = 0x%x",
+                                port, fw_ver1);
                        rc = ELINK_STATUS_ERROR;
                        break;
                }
@@ -6644,17 +8153,19 @@ static elink_status_t elink_8073_8727_external_rom_boot(struct bnx2x_softc *sc,
 
                DELAY(1000 * 1);
        } while (fw_ver1 == 0 || fw_ver1 == 0x4321 ||
-                ((fw_msgout & 0xff) != 0x03 && (phy->type ==
-                                                PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8073)));
+                       ((fw_msgout & 0xff) != 0x03 && (phy->type ==
+                       PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8073)));
 
        /* Clear ser_boot_ctl bit */
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_MISC_CTRL1, 0x0000);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_MISC_CTRL1, 0x0000);
        elink_save_bnx2x_spirom_ver(sc, phy, port);
 
-       PMD_DRV_LOG(DEBUG,
-                   "elink_8073_8727_external_rom_boot port %x:"
-                   "Download complete. fw version = 0x%x", port, fw_ver1);
+       ELINK_DEBUG_P2(sc,
+                "elink_8073_8727_external_rom_boot port %x:"
+                "Download complete. fw version = 0x%x",
+                port, fw_ver1);
 
        return rc;
 }
@@ -6668,22 +8179,25 @@ static elink_status_t elink_8073_is_snr_needed(struct bnx2x_softc *sc,
        /* This is only required for 8073A1, version 102 only */
        uint16_t val;
 
-       /* Read 8073 HW revision */
+       /* Read 8073 HW revision*/
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8073_CHIP_REV, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_8073_CHIP_REV, &val);
 
        if (val != 1) {
                /* No need to workaround in 8073 A1 */
                return ELINK_STATUS_OK;
        }
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_ROM_VER2, &val);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_ROM_VER2, &val);
 
        /* SNR should be applied only for version 0x102 */
        if (val != 0x102)
                return ELINK_STATUS_OK;
 
-       return ELINK_STATUS_ERROR;
+       return 1;
 }
 
 static elink_status_t elink_8073_xaui_wa(struct bnx2x_softc *sc,
@@ -6692,7 +8206,8 @@ static elink_status_t elink_8073_xaui_wa(struct bnx2x_softc *sc,
        uint16_t val, cnt, cnt1;
 
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8073_CHIP_REV, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_8073_CHIP_REV, &val);
 
        if (val > 0) {
                /* No need to workaround in 8073 A1 */
@@ -6707,16 +8222,17 @@ static elink_status_t elink_8073_xaui_wa(struct bnx2x_softc *sc,
        for (cnt = 0; cnt < 1000; cnt++) {
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD,
-                               MDIO_PMA_REG_8073_SPEED_LINK_STATUS, &val);
-               /* If bit [14] = 0 or bit [13] = 0, continue on with
-                * system initialization (XAUI work-around not required, as
-                * these bits indicate 2.5G or 1G link up).
-                */
+                               MDIO_PMA_REG_8073_SPEED_LINK_STATUS,
+                               &val);
+                 /* If bit [14] = 0 or bit [13] = 0, continue on with
+                  * system initialization (XAUI work-around not required, as
+                  * these bits indicate 2.5G or 1G link up).
+                  */
                if (!(val & (1 << 14)) || !(val & (1 << 13))) {
-                       PMD_DRV_LOG(DEBUG, "XAUI work-around not required");
+                       ELINK_DEBUG_P0(sc, "XAUI work-around not required");
                        return ELINK_STATUS_OK;
                } else if (!(val & (1 << 15))) {
-                       PMD_DRV_LOG(DEBUG, "bit 15 went off");
+                       ELINK_DEBUG_P0(sc, "bit 15 went off");
                        /* If bit 15 is 0, then poll Dev1, Reg $C841 until it's
                         * MSB (bit15) goes to 1 (indicating that the XAUI
                         * workaround has completed), then continue on with
@@ -6724,12 +8240,11 @@ static elink_status_t elink_8073_xaui_wa(struct bnx2x_softc *sc,
                         */
                        for (cnt1 = 0; cnt1 < 1000; cnt1++) {
                                elink_cl45_read(sc, phy,
-                                               MDIO_PMA_DEVAD,
-                                               MDIO_PMA_REG_8073_XAUI_WA,
-                                               &val);
+                                       MDIO_PMA_DEVAD,
+                                       MDIO_PMA_REG_8073_XAUI_WA, &val);
                                if (val & (1 << 15)) {
-                                       PMD_DRV_LOG(DEBUG,
-                                                   "XAUI workaround has completed");
+                                       ELINK_DEBUG_P0(sc,
+                                         "XAUI workaround has completed");
                                        return ELINK_STATUS_OK;
                                }
                                DELAY(1000 * 3);
@@ -6738,19 +8253,21 @@ static elink_status_t elink_8073_xaui_wa(struct bnx2x_softc *sc,
                }
                DELAY(1000 * 3);
        }
-       PMD_DRV_LOG(DEBUG, "Warning: XAUI work-around timeout !!!");
+       ELINK_DEBUG_P0(sc, "Warning: XAUI work-around timeout !!!");
        return ELINK_STATUS_ERROR;
 }
 
 static void elink_807x_force_10G(struct bnx2x_softc *sc, struct elink_phy *phy)
 {
        /* Force KR or KX */
-       elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x2040);
+       elink_cl45_write(sc, phy,
+                        MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x2040);
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, 0x000b);
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_BNX2X_CTRL, 0x0000);
-       elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x0000);
+                        MDIO_PMA_DEVAD, MDIO_PMA_REG_BCM_CTRL, 0x0000);
+       elink_cl45_write(sc, phy,
+                        MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x0000);
 }
 
 static void elink_8073_set_pause_cl37(struct elink_params *params,
@@ -6766,21 +8283,22 @@ static void elink_8073_set_pause_cl37(struct elink_params *params,
        /* Please refer to Table 28B-3 of 802.3ab-1999 spec. */
        elink_calc_ieee_aneg_adv(phy, params, &vars->ieee_fc);
        if ((vars->ieee_fc &
-            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_SYMMETRIC) ==
+           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_SYMMETRIC) ==
            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_SYMMETRIC) {
-               cl37_val |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_SYMMETRIC;
+               cl37_val |=  MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_SYMMETRIC;
        }
        if ((vars->ieee_fc &
-            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) ==
+           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) ==
            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) {
-               cl37_val |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
+               cl37_val |=  MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC;
        }
        if ((vars->ieee_fc &
-            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) ==
+           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) ==
            MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) {
                cl37_val |= MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH;
        }
-       PMD_DRV_LOG(DEBUG, "Ext phy AN advertize cl37 0x%x", cl37_val);
+       ELINK_DEBUG_P1(sc,
+                "Ext phy AN advertize cl37 0x%x", cl37_val);
 
        elink_cl45_write(sc, phy,
                         MDIO_AN_DEVAD, MDIO_AN_REG_CL37_FC_LD, cl37_val);
@@ -6798,31 +8316,31 @@ static void elink_8073_specific_func(struct elink_phy *phy,
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
                                 (1 << 2));
-               elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,
-                                0x0004);
+               elink_cl45_write(sc, phy,
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,  0x0004);
                break;
        }
 }
 
 static uint8_t elink_8073_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val = 0, tmp1;
        uint8_t gpio_port;
-       PMD_DRV_LOG(DEBUG, "Init 8073");
+       ELINK_DEBUG_P0(sc, "Init 8073");
 
        if (CHIP_IS_E2(sc))
                gpio_port = SC_PATH(sc);
        else
                gpio_port = params->port;
-       /* Restore normal power mode */
+       /* Restore normal power mode*/
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
 
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, gpio_port);
 
        elink_8073_specific_func(phy, params, ELINK_PHY_INIT);
        elink_8073_set_pause_cl37(params, phy, vars);
@@ -6830,14 +8348,15 @@ static uint8_t elink_8073_config_init(struct elink_phy *phy,
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD, MDIO_PMA_REG_M8051_MSGOUT_REG, &tmp1);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
 
-       PMD_DRV_LOG(DEBUG, "Before rom RX_ALARM(port1): 0x%x", tmp1);
+       ELINK_DEBUG_P1(sc, "Before rom RX_ALARM(port1): 0x%x", tmp1);
 
        /* Swap polarity if required - Must be done only in non-1G mode */
        if (params->lane_config & PORT_HW_CFG_SWAP_PHY_POLARITY_ENABLED) {
                /* Configure the 8073 to swap _P and _N of the KR lines */
-               PMD_DRV_LOG(DEBUG, "Swapping polarity for the 8073");
+               ELINK_DEBUG_P0(sc, "Swapping polarity for the 8073");
                /* 10G Rx/Tx and 1G Tx signal polarity swap */
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD,
@@ -6848,31 +8367,33 @@ static uint8_t elink_8073_config_init(struct elink_phy *phy,
                                 (val | (3 << 9)));
        }
 
+
        /* Enable CL37 BAM */
        if (REG_RD(sc, params->shmem_base +
-                  offsetof(struct shmem_region,
-                           dev_info.port_hw_config[params->port].
-                           default_cfg)) &
+                        offsetof(struct shmem_region, dev_info.
+                                 port_hw_config[params->port].default_cfg)) &
            PORT_HW_CFG_ENABLE_BAM_ON_KR_ENABLED) {
 
                elink_cl45_read(sc, phy,
-                               MDIO_AN_DEVAD, MDIO_AN_REG_8073_BAM, &val);
+                               MDIO_AN_DEVAD,
+                               MDIO_AN_REG_8073_BAM, &val);
                elink_cl45_write(sc, phy,
-                                MDIO_AN_DEVAD, MDIO_AN_REG_8073_BAM, val | 1);
-               PMD_DRV_LOG(DEBUG, "Enable CL37 BAM on KR");
+                                MDIO_AN_DEVAD,
+                                MDIO_AN_REG_8073_BAM, val | 1);
+               ELINK_DEBUG_P0(sc, "Enable CL37 BAM on KR");
        }
        if (params->loopback_mode == ELINK_LOOPBACK_EXT) {
                elink_807x_force_10G(sc, phy);
-               PMD_DRV_LOG(DEBUG, "Forced speed 10G on 807X");
+               ELINK_DEBUG_P0(sc, "Forced speed 10G on 807X");
                return ELINK_STATUS_OK;
        } else {
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_BNX2X_CTRL, 0x0002);
+                                MDIO_PMA_DEVAD, MDIO_PMA_REG_BCM_CTRL, 0x0002);
        }
        if (phy->req_line_speed != ELINK_SPEED_AUTO_NEG) {
                if (phy->req_line_speed == ELINK_SPEED_10000) {
                        val = (1 << 7);
-               } else if (phy->req_line_speed == ELINK_SPEED_2500) {
+               } else if (phy->req_line_speed ==  ELINK_SPEED_2500) {
                        val = (1 << 5);
                        /* Note that 2.5G works only when used with 1G
                         * advertisement
@@ -6881,15 +8402,16 @@ static uint8_t elink_8073_config_init(struct elink_phy *phy,
                        val = (1 << 5);
        } else {
                val = 0;
-               if (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
+               if (phy->speed_cap_mask &
+                       PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)
                        val |= (1 << 7);
 
                /* Note that 2.5G works only when used with 1G advertisement */
                if (phy->speed_cap_mask &
-                   (PORT_HW_CFG_SPEED_CAPABILITY_D0_1G |
-                    PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G))
+                       (PORT_HW_CFG_SPEED_CAPABILITY_D0_1G |
+                        PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G))
                        val |= (1 << 5);
-               PMD_DRV_LOG(DEBUG, "807x autoneg val = 0x%x", val);
+               ELINK_DEBUG_P1(sc, "807x autoneg val = 0x%x", val);
        }
 
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV, val);
@@ -6903,13 +8425,13 @@ static uint8_t elink_8073_config_init(struct elink_phy *phy,
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD, MDIO_PMA_REG_8073_CHIP_REV,
                                &phy_ver);
-               PMD_DRV_LOG(DEBUG, "Add 2.5G");
+               ELINK_DEBUG_P0(sc, "Add 2.5G");
                if (phy_ver > 0)
                        tmp1 |= 1;
                else
                        tmp1 &= 0xfffe;
        } else {
-               PMD_DRV_LOG(DEBUG, "Disable 2.5G");
+               ELINK_DEBUG_P0(sc, "Disable 2.5G");
                tmp1 &= 0xfffe;
        }
 
@@ -6943,14 +8465,14 @@ static uint8_t elink_8073_config_init(struct elink_phy *phy,
        /* Restart autoneg */
        DELAY(1000 * 500);
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x1200);
-       PMD_DRV_LOG(DEBUG, "807x Autoneg Restart: Advertise 1G=%x, 10G=%x",
-                   ((val & (1 << 5)) > 0), ((val & (1 << 7)) > 0));
+       ELINK_DEBUG_P2(sc, "807x Autoneg Restart: Advertise 1G=%x, 10G=%x",
+                  ((val & (1 << 5)) > 0), ((val & (1 << 7)) > 0));
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_8073_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                struct elink_params *params,
+                                struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t link_up = 0;
@@ -6958,33 +8480,41 @@ static uint8_t elink_8073_read_status(struct elink_phy *phy,
        uint16_t link_status = 0;
        uint16_t an1000_status = 0;
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
 
-       PMD_DRV_LOG(DEBUG, "8703 LASI status 0x%x", val1);
+       ELINK_DEBUG_P1(sc, "8703 LASI status 0x%x", val1);
 
        /* Clear the interrupt LASI status register */
-       elink_cl45_read(sc, phy, MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val2);
-       elink_cl45_read(sc, phy, MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val1);
-       PMD_DRV_LOG(DEBUG, "807x PCS status 0x%x->0x%x", val2, val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val1);
+       ELINK_DEBUG_P2(sc, "807x PCS status 0x%x->0x%x", val2, val1);
        /* Clear MSG-OUT */
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD, MDIO_PMA_REG_M8051_MSGOUT_REG, &val1);
 
        /* Check the LASI */
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &val2);
 
-       PMD_DRV_LOG(DEBUG, "KR 0x9003 0x%x", val2);
+       ELINK_DEBUG_P1(sc, "KR 0x9003 0x%x", val2);
 
        /* Check the link status */
-       elink_cl45_read(sc, phy, MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val2);
-       PMD_DRV_LOG(DEBUG, "KR PCS status 0x%x", val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &val2);
+       ELINK_DEBUG_P1(sc, "KR PCS status 0x%x", val2);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
        link_up = ((val1 & 4) == 4);
-       PMD_DRV_LOG(DEBUG, "PMA_REG_STATUS=0x%x", val1);
+       ELINK_DEBUG_P1(sc, "PMA_REG_STATUS=0x%x", val1);
 
-       if (link_up && ((phy->req_line_speed != ELINK_SPEED_10000))) {
+       if (link_up &&
+            ((phy->req_line_speed != ELINK_SPEED_10000))) {
                if (elink_8073_xaui_wa(sc, phy) != 0)
                        return 0;
        }
@@ -6994,10 +8524,12 @@ static uint8_t elink_8073_read_status(struct elink_phy *phy,
                        MDIO_AN_DEVAD, MDIO_AN_REG_LINK_STATUS, &an1000_status);
 
        /* Check the link status on 1.1.2 */
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
-       PMD_DRV_LOG(DEBUG, "KR PMA status 0x%x->0x%x,"
-                   "an_link_status=0x%x", val2, val1, an1000_status);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
+       ELINK_DEBUG_P3(sc, "KR PMA status 0x%x->0x%x,"
+                  "an_link_status=0x%x", val2, val1, an1000_status);
 
        link_up = (((val1 & 4) == 4) || (an1000_status & (1 << 1)));
        if (link_up && elink_8073_is_snr_needed(sc, phy)) {
@@ -7022,27 +8554,28 @@ static uint8_t elink_8073_read_status(struct elink_phy *phy,
        if ((link_status & (1 << 2)) && (!(link_status & (1 << 15)))) {
                link_up = 1;
                vars->line_speed = ELINK_SPEED_10000;
-               PMD_DRV_LOG(DEBUG, "port %x: External link up in 10G",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link up in 10G",
+                          params->port);
        } else if ((link_status & (1 << 1)) && (!(link_status & (1 << 14)))) {
                link_up = 1;
                vars->line_speed = ELINK_SPEED_2500;
-               PMD_DRV_LOG(DEBUG, "port %x: External link up in 2.5G",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link up in 2.5G",
+                          params->port);
        } else if ((link_status & (1 << 0)) && (!(link_status & (1 << 13)))) {
                link_up = 1;
                vars->line_speed = ELINK_SPEED_1000;
-               PMD_DRV_LOG(DEBUG, "port %x: External link up in 1G",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link up in 1G",
+                          params->port);
        } else {
                link_up = 0;
-               PMD_DRV_LOG(DEBUG, "port %x: External link is down",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link is down",
+                          params->port);
        }
 
        if (link_up) {
                /* Swap polarity if required */
-               if (params->lane_config & PORT_HW_CFG_SWAP_PHY_POLARITY_ENABLED) {
+               if (params->lane_config &
+                   PORT_HW_CFG_SWAP_PHY_POLARITY_ENABLED) {
                        /* Configure the 8073 to swap P and N of the KR lines */
                        elink_cl45_read(sc, phy,
                                        MDIO_XS_DEVAD,
@@ -7051,15 +8584,16 @@ static uint8_t elink_8073_read_status(struct elink_phy *phy,
                         * when it`s in 10G mode.
                         */
                        if (vars->line_speed == ELINK_SPEED_1000) {
-                               PMD_DRV_LOG(DEBUG, "Swapping 1G polarity for"
-                                           "the 8073");
+                               ELINK_DEBUG_P0(sc, "Swapping 1G polarity for"
+                                              " the 8073");
                                val1 |= (1 << 3);
                        } else
                                val1 &= ~(1 << 3);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_XS_DEVAD,
-                                        MDIO_XS_REG_8073_RX_CTRL_PCIE, val1);
+                                        MDIO_XS_REG_8073_RX_CTRL_PCIE,
+                                        val1);
                }
                elink_ext_phy_10G_an_resolve(sc, phy, vars);
                elink_8073_resolve_fc(phy, params, vars);
@@ -7072,10 +8606,10 @@ static uint8_t elink_8073_read_status(struct elink_phy *phy,
 
                if (val1 & (1 << 5))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
                if (val1 & (1 << 7))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
        }
 
        return link_up;
@@ -7090,25 +8624,25 @@ static void elink_8073_link_reset(__rte_unused struct elink_phy *phy,
                gpio_port = SC_PATH(sc);
        else
                gpio_port = params->port;
-       PMD_DRV_LOG(DEBUG, "Setting 8073 port %d into low power mode",
-                   gpio_port);
+       ELINK_DEBUG_P1(sc, "Setting 8073 port %d into low power mode",
+          gpio_port);
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, gpio_port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW,
+                      gpio_port);
 }
 
 /******************************************************************/
 /*                     BNX2X8705 PHY SECTION                     */
 /******************************************************************/
 static uint8_t elink_8705_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     __rte_unused struct elink_vars
-                                            *vars)
+                                 struct elink_params *params,
+                                 __rte_unused struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "init 8705");
-       /* Restore normal power mode */
+       ELINK_DEBUG_P0(sc, "init 8705");
+       /* Restore normal power mode*/
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
        /* HW reset */
        elink_ext_phy_hw_reset(sc, params->port);
        elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0xa040);
@@ -7120,36 +8654,40 @@ static uint8_t elink_8705_config_init(struct elink_phy *phy,
                         MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER, 0x7fbf);
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD, MDIO_PMA_REG_CMU_PLL_BYPASS, 0x0100);
-       elink_cl45_write(sc, phy, MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_CNTL, 0x1);
+       elink_cl45_write(sc, phy,
+                        MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_CNTL, 0x1);
        /* BNX2X8705 doesn't have microcode, hence the 0 */
        elink_save_spirom_version(sc, params->port, params->shmem_base, 0);
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_8705_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                struct elink_params *params,
+                                struct elink_vars *vars)
 {
        uint8_t link_up = 0;
        uint16_t val1, rx_sd;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "read status 8705");
+       ELINK_DEBUG_P0(sc, "read status 8705");
        elink_cl45_read(sc, phy,
-                       MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_STATUS, &val1);
-       PMD_DRV_LOG(DEBUG, "8705 LASI status 0x%x", val1);
+                     MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_STATUS, &val1);
+       ELINK_DEBUG_P1(sc, "8705 LASI status 0x%x", val1);
 
        elink_cl45_read(sc, phy,
-                       MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_STATUS, &val1);
-       PMD_DRV_LOG(DEBUG, "8705 LASI status 0x%x", val1);
+                     MDIO_WIS_DEVAD, MDIO_WIS_REG_LASI_STATUS, &val1);
+       ELINK_DEBUG_P1(sc, "8705 LASI status 0x%x", val1);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_RX_SD, &rx_sd);
+       elink_cl45_read(sc, phy,
+                     MDIO_PMA_DEVAD, MDIO_PMA_REG_RX_SD, &rx_sd);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, 0xc809, &val1);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, 0xc809, &val1);
+       elink_cl45_read(sc, phy,
+                     MDIO_PMA_DEVAD, 0xc809, &val1);
+       elink_cl45_read(sc, phy,
+                     MDIO_PMA_DEVAD, 0xc809, &val1);
 
-       PMD_DRV_LOG(DEBUG, "8705 1.c809 val=0x%x", val1);
-       link_up = ((rx_sd & 0x1) && (val1 & (1 << 9))
-                  && ((val1 & (1 << 8)) == 0));
+       ELINK_DEBUG_P1(sc, "8705 1.c809 val=0x%x", val1);
+       link_up = ((rx_sd & 0x1) && (val1 & (1 << 9)) &&
+                  ((val1 & (1 << 8)) == 0));
        if (link_up) {
                vars->line_speed = ELINK_SPEED_10000;
                elink_ext_phy_resolve_fc(phy, params, vars);
@@ -7170,17 +8708,17 @@ static void elink_set_disable_pmd_transmit(struct elink_params *params,
         */
        if (pmd_dis) {
                if (params->feature_config_flags &
-                   ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED) {
-                       PMD_DRV_LOG(DEBUG, "Disabling PMD transmitter");
+                    ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED) {
+                       ELINK_DEBUG_P0(sc, "Disabling PMD transmitter");
                } else {
-                       PMD_DRV_LOG(DEBUG, "NOT disabling PMD transmitter");
+                       ELINK_DEBUG_P0(sc, "NOT disabling PMD transmitter");
                        return;
                }
-       } else {
-               PMD_DRV_LOG(DEBUG, "Enabling PMD transmitter");
-       }
+       } else
+               ELINK_DEBUG_P0(sc, "Enabling PMD transmitter");
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_TX_DISABLE, pmd_dis);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_TX_DISABLE, pmd_dis);
 }
 
 static uint8_t elink_get_gpio_port(struct elink_params *params)
@@ -7188,37 +8726,38 @@ static uint8_t elink_get_gpio_port(struct elink_params *params)
        uint8_t gpio_port;
        uint32_t swap_val, swap_override;
        struct bnx2x_softc *sc = params->sc;
-       if (CHIP_IS_E2(sc)) {
+       if (CHIP_IS_E2(sc))
                gpio_port = SC_PATH(sc);
-       } else {
+       else
                gpio_port = params->port;
-       }
        swap_val = REG_RD(sc, NIG_REG_PORT_SWAP);
        swap_override = REG_RD(sc, NIG_REG_STRAP_OVERRIDE);
        return gpio_port ^ (swap_val && swap_override);
 }
 
 static void elink_sfp_e1e2_set_transmitter(struct elink_params *params,
-                                          struct elink_phy *phy, uint8_t tx_en)
+                                          struct elink_phy *phy,
+                                          uint8_t tx_en)
 {
        uint16_t val;
        uint8_t port = params->port;
        struct bnx2x_softc *sc = params->sc;
        uint32_t tx_en_mode;
 
-       /* Disable/Enable transmitter ( TX laser of the SFP+ module.) */
+       /* Disable/Enable transmitter ( TX laser of the SFP+ module.)*/
        tx_en_mode = REG_RD(sc, params->shmem_base +
                            offsetof(struct shmem_region,
                                     dev_info.port_hw_config[port].sfp_ctrl)) &
-           PORT_HW_CFG_TX_LASER_MASK;
-       PMD_DRV_LOG(DEBUG, "Setting transmitter tx_en=%x for port %x "
-                   "mode = %x", tx_en, port, tx_en_mode);
+               PORT_HW_CFG_TX_LASER_MASK;
+       ELINK_DEBUG_P3(sc, "Setting transmitter tx_en=%x for port %x "
+                          "mode = %x", tx_en, port, tx_en_mode);
        switch (tx_en_mode) {
        case PORT_HW_CFG_TX_LASER_MDIO:
 
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD,
-                               MDIO_PMA_REG_PHY_IDENTIFIER, &val);
+                               MDIO_PMA_REG_PHY_IDENTIFIER,
+                               &val);
 
                if (tx_en)
                        val &= ~(1 << 15);
@@ -7227,36 +8766,38 @@ static void elink_sfp_e1e2_set_transmitter(struct elink_params *params,
 
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD,
-                                MDIO_PMA_REG_PHY_IDENTIFIER, val);
-               break;
+                                MDIO_PMA_REG_PHY_IDENTIFIER,
+                                val);
+       break;
        case PORT_HW_CFG_TX_LASER_GPIO0:
        case PORT_HW_CFG_TX_LASER_GPIO1:
        case PORT_HW_CFG_TX_LASER_GPIO2:
        case PORT_HW_CFG_TX_LASER_GPIO3:
-               {
-                       uint16_t gpio_pin;
-                       uint8_t gpio_port, gpio_mode;
-                       if (tx_en)
-                               gpio_mode = MISC_REGISTERS_GPIO_OUTPUT_HIGH;
-                       else
-                               gpio_mode = MISC_REGISTERS_GPIO_OUTPUT_LOW;
+       {
+               uint16_t gpio_pin;
+               uint8_t gpio_port, gpio_mode;
+               if (tx_en)
+                       gpio_mode = MISC_REGISTERS_GPIO_OUTPUT_HIGH;
+               else
+                       gpio_mode = MISC_REGISTERS_GPIO_OUTPUT_LOW;
 
-                       gpio_pin = tx_en_mode - PORT_HW_CFG_TX_LASER_GPIO0;
-                       gpio_port = elink_get_gpio_port(params);
-                       elink_cb_gpio_write(sc, gpio_pin, gpio_mode, gpio_port);
-                       break;
-               }
+               gpio_pin = tx_en_mode - PORT_HW_CFG_TX_LASER_GPIO0;
+               gpio_port = elink_get_gpio_port(params);
+               elink_cb_gpio_write(sc, gpio_pin, gpio_mode, gpio_port);
+               break;
+       }
        default:
-               PMD_DRV_LOG(DEBUG, "Invalid TX_LASER_MDIO 0x%x", tx_en_mode);
+               ELINK_DEBUG_P1(sc, "Invalid TX_LASER_MDIO 0x%x", tx_en_mode);
                break;
        }
 }
 
 static void elink_sfp_set_transmitter(struct elink_params *params,
-                                     struct elink_phy *phy, uint8_t tx_en)
+                                     struct elink_phy *phy,
+                                     uint8_t tx_en)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "Setting SFP+ transmitter to %d", tx_en);
+       ELINK_DEBUG_P1(sc, "Setting SFP+ transmitter to %d", tx_en);
        if (CHIP_IS_E3(sc))
                elink_sfp_e3_set_transmitter(params, phy, tx_en);
        else
@@ -7264,20 +8805,17 @@ static void elink_sfp_set_transmitter(struct elink_params *params,
 }
 
 static elink_status_t elink_8726_read_sfp_module_eeprom(struct elink_phy *phy,
-                                                       struct elink_params
-                                                       *params,
-                                                       uint8_t dev_addr,
-                                                       uint16_t addr,
-                                                       uint8_t byte_cnt,
-                                                       uint8_t * o_buf,
-                                                       __rte_unused uint8_t
-                                                       is_init)
+                            struct elink_params *params,
+                            uint8_t dev_addr, uint16_t addr,
+                            uint8_t byte_cnt,
+                            uint8_t *o_buf, __rte_unused uint8_t is_init)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val = 0;
        uint16_t i;
        if (byte_cnt > ELINK_SFP_EEPROM_PAGE_SIZE) {
-               PMD_DRV_LOG(DEBUG, "Reading from eeprom is limited to 0xf");
+               ELINK_DEBUG_P0(sc,
+                  "Reading from eeprom is limited to 0xf");
                return ELINK_STATUS_ERROR;
        }
        /* Set the read command byte count */
@@ -7307,10 +8845,10 @@ static elink_status_t elink_8726_read_sfp_module_eeprom(struct elink_phy *phy,
        }
 
        if ((val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK) !=
-           MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE) {
-               PMD_DRV_LOG(DEBUG,
-                           "Got bad status 0x%x when reading from SFP+ EEPROM",
-                           (val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK));
+                   MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE) {
+               ELINK_DEBUG_P1(sc,
+                        "Got bad status 0x%x when reading from SFP+ EEPROM",
+                        (val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK));
                return ELINK_STATUS_ERROR;
        }
 
@@ -7319,8 +8857,8 @@ static elink_status_t elink_8726_read_sfp_module_eeprom(struct elink_phy *phy,
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD,
                                MDIO_PMA_REG_8726_TWO_WIRE_DATA_BUF + i, &val);
-               o_buf[i] =
-                   (uint8_t) (val & MDIO_PMA_REG_8726_TWO_WIRE_DATA_MASK);
+               o_buf[i] = (uint8_t)
+                               (val & MDIO_PMA_REG_8726_TWO_WIRE_DATA_MASK);
        }
 
        for (i = 0; i < 100; i++) {
@@ -7343,29 +8881,27 @@ static void elink_warpcore_power_module(struct elink_params *params,
 
        pin_cfg = (REG_RD(sc, params->shmem_base +
                          offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[params->port].
-                                  e3_sfp_ctrl)) & PORT_HW_CFG_E3_PWR_DIS_MASK)
-           >> PORT_HW_CFG_E3_PWR_DIS_SHIFT;
+                       dev_info.port_hw_config[params->port].e3_sfp_ctrl)) &
+                       PORT_HW_CFG_E3_PWR_DIS_MASK) >>
+                       PORT_HW_CFG_E3_PWR_DIS_SHIFT;
 
        if (pin_cfg == PIN_CFG_NA)
                return;
-       PMD_DRV_LOG(DEBUG, "Setting SFP+ module power to %d using pin cfg %d",
-                   power, pin_cfg);
+       ELINK_DEBUG_P2(sc, "Setting SFP+ module power to %d using pin cfg %d",
+                      power, pin_cfg);
        /* Low ==> corresponding SFP+ module is powered
         * high ==> the SFP+ module is powered down
         */
        elink_set_cfg_pin(sc, pin_cfg, power ^ 1);
 }
-
-static elink_status_t elink_warpcore_read_sfp_module_eeprom(__rte_unused struct
-                                                           elink_phy *phy,
-                                                           struct elink_params
-                                                           *params,
-                                                           uint8_t dev_addr,
-                                                           uint16_t addr,
-                                                           uint8_t byte_cnt,
-                                                           uint8_t * o_buf,
-                                                           uint8_t is_init)
+static elink_status_t elink_warpcore_read_sfp_module_eeprom(
+                                        __rte_unused struct elink_phy *phy,
+                                        struct elink_params *params,
+                                        uint8_t dev_addr,
+                                        uint16_t addr,
+                                        uint8_t byte_cnt,
+                                        uint8_t *o_buf,
+                                        uint8_t is_init)
 {
        elink_status_t rc = ELINK_STATUS_OK;
        uint8_t i, j = 0, cnt = 0;
@@ -7374,8 +8910,8 @@ static elink_status_t elink_warpcore_read_sfp_module_eeprom(__rte_unused struct
        struct bnx2x_softc *sc = params->sc;
 
        if (byte_cnt > ELINK_SFP_EEPROM_PAGE_SIZE) {
-               PMD_DRV_LOG(DEBUG,
-                           "Reading from eeprom is limited to 16 bytes");
+               ELINK_DEBUG_P0(sc,
+                  "Reading from eeprom is limited to 16 bytes");
                return ELINK_STATUS_ERROR;
        }
 
@@ -7388,13 +8924,15 @@ static elink_status_t elink_warpcore_read_sfp_module_eeprom(__rte_unused struct
                        DELAY(1000 * 1);
                        elink_warpcore_power_module(params, 1);
                }
-               rc = elink_bsc_read(params, sc, dev_addr, addr32, 0, byte_cnt,
+
+               elink_bsc_module_sel(params);
+               rc = elink_bsc_read(sc, dev_addr, addr32, 0, byte_cnt,
                                    data_array);
        } while ((rc != ELINK_STATUS_OK) && (++cnt < I2C_WA_RETRY_CNT));
 
        if (rc == ELINK_STATUS_OK) {
                for (i = (addr - addr32); i < byte_cnt + (addr - addr32); i++) {
-                       o_buf[j] = *((uint8_t *) data_array + i);
+                       o_buf[j] = *((uint8_t *)data_array + i);
                        j++;
                }
        }
@@ -7403,20 +8941,18 @@ static elink_status_t elink_warpcore_read_sfp_module_eeprom(__rte_unused struct
 }
 
 static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
-                                                       struct elink_params
-                                                       *params,
-                                                       uint8_t dev_addr,
-                                                       uint16_t addr,
-                                                       uint8_t byte_cnt,
-                                                       uint8_t * o_buf,
-                                                       __rte_unused uint8_t
-                                                       is_init)
+                                            struct elink_params *params,
+                                            uint8_t dev_addr, uint16_t addr,
+                                            uint8_t byte_cnt,
+                                            uint8_t *o_buf,
+                                            __rte_unused uint8_t is_init)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val, i;
 
        if (byte_cnt > ELINK_SFP_EEPROM_PAGE_SIZE) {
-               PMD_DRV_LOG(DEBUG, "Reading from eeprom is limited to 0xf");
+               ELINK_DEBUG_P0(sc,
+                  "Reading from eeprom is limited to 0xf");
                return ELINK_STATUS_ERROR;
        }
 
@@ -7431,7 +8967,9 @@ static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
 
        /* Need to read from 1.8000 to clear it */
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_SFP_TWO_WIRE_CTRL, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_SFP_TWO_WIRE_CTRL,
+                       &val);
 
        /* Set the read command byte count */
        elink_cl45_write(sc, phy,
@@ -7442,16 +8980,19 @@ static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
        /* Set the read command address */
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
-                        MDIO_PMA_REG_SFP_TWO_WIRE_MEM_ADDR, addr);
+                        MDIO_PMA_REG_SFP_TWO_WIRE_MEM_ADDR,
+                        addr);
        /* Set the destination address */
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
-                        0x8004, MDIO_PMA_REG_8727_TWO_WIRE_DATA_BUF);
+                        0x8004,
+                        MDIO_PMA_REG_8727_TWO_WIRE_DATA_BUF);
 
        /* Activate read command */
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
-                        MDIO_PMA_REG_SFP_TWO_WIRE_CTRL, 0x8002);
+                        MDIO_PMA_REG_SFP_TWO_WIRE_CTRL,
+                        0x8002);
        /* Wait appropriate time for two-wire command to finish before
         * polling the status register
         */
@@ -7469,10 +9010,10 @@ static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
        }
 
        if ((val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK) !=
-           MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE) {
-               PMD_DRV_LOG(DEBUG,
-                           "Got bad status 0x%x when reading from SFP+ EEPROM",
-                           (val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK));
+                   MDIO_PMA_REG_SFP_TWO_WIRE_STATUS_COMPLETE) {
+               ELINK_DEBUG_P1(sc,
+                        "Got bad status 0x%x when reading from SFP+ EEPROM",
+                        (val & MDIO_PMA_REG_SFP_TWO_WIRE_CTRL_STATUS_MASK));
                return ELINK_STATUS_TIMEOUT;
        }
 
@@ -7481,8 +9022,8 @@ static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD,
                                MDIO_PMA_REG_8727_TWO_WIRE_DATA_BUF + i, &val);
-               o_buf[i] =
-                   (uint8_t) (val & MDIO_PMA_REG_8727_TWO_WIRE_DATA_MASK);
+               o_buf[i] = (uint8_t)
+                               (val & MDIO_PMA_REG_8727_TWO_WIRE_DATA_MASK);
        }
 
        for (i = 0; i < 100; i++) {
@@ -7497,21 +9038,18 @@ static elink_status_t elink_8727_read_sfp_module_eeprom(struct elink_phy *phy,
 
        return ELINK_STATUS_ERROR;
 }
-
-static elink_status_t elink_read_sfp_module_eeprom(struct elink_phy *phy,
-                                                  struct elink_params *params,
-                                                  uint8_t dev_addr,
-                                                  uint16_t addr,
-                                                  uint16_t byte_cnt,
-                                                  uint8_t * o_buf)
+elink_status_t elink_read_sfp_module_eeprom(struct elink_phy *phy,
+                                struct elink_params *params, uint8_t dev_addr,
+                                uint16_t addr, uint16_t byte_cnt,
+                                uint8_t *o_buf)
 {
-       elink_status_t rc = ELINK_STATUS_OK;
+       elink_status_t rc = 0;
+       struct bnx2x_softc *sc = params->sc;
        uint8_t xfer_size;
        uint8_t *user_data = o_buf;
        read_sfp_module_eeprom_func_p read_func;
-
        if ((dev_addr != 0xa0) && (dev_addr != 0xa2)) {
-               PMD_DRV_LOG(DEBUG, "invalid dev_addr 0x%x", dev_addr);
+               ELINK_DEBUG_P1(sc, "invalid dev_addr 0x%x", dev_addr);
                return ELINK_STATUS_ERROR;
        }
 
@@ -7532,7 +9070,7 @@ static elink_status_t elink_read_sfp_module_eeprom(struct elink_phy *phy,
 
        while (!rc && (byte_cnt > 0)) {
                xfer_size = (byte_cnt > ELINK_SFP_EEPROM_PAGE_SIZE) ?
-                   ELINK_SFP_EEPROM_PAGE_SIZE : byte_cnt;
+                       ELINK_SFP_EEPROM_PAGE_SIZE : byte_cnt;
                rc = read_func(phy, params, dev_addr, addr, xfer_size,
                               user_data, 0);
                byte_cnt -= xfer_size;
@@ -7543,91 +9081,105 @@ static elink_status_t elink_read_sfp_module_eeprom(struct elink_phy *phy,
 }
 
 static elink_status_t elink_get_edc_mode(struct elink_phy *phy,
-                                        struct elink_params *params,
-                                        uint16_t * edc_mode)
+                             struct elink_params *params,
+                             uint16_t *edc_mode)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t sync_offset = 0, phy_idx, media_types;
-       uint8_t gport, val[2], check_limiting_mode = 0;
+       uint8_t val[ELINK_SFP_EEPROM_FC_TX_TECH_ADDR + 1];
+       uint8_t check_limiting_mode = 0;
        *edc_mode = ELINK_EDC_MODE_LIMITING;
        phy->media_type = ELINK_ETH_PHY_UNSPECIFIED;
        /* First check for copper cable */
        if (elink_read_sfp_module_eeprom(phy,
                                         params,
                                         ELINK_I2C_DEV_ADDR_A0,
-                                        ELINK_SFP_EEPROM_CON_TYPE_ADDR,
-                                        2, (uint8_t *) val) != 0) {
-               PMD_DRV_LOG(DEBUG, "Failed to read from SFP+ module EEPROM");
+                                        0,
+                                        ELINK_SFP_EEPROM_FC_TX_TECH_ADDR + 1,
+                                        (uint8_t *)val) != 0) {
+               ELINK_DEBUG_P0(sc, "Failed to read from SFP+ module EEPROM");
                return ELINK_STATUS_ERROR;
        }
-
-       switch (val[0]) {
+       params->link_attr_sync &= ~LINK_SFP_EEPROM_COMP_CODE_MASK;
+       params->link_attr_sync |= val[ELINK_SFP_EEPROM_10G_COMP_CODE_ADDR] <<
+               LINK_SFP_EEPROM_COMP_CODE_SHIFT;
+       elink_update_link_attr(params, params->link_attr_sync);
+       switch (val[ELINK_SFP_EEPROM_CON_TYPE_ADDR]) {
        case ELINK_SFP_EEPROM_CON_TYPE_VAL_COPPER:
-               {
-                       uint8_t copper_module_type;
-                       phy->media_type = ELINK_ETH_PHY_DA_TWINAX;
-                       /* Check if its active cable (includes SFP+ module)
-                        * of passive cable
+       {
+               uint8_t copper_module_type;
+               phy->media_type = ELINK_ETH_PHY_DA_TWINAX;
+               /* Check if its active cable (includes SFP+ module)
+                * of passive cable
+                */
+               copper_module_type = val[ELINK_SFP_EEPROM_FC_TX_TECH_ADDR];
+               if (copper_module_type &
+                   ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE) {
+                       ELINK_DEBUG_P0(sc, "Active Copper cable detected");
+                       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT)
+                               *edc_mode = ELINK_EDC_MODE_ACTIVE_DAC;
+                       else
+                               check_limiting_mode = 1;
+               } else {
+                       *edc_mode = ELINK_EDC_MODE_PASSIVE_DAC;
+                       /* Even in case PASSIVE_DAC indication is not set,
+                        * treat it as a passive DAC cable, since some cables
+                        * don't have this indication.
                         */
-                       if (elink_read_sfp_module_eeprom(phy,
-                                                        params,
-                                                        ELINK_I2C_DEV_ADDR_A0,
-                                                        ELINK_SFP_EEPROM_FC_TX_TECH_ADDR,
-                                                        1,
-                                                        &copper_module_type) !=
-                           0) {
-                               PMD_DRV_LOG(DEBUG,
-                                           "Failed to read copper-cable-type"
-                                           " from SFP+ EEPROM");
-                               return ELINK_STATUS_ERROR;
-                       }
-
                        if (copper_module_type &
-                           ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_ACTIVE) {
-                               PMD_DRV_LOG(DEBUG,
-                                           "Active Copper cable detected");
-                               if (phy->type ==
-                                   PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT)
-                                       *edc_mode = ELINK_EDC_MODE_ACTIVE_DAC;
-                               else
-                                       check_limiting_mode = 1;
-                       } else if (copper_module_type &
-                                  ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE)
-                       {
-                               PMD_DRV_LOG(DEBUG,
-                                           "Passive Copper cable detected");
-                               *edc_mode = ELINK_EDC_MODE_PASSIVE_DAC;
+                          ELINK_SFP_EEPROM_FC_TX_TECH_BITMASK_COPPER_PASSIVE) {
+                               ELINK_DEBUG_P0(sc,
+                                              "Passive Copper cable detected");
                        } else {
-                               PMD_DRV_LOG(DEBUG,
-                                           "Unknown copper-cable-type 0x%x !!!",
-                                           copper_module_type);
-                               return ELINK_STATUS_ERROR;
+                               ELINK_DEBUG_P0(sc,
+                                              "Unknown copper-cable-type");
                        }
-                       break;
                }
+               break;
+       }
+       case ELINK_SFP_EEPROM_CON_TYPE_VAL_UNKNOWN:
        case ELINK_SFP_EEPROM_CON_TYPE_VAL_LC:
        case ELINK_SFP_EEPROM_CON_TYPE_VAL_RJ45:
                check_limiting_mode = 1;
-               if ((val[1] & (ELINK_SFP_EEPROM_COMP_CODE_SR_MASK |
-                              ELINK_SFP_EEPROM_COMP_CODE_LR_MASK |
-                              ELINK_SFP_EEPROM_COMP_CODE_LRM_MASK)) == 0) {
-                       PMD_DRV_LOG(DEBUG, "1G SFP module detected");
-                       gport = params->port;
+               /* Module is considered as 1G in case it's NOT compliant with
+                * any 10G ethernet protocol, but is 1G Ethernet compliant.
+                */
+               if (((val[ELINK_SFP_EEPROM_10G_COMP_CODE_ADDR] &
+                     (ELINK_SFP_EEPROM_10G_COMP_CODE_SR_MASK |
+                      ELINK_SFP_EEPROM_10G_COMP_CODE_LR_MASK |
+                      ELINK_SFP_EEPROM_10G_COMP_CODE_LRM_MASK)) == 0) &&
+                   (val[ELINK_SFP_EEPROM_1G_COMP_CODE_ADDR] != 0)) {
+                       ELINK_DEBUG_P0(sc, "1G SFP module detected");
                        phy->media_type = ELINK_ETH_PHY_SFP_1G_FIBER;
                        if (phy->req_line_speed != ELINK_SPEED_1000) {
+                               uint8_t gport = params->port;
                                phy->req_line_speed = ELINK_SPEED_1000;
                                if (!CHIP_IS_E1x(sc)) {
                                        gport = SC_PATH(sc) +
-                                           (params->port << 1);
+                                       (params->port << 1);
                                }
-                               elink_cb_event_log(sc, ELINK_LOG_ID_NON_10G_MODULE, gport);     //"Warning: Link speed was forced to 1000Mbps."
-                               // " Current SFP module in port %d is not"
-                               // " compliant with 10G Ethernet",
+                               elink_cb_event_log(sc,
+                                                  ELINK_LOG_ID_NON_10G_MODULE,
+                                                  gport);
+                                /*"Warning: Link speed was forced to 1000Mbps."
+                                 *" Current SFP module in port %d is not"
+                                 *" compliant with 10G Ethernet",
+                                 */
+                       }
 
+                       if (val[ELINK_SFP_EEPROM_1G_COMP_CODE_ADDR] &
+                           ELINK_SFP_EEPROM_1G_COMP_CODE_BASE_T) {
+                               /* Some 1G-baseT modules will not link up,
+                                * unless TX_EN is toggled with long delay in
+                                * between.
+                                */
+                               elink_sfp_set_transmitter(params, phy, 0);
+                               DELAY(1000 * 40);
+                               elink_sfp_set_transmitter(params, phy, 1);
                        }
                } else {
                        int idx, cfg_idx = 0;
-                       PMD_DRV_LOG(DEBUG, "10G Optic module detected");
+                       ELINK_DEBUG_P0(sc, "10G Optic module detected");
                        for (idx = ELINK_INT_PHY; idx < ELINK_MAX_PHYS; idx++) {
                                if (params->phy[idx].type == phy->type) {
                                        cfg_idx = ELINK_LINK_CONFIG_IDX(idx);
@@ -7639,24 +9191,22 @@ static elink_status_t elink_get_edc_mode(struct elink_phy *phy,
                }
                break;
        default:
-               PMD_DRV_LOG(DEBUG, "Unable to determine module type 0x%x !!!",
-                           val[0]);
+               ELINK_DEBUG_P1(sc, "Unable to determine module type 0x%x !!!",
+                        val[ELINK_SFP_EEPROM_CON_TYPE_ADDR]);
                return ELINK_STATUS_ERROR;
        }
        sync_offset = params->shmem_base +
-           offsetof(struct shmem_region,
-                    dev_info.port_hw_config[params->port].media_type);
+               offsetof(struct shmem_region,
+                        dev_info.port_hw_config[params->port].media_type);
        media_types = REG_RD(sc, sync_offset);
        /* Update media type for non-PMF sync */
        for (phy_idx = ELINK_INT_PHY; phy_idx < ELINK_MAX_PHYS; phy_idx++) {
                if (&(params->phy[phy_idx]) == phy) {
                        media_types &= ~(PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK <<
-                                        (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT *
-                                         phy_idx));
-                       media_types |=
-                           ((phy->
-                             media_type & PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) <<
-                            (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT * phy_idx));
+                               (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT * phy_idx));
+                       media_types |= ((phy->media_type &
+                                       PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) <<
+                               (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT * phy_idx));
                        break;
                }
        }
@@ -7669,8 +9219,8 @@ static elink_status_t elink_get_edc_mode(struct elink_phy *phy,
                                                 ELINK_SFP_EEPROM_OPTIONS_ADDR,
                                                 ELINK_SFP_EEPROM_OPTIONS_SIZE,
                                                 options) != 0) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "Failed to read Option field from module EEPROM");
+                       ELINK_DEBUG_P0(sc,
+                          "Failed to read Option field from module EEPROM");
                        return ELINK_STATUS_ERROR;
                }
                if ((options[0] & ELINK_SFP_EEPROM_OPTIONS_LINEAR_RX_OUT_MASK))
@@ -7678,15 +9228,14 @@ static elink_status_t elink_get_edc_mode(struct elink_phy *phy,
                else
                        *edc_mode = ELINK_EDC_MODE_LIMITING;
        }
-       PMD_DRV_LOG(DEBUG, "EDC mode is set to 0x%x", *edc_mode);
+       ELINK_DEBUG_P1(sc, "EDC mode is set to 0x%x", *edc_mode);
        return ELINK_STATUS_OK;
 }
-
 /* This function read the relevant field from the module (SFP+), and verify it
  * is compliant with this board
  */
 static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
-                                             struct elink_params *params)
+                                  struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t val, cmd;
@@ -7695,12 +9244,11 @@ static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
        char vendor_pn[ELINK_SFP_EEPROM_PART_NO_SIZE + 1];
        phy->flags &= ~ELINK_FLAGS_SFP_NOT_APPROVED;
        val = REG_RD(sc, params->shmem_base +
-                    offsetof(struct shmem_region,
-                             dev_info.port_feature_config[params->port].
-                             config));
+                        offsetof(struct shmem_region, dev_info.
+                                 port_feature_config[params->port].config));
        if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
            PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_NO_ENFORCEMENT) {
-               PMD_DRV_LOG(DEBUG, "NOT enforcing module verification");
+               ELINK_DEBUG_P0(sc, "NOT enforcing module verification");
                return ELINK_STATUS_OK;
        }
 
@@ -7710,23 +9258,24 @@ static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
                cmd = DRV_MSG_CODE_VRFY_SPECIFIC_PHY_OPT_MDL;
        } else if (params->feature_config_flags &
                   ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY) {
-               /* Use first phy request only in case of non-dual media */
+               /* Use first phy request only in case of non-dual media*/
                if (ELINK_DUAL_MEDIA(params)) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "FW does not support OPT MDL verification");
+                       ELINK_DEBUG_P0(sc,
+                          "FW does not support OPT MDL verification");
                        return ELINK_STATUS_ERROR;
                }
                cmd = DRV_MSG_CODE_VRFY_FIRST_PHY_OPT_MDL;
        } else {
                /* No support in OPT MDL detection */
-               PMD_DRV_LOG(DEBUG, "FW does not support OPT MDL verification");
+               ELINK_DEBUG_P0(sc,
+                  "FW does not support OPT MDL verification");
                return ELINK_STATUS_ERROR;
        }
 
        fw_cmd_param = ELINK_FW_PARAM_SET(phy->addr, phy->type, phy->mdio_ctrl);
        fw_resp = elink_cb_fw_command(sc, cmd, fw_cmd_param);
        if (fw_resp == FW_MSG_CODE_VRFY_OPT_MDL_SUCCESS) {
-               PMD_DRV_LOG(DEBUG, "Approved module");
+               ELINK_DEBUG_P0(sc, "Approved module");
                return ELINK_STATUS_OK;
        }
 
@@ -7736,7 +9285,7 @@ static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
                                         ELINK_I2C_DEV_ADDR_A0,
                                         ELINK_SFP_EEPROM_VENDOR_NAME_ADDR,
                                         ELINK_SFP_EEPROM_VENDOR_NAME_SIZE,
-                                        (uint8_t *) vendor_name))
+                                        (uint8_t *)vendor_name))
                vendor_name[0] = '\0';
        else
                vendor_name[ELINK_SFP_EEPROM_VENDOR_NAME_SIZE] = '\0';
@@ -7745,13 +9294,16 @@ static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
                                         ELINK_I2C_DEV_ADDR_A0,
                                         ELINK_SFP_EEPROM_PART_NO_ADDR,
                                         ELINK_SFP_EEPROM_PART_NO_SIZE,
-                                        (uint8_t *) vendor_pn))
+                                        (uint8_t *)vendor_pn))
                vendor_pn[0] = '\0';
        else
                vendor_pn[ELINK_SFP_EEPROM_PART_NO_SIZE] = '\0';
 
-       elink_cb_event_log(sc, ELINK_LOG_ID_UNQUAL_IO_MODULE, params->port, vendor_name, vendor_pn);    // "Warning: Unqualified SFP+ module detected,"
-       // " Port %d from %s part number %s",
+       elink_cb_event_log(sc, ELINK_LOG_ID_UNQUAL_IO_MODULE, params->port,
+                          vendor_name, vendor_pn);
+                            /* "Warning: Unqualified SFP+ module detected,"
+                             * " Port %d from %s part number %s",
+                             */
 
        if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) !=
            PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_WARNING_MSG)
@@ -7759,13 +9311,14 @@ static elink_status_t elink_verify_sfp_module(struct elink_phy *phy,
        return ELINK_STATUS_ERROR;
 }
 
-static elink_status_t elink_wait_for_sfp_module_initialized(struct elink_phy
-                                                           *phy,
-                                                           struct elink_params
-                                                           *params)
+static elink_status_t elink_wait_for_sfp_module_initialized(
+                                                struct elink_phy *phy,
+                                                struct elink_params *params)
+
 {
        uint8_t val;
        elink_status_t rc;
+       struct bnx2x_softc *sc = params->sc;
        uint16_t timeout;
        /* Initialization time after hot-plug may take up to 300ms for
         * some phys type ( e.g. JDSU )
@@ -7773,18 +9326,17 @@ static elink_status_t elink_wait_for_sfp_module_initialized(struct elink_phy
 
        for (timeout = 0; timeout < 60; timeout++) {
                if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT)
-                       rc = elink_warpcore_read_sfp_module_eeprom(phy, params,
-                                                                  ELINK_I2C_DEV_ADDR_A0,
-                                                                  1, 1, &val,
-                                                                  1);
+                       rc = elink_warpcore_read_sfp_module_eeprom(
+                               phy, params, ELINK_I2C_DEV_ADDR_A0, 1, 1, &val,
+                               1);
                else
                        rc = elink_read_sfp_module_eeprom(phy, params,
                                                          ELINK_I2C_DEV_ADDR_A0,
                                                          1, 1, &val);
                if (rc == 0) {
-                       PMD_DRV_LOG(DEBUG,
-                                   "SFP+ module initialization took %d ms",
-                                   timeout * 5);
+                       ELINK_DEBUG_P1(sc,
+                          "SFP+ module initialization took %d ms",
+                          timeout * 5);
                        return ELINK_STATUS_OK;
                }
                DELAY(1000 * 5);
@@ -7795,8 +9347,8 @@ static elink_status_t elink_wait_for_sfp_module_initialized(struct elink_phy
 }
 
 static void elink_8727_power_module(struct bnx2x_softc *sc,
-                                   struct elink_phy *phy, uint8_t is_power_up)
-{
+                                   struct elink_phy *phy,
+                                   uint8_t is_power_up) {
        /* Make sure GPIOs are not using for LED mode */
        uint16_t val;
        /* In the GPIO register, bit 4 is use to determine if the GPIOs are
@@ -7821,29 +9373,33 @@ static void elink_8727_power_module(struct bnx2x_softc *sc,
                val = (1 << 1);
 
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_GPIO_CTRL, val);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_8727_GPIO_CTRL,
+                        val);
 }
 
 static elink_status_t elink_8726_set_limiting_mode(struct bnx2x_softc *sc,
-                                                  struct elink_phy *phy,
-                                                  uint16_t edc_mode)
+                                       struct elink_phy *phy,
+                                       uint16_t edc_mode)
 {
        uint16_t cur_limiting_mode;
 
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD,
-                       MDIO_PMA_REG_ROM_VER2, &cur_limiting_mode);
-       PMD_DRV_LOG(DEBUG, "Current Limiting mode is 0x%x", cur_limiting_mode);
+                       MDIO_PMA_REG_ROM_VER2,
+                       &cur_limiting_mode);
+       ELINK_DEBUG_P1(sc, "Current Limiting mode is 0x%x",
+                cur_limiting_mode);
 
        if (edc_mode == ELINK_EDC_MODE_LIMITING) {
-               PMD_DRV_LOG(DEBUG, "Setting LIMITING MODE");
+               ELINK_DEBUG_P0(sc, "Setting LIMITING MODE");
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD,
                                 MDIO_PMA_REG_ROM_VER2,
                                 ELINK_EDC_MODE_LIMITING);
-       } else {                /* LRM mode ( default ) */
+       } else { /* LRM mode ( default )*/
 
-               PMD_DRV_LOG(DEBUG, "Setting LRM MODE");
+               ELINK_DEBUG_P0(sc, "Setting LRM MODE");
 
                /* Changing to LRM mode takes quite few seconds. So do it only
                 * if current mode is limiting (default is LRM)
@@ -7852,27 +9408,35 @@ static elink_status_t elink_8726_set_limiting_mode(struct bnx2x_softc *sc,
                        return ELINK_STATUS_OK;
 
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_LRM_MODE, 0);
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_LRM_MODE,
+                                0);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_ROM_VER2, 0x128);
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_ROM_VER2,
+                                0x128);
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD,
-                                MDIO_PMA_REG_MISC_CTRL0, 0x4008);
+                                MDIO_PMA_REG_MISC_CTRL0,
+                                0x4008);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_LRM_MODE, 0xaaaa);
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_LRM_MODE,
+                                0xaaaa);
        }
        return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_8727_set_limiting_mode(struct bnx2x_softc *sc,
-                                                  struct elink_phy *phy,
-                                                  uint16_t edc_mode)
+                                       struct elink_phy *phy,
+                                       uint16_t edc_mode)
 {
        uint16_t phy_identifier;
        uint16_t rom_ver2_val;
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD,
-                       MDIO_PMA_REG_PHY_IDENTIFIER, &phy_identifier);
+                       MDIO_PMA_REG_PHY_IDENTIFIER,
+                       &phy_identifier);
 
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
@@ -7880,7 +9444,9 @@ static elink_status_t elink_8727_set_limiting_mode(struct bnx2x_softc *sc,
                         (phy_identifier & ~(1 << 9)));
 
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_ROM_VER2, &rom_ver2_val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_ROM_VER2,
+                       &rom_ver2_val);
        /* Keep the MSB 8-bits, and set the LSB 8-bits with the edc_mode */
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
@@ -7914,12 +9480,14 @@ static void elink_8727_specific_func(struct elink_phy *phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
                                 (1 << 2) | (1 << 5));
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL, 0);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL,
+                                0);
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0006);
                /* Make MOD_ABS give interrupt on change */
                elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_REG_8727_PCS_OPT_CTRL, &val);
+                               MDIO_PMA_REG_8727_PCS_OPT_CTRL,
+                               &val);
                val |= (1 << 12);
                if (phy->flags & ELINK_FLAGS_NOC)
                        val |= (3 << 5);
@@ -7927,29 +9495,27 @@ static void elink_8727_specific_func(struct elink_phy *phy,
                 * status which reflect SFP+ module over-current
                 */
                if (!(phy->flags & ELINK_FLAGS_NOC))
-                       val &= 0xff8f;  /* Reset bits 4-6 */
+                       val &= 0xff8f; /* Reset bits 4-6 */
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL,
                                 val);
                break;
        default:
-               PMD_DRV_LOG(DEBUG, "Function 0x%x not supported by 8727",
-                           action);
+               ELINK_DEBUG_P1(sc, "Function 0x%x not supported by 8727",
+                  action);
                return;
        }
 }
 
 static void elink_set_e1e2_module_fault_led(struct elink_params *params,
-                                           uint8_t gpio_mode)
+                                          uint8_t gpio_mode)
 {
        struct bnx2x_softc *sc = params->sc;
 
        uint32_t fault_led_gpio = REG_RD(sc, params->shmem_base +
-                                        offsetof(struct shmem_region,
-                                                 dev_info.
-                                                 port_hw_config[params->port].
-                                                 sfp_ctrl)) &
-           PORT_HW_CFG_FAULT_MODULE_LED_MASK;
+                           offsetof(struct shmem_region,
+                       dev_info.port_hw_config[params->port].sfp_ctrl)) &
+               PORT_HW_CFG_FAULT_MODULE_LED_MASK;
        switch (fault_led_gpio) {
        case PORT_HW_CFG_FAULT_MODULE_LED_DISABLED:
                return;
@@ -7957,19 +9523,19 @@ static void elink_set_e1e2_module_fault_led(struct elink_params *params,
        case PORT_HW_CFG_FAULT_MODULE_LED_GPIO1:
        case PORT_HW_CFG_FAULT_MODULE_LED_GPIO2:
        case PORT_HW_CFG_FAULT_MODULE_LED_GPIO3:
-               {
-                       uint8_t gpio_port = elink_get_gpio_port(params);
-                       uint16_t gpio_pin = fault_led_gpio -
-                           PORT_HW_CFG_FAULT_MODULE_LED_GPIO0;
-                       PMD_DRV_LOG(DEBUG, "Set fault module-detected led "
-                                   "pin %x port %x mode %x",
-                                   gpio_pin, gpio_port, gpio_mode);
-                       elink_cb_gpio_write(sc, gpio_pin, gpio_mode, gpio_port);
-               }
-               break;
+       {
+               uint8_t gpio_port = elink_get_gpio_port(params);
+               uint16_t gpio_pin = fault_led_gpio -
+                       PORT_HW_CFG_FAULT_MODULE_LED_GPIO0;
+               ELINK_DEBUG_P3(sc, "Set fault module-detected led "
+                                  "pin %x port %x mode %x",
+                              gpio_pin, gpio_port, gpio_mode);
+               elink_cb_gpio_write(sc, gpio_pin, gpio_mode, gpio_port);
+       }
+       break;
        default:
-               PMD_DRV_LOG(DEBUG, "Error: Invalid fault led mode 0x%x",
-                           fault_led_gpio);
+               ELINK_DEBUG_P1(sc, "Error: Invalid fault led mode 0x%x",
+                              fault_led_gpio);
        }
 }
 
@@ -7980,12 +9546,12 @@ static void elink_set_e3_module_fault_led(struct elink_params *params,
        uint8_t port = params->port;
        struct bnx2x_softc *sc = params->sc;
        pin_cfg = (REG_RD(sc, params->shmem_base +
-                         offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[port].e3_sfp_ctrl)) &
-                  PORT_HW_CFG_E3_FAULT_MDL_LED_MASK) >>
-           PORT_HW_CFG_E3_FAULT_MDL_LED_SHIFT;
-       PMD_DRV_LOG(DEBUG, "Setting Fault LED to %d using pin cfg %d",
-                   gpio_mode, pin_cfg);
+                        offsetof(struct shmem_region,
+                                 dev_info.port_hw_config[port].e3_sfp_ctrl)) &
+               PORT_HW_CFG_E3_FAULT_MDL_LED_MASK) >>
+               PORT_HW_CFG_E3_FAULT_MDL_LED_SHIFT;
+       ELINK_DEBUG_P2(sc, "Setting Fault LED to %d using pin cfg %d",
+                      gpio_mode, pin_cfg);
        elink_set_cfg_pin(sc, pin_cfg, gpio_mode);
 }
 
@@ -7993,7 +9559,7 @@ static void elink_set_sfp_module_fault_led(struct elink_params *params,
                                           uint8_t gpio_mode)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "Setting SFP+ module fault LED to %d", gpio_mode);
+       ELINK_DEBUG_P1(sc, "Setting SFP+ module fault LED to %d", gpio_mode);
        if (CHIP_IS_E3(sc)) {
                /* Low ==> if SFP+ module is supported otherwise
                 * High ==> if SFP+ module is not on the approved vendor list
@@ -8018,9 +9584,11 @@ static void elink_warpcore_hw_reset(__rte_unused struct elink_phy *phy,
 }
 
 static void elink_power_sfp_module(struct elink_params *params,
-                                  struct elink_phy *phy, uint8_t power)
+                                  struct elink_phy *phy,
+                                  uint8_t power)
 {
-       PMD_DRV_LOG(DEBUG, "Setting SFP+ power to %x", power);
+       struct bnx2x_softc *sc = params->sc;
+       ELINK_DEBUG_P1(sc, "Setting SFP+ power to %x", power);
 
        switch (phy->type) {
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8727:
@@ -8034,7 +9602,6 @@ static void elink_power_sfp_module(struct elink_params *params,
                break;
        }
 }
-
 static void elink_warpcore_set_limiting_mode(struct elink_params *params,
                                             struct elink_phy *phy,
                                             uint16_t edc_mode)
@@ -8043,7 +9610,7 @@ static void elink_warpcore_set_limiting_mode(struct elink_params *params,
        uint16_t mode = MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE_DEFAULT;
        struct bnx2x_softc *sc = params->sc;
 
-       uint8_t lane = elink_get_warpcore_lane(params);
+       uint8_t lane = elink_get_warpcore_lane(phy, params);
        /* This is a global register which controls all lanes */
        elink_cl45_read(sc, phy, MDIO_WC_DEVAD,
                        MDIO_WC_REG_UC_INFO_B1_FIRMWARE_MODE, &val);
@@ -8076,7 +9643,8 @@ static void elink_warpcore_set_limiting_mode(struct elink_params *params,
 }
 
 static void elink_set_limiting_mode(struct elink_params *params,
-                                   struct elink_phy *phy, uint16_t edc_mode)
+                                   struct elink_phy *phy,
+                                   uint16_t edc_mode)
 {
        switch (phy->type) {
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8726:
@@ -8092,30 +9660,28 @@ static void elink_set_limiting_mode(struct elink_params *params,
        }
 }
 
-static elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
-                                                struct elink_params *params)
+elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
+                              struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t edc_mode;
        elink_status_t rc = ELINK_STATUS_OK;
 
        uint32_t val = REG_RD(sc, params->shmem_base +
-                             offsetof(struct shmem_region,
-                                      dev_info.port_feature_config[params->
-                                                                   port].
-                                      config));
+                            offsetof(struct shmem_region, dev_info.
+                                    port_feature_config[params->port].config));
        /* Enabled transmitter by default */
        elink_sfp_set_transmitter(params, phy, 1);
-       PMD_DRV_LOG(DEBUG, "SFP+ module plugged in/out detected on port %d",
-                   params->port);
+       ELINK_DEBUG_P1(sc, "SFP+ module plugged in/out detected on port %d",
+                params->port);
        /* Power up module */
        elink_power_sfp_module(params, phy, 1);
        if (elink_get_edc_mode(phy, params, &edc_mode) != 0) {
-               PMD_DRV_LOG(DEBUG, "Failed to get valid module type");
+               ELINK_DEBUG_P0(sc, "Failed to get valid module type");
                return ELINK_STATUS_ERROR;
        } else if (elink_verify_sfp_module(phy, params) != 0) {
                /* Check SFP+ module compatibility */
-               PMD_DRV_LOG(DEBUG, "Module verification failed!!");
+               ELINK_DEBUG_P0(sc, "Module verification failed!!");
                rc = ELINK_STATUS_ERROR;
                /* Turn on fault module-detected led */
                elink_set_sfp_module_fault_led(params,
@@ -8123,8 +9689,8 @@ static elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
 
                /* Check if need to power down the SFP+ module */
                if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
-                   PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_POWER_DOWN) {
-                       PMD_DRV_LOG(DEBUG, "Shutdown SFP+ module!!");
+                    PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_POWER_DOWN) {
+                       ELINK_DEBUG_P0(sc, "Shutdown SFP+ module!!");
                        elink_power_sfp_module(params, phy, 0);
                        return rc;
                }
@@ -8157,22 +9723,22 @@ void elink_handle_module_detect_int(struct elink_params *params)
        uint8_t gpio_num, gpio_port;
        if (CHIP_IS_E3(sc)) {
                phy = &params->phy[ELINK_INT_PHY];
-               /* Always enable TX laser,will be disabled in case of fault */
+               /* Always enable TX laser, will be disabled in case of fault */
                elink_sfp_set_transmitter(params, phy, 1);
        } else {
                phy = &params->phy[ELINK_EXT_PHY1];
        }
-       if (elink_get_mod_abs_int_cfg(sc, params->shmem_base,
+       if (elink_get_mod_abs_int_cfg(sc, params->chip_id, params->shmem_base,
                                      params->port, &gpio_num, &gpio_port) ==
            ELINK_STATUS_ERROR) {
-               PMD_DRV_LOG(DEBUG, "Failed to get MOD_ABS interrupt config");
+               ELINK_DEBUG_P0(sc, "Failed to get MOD_ABS interrupt config");
                return;
        }
 
        /* Set valid module led off */
        elink_set_sfp_module_fault_led(params, MISC_REGISTERS_GPIO_HIGH);
 
-       /* Get current gpio val reflecting module plugged in / out */
+       /* Get current gpio val reflecting module plugged in / out*/
        gpio_val = elink_cb_gpio_read(sc, gpio_num, gpio_port);
 
        /* Call the handling function in case module is detected */
@@ -8182,8 +9748,8 @@ void elink_handle_module_detect_int(struct elink_params *params)
 
                elink_power_sfp_module(params, phy, 1);
                elink_cb_gpio_int_write(sc, gpio_num,
-                                       MISC_REGISTERS_GPIO_INT_OUTPUT_CLR,
-                                       gpio_port);
+                                  MISC_REGISTERS_GPIO_INT_OUTPUT_CLR,
+                                  gpio_port);
                if (elink_wait_for_sfp_module_initialized(phy, params) == 0) {
                        elink_sfp_module_detection(phy, params);
                        if (CHIP_IS_E3(sc)) {
@@ -8205,12 +9771,12 @@ void elink_handle_module_detect_int(struct elink_params *params)
                                }
                        }
                } else {
-                       PMD_DRV_LOG(DEBUG, "SFP+ module is not initialized");
+                       ELINK_DEBUG_P0(sc, "SFP+ module is not initialized");
                }
        } else {
                elink_cb_gpio_int_write(sc, gpio_num,
-                                       MISC_REGISTERS_GPIO_INT_OUTPUT_SET,
-                                       gpio_port);
+                                  MISC_REGISTERS_GPIO_INT_OUTPUT_SET,
+                                  gpio_port);
                /* Module was plugged out.
                 * Disable transmit for this module
                 */
@@ -8228,9 +9794,11 @@ static void elink_sfp_mask_fault(struct bnx2x_softc *sc,
 {
        uint16_t alarm_status, val;
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, alarm_status_offset, &alarm_status);
+                       MDIO_PMA_DEVAD, alarm_status_offset,
+                       &alarm_status);
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, alarm_status_offset, &alarm_status);
+                       MDIO_PMA_DEVAD, alarm_status_offset,
+                       &alarm_status);
        /* Mask or enable the fault event. */
        elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, alarm_ctrl_offset, &val);
        if (alarm_status & (1 << 0))
@@ -8239,37 +9807,42 @@ static void elink_sfp_mask_fault(struct bnx2x_softc *sc,
                val |= (1 << 0);
        elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, alarm_ctrl_offset, val);
 }
-
 /******************************************************************/
 /*             common BNX2X8706/BNX2X8726 PHY SECTION            */
 /******************************************************************/
 static uint8_t elink_8706_8726_read_status(struct elink_phy *phy,
-                                          struct elink_params *params,
-                                          struct elink_vars *vars)
+                                     struct elink_params *params,
+                                     struct elink_vars *vars)
 {
        uint8_t link_up = 0;
        uint16_t val1, val2, rx_sd, pcs_status;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "XGXS 8706/8726");
-       /* Clear RX Alarm */
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &val2);
+       ELINK_DEBUG_P0(sc, "XGXS 8706/8726");
+       /* Clear RX Alarm*/
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &val2);
 
        elink_sfp_mask_fault(sc, phy, MDIO_PMA_LASI_TXSTAT,
                             MDIO_PMA_LASI_TXCTRL);
 
-       /* Clear LASI indication */
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val2);
-       PMD_DRV_LOG(DEBUG, "8706/8726 LASI status 0x%x--> 0x%x", val1, val2);
+       /* Clear LASI indication*/
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val2);
+       ELINK_DEBUG_P2(sc, "8706/8726 LASI status 0x%x--> 0x%x", val1, val2);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_RX_SD, &rx_sd);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_RX_SD, &rx_sd);
        elink_cl45_read(sc, phy,
                        MDIO_PCS_DEVAD, MDIO_PCS_REG_STATUS, &pcs_status);
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_LINK_STATUS, &val2);
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_LINK_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_AN_DEVAD, MDIO_AN_REG_LINK_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_AN_DEVAD, MDIO_AN_REG_LINK_STATUS, &val2);
 
-       PMD_DRV_LOG(DEBUG, "8706/8726 rx_sd 0x%x pcs_status 0x%x 1Gbps"
-                   " link_status 0x%x", rx_sd, pcs_status, val2);
+       ELINK_DEBUG_P3(sc, "8706/8726 rx_sd 0x%x pcs_status 0x%x 1Gbps"
+                       " link_status 0x%x", rx_sd, pcs_status, val2);
        /* Link is up if both bit 0 of pmd_rx_sd and bit 0 of pcs_status
         * are set, or if the autoneg bit 1 is set
         */
@@ -8286,9 +9859,9 @@ static uint8_t elink_8706_8726_read_status(struct elink_phy *phy,
        /* Capture 10G link fault. Read twice to clear stale value. */
        if (vars->line_speed == ELINK_SPEED_10000) {
                elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_LASI_TXSTAT, &val1);
+                           MDIO_PMA_LASI_TXSTAT, &val1);
                elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_LASI_TXSTAT, &val1);
+                           MDIO_PMA_LASI_TXSTAT, &val1);
                if (val1 & (1 << 0))
                        vars->fault_detected = 1;
        }
@@ -8300,15 +9873,15 @@ static uint8_t elink_8706_8726_read_status(struct elink_phy *phy,
 /*                     BNX2X8706 PHY SECTION                     */
 /******************************************************************/
 static uint8_t elink_8706_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     __rte_unused struct elink_vars *vars)
+                                struct elink_params *params,
+                                __rte_unused struct elink_vars *vars)
 {
        uint32_t tx_en_mode;
        uint16_t cnt, val, tmp1;
        struct bnx2x_softc *sc = params->sc;
 
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
        /* HW reset */
        elink_ext_phy_hw_reset(sc, params->port);
        elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0xa040);
@@ -8322,34 +9895,35 @@ static uint8_t elink_8706_config_init(struct elink_phy *phy,
                        break;
                DELAY(1000 * 10);
        }
-       PMD_DRV_LOG(DEBUG, "XGXS 8706 is initialized after %d ms", cnt);
+       ELINK_DEBUG_P1(sc, "XGXS 8706 is initialized after %d ms", cnt);
        if ((params->feature_config_flags &
             ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED)) {
                uint8_t i;
                uint16_t reg;
                for (i = 0; i < 4; i++) {
                        reg = MDIO_XS_8706_REG_BANK_RX0 +
-                           i * (MDIO_XS_8706_REG_BANK_RX1 -
-                                MDIO_XS_8706_REG_BANK_RX0);
+                               i * (MDIO_XS_8706_REG_BANK_RX1 -
+                                    MDIO_XS_8706_REG_BANK_RX0);
                        elink_cl45_read(sc, phy, MDIO_XS_DEVAD, reg, &val);
                        /* Clear first 3 bits of the control */
                        val &= ~0x7;
                        /* Set control bits according to configuration */
                        val |= (phy->rx_preemphasis[i] & 0x7);
-                       PMD_DRV_LOG(DEBUG, "Setting RX Equalizer to BNX2X8706"
-                                   " reg 0x%x <-- val 0x%x", reg, val);
+                       ELINK_DEBUG_P2(sc, "Setting RX Equalizer to BNX2X8706"
+                                  " reg 0x%x <-- val 0x%x", reg, val);
                        elink_cl45_write(sc, phy, MDIO_XS_DEVAD, reg, val);
                }
        }
        /* Force speed */
        if (phy->req_line_speed == ELINK_SPEED_10000) {
-               PMD_DRV_LOG(DEBUG, "XGXS 8706 force 10Gbps");
+               ELINK_DEBUG_P0(sc, "XGXS 8706 force 10Gbps");
 
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD,
                                 MDIO_PMA_REG_DIGITAL_CTRL, 0x400);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL, 0);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_TXCTRL,
+                                0);
                /* Arm LASI for link and Tx fault. */
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 3);
@@ -8357,7 +9931,7 @@ static uint8_t elink_8706_config_init(struct elink_phy *phy,
                /* Force 1Gbps using autoneg with 1G advertisement */
 
                /* Allow CL37 through CL73 */
-               PMD_DRV_LOG(DEBUG, "XGXS 8706 AutoNeg");
+               ELINK_DEBUG_P0(sc, "XGXS 8706 AutoNeg");
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_CL73, 0x040c);
 
@@ -8375,9 +9949,11 @@ static uint8_t elink_8706_config_init(struct elink_phy *phy,
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x1200);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, 0x0400);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
+                                0x0400);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x0004);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,
+                                0x0004);
        }
        elink_save_bnx2x_spirom_ver(sc, phy, params->port);
 
@@ -8387,27 +9963,24 @@ static uint8_t elink_8706_config_init(struct elink_phy *phy,
 
        tx_en_mode = REG_RD(sc, params->shmem_base +
                            offsetof(struct shmem_region,
-                                    dev_info.port_hw_config[params->port].
-                                    sfp_ctrl))
-       & PORT_HW_CFG_TX_LASER_MASK;
+                               dev_info.port_hw_config[params->port].sfp_ctrl))
+                       & PORT_HW_CFG_TX_LASER_MASK;
 
        if (tx_en_mode == PORT_HW_CFG_TX_LASER_GPIO0) {
-               PMD_DRV_LOG(DEBUG, "Enabling TXONOFF_PWRDN_DIS");
+               ELINK_DEBUG_P0(sc, "Enabling TXONOFF_PWRDN_DIS");
                elink_cl45_read(sc, phy,
-                               MDIO_PMA_DEVAD, MDIO_PMA_REG_DIGITAL_CTRL,
-                               &tmp1);
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_DIGITAL_CTRL, &tmp1);
                tmp1 |= 0x1;
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_DIGITAL_CTRL,
-                                tmp1);
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_DIGITAL_CTRL, tmp1);
        }
 
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_8706_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        return elink_8706_8726_read_status(phy, params, vars);
 }
@@ -8419,7 +9992,7 @@ static void elink_8726_config_loopback(struct elink_phy *phy,
                                       struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "PMA/PMD ext_phy_loopback: 8726");
+       ELINK_DEBUG_P0(sc, "PMA/PMD ext_phy_loopback: 8726");
        elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x0001);
 }
 
@@ -8441,7 +10014,8 @@ static void elink_8726_external_rom_boot(struct elink_phy *phy,
                         MDIO_PMA_REG_GEN_CTRL_ROM_MICRO_RESET);
 
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_MISC_CTRL1, 0x0001);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_MISC_CTRL1, 0x0001);
 
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD,
@@ -8453,15 +10027,16 @@ static void elink_8726_external_rom_boot(struct elink_phy *phy,
 
        /* Disable serial boot control, tristates pins SS_N, SCK, MOSI, MISO */
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_MISC_CTRL1, 0x0000);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_MISC_CTRL1, 0x0000);
 
        DELAY(1000 * 200);
        elink_save_bnx2x_spirom_ver(sc, phy, params->port);
 }
 
 static uint8_t elink_8726_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                struct elink_params *params,
+                                struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val1;
@@ -8471,7 +10046,7 @@ static uint8_t elink_8726_read_status(struct elink_phy *phy,
                                MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
                                &val1);
                if (val1 & (1 << 15)) {
-                       PMD_DRV_LOG(DEBUG, "Tx is disabled");
+                       ELINK_DEBUG_P0(sc, "Tx is disabled");
                        link_up = 0;
                        vars->line_speed = 0;
                }
@@ -8479,12 +10054,13 @@ static uint8_t elink_8726_read_status(struct elink_phy *phy,
        return link_up;
 }
 
+
 static uint8_t elink_8726_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "Initializing BNX2X8726");
+       ELINK_DEBUG_P0(sc, "Initializing BNX2X8726");
 
        elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1 << 15);
        elink_wait_reset_complete(sc, phy, params);
@@ -8499,7 +10075,7 @@ static uint8_t elink_8726_config_init(struct elink_phy *phy,
        elink_sfp_module_detection(phy, params);
 
        if (phy->req_line_speed == ELINK_SPEED_1000) {
-               PMD_DRV_LOG(DEBUG, "Setting 1G force");
+               ELINK_DEBUG_P0(sc, "Setting 1G force");
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x40);
                elink_cl45_write(sc, phy,
@@ -8507,17 +10083,19 @@ static uint8_t elink_8726_config_init(struct elink_phy *phy,
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x5);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, 0x400);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
+                                0x400);
        } else if ((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
                   (phy->speed_cap_mask &
-                   PORT_HW_CFG_SPEED_CAPABILITY_D0_1G) &&
+                     PORT_HW_CFG_SPEED_CAPABILITY_D0_1G) &&
                   ((phy->speed_cap_mask &
-                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
+                     PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
-               PMD_DRV_LOG(DEBUG, "Setting 1G clause37");
+               ELINK_DEBUG_P0(sc, "Setting 1G clause37");
                /* Set Flow control */
                elink_ext_phy_set_pause(params, phy, vars);
-               elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_ADV, 0x20);
+               elink_cl45_write(sc, phy,
+                                MDIO_AN_DEVAD, MDIO_AN_REG_ADV, 0x20);
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_CL73, 0x040c);
                elink_cl45_write(sc, phy,
@@ -8525,16 +10103,17 @@ static uint8_t elink_8726_config_init(struct elink_phy *phy,
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x1000);
                elink_cl45_write(sc, phy,
-                                MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x1200);
+                               MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x1200);
                /* Enable RX-ALARM control to receive interrupt for 1G speed
                 * change
                 */
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x4);
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL, 0x400);
+                                MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXCTRL,
+                                0x400);
 
-       } else {                /* Default 10G. Set only LASI control */
+       } else { /* Default 10G. Set only LASI control */
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 1);
        }
@@ -8542,9 +10121,10 @@ static uint8_t elink_8726_config_init(struct elink_phy *phy,
        /* Set TX PreEmphasis if needed */
        if ((params->feature_config_flags &
             ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED)) {
-               PMD_DRV_LOG(DEBUG,
-                           "Setting TX_CTRL1 0x%x, TX_CTRL2 0x%x",
-                           phy->tx_preemphasis[0], phy->tx_preemphasis[1]);
+               ELINK_DEBUG_P2(sc,
+                  "Setting TX_CTRL1 0x%x, TX_CTRL2 0x%x",
+                        phy->tx_preemphasis[0],
+                        phy->tx_preemphasis[1]);
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD,
                                 MDIO_PMA_REG_8726_TX_CTRL1,
@@ -8564,10 +10144,11 @@ static void elink_8726_link_reset(struct elink_phy *phy,
                                  struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "elink_8726_link_reset port %d", params->port);
+       ELINK_DEBUG_P1(sc, "elink_8726_link_reset port %d", params->port);
        /* Set serial boot control for external load */
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_GEN_CTRL, 0x0001);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_GEN_CTRL, 0x0001);
 }
 
 /******************************************************************/
@@ -8600,22 +10181,28 @@ static void elink_8727_set_link_led(struct elink_phy *phy,
                break;
        }
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_8727_PCS_OPT_CTRL,
+                       &val);
        val &= 0xff8f;
        val |= led_mode_bitmask;
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_PCS_OPT_CTRL, val);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_8727_PCS_OPT_CTRL,
+                        val);
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_GPIO_CTRL, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_8727_GPIO_CTRL,
+                       &val);
        val &= 0xffe0;
        val |= gpio_pins_bitmask;
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_GPIO_CTRL, val);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_8727_GPIO_CTRL,
+                        val);
 }
-
 static void elink_8727_hw_reset(__rte_unused struct elink_phy *phy,
-                               struct elink_params *params)
-{
+                               struct elink_params *params) {
        uint32_t swap_val, swap_override;
        uint8_t port;
        /* The PHY reset is controlled by GPIO 1. Fake the port number
@@ -8626,7 +10213,7 @@ static void elink_8727_hw_reset(__rte_unused struct elink_phy *phy,
        swap_override = REG_RD(sc, NIG_REG_STRAP_OVERRIDE);
        port = (swap_val && swap_override) ^ 1;
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
 }
 
 static void elink_8727_config_speed(struct elink_phy *phy,
@@ -8637,14 +10224,14 @@ static void elink_8727_config_speed(struct elink_phy *phy,
        /* Set option 1G speed */
        if ((phy->req_line_speed == ELINK_SPEED_1000) ||
            (phy->media_type == ELINK_ETH_PHY_SFP_1G_FIBER)) {
-               PMD_DRV_LOG(DEBUG, "Setting 1G force");
+               ELINK_DEBUG_P0(sc, "Setting 1G force");
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x40);
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, 0xD);
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, &tmp1);
-               PMD_DRV_LOG(DEBUG, "1.7 = 0x%x", tmp1);
+               ELINK_DEBUG_P1(sc, "1.7 = 0x%x", tmp1);
                /* Power down the XAUI until link is up in case of dual-media
                 * and 1G
                 */
@@ -8661,10 +10248,10 @@ static void elink_8727_config_speed(struct elink_phy *phy,
                   ((phy->speed_cap_mask &
                     PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) &&
                   ((phy->speed_cap_mask &
-                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
-                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
+                     PORT_HW_CFG_SPEED_CAPABILITY_D0_10G) !=
+                  PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) {
 
-               PMD_DRV_LOG(DEBUG, "Setting 1G clause37");
+               ELINK_DEBUG_P0(sc, "Setting 1G clause37");
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL, 0);
                elink_cl45_write(sc, phy,
@@ -8687,9 +10274,8 @@ static void elink_8727_config_speed(struct elink_phy *phy,
 }
 
 static uint8_t elink_8727_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     __rte_unused struct elink_vars
-                                            *vars)
+                                 struct elink_params *params,
+                                 __rte_unused struct elink_vars *vars)
 {
        uint32_t tx_en_mode;
        uint16_t tmp1, mod_abs, tmp2;
@@ -8698,7 +10284,7 @@ static uint8_t elink_8727_config_init(struct elink_phy *phy,
 
        elink_wait_reset_complete(sc, phy, params);
 
-       PMD_DRV_LOG(DEBUG, "Initializing BNX2X8727");
+       ELINK_DEBUG_P0(sc, "Initializing BNX2X8727");
 
        elink_8727_specific_func(phy, params, ELINK_PHY_INIT);
        /* Initially configure MOD_ABS to interrupt when module is
@@ -8724,15 +10310,18 @@ static uint8_t elink_8727_config_init(struct elink_phy *phy,
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD, MDIO_PMA_REG_M8051_MSGOUT_REG, &tmp1);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &tmp1);
 
        elink_8727_config_speed(phy, params);
 
+
        /* Set TX PreEmphasis if needed */
        if ((params->feature_config_flags &
             ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED)) {
-               PMD_DRV_LOG(DEBUG, "Setting TX_CTRL1 0x%x, TX_CTRL2 0x%x",
-                           phy->tx_preemphasis[0], phy->tx_preemphasis[1]);
+               ELINK_DEBUG_P2(sc, "Setting TX_CTRL1 0x%x, TX_CTRL2 0x%x",
+                          phy->tx_preemphasis[0],
+                          phy->tx_preemphasis[1]);
                elink_cl45_write(sc, phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_TX_CTRL1,
                                 phy->tx_preemphasis[0]);
@@ -8747,25 +10336,24 @@ static uint8_t elink_8727_config_init(struct elink_phy *phy,
         */
        tx_en_mode = REG_RD(sc, params->shmem_base +
                            offsetof(struct shmem_region,
-                                    dev_info.port_hw_config[params->port].
-                                    sfp_ctrl))
-       & PORT_HW_CFG_TX_LASER_MASK;
+                               dev_info.port_hw_config[params->port].sfp_ctrl))
+                       & PORT_HW_CFG_TX_LASER_MASK;
 
        if (tx_en_mode == PORT_HW_CFG_TX_LASER_GPIO0) {
 
-               PMD_DRV_LOG(DEBUG, "Enabling TXONOFF_PWRDN_DIS");
+               ELINK_DEBUG_P0(sc, "Enabling TXONOFF_PWRDN_DIS");
                elink_cl45_read(sc, phy,
-                               MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_OPT_CFG_REG,
-                               &tmp2);
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_OPT_CFG_REG, &tmp2);
                tmp2 |= 0x1000;
                tmp2 &= 0xFFEF;
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_OPT_CFG_REG,
-                                tmp2);
-               elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_REG_PHY_IDENTIFIER, &tmp2);
-               elink_cl45_write(sc, phy, MDIO_PMA_DEVAD,
-                                MDIO_PMA_REG_PHY_IDENTIFIER, (tmp2 & 0x7fff));
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_OPT_CFG_REG, tmp2);
+               elink_cl45_read(sc, phy,
+                               MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
+                               &tmp2);
+               elink_cl45_write(sc, phy,
+                                MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
+                                (tmp2 & 0x7fff));
        }
 
        return ELINK_STATUS_OK;
@@ -8777,15 +10365,17 @@ static void elink_8727_handle_mod_abs(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        uint16_t mod_abs, rx_alarm_status;
        uint32_t val = REG_RD(sc, params->shmem_base +
-                             offsetof(struct shmem_region,
-                                      dev_info.port_feature_config[params->
-                                                                   port].config));
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_PHY_IDENTIFIER,
-                       &mod_abs);
+                            offsetof(struct shmem_region, dev_info.
+                                     port_feature_config[params->port].
+                                     config));
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_PHY_IDENTIFIER, &mod_abs);
        if (mod_abs & (1 << 8)) {
 
                /* Module is absent */
-               PMD_DRV_LOG(DEBUG, "MOD_ABS indication show module is absent");
+               ELINK_DEBUG_P0(sc,
+                  "MOD_ABS indication show module is absent");
                phy->media_type = ELINK_ETH_PHY_NOT_PRESENT;
                /* 1. Set mod_abs to detect next module
                 *    presence event
@@ -8810,7 +10400,8 @@ static void elink_8727_handle_mod_abs(struct elink_phy *phy,
 
        } else {
                /* Module is present */
-               PMD_DRV_LOG(DEBUG, "MOD_ABS indication show module is present");
+               ELINK_DEBUG_P0(sc,
+                  "MOD_ABS indication show module is present");
                /* First disable transmitter, and if the module is ok, the
                 * module_detection will enable it
                 * 1. Set mod_abs to detect next module absent event ( bit 8)
@@ -8834,51 +10425,56 @@ static void elink_8727_handle_mod_abs(struct elink_phy *phy,
                                MDIO_PMA_DEVAD,
                                MDIO_PMA_LASI_RXSTAT, &rx_alarm_status);
 
+
                if ((val & PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_MASK) ==
                    PORT_FEAT_CFG_OPT_MDL_ENFRCMNT_DISABLE_TX_LASER)
                        elink_sfp_set_transmitter(params, phy, 0);
 
-               if (elink_wait_for_sfp_module_initialized(phy, params) == 0) {
+               if (elink_wait_for_sfp_module_initialized(phy, params) == 0)
                        elink_sfp_module_detection(phy, params);
-               } else {
-                       PMD_DRV_LOG(DEBUG, "SFP+ module is not initialized");
-               }
+               else
+                       ELINK_DEBUG_P0(sc, "SFP+ module is not initialized");
 
                /* Reconfigure link speed based on module type limitations */
                elink_8727_config_speed(phy, params);
        }
 
-       PMD_DRV_LOG(DEBUG, "8727 RX_ALARM_STATUS 0x%x", rx_alarm_status);
+       ELINK_DEBUG_P1(sc, "8727 RX_ALARM_STATUS 0x%x",
+                  rx_alarm_status);
        /* No need to check link status in case of module plugged in/out */
 }
 
 static uint8_t elink_8727_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                struct elink_params *params,
+                                struct elink_vars *vars)
+
 {
        struct bnx2x_softc *sc = params->sc;
-       uint8_t link_up = 0, oc_port = params->port;
+       uint8_t link_up = 0;
        uint16_t link_status = 0;
        uint16_t rx_alarm_status, lasi_ctrl, val1;
 
        /* If PHY is not initialized, do not check link status */
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, &lasi_ctrl);
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL,
+                       &lasi_ctrl);
        if (!lasi_ctrl)
                return 0;
 
        /* Check the LASI on Rx */
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT, &rx_alarm_status);
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_RXSTAT,
+                       &rx_alarm_status);
        vars->line_speed = 0;
-       PMD_DRV_LOG(DEBUG, "8727 RX_ALARM_STATUS  0x%x", rx_alarm_status);
+       ELINK_DEBUG_P1(sc, "8727 RX_ALARM_STATUS  0x%x", rx_alarm_status);
 
        elink_sfp_mask_fault(sc, phy, MDIO_PMA_LASI_TXSTAT,
                             MDIO_PMA_LASI_TXCTRL);
 
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
 
-       PMD_DRV_LOG(DEBUG, "8727 LASI status 0x%x", val1);
+       ELINK_DEBUG_P1(sc, "8727 LASI status 0x%x", val1);
 
        /* Clear MSG-OUT */
        elink_cl45_read(sc, phy,
@@ -8888,24 +10484,28 @@ static uint8_t elink_8727_read_status(struct elink_phy *phy,
         * for over current
         */
        if (!(phy->flags & ELINK_FLAGS_NOC) && !(rx_alarm_status & (1 << 5))) {
-               /* Check over-current using 8727 GPIO0 input */
+               /* Check over-current using 8727 GPIO0 input*/
                elink_cl45_read(sc, phy,
                                MDIO_PMA_DEVAD, MDIO_PMA_REG_8727_GPIO_CTRL,
                                &val1);
 
                if ((val1 & (1 << 8)) == 0) {
+                       uint8_t oc_port = params->port;
                        if (!CHIP_IS_E1x(sc))
                                oc_port = SC_PATH(sc) + (params->port << 1);
-                       PMD_DRV_LOG(DEBUG,
-                                   "8727 Power fault has been detected on port %d",
-                                   oc_port);
-                       elink_cb_event_log(sc, ELINK_LOG_ID_OVER_CURRENT, oc_port);     //"Error: Power fault on Port %d has "
-                       //  "been detected and the power to "
-                       //  "that SFP+ module has been removed "
-                       //  "to prevent failure of the card. "
-                       //  "Please remove the SFP+ module and "
-                       //  "restart the system to clear this "
-                       //  "error.",
+                       ELINK_DEBUG_P1(sc,
+                          "8727 Power fault has been detected on port %d",
+                          oc_port);
+                       elink_cb_event_log(sc, ELINK_LOG_ID_OVER_CURRENT,
+                                          oc_port);
+                                       /* "Error: Power fault on Port %d has "
+                                        *  "been detected and the power to "
+                                        *  "that SFP+ module has been removed "
+                                        *  "to prevent failure of the card. "
+                                        *  "Please remove the SFP+ module and "
+                                        *  "restart the system to clear this "
+                                        *  "error.",
+                                        */
                        /* Disable all RX_ALARMs except for mod_abs */
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
@@ -8921,14 +10521,13 @@ static uint8_t elink_8727_read_status(struct elink_phy *phy,
                                         MDIO_PMA_REG_PHY_IDENTIFIER, val1);
                        /* Clear RX alarm */
                        elink_cl45_read(sc, phy,
-                                       MDIO_PMA_DEVAD,
-                                       MDIO_PMA_LASI_RXSTAT, &rx_alarm_status);
+                               MDIO_PMA_DEVAD,
+                               MDIO_PMA_LASI_RXSTAT, &rx_alarm_status);
                        elink_8727_power_module(params->sc, phy, 0);
                        return 0;
                }
-       }
+       } /* Over current check */
 
-       /* Over current check */
        /* When module absent bit is set, check module */
        if (rx_alarm_status & (1 << 5)) {
                elink_8727_handle_mod_abs(phy, params);
@@ -8939,10 +10538,10 @@ static uint8_t elink_8727_read_status(struct elink_phy *phy,
        }
 
        if (!(phy->flags & ELINK_FLAGS_SFP_NOT_APPROVED)) {
-               PMD_DRV_LOG(DEBUG, "Enabling 8727 TX laser");
+               ELINK_DEBUG_P0(sc, "Enabling 8727 TX laser");
                elink_sfp_set_transmitter(params, phy, 1);
        } else {
-               PMD_DRV_LOG(DEBUG, "Tx is disabled");
+               ELINK_DEBUG_P0(sc, "Tx is disabled");
                return 0;
        }
 
@@ -8956,26 +10555,26 @@ static uint8_t elink_8727_read_status(struct elink_phy *phy,
        if ((link_status & (1 << 2)) && (!(link_status & (1 << 15)))) {
                link_up = 1;
                vars->line_speed = ELINK_SPEED_10000;
-               PMD_DRV_LOG(DEBUG, "port %x: External link up in 10G",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link up in 10G",
+                          params->port);
        } else if ((link_status & (1 << 0)) && (!(link_status & (1 << 13)))) {
                link_up = 1;
                vars->line_speed = ELINK_SPEED_1000;
-               PMD_DRV_LOG(DEBUG, "port %x: External link up in 1G",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link up in 1G",
+                          params->port);
        } else {
                link_up = 0;
-               PMD_DRV_LOG(DEBUG, "port %x: External link is down",
-                           params->port);
+               ELINK_DEBUG_P1(sc, "port %x: External link is down",
+                          params->port);
        }
 
        /* Capture 10G link fault. */
        if (vars->line_speed == ELINK_SPEED_10000) {
                elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_LASI_TXSTAT, &val1);
+                           MDIO_PMA_LASI_TXSTAT, &val1);
 
                elink_cl45_read(sc, phy, MDIO_PMA_DEVAD,
-                               MDIO_PMA_LASI_TXSTAT, &val1);
+                           MDIO_PMA_LASI_TXSTAT, &val1);
 
                if (val1 & (1 << 0)) {
                        vars->fault_detected = 1;
@@ -8985,7 +10584,7 @@ static uint8_t elink_8727_read_status(struct elink_phy *phy,
        if (link_up) {
                elink_ext_phy_resolve_fc(phy, params, vars);
                vars->duplex = DUPLEX_FULL;
-               PMD_DRV_LOG(DEBUG, "duplex = 0x%x", vars->duplex);
+               ELINK_DEBUG_P1(sc, "duplex = 0x%x", vars->duplex);
        }
 
        if ((ELINK_DUAL_MEDIA(params)) &&
@@ -9025,8 +10624,16 @@ static void elink_8727_link_reset(struct elink_phy *phy,
 /******************************************************************/
 /*             BNX2X8481/BNX2X84823/BNX2X84833 PHY SECTION               */
 /******************************************************************/
+static int elink_is_8483x_8485x(struct elink_phy *phy)
+{
+       return ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
+               (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) ||
+               (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858));
+}
+
 static void elink_save_848xx_spirom_version(struct elink_phy *phy,
-                                           struct bnx2x_softc *sc, uint8_t port)
+                                           struct bnx2x_softc *sc,
+                                           uint8_t port)
 {
        uint16_t val, fw_ver2, cnt, i;
        static struct elink_reg_set reg_set[] = {
@@ -9038,11 +10645,10 @@ static void elink_save_848xx_spirom_version(struct elink_phy *phy,
        };
        uint16_t fw_ver1;
 
-       if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-           (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) {
+       if (elink_is_8483x_8485x(phy)) {
                elink_cl45_read(sc, phy, MDIO_CTL_DEVAD, 0x400f, &fw_ver1);
                elink_save_spirom_version(sc, port, fw_ver1 & 0xfff,
-                                         phy->ver_addr);
+                               phy->ver_addr);
        } else {
                /* For 32-bit registers in 848xx, access via MDIO2ARM i/f. */
                /* (1) set reg 0xc200_0014(SPI_BRIDGE_CTRL_2) to 0x03000000 */
@@ -9057,12 +10663,14 @@ static void elink_save_848xx_spirom_version(struct elink_phy *phy,
                        DELAY(5);
                }
                if (cnt == 100) {
-                       PMD_DRV_LOG(DEBUG, "Unable to read 848xx "
-                                   "phy fw version(1)");
-                       elink_save_spirom_version(sc, port, 0, phy->ver_addr);
+                       ELINK_DEBUG_P0(sc, "Unable to read 848xx "
+                                       "phy fw version(1)");
+                       elink_save_spirom_version(sc, port, 0,
+                                                 phy->ver_addr);
                        return;
                }
 
+
                /* 2) read register 0xc200_0000 (SPI_FW_STATUS) */
                elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, 0xA819, 0x0000);
                elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, 0xA81A, 0xc200);
@@ -9074,9 +10682,10 @@ static void elink_save_848xx_spirom_version(struct elink_phy *phy,
                        DELAY(5);
                }
                if (cnt == 100) {
-                       PMD_DRV_LOG(DEBUG, "Unable to read 848xx phy fw "
-                                   "version(2)");
-                       elink_save_spirom_version(sc, port, 0, phy->ver_addr);
+                       ELINK_DEBUG_P0(sc, "Unable to read 848xx phy fw "
+                                       "version(2)");
+                       elink_save_spirom_version(sc, port, 0,
+                                                 phy->ver_addr);
                        return;
                }
 
@@ -9090,8 +10699,8 @@ static void elink_save_848xx_spirom_version(struct elink_phy *phy,
        }
 
 }
-
-static void elink_848xx_set_led(struct bnx2x_softc *sc, struct elink_phy *phy)
+static void elink_848xx_set_led(struct bnx2x_softc *sc,
+                               struct elink_phy *phy)
 {
        uint16_t val, offset, i;
        static struct elink_reg_set reg_set[] = {
@@ -9100,29 +10709,30 @@ static void elink_848xx_set_led(struct bnx2x_softc *sc, struct elink_phy *phy)
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_MASK, 0x0006},
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LED3_BLINK, 0x0000},
                {MDIO_PMA_DEVAD, MDIO_PMA_REG_84823_CTL_SLOW_CLK_CNT_HIGH,
-                MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ},
+                       MDIO_PMA_REG_84823_BLINK_RATE_VAL_15P9HZ},
                {MDIO_AN_DEVAD, 0xFFFB, 0xFFFD}
        };
        /* PHYC_CTL_LED_CTL */
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LINK_SIGNAL, &val);
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_8481_LINK_SIGNAL, &val);
        val &= 0xFE00;
        val |= 0x0092;
 
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_LINK_SIGNAL, val);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_8481_LINK_SIGNAL, val);
 
        for (i = 0; i < ARRAY_SIZE(reg_set); i++)
                elink_cl45_write(sc, phy, reg_set[i].devad, reg_set[i].reg,
                                 reg_set[i].val);
 
-       if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-           (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834))
+       if (elink_is_8483x_8485x(phy))
                offset = MDIO_PMA_REG_84833_CTL_LED_CTL_1;
        else
                offset = MDIO_PMA_REG_84823_CTL_LED_CTL_1;
 
-       /* stretch_en for LED3 */
+       /* stretch_en for LED3*/
        elink_cl45_read_or_write(sc, phy,
                                 MDIO_PMA_DEVAD, offset,
                                 MDIO_PMA_REG_84823_LED3_STRETCH_EN);
@@ -9135,8 +10745,7 @@ static void elink_848xx_specific_func(struct elink_phy *phy,
        struct bnx2x_softc *sc = params->sc;
        switch (action) {
        case ELINK_PHY_INIT:
-               if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) &&
-                   (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) {
+               if (!elink_is_8483x_8485x(phy)) {
                        /* Save spirom version */
                        elink_save_848xx_spirom_version(phy, sc, params->port);
                }
@@ -9153,14 +10762,15 @@ static void elink_848xx_specific_func(struct elink_phy *phy,
 }
 
 static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
-                                                 struct elink_params *params,
-                                                 struct elink_vars *vars)
+                                      struct elink_params *params,
+                                      struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t autoneg_val, an_1000_val, an_10_100_val;
 
        elink_848xx_specific_func(phy, params, ELINK_PHY_INIT);
-       elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x0000);
+       elink_cl45_write(sc, phy,
+                        MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x0000);
 
        /* set 1000 speed advertisement */
        elink_cl45_read(sc, phy,
@@ -9170,24 +10780,25 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
        elink_ext_phy_set_pause(params, phy, vars);
        elink_cl45_read(sc, phy,
                        MDIO_AN_DEVAD,
-                       MDIO_AN_REG_8481_LEGACY_AN_ADV, &an_10_100_val);
+                       MDIO_AN_REG_8481_LEGACY_AN_ADV,
+                       &an_10_100_val);
        elink_cl45_read(sc, phy,
                        MDIO_AN_DEVAD, MDIO_AN_REG_8481_LEGACY_MII_CTRL,
                        &autoneg_val);
        /* Disable forced speed */
-       autoneg_val &=
-           ~((1 << 6) | (1 << 8) | (1 << 9) | (1 << 12) | (1 << 13));
+       autoneg_val &= ~((1 << 6) | (1 << 8) | (1 << 9) | (1 << 12) |
+                        (1 << 13));
        an_10_100_val &= ~((1 << 5) | (1 << 6) | (1 << 7) | (1 << 8));
 
        if (((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
             (phy->speed_cap_mask &
-             PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
+            PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
            (phy->req_line_speed == ELINK_SPEED_1000)) {
                an_1000_val |= (1 << 8);
                autoneg_val |= (1 << 9 | 1 << 12);
                if (phy->req_duplex == DUPLEX_FULL)
                        an_1000_val |= (1 << 9);
-               PMD_DRV_LOG(DEBUG, "Advertising 1G");
+               ELINK_DEBUG_P0(sc, "Advertising 1G");
        } else
                an_1000_val &= ~((1 << 8) | (1 << 9));
 
@@ -9203,7 +10814,7 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
                         */
                        autoneg_val |= (1 << 9 | 1 << 12);
                        an_10_100_val |= (1 << 8);
-                       PMD_DRV_LOG(DEBUG, "Advertising 100M-FD");
+                       ELINK_DEBUG_P0(sc, "Advertising 100M-FD");
                }
 
                if (phy->speed_cap_mask &
@@ -9212,7 +10823,7 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
                         */
                        autoneg_val |= (1 << 9 | 1 << 12);
                        an_10_100_val |= (1 << 7);
-                       PMD_DRV_LOG(DEBUG, "Advertising 100M-HD");
+                       ELINK_DEBUG_P0(sc, "Advertising 100M-HD");
                }
 
                if ((phy->speed_cap_mask &
@@ -9220,7 +10831,7 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
                    (phy->supported & ELINK_SUPPORTED_10baseT_Full)) {
                        an_10_100_val |= (1 << 6);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 10M-FD");
+                       ELINK_DEBUG_P0(sc, "Advertising 10M-FD");
                }
 
                if ((phy->speed_cap_mask &
@@ -9228,14 +10839,15 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
                    (phy->supported & ELINK_SUPPORTED_10baseT_Half)) {
                        an_10_100_val |= (1 << 5);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 10M-HD");
+                       ELINK_DEBUG_P0(sc, "Advertising 10M-HD");
                }
        }
 
        /* Only 10/100 are allowed to work in FORCE mode */
        if ((phy->req_line_speed == ELINK_SPEED_100) &&
            (phy->supported &
-            (ELINK_SUPPORTED_100baseT_Half | ELINK_SUPPORTED_100baseT_Full))) {
+            (ELINK_SUPPORTED_100baseT_Half |
+             ELINK_SUPPORTED_100baseT_Full))) {
                autoneg_val |= (1 << 13);
                /* Enabled AUTO-MDIX when autoneg is disabled */
                elink_cl45_write(sc, phy,
@@ -9243,16 +10855,17 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
                                 (1 << 15 | 1 << 9 | 7 << 0));
                /* The PHY needs this set even for forced link. */
                an_10_100_val |= (1 << 8) | (1 << 7);
-               PMD_DRV_LOG(DEBUG, "Setting 100M force");
+               ELINK_DEBUG_P0(sc, "Setting 100M force");
        }
        if ((phy->req_line_speed == ELINK_SPEED_10) &&
            (phy->supported &
-            (ELINK_SUPPORTED_10baseT_Half | ELINK_SUPPORTED_10baseT_Full))) {
+            (ELINK_SUPPORTED_10baseT_Half |
+             ELINK_SUPPORTED_10baseT_Full))) {
                /* Enabled AUTO-MDIX when autoneg is disabled */
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD, MDIO_AN_REG_8481_AUX_CTRL,
                                 (1 << 15 | 1 << 9 | 7 << 0));
-               PMD_DRV_LOG(DEBUG, "Setting 10M force");
+               ELINK_DEBUG_P0(sc, "Setting 10M force");
        }
 
        elink_cl45_write(sc, phy,
@@ -9265,42 +10878,44 @@ static elink_status_t elink_848xx_cmn_config_init(struct elink_phy *phy,
        /* Always write this if this is not 84833/4.
         * For 84833/4, write it only when it's a forced speed.
         */
-       if (((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) &&
-            (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) ||
+       if (!elink_is_8483x_8485x(phy) ||
            ((autoneg_val & (1 << 12)) == 0))
                elink_cl45_write(sc, phy,
-                                MDIO_AN_DEVAD,
-                                MDIO_AN_REG_8481_LEGACY_MII_CTRL, autoneg_val);
+                        MDIO_AN_DEVAD,
+                        MDIO_AN_REG_8481_LEGACY_MII_CTRL, autoneg_val);
 
        if (((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
             (phy->speed_cap_mask &
              PORT_HW_CFG_SPEED_CAPABILITY_D0_10G)) ||
            (phy->req_line_speed == ELINK_SPEED_10000)) {
-               PMD_DRV_LOG(DEBUG, "Advertising 10G");
-               /* Restart autoneg for 10G */
+               ELINK_DEBUG_P0(sc, "Advertising 10G");
+               /* Restart autoneg for 10G*/
 
-               elink_cl45_read_or_write(sc, phy,
-                                        MDIO_AN_DEVAD,
-                                        MDIO_AN_REG_8481_10GBASE_T_AN_CTRL,
-                                        0x1000);
+               elink_cl45_read_or_write(
+                       sc, phy,
+                       MDIO_AN_DEVAD,
+                       MDIO_AN_REG_8481_10GBASE_T_AN_CTRL,
+                       0x1000);
                elink_cl45_write(sc, phy,
-                                MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x3200);
+                                MDIO_AN_DEVAD, MDIO_AN_REG_CTRL,
+                                0x3200);
        } else
                elink_cl45_write(sc, phy,
                                 MDIO_AN_DEVAD,
-                                MDIO_AN_REG_8481_10GBASE_T_AN_CTRL, 1);
+                                MDIO_AN_REG_8481_10GBASE_T_AN_CTRL,
+                                1);
 
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_8481_config_init(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
-       /* Restore normal power mode */
+       /* Restore normal power mode*/
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
 
        /* HW reset */
        elink_ext_phy_hw_reset(sc, params->port);
@@ -9310,101 +10925,219 @@ static uint8_t elink_8481_config_init(struct elink_phy *phy,
        return elink_848xx_cmn_config_init(phy, params, vars);
 }
 
-#define PHY84833_CMDHDLR_WAIT 300
-#define PHY84833_CMDHDLR_MAX_ARGS 5
-static elink_status_t elink_84833_cmd_hdlr(struct elink_phy *phy,
+#define PHY848xx_CMDHDLR_WAIT 300
+#define PHY848xx_CMDHDLR_MAX_ARGS 5
+
+static elink_status_t elink_84858_cmd_hdlr(struct elink_phy *phy,
                                           struct elink_params *params,
-                                          uint16_t fw_cmd, uint16_t cmd_args[],
-                                          int argc)
+                                          uint16_t fw_cmd,
+                                          uint16_t cmd_args[], int argc)
+{
+       int idx;
+       uint16_t val;
+       struct bnx2x_softc *sc = params->sc;
+
+       /* Step 1: Poll the STATUS register to see whether the previous command
+        * is in progress or the system is busy (CMD_IN_PROGRESS or
+        * SYSTEM_BUSY). If previous command is in progress or system is busy,
+        * check again until the previous command finishes execution and the
+        * system is available for taking command
+        */
+
+       for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
+               elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
+                               MDIO_848xx_CMD_HDLR_STATUS, &val);
+               if ((val != PHY84858_STATUS_CMD_IN_PROGRESS) &&
+                   (val != PHY84858_STATUS_CMD_SYSTEM_BUSY))
+                       break;
+               DELAY(1000 * 1);
+       }
+       if (idx >= PHY848xx_CMDHDLR_WAIT) {
+               ELINK_DEBUG_P0(sc, "FW cmd: FW not ready.");
+               return ELINK_STATUS_ERROR;
+       }
+
+       /* Step2: If any parameters are required for the function, write them
+        * to the required DATA registers
+        */
+
+       for (idx = 0; idx < argc; idx++) {
+               elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
+                                MDIO_848xx_CMD_HDLR_DATA1 + idx,
+                                cmd_args[idx]);
+       }
+
+       /* Step3: When the firmware is ready for commands, write the 'Command
+        * code' to the CMD register
+        */
+       elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
+                        MDIO_848xx_CMD_HDLR_COMMAND, fw_cmd);
+
+       /* Step4: Once the command has been written, poll the STATUS register
+        * to check whether the command has completed (CMD_COMPLETED_PASS/
+        * CMD_FOR_CMDS or CMD_COMPLETED_ERROR).
+        */
+
+       for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
+               elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
+                               MDIO_848xx_CMD_HDLR_STATUS, &val);
+               if ((val == PHY84858_STATUS_CMD_COMPLETE_PASS) ||
+                   (val == PHY84858_STATUS_CMD_COMPLETE_ERROR))
+                       break;
+               DELAY(1000 * 1);
+       }
+       if ((idx >= PHY848xx_CMDHDLR_WAIT) ||
+           (val == PHY84858_STATUS_CMD_COMPLETE_ERROR)) {
+               ELINK_DEBUG_P0(sc, "FW cmd failed.");
+               return ELINK_STATUS_ERROR;
+       }
+       /* Step5: Once the command has completed, read the specficied DATA
+        * registers for any saved results for the command, if applicable
+        */
+
+       /* Gather returning data */
+       for (idx = 0; idx < argc; idx++) {
+               elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
+                               MDIO_848xx_CMD_HDLR_DATA1 + idx,
+                               &cmd_args[idx]);
+       }
+
+       return ELINK_STATUS_OK;
+}
+
+static elink_status_t elink_84833_cmd_hdlr(struct elink_phy *phy,
+                               struct elink_params *params, uint16_t fw_cmd,
+                               uint16_t cmd_args[], int argc, int process)
 {
        int idx;
        uint16_t val;
        struct bnx2x_softc *sc = params->sc;
+       elink_status_t rc = ELINK_STATUS_OK;
+
+       if (process == PHY84833_MB_PROCESS2) {
        /* Write CMD_OPEN_OVERRIDE to STATUS reg */
        elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
-                        MDIO_84833_CMD_HDLR_STATUS,
-                        PHY84833_STATUS_CMD_OPEN_OVERRIDE);
-       for (idx = 0; idx < PHY84833_CMDHDLR_WAIT; idx++) {
+                                MDIO_848xx_CMD_HDLR_STATUS,
+                       PHY84833_STATUS_CMD_OPEN_OVERRIDE);
+       }
+
+       for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
                elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
-                               MDIO_84833_CMD_HDLR_STATUS, &val);
+                              MDIO_848xx_CMD_HDLR_STATUS, &val);
                if (val == PHY84833_STATUS_CMD_OPEN_FOR_CMDS)
                        break;
                DELAY(1000 * 1);
        }
-       if (idx >= PHY84833_CMDHDLR_WAIT) {
-               PMD_DRV_LOG(DEBUG, "FW cmd: FW not ready.");
+       if (idx >= PHY848xx_CMDHDLR_WAIT) {
+               ELINK_DEBUG_P0(sc, "FW cmd: FW not ready.");
+               /* if the status is CMD_COMPLETE_PASS or CMD_COMPLETE_ERROR
+                * clear the status to CMD_CLEAR_COMPLETE
+                */
+               if (val == PHY84833_STATUS_CMD_COMPLETE_PASS ||
+                   val == PHY84833_STATUS_CMD_COMPLETE_ERROR) {
+                       elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
+                                        MDIO_848xx_CMD_HDLR_STATUS,
+                                        PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+               }
                return ELINK_STATUS_ERROR;
        }
-
-       /* Prepare argument(s) and issue command */
+       if (process == PHY84833_MB_PROCESS1 ||
+           process == PHY84833_MB_PROCESS2) {
+               /* Prepare argument(s) */
        for (idx = 0; idx < argc; idx++) {
                elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
-                                MDIO_84833_CMD_HDLR_DATA1 + idx,
-                                cmd_args[idx]);
+                                        MDIO_848xx_CMD_HDLR_DATA1 + idx,
+                               cmd_args[idx]);
        }
+       }
+
+       /* Issue command */
        elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
-                        MDIO_84833_CMD_HDLR_COMMAND, fw_cmd);
-       for (idx = 0; idx < PHY84833_CMDHDLR_WAIT; idx++) {
+                       MDIO_848xx_CMD_HDLR_COMMAND, fw_cmd);
+       for (idx = 0; idx < PHY848xx_CMDHDLR_WAIT; idx++) {
                elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
-                               MDIO_84833_CMD_HDLR_STATUS, &val);
+                              MDIO_848xx_CMD_HDLR_STATUS, &val);
                if ((val == PHY84833_STATUS_CMD_COMPLETE_PASS) ||
-                   (val == PHY84833_STATUS_CMD_COMPLETE_ERROR))
+                       (val == PHY84833_STATUS_CMD_COMPLETE_ERROR))
                        break;
                DELAY(1000 * 1);
        }
-       if ((idx >= PHY84833_CMDHDLR_WAIT) ||
-           (val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
-               PMD_DRV_LOG(DEBUG, "FW cmd failed.");
-               return ELINK_STATUS_ERROR;
+       if ((idx >= PHY848xx_CMDHDLR_WAIT) ||
+               (val == PHY84833_STATUS_CMD_COMPLETE_ERROR)) {
+               ELINK_DEBUG_P0(sc, "FW cmd failed.");
+               rc = ELINK_STATUS_ERROR;
        }
+       if (process == PHY84833_MB_PROCESS3 && rc == ELINK_STATUS_OK) {
        /* Gather returning data */
        for (idx = 0; idx < argc; idx++) {
                elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
-                               MDIO_84833_CMD_HDLR_DATA1 + idx,
+                                       MDIO_848xx_CMD_HDLR_DATA1 + idx,
                                &cmd_args[idx]);
        }
+       }
+       if (val == PHY84833_STATUS_CMD_COMPLETE_ERROR ||
+           val == PHY84833_STATUS_CMD_COMPLETE_PASS) {
        elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
-                        MDIO_84833_CMD_HDLR_STATUS,
-                        PHY84833_STATUS_CMD_CLEAR_COMPLETE);
-       return ELINK_STATUS_OK;
+                                MDIO_848xx_CMD_HDLR_STATUS,
+                       PHY84833_STATUS_CMD_CLEAR_COMPLETE);
+       }
+       return rc;
+}
+
+static elink_status_t elink_848xx_cmd_hdlr(struct elink_phy *phy,
+                                          struct elink_params *params,
+                                          uint16_t fw_cmd,
+                                          uint16_t cmd_args[], int argc,
+                                          int process)
+{
+       struct bnx2x_softc *sc = params->sc;
+
+       if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858) ||
+           (REG_RD(sc, params->shmem2_base +
+                   offsetof(struct shmem2_region,
+                            link_attr_sync[params->port])) &
+                            LINK_ATTR_84858)) {
+               return elink_84858_cmd_hdlr(phy, params, fw_cmd, cmd_args,
+                                           argc);
+       } else {
+               return elink_84833_cmd_hdlr(phy, params, fw_cmd, cmd_args,
+                                           argc, process);
+       }
 }
 
-static elink_status_t elink_84833_pair_swap_cfg(struct elink_phy *phy,
-                                               struct elink_params *params,
-                                               __rte_unused struct elink_vars
-                                               *vars)
+static elink_status_t elink_848xx_pair_swap_cfg(struct elink_phy *phy,
+                                  struct elink_params *params,
+                                  __rte_unused struct elink_vars *vars)
 {
        uint32_t pair_swap;
-       uint16_t data[PHY84833_CMDHDLR_MAX_ARGS];
+       uint16_t data[PHY848xx_CMDHDLR_MAX_ARGS];
        elink_status_t status;
        struct bnx2x_softc *sc = params->sc;
 
        /* Check for configuration. */
        pair_swap = REG_RD(sc, params->shmem_base +
                           offsetof(struct shmem_region,
-                                   dev_info.port_hw_config[params->port].
-                                   xgbt_phy_cfg)) &
-           PORT_HW_CFG_RJ45_PAIR_SWAP_MASK;
+                       dev_info.port_hw_config[params->port].xgbt_phy_cfg)) &
+               PORT_HW_CFG_RJ45_PAIR_SWAP_MASK;
 
        if (pair_swap == 0)
                return ELINK_STATUS_OK;
 
        /* Only the second argument is used for this command */
-       data[1] = (uint16_t) pair_swap;
+       data[1] = (uint16_t)pair_swap;
 
-       status = elink_84833_cmd_hdlr(phy, params,
-                                     PHY84833_CMD_SET_PAIR_SWAP, data,
-                                     PHY84833_CMDHDLR_MAX_ARGS);
-       if (status == ELINK_STATUS_OK) {
-               PMD_DRV_LOG(DEBUG, "Pairswap OK, val=0x%x", data[1]);
-       }
+       status = elink_848xx_cmd_hdlr(phy, params,
+                                     PHY848xx_CMD_SET_PAIR_SWAP, data,
+                                     2, PHY84833_MB_PROCESS2);
+       if (status == ELINK_STATUS_OK)
+               ELINK_DEBUG_P1(sc, "Pairswap OK, val=0x%x", data[1]);
 
        return status;
 }
 
 static uint8_t elink_84833_get_reset_gpios(struct bnx2x_softc *sc,
-                                          uint32_t shmem_base_path[],
-                                          __rte_unused uint32_t chip_id)
+                                     uint32_t shmem_base_path[],
+                                     __rte_unused uint32_t chip_id)
 {
        uint32_t reset_pin[2];
        uint32_t idx;
@@ -9414,54 +11147,50 @@ static uint8_t elink_84833_get_reset_gpios(struct bnx2x_softc *sc,
                for (idx = 0; idx < 2; idx++) {
                        /* Map config param to register bit. */
                        reset_pin[idx] = REG_RD(sc, shmem_base_path[idx] +
-                                               offsetof(struct shmem_region,
-                                                        dev_info.
-                                                        port_hw_config[0].
-                                                        e3_cmn_pin_cfg));
-                       reset_pin[idx] =
-                           (reset_pin[idx] & PORT_HW_CFG_E3_PHY_RESET_MASK) >>
-                           PORT_HW_CFG_E3_PHY_RESET_SHIFT;
+                               offsetof(struct shmem_region,
+                               dev_info.port_hw_config[0].e3_cmn_pin_cfg));
+                       reset_pin[idx] = (reset_pin[idx] &
+                               PORT_HW_CFG_E3_PHY_RESET_MASK) >>
+                               PORT_HW_CFG_E3_PHY_RESET_SHIFT;
                        reset_pin[idx] -= PIN_CFG_GPIO0_P0;
                        reset_pin[idx] = (1 << reset_pin[idx]);
                }
-               reset_gpios = (uint8_t) (reset_pin[0] | reset_pin[1]);
+               reset_gpios = (uint8_t)(reset_pin[0] | reset_pin[1]);
        } else {
                /* E2, look from diff place of shmem. */
                for (idx = 0; idx < 2; idx++) {
                        reset_pin[idx] = REG_RD(sc, shmem_base_path[idx] +
-                                               offsetof(struct shmem_region,
-                                                        dev_info.
-                                                        port_hw_config[0].
-                                                        default_cfg));
+                               offsetof(struct shmem_region,
+                               dev_info.port_hw_config[0].default_cfg));
                        reset_pin[idx] &= PORT_HW_CFG_EXT_PHY_GPIO_RST_MASK;
                        reset_pin[idx] -= PORT_HW_CFG_EXT_PHY_GPIO_RST_GPIO0_P0;
                        reset_pin[idx] >>= PORT_HW_CFG_EXT_PHY_GPIO_RST_SHIFT;
                        reset_pin[idx] = (1 << reset_pin[idx]);
                }
-               reset_gpios = (uint8_t) (reset_pin[0] | reset_pin[1]);
+               reset_gpios = (uint8_t)(reset_pin[0] | reset_pin[1]);
        }
 
        return reset_gpios;
 }
 
 static void elink_84833_hw_reset_phy(struct elink_phy *phy,
-                                       struct elink_params *params)
+                               struct elink_params *params)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t reset_gpios;
        uint32_t other_shmem_base_addr = REG_RD(sc, params->shmem2_base +
-                                               offsetof(struct shmem2_region,
-                                                        other_shmem_base_addr));
+                               offsetof(struct shmem2_region,
+                               other_shmem_base_addr));
 
        uint32_t shmem_base_path[2];
 
        /* Work around for 84833 LED failure inside RESET status */
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
-                        MDIO_AN_REG_8481_LEGACY_MII_CTRL,
-                        MDIO_AN_REG_8481_MII_CTRL_FORCE_1G);
+               MDIO_AN_REG_8481_LEGACY_MII_CTRL,
+               MDIO_AN_REG_8481_MII_CTRL_FORCE_1G);
        elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
-                        MDIO_AN_REG_8481_1G_100T_EXT_CTRL,
-                        MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF);
+               MDIO_AN_REG_8481_1G_100T_EXT_CTRL,
+               MIDO_AN_REG_8481_EXT_CTRL_FORCE_LEDS_OFF);
 
        shmem_base_path[0] = params->shmem_base;
        shmem_base_path[1] = other_shmem_base_addr;
@@ -9472,23 +11201,25 @@ static void elink_84833_hw_reset_phy(struct elink_phy *phy,
        elink_cb_gpio_mult_write(sc, reset_gpios,
                                 MISC_REGISTERS_GPIO_OUTPUT_LOW);
        DELAY(10);
-       PMD_DRV_LOG(DEBUG, "84833 hw reset on pin values 0x%x", reset_gpios);
+       ELINK_DEBUG_P1(sc, "84833 hw reset on pin values 0x%x",
+               reset_gpios);
 }
 
 static elink_status_t elink_8483x_disable_eee(struct elink_phy *phy,
-                                             struct elink_params *params,
-                                             struct elink_vars *vars)
+                                  struct elink_params *params,
+                                  struct elink_vars *vars)
 {
        elink_status_t rc;
+       struct bnx2x_softc *sc = params->sc;
        uint16_t cmd_args = 0;
 
-       PMD_DRV_LOG(DEBUG, "Don't Advertise 10GBase-T EEE");
+       ELINK_DEBUG_P0(sc, "Don't Advertise 10GBase-T EEE");
 
        /* Prevent Phy from working in EEE and advertising it */
-       rc = elink_84833_cmd_hdlr(phy, params,
-                                 PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
+       rc = elink_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+                                 &cmd_args, 1, PHY84833_MB_PROCESS1);
        if (rc != ELINK_STATUS_OK) {
-               PMD_DRV_LOG(DEBUG, "EEE disable failed.");
+               ELINK_DEBUG_P0(sc, "EEE disable failed.");
                return rc;
        }
 
@@ -9496,16 +11227,17 @@ static elink_status_t elink_8483x_disable_eee(struct elink_phy *phy,
 }
 
 static elink_status_t elink_8483x_enable_eee(struct elink_phy *phy,
-                                            struct elink_params *params,
-                                            struct elink_vars *vars)
+                                  struct elink_params *params,
+                                  struct elink_vars *vars)
 {
        elink_status_t rc;
+       struct bnx2x_softc *sc = params->sc;
        uint16_t cmd_args = 1;
 
-       rc = elink_84833_cmd_hdlr(phy, params,
-                                 PHY84833_CMD_SET_EEE_MODE, &cmd_args, 1);
+       rc = elink_848xx_cmd_hdlr(phy, params, PHY848xx_CMD_SET_EEE_MODE,
+                                 &cmd_args, 1, PHY84833_MB_PROCESS1);
        if (rc != ELINK_STATUS_OK) {
-               PMD_DRV_LOG(DEBUG, "EEE enable failed.");
+               ELINK_DEBUG_P0(sc, "EEE enable failed.");
                return rc;
        }
 
@@ -9514,14 +11246,14 @@ static elink_status_t elink_8483x_enable_eee(struct elink_phy *phy,
 
 #define PHY84833_CONSTANT_LATENCY 1193
 static uint8_t elink_848x3_config_init(struct elink_phy *phy,
-                                      struct elink_params *params,
-                                      struct elink_vars *vars)
+                                  struct elink_params *params,
+                                  struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port, initialize = 1;
        uint16_t val;
        uint32_t actual_phy_selection;
-       uint16_t cmd_args[PHY84833_CMDHDLR_MAX_ARGS];
+       uint16_t cmd_args[PHY848xx_CMDHDLR_MAX_ARGS];
        elink_status_t rc = ELINK_STATUS_OK;
 
        DELAY(1000 * 1);
@@ -9533,19 +11265,20 @@ static uint8_t elink_848x3_config_init(struct elink_phy *phy,
 
        if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84823) {
                elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_3,
-                                   MISC_REGISTERS_GPIO_OUTPUT_HIGH, port);
+                              MISC_REGISTERS_GPIO_OUTPUT_HIGH,
+                              port);
        } else {
                /* MDIO reset */
                elink_cl45_write(sc, phy,
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x8000);
+                               MDIO_PMA_DEVAD,
+                               MDIO_PMA_REG_CTRL, 0x8000);
        }
 
        elink_wait_reset_complete(sc, phy, params);
 
        /* Wait for GPHY to come out of reset */
        DELAY(1000 * 50);
-       if ((phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) &&
-           (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) {
+       if (!elink_is_8483x_8485x(phy)) {
                /* BNX2X84823 requires that XGXS links up first @ 10G for normal
                 * behavior.
                 */
@@ -9556,7 +11289,19 @@ static uint8_t elink_848x3_config_init(struct elink_phy *phy,
                elink_program_serdes(&params->phy[ELINK_INT_PHY], params, vars);
                vars->line_speed = temp;
        }
+       /* Check if this is actually BNX2X84858 */
+       if (phy->type != PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858) {
+               uint16_t hw_rev;
+
+               elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
+                               MDIO_AN_REG_848xx_ID_MSB, &hw_rev);
+               if (hw_rev == BNX2X84858_PHY_ID) {
+                       params->link_attr_sync |= LINK_ATTR_84858;
+                       elink_update_link_attr(params, params->link_attr_sync);
+               }
+       }
 
+       /* Set dual-media configuration according to configuration */
        elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
                        MDIO_CTL_REG_84823_MEDIA, &val);
        val &= ~(MDIO_CTL_REG_84823_MEDIA_MAC_MASK |
@@ -9598,39 +11343,33 @@ static uint8_t elink_848x3_config_init(struct elink_phy *phy,
 
        elink_cl45_write(sc, phy, MDIO_CTL_DEVAD,
                         MDIO_CTL_REG_84823_MEDIA, val);
-       PMD_DRV_LOG(DEBUG, "Multi_phy config = 0x%x, Media control = 0x%x",
-                   params->multi_phy_config, val);
+       ELINK_DEBUG_P2(sc, "Multi_phy config = 0x%x, Media control = 0x%x",
+                  params->multi_phy_config, val);
 
-       if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-           (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) {
-               elink_84833_pair_swap_cfg(phy, params, vars);
+       if (elink_is_8483x_8485x(phy)) {
+               elink_848xx_pair_swap_cfg(phy, params, vars);
 
                /* Keep AutogrEEEn disabled. */
                cmd_args[0] = 0x0;
                cmd_args[1] = 0x0;
                cmd_args[2] = PHY84833_CONSTANT_LATENCY + 1;
                cmd_args[3] = PHY84833_CONSTANT_LATENCY;
-               rc = elink_84833_cmd_hdlr(phy, params,
-                                         PHY84833_CMD_SET_EEE_MODE, cmd_args,
-                                         PHY84833_CMDHDLR_MAX_ARGS);
-               if (rc != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "Cfg AutogrEEEn failed.");
-               }
+               rc = elink_848xx_cmd_hdlr(phy, params,
+                                         PHY848xx_CMD_SET_EEE_MODE, cmd_args,
+                                         4, PHY84833_MB_PROCESS1);
+               if (rc != ELINK_STATUS_OK)
+                       ELINK_DEBUG_P0(sc, "Cfg AutogrEEEn failed.");
        }
-       if (initialize) {
+       if (initialize)
                rc = elink_848xx_cmn_config_init(phy, params, vars);
-       } else {
+       else
                elink_save_848xx_spirom_version(phy, sc, params->port);
-       }
        /* 84833 PHY has a better feature and doesn't need to support this. */
        if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84823) {
                uint32_t cms_enable = REG_RD(sc, params->shmem_base +
-                                            offsetof(struct shmem_region,
-                                                     dev_info.
-                                                     port_hw_config[params->
-                                                                    port].
-                                                     default_cfg)) &
-                   PORT_HW_CFG_ENABLE_CMS_MASK;
+                       offsetof(struct shmem_region,
+                       dev_info.port_hw_config[params->port].default_cfg)) &
+                       PORT_HW_CFG_ENABLE_CMS_MASK;
 
                elink_cl45_read(sc, phy, MDIO_CTL_DEVAD,
                                MDIO_CTL_REG_84823_USER_CTRL_REG, &val);
@@ -9651,7 +11390,7 @@ static uint8_t elink_848x3_config_init(struct elink_phy *phy,
            elink_eee_has_cap(params)) {
                rc = elink_eee_initial_config(params, vars, SHMEM_EEE_10G_ADV);
                if (rc != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "Failed to configure EEE timers");
+                       ELINK_DEBUG_P0(sc, "Failed to configure EEE timers");
                        elink_8483x_disable_eee(phy, params, vars);
                        return rc;
                }
@@ -9664,39 +11403,40 @@ static uint8_t elink_848x3_config_init(struct elink_phy *phy,
                else
                        rc = elink_8483x_disable_eee(phy, params, vars);
                if (rc != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "Failed to set EEE advertisement");
+                       ELINK_DEBUG_P0(sc, "Failed to set EEE advertisement");
                        return rc;
                }
        } else {
                vars->eee_status &= ~SHMEM_EEE_SUPPORTED_MASK;
        }
 
-       if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-           (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) {
+       if (elink_is_8483x_8485x(phy)) {
                /* Bring PHY out of super isolate mode as the final step. */
                elink_cl45_read_and_write(sc, phy,
                                          MDIO_CTL_DEVAD,
                                          MDIO_84833_TOP_CFG_XGPHY_STRAP1,
-                                         (uint16_t) ~
-                                         MDIO_84833_SUPER_ISOLATE);
+                                         (uint16_t)~MDIO_84833_SUPER_ISOLATE);
        }
        return rc;
 }
 
 static uint8_t elink_848xx_read_status(struct elink_phy *phy,
-                                      struct elink_params *params,
-                                      struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val, val1, val2;
        uint8_t link_up = 0;
 
+
        /* Check 10G-BaseT link status */
        /* Check PMD signal ok */
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, 0xFFFA, &val1);
        elink_cl45_read(sc, phy,
-                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_PMD_SIGNAL, &val2);
-       PMD_DRV_LOG(DEBUG, "BNX2X848xx: PMD_SIGNAL 1.a811 = 0x%x", val2);
+                       MDIO_AN_DEVAD, 0xFFFA, &val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_8481_PMD_SIGNAL,
+                       &val2);
+       ELINK_DEBUG_P1(sc, "BNX2X848xx: PMD_SIGNAL 1.a811 = 0x%x", val2);
 
        /* Check link 10G */
        if (val2 & (1 << 11)) {
@@ -9704,8 +11444,8 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                vars->duplex = DUPLEX_FULL;
                link_up = 1;
                elink_ext_phy_10G_an_resolve(sc, phy, vars);
-       } else {                /* Check Legacy speed link */
-               uint16_t legacy_status, legacy_speed, mii_ctrl;
+       } else { /* Check Legacy speed link */
+               uint16_t legacy_status, legacy_speed;
 
                /* Enable expansion register 0x42 (Operation mode status) */
                elink_cl45_write(sc, phy,
@@ -9718,7 +11458,8 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                                MDIO_AN_REG_8481_EXPANSION_REG_RD_RW,
                                &legacy_status);
 
-               PMD_DRV_LOG(DEBUG, "Legacy speed status = 0x%x", legacy_status);
+               ELINK_DEBUG_P1(sc, "Legacy speed status = 0x%x",
+                  legacy_status);
                link_up = ((legacy_status & (1 << 11)) == (1 << 11));
                legacy_speed = (legacy_status & (3 << 9));
                if (legacy_speed == (0 << 9))
@@ -9727,13 +11468,15 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                        vars->line_speed = ELINK_SPEED_100;
                else if (legacy_speed == (2 << 9))
                        vars->line_speed = ELINK_SPEED_1000;
-               else {          /* Should not happen: Treat as link down */
+               else { /* Should not happen: Treat as link down */
                        vars->line_speed = 0;
                        link_up = 0;
                }
 
                if (params->feature_config_flags &
-                   ELINK_FEATURE_CONFIG_IEEE_PHY_TEST) {
+                       ELINK_FEATURE_CONFIG_IEEE_PHY_TEST) {
+                       uint16_t mii_ctrl;
+
                        elink_cl45_read(sc, phy,
                                        MDIO_AN_DEVAD,
                                        MDIO_AN_REG_8481_LEGACY_MII_CTRL,
@@ -9748,10 +11491,10 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                        else
                                vars->duplex = DUPLEX_HALF;
 
-                       PMD_DRV_LOG(DEBUG,
-                                   "Link is up in %dMbps, is_duplex_full= %d",
-                                   vars->line_speed,
-                                   (vars->duplex == DUPLEX_FULL));
+                       ELINK_DEBUG_P2(sc,
+                          "Link is up in %dMbps, is_duplex_full= %d",
+                          vars->line_speed,
+                          (vars->duplex == DUPLEX_FULL));
                        /* Check legacy speed AN resolution */
                        elink_cl45_read(sc, phy,
                                        MDIO_AN_DEVAD,
@@ -9759,19 +11502,19 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                                        &val);
                        if (val & (1 << 5))
                                vars->link_status |=
-                                   LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
+                                       LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
                        elink_cl45_read(sc, phy,
                                        MDIO_AN_DEVAD,
                                        MDIO_AN_REG_8481_LEGACY_AN_EXPANSION,
                                        &val);
                        if ((val & (1 << 0)) == 0)
                                vars->link_status |=
-                                   LINK_STATUS_PARALLEL_DETECTION_USED;
+                                       LINK_STATUS_PARALLEL_DETECTION_USED;
                }
        }
        if (link_up) {
-               PMD_DRV_LOG(DEBUG, "BNX2X848x3: link speed is %d",
-                           vars->line_speed);
+               ELINK_DEBUG_P1(sc, "BNX2X848x3: link speed is %d",
+                          vars->line_speed);
                elink_ext_phy_resolve_fc(phy, params, vars);
 
                /* Read LP advertised speeds */
@@ -9779,48 +11522,47 @@ static uint8_t elink_848xx_read_status(struct elink_phy *phy,
                                MDIO_AN_REG_CL37_FC_LP, &val);
                if (val & (1 << 5))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10THD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10THD_CAPABLE;
                if (val & (1 << 6))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10TFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10TFD_CAPABLE;
                if (val & (1 << 7))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_100TXHD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_100TXHD_CAPABLE;
                if (val & (1 << 8))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_100TXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_100TXFD_CAPABLE;
                if (val & (1 << 9))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_100T4_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_100T4_CAPABLE;
 
                elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
                                MDIO_AN_REG_1000T_STATUS, &val);
 
                if (val & (1 << 10))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE;
                if (val & (1 << 11))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
 
                elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
                                MDIO_AN_REG_MASTER_STATUS, &val);
 
                if (val & (1 << 11))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
 
                /* Determine if EEE was negotiated */
-               if ((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-                   (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834))
+               if (elink_is_8483x_8485x(phy))
                        elink_eee_an_resolve(phy, params, vars);
        }
 
        return link_up;
 }
 
-static uint8_t elink_848xx_format_ver(uint32_t raw_ver, uint8_t * str,
-                                            uint16_t * len)
+static elink_status_t elink_848xx_format_ver(uint32_t raw_ver, uint8_t *str,
+                                            uint16_t *len)
 {
        elink_status_t status = ELINK_STATUS_OK;
        uint32_t spirom_ver;
@@ -9833,17 +11575,18 @@ static void elink_8481_hw_reset(__rte_unused struct elink_phy *phy,
                                struct elink_params *params)
 {
        elink_cb_gpio_write(params->sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, 0);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, 0);
        elink_cb_gpio_write(params->sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, 1);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, 1);
 }
 
 static void elink_8481_link_reset(struct elink_phy *phy,
-                                 struct elink_params *params)
+                                       struct elink_params *params)
 {
        elink_cl45_write(params->sc, phy,
                         MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, 0x0000);
-       elink_cl45_write(params->sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1);
+       elink_cl45_write(params->sc, phy,
+                        MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1);
 }
 
 static void elink_848x3_link_reset(struct elink_phy *phy,
@@ -9860,7 +11603,8 @@ static void elink_848x3_link_reset(struct elink_phy *phy,
 
        if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84823) {
                elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_3,
-                                   MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
+                              MISC_REGISTERS_GPIO_OUTPUT_LOW,
+                              port);
        } else {
                elink_cl45_read(sc, phy,
                                MDIO_CTL_DEVAD,
@@ -9877,47 +11621,52 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val;
-       __rte_unused uint8_t port;
+       uint8_t port;
 
        if (!(CHIP_IS_E1x(sc)))
                port = SC_PATH(sc);
        else
                port = params->port;
-
        switch (mode) {
        case ELINK_LED_MODE_OFF:
 
-               PMD_DRV_LOG(DEBUG, "Port 0x%x: LED MODE OFF", port);
+               ELINK_DEBUG_P1(sc, "Port 0x%x: LED MODE OFF", port);
 
                if ((params->hw_led_mode << SHARED_HW_CFG_LED_MODE_SHIFT) ==
                    SHARED_HW_CFG_LED_EXTPHY1) {
 
                        /* Set LED masks */
                        elink_cl45_write(sc, phy,
-                                        MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x0);
+                                       MDIO_PMA_DEVAD,
+                                       MDIO_PMA_REG_8481_LED1_MASK,
+                                       0x0);
 
                        elink_cl45_write(sc, phy,
-                                        MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED2_MASK, 0x0);
+                                       MDIO_PMA_DEVAD,
+                                       MDIO_PMA_REG_8481_LED2_MASK,
+                                       0x0);
 
                        elink_cl45_write(sc, phy,
-                                        MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED3_MASK, 0x0);
+                                       MDIO_PMA_DEVAD,
+                                       MDIO_PMA_REG_8481_LED3_MASK,
+                                       0x0);
 
                        elink_cl45_write(sc, phy,
-                                        MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED5_MASK, 0x0);
+                                       MDIO_PMA_DEVAD,
+                                       MDIO_PMA_REG_8481_LED5_MASK,
+                                       0x0);
 
                } else {
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x0);
                }
                break;
        case ELINK_LED_MODE_FRONT_PANEL_OFF:
 
-               PMD_DRV_LOG(DEBUG, "Port 0x%x: LED MODE FRONT PANEL OFF", port);
+               ELINK_DEBUG_P1(sc, "Port 0x%x: LED MODE FRONT PANEL OFF",
+                  port);
 
                if ((params->hw_led_mode << SHARED_HW_CFG_LED_MODE_SHIFT) ==
                    SHARED_HW_CFG_LED_EXTPHY1) {
@@ -9925,25 +11674,31 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                        /* Set LED masks */
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x0);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED2_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED2_MASK,
+                                        0x0);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED3_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED3_MASK,
+                                        0x0);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED5_MASK, 0x20);
+                                        MDIO_PMA_REG_8481_LED5_MASK,
+                                        0x20);
 
                } else {
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x0);
-                       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x0);
+                       if (phy->type ==
+                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
                                /* Disable MI_INT interrupt before setting LED4
                                 * source to constant off.
                                 */
@@ -9951,12 +11706,13 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                                           params->port * 4) &
                                    ELINK_NIG_MASK_MI_INT) {
                                        params->link_flags |=
-                                           ELINK_LINK_FLAGS_INT_DISABLED;
+                                       ELINK_LINK_FLAGS_INT_DISABLED;
 
-                                       elink_bits_dis(sc,
-                                                      NIG_REG_MASK_INTERRUPT_PORT0
-                                                      + params->port * 4,
-                                                      ELINK_NIG_MASK_MI_INT);
+                                       elink_bits_dis(
+                                               sc,
+                                               NIG_REG_MASK_INTERRUPT_PORT0 +
+                                               params->port * 4,
+                                               ELINK_NIG_MASK_MI_INT);
                                }
                                elink_cl45_write(sc, phy,
                                                 MDIO_PMA_DEVAD,
@@ -9967,42 +11723,50 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                break;
        case ELINK_LED_MODE_ON:
 
-               PMD_DRV_LOG(DEBUG, "Port 0x%x: LED MODE ON", port);
+               ELINK_DEBUG_P1(sc, "Port 0x%x: LED MODE ON", port);
 
                if ((params->hw_led_mode << SHARED_HW_CFG_LED_MODE_SHIFT) ==
                    SHARED_HW_CFG_LED_EXTPHY1) {
                        /* Set control reg */
                        elink_cl45_read(sc, phy,
                                        MDIO_PMA_DEVAD,
-                                       MDIO_PMA_REG_8481_LINK_SIGNAL, &val);
+                                       MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                       &val);
                        val &= 0x8000;
                        val |= 0x2492;
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LINK_SIGNAL, val);
+                                        MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                        val);
 
                        /* Set LED masks */
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x0);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED2_MASK, 0x20);
+                                        MDIO_PMA_REG_8481_LED2_MASK,
+                                        0x20);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED3_MASK, 0x20);
+                                        MDIO_PMA_REG_8481_LED3_MASK,
+                                        0x20);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED5_MASK, 0x0);
+                                        MDIO_PMA_REG_8481_LED5_MASK,
+                                        0x0);
                } else {
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x20);
-                       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x20);
+                       if (phy->type ==
+                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
                                /* Disable MI_INT interrupt before setting LED4
                                 * source to constant on.
                                 */
@@ -10010,12 +11774,13 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                                           params->port * 4) &
                                    ELINK_NIG_MASK_MI_INT) {
                                        params->link_flags |=
-                                           ELINK_LINK_FLAGS_INT_DISABLED;
+                                       ELINK_LINK_FLAGS_INT_DISABLED;
 
-                                       elink_bits_dis(sc,
-                                                      NIG_REG_MASK_INTERRUPT_PORT0
-                                                      + params->port * 4,
-                                                      ELINK_NIG_MASK_MI_INT);
+                                       elink_bits_dis(
+                                               sc,
+                                               NIG_REG_MASK_INTERRUPT_PORT0 +
+                                               params->port * 4,
+                                               ELINK_NIG_MASK_MI_INT);
                                }
                                elink_cl45_write(sc, phy,
                                                 MDIO_PMA_DEVAD,
@@ -10027,7 +11792,7 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
 
        case ELINK_LED_MODE_OPER:
 
-               PMD_DRV_LOG(DEBUG, "Port 0x%x: LED MODE OPER", port);
+               ELINK_DEBUG_P1(sc, "Port 0x%x: LED MODE OPER", port);
 
                if ((params->hw_led_mode << SHARED_HW_CFG_LED_MODE_SHIFT) ==
                    SHARED_HW_CFG_LED_EXTPHY1) {
@@ -10035,14 +11800,13 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                        /* Set control reg */
                        elink_cl45_read(sc, phy,
                                        MDIO_PMA_DEVAD,
-                                       MDIO_PMA_REG_8481_LINK_SIGNAL, &val);
+                                       MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                       &val);
 
                        if (!((val &
                               MDIO_PMA_REG_8481_LINK_SIGNAL_LED4_ENABLE_MASK)
-                             >>
-                             MDIO_PMA_REG_8481_LINK_SIGNAL_LED4_ENABLE_SHIFT))
-                       {
-                               PMD_DRV_LOG(DEBUG, "Setting LINK_SIGNAL");
+                         >> MDIO_PMA_REG_8481_LINK_SIGNAL_LED4_ENABLE_SHIFT)) {
+                               ELINK_DEBUG_P0(sc, "Setting LINK_SIGNAL");
                                elink_cl45_write(sc, phy,
                                                 MDIO_PMA_DEVAD,
                                                 MDIO_PMA_REG_8481_LINK_SIGNAL,
@@ -10052,19 +11816,23 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                        /* Set LED masks */
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, 0x10);
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        0x10);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED2_MASK, 0x80);
+                                        MDIO_PMA_REG_8481_LED2_MASK,
+                                        0x80);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED3_MASK, 0x98);
+                                        MDIO_PMA_REG_8481_LED3_MASK,
+                                        0x98);
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED5_MASK, 0x40);
+                                        MDIO_PMA_REG_8481_LED5_MASK,
+                                        0x40);
 
                } else {
                        /* EXTPHY2 LED mode indicate that the 100M/1G/10G LED
@@ -10077,18 +11845,22 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
 
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LED1_MASK, val);
+                                        MDIO_PMA_REG_8481_LED1_MASK,
+                                        val);
 
                        /* Tell LED3 to blink on source */
                        elink_cl45_read(sc, phy,
                                        MDIO_PMA_DEVAD,
-                                       MDIO_PMA_REG_8481_LINK_SIGNAL, &val);
+                                       MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                       &val);
                        val &= ~(7 << 6);
-                       val |= (1 << 6);        /* A83B[8:6]= 1 */
+                       val |= (1 << 6); /* A83B[8:6]= 1 */
                        elink_cl45_write(sc, phy,
                                         MDIO_PMA_DEVAD,
-                                        MDIO_PMA_REG_8481_LINK_SIGNAL, val);
-                       if (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
+                                        MDIO_PMA_REG_8481_LINK_SIGNAL,
+                                        val);
+                       if (phy->type ==
+                           PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834) {
                                /* Restore LED4 source to external link,
                                 * and re-enable interrupts.
                                 */
@@ -10100,14 +11872,14 @@ static void elink_848xx_set_link_led(struct elink_phy *phy,
                                    ELINK_LINK_FLAGS_INT_DISABLED) {
                                        elink_link_int_enable(params);
                                        params->link_flags &=
-                                           ~ELINK_LINK_FLAGS_INT_DISABLED;
+                                               ~ELINK_LINK_FLAGS_INT_DISABLED;
                                }
                        }
                }
                break;
        }
 
-       /* This is a workaround for E3+84833 until autoneg
+       /* This is a workaround for E3 + 84833 until autoneg
         * restart is fixed in f/w
         */
        if (CHIP_IS_E3(sc)) {
@@ -10132,7 +11904,9 @@ static void elink_54618se_specific_func(struct elink_phy *phy,
                elink_cl22_write(sc, phy,
                                 MDIO_REG_GPHY_SHADOW,
                                 MDIO_REG_GPHY_SHADOW_LED_SEL2);
-               elink_cl22_read(sc, phy, MDIO_REG_GPHY_SHADOW, &temp);
+               elink_cl22_read(sc, phy,
+                               MDIO_REG_GPHY_SHADOW,
+                               &temp);
                temp &= ~(0xf << 4);
                temp |= (0x6 << 4);
                elink_cl22_write(sc, phy,
@@ -10147,15 +11921,15 @@ static void elink_54618se_specific_func(struct elink_phy *phy,
 }
 
 static uint8_t elink_54618se_config_init(struct elink_phy *phy,
-                                        struct elink_params *params,
-                                        struct elink_vars *vars)
+                                              struct elink_params *params,
+                                              struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t port;
        uint16_t autoneg_val, an_1000_val, an_10_100_val, fc_val, temp;
        uint32_t cfg_pin;
 
-       PMD_DRV_LOG(DEBUG, "54618SE cfg init");
+       ELINK_DEBUG_P0(sc, "54618SE cfg init");
        DELAY(1000 * 1);
 
        /* This works with E3 only, no need to check the chip
@@ -10164,11 +11938,10 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
        port = params->port;
 
        cfg_pin = (REG_RD(sc, params->shmem_base +
-                         offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[port].
-                                  e3_cmn_pin_cfg)) &
-                  PORT_HW_CFG_E3_PHY_RESET_MASK) >>
-           PORT_HW_CFG_E3_PHY_RESET_SHIFT;
+                       offsetof(struct shmem_region,
+                       dev_info.port_hw_config[port].e3_cmn_pin_cfg)) &
+                       PORT_HW_CFG_E3_PHY_RESET_MASK) >>
+                       PORT_HW_CFG_E3_PHY_RESET_SHIFT;
 
        /* Drive pin high to bring the GPHY out of reset. */
        elink_set_cfg_pin(sc, cfg_pin, 1);
@@ -10177,63 +11950,76 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
        DELAY(1000 * 50);
 
        /* reset phy */
-       elink_cl22_write(sc, phy, MDIO_PMA_REG_CTRL, 0x8000);
+       elink_cl22_write(sc, phy,
+                        MDIO_PMA_REG_CTRL, 0x8000);
        elink_wait_reset_complete(sc, phy, params);
 
        /* Wait for GPHY to reset */
        DELAY(1000 * 50);
 
+
        elink_54618se_specific_func(phy, params, ELINK_PHY_INIT);
        /* Flip the signal detect polarity (set 0x1c.0x1e[8]). */
        elink_cl22_write(sc, phy,
-                        MDIO_REG_GPHY_SHADOW,
-                        MDIO_REG_GPHY_SHADOW_AUTO_DET_MED);
-       elink_cl22_read(sc, phy, MDIO_REG_GPHY_SHADOW, &temp);
+                       MDIO_REG_GPHY_SHADOW,
+                       MDIO_REG_GPHY_SHADOW_AUTO_DET_MED);
+       elink_cl22_read(sc, phy,
+                       MDIO_REG_GPHY_SHADOW,
+                       &temp);
        temp |= MDIO_REG_GPHY_SHADOW_INVERT_FIB_SD;
        elink_cl22_write(sc, phy,
-                        MDIO_REG_GPHY_SHADOW,
-                        MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
+                       MDIO_REG_GPHY_SHADOW,
+                       MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
 
        /* Set up fc */
        /* Please refer to Table 28B-3 of 802.3ab-1999 spec. */
        elink_calc_ieee_aneg_adv(phy, params, &vars->ieee_fc);
        fc_val = 0;
        if ((vars->ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC) ==
-           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC)
+                       MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_ASYMMETRIC)
                fc_val |= MDIO_AN_REG_ADV_PAUSE_ASYMMETRIC;
 
        if ((vars->ieee_fc & MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH) ==
-           MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH)
+                       MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_BOTH)
                fc_val |= MDIO_AN_REG_ADV_PAUSE_PAUSE;
 
        /* Read all advertisement */
-       elink_cl22_read(sc, phy, 0x09, &an_1000_val);
+       elink_cl22_read(sc, phy,
+                       0x09,
+                       &an_1000_val);
 
-       elink_cl22_read(sc, phy, 0x04, &an_10_100_val);
+       elink_cl22_read(sc, phy,
+                       0x04,
+                       &an_10_100_val);
 
-       elink_cl22_read(sc, phy, MDIO_PMA_REG_CTRL, &autoneg_val);
+       elink_cl22_read(sc, phy,
+                       MDIO_PMA_REG_CTRL,
+                       &autoneg_val);
 
        /* Disable forced speed */
-       autoneg_val &=
-           ~((1 << 6) | (1 << 8) | (1 << 9) | (1 << 12) | (1 << 13));
-       an_10_100_val &=
-           ~((1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) | (1 << 10) |
-             (1 << 11));
+       autoneg_val &= ~((1 << 6) | (1 << 8) | (1 << 9) | (1 << 12) |
+                        (1 << 13));
+       an_10_100_val &= ~((1 << 5) | (1 << 6) | (1 << 7) | (1 << 8) |
+                          (1 << 10) | (1 << 11));
 
        if (((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
-            (phy->speed_cap_mask &
-             PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
-           (phy->req_line_speed == ELINK_SPEED_1000)) {
+                       (phy->speed_cap_mask &
+                       PORT_HW_CFG_SPEED_CAPABILITY_D0_1G)) ||
+                       (phy->req_line_speed == ELINK_SPEED_1000)) {
                an_1000_val |= (1 << 8);
                autoneg_val |= (1 << 9 | 1 << 12);
                if (phy->req_duplex == DUPLEX_FULL)
                        an_1000_val |= (1 << 9);
-               PMD_DRV_LOG(DEBUG, "Advertising 1G");
+               ELINK_DEBUG_P0(sc, "Advertising 1G");
        } else
                an_1000_val &= ~((1 << 8) | (1 << 9));
 
-       elink_cl22_write(sc, phy, 0x09, an_1000_val);
-       elink_cl22_read(sc, phy, 0x09, &an_1000_val);
+       elink_cl22_write(sc, phy,
+                       0x09,
+                       an_1000_val);
+       elink_cl22_read(sc, phy,
+                       0x09,
+                       &an_1000_val);
 
        /* Advertise 10/100 link speed */
        if (phy->req_line_speed == ELINK_SPEED_AUTO_NEG) {
@@ -10241,25 +12027,25 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
                    PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF) {
                        an_10_100_val |= (1 << 5);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 10M-HD");
+                       ELINK_DEBUG_P0(sc, "Advertising 10M-HD");
                }
                if (phy->speed_cap_mask &
-                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_HALF) {
+                   PORT_HW_CFG_SPEED_CAPABILITY_D0_10M_FULL) {
                        an_10_100_val |= (1 << 6);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 10M-FD");
+                       ELINK_DEBUG_P0(sc, "Advertising 10M-FD");
                }
                if (phy->speed_cap_mask &
                    PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_HALF) {
                        an_10_100_val |= (1 << 7);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 100M-HD");
+                       ELINK_DEBUG_P0(sc, "Advertising 100M-HD");
                }
                if (phy->speed_cap_mask &
                    PORT_HW_CFG_SPEED_CAPABILITY_D0_100M_FULL) {
                        an_10_100_val |= (1 << 8);
                        autoneg_val |= (1 << 9 | 1 << 12);
-                       PMD_DRV_LOG(DEBUG, "Advertising 100M-FD");
+                       ELINK_DEBUG_P0(sc, "Advertising 100M-FD");
                }
        }
 
@@ -10267,13 +12053,17 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
        if (phy->req_line_speed == ELINK_SPEED_100) {
                autoneg_val |= (1 << 13);
                /* Enabled AUTO-MDIX when autoneg is disabled */
-               elink_cl22_write(sc, phy, 0x18, (1 << 15 | 1 << 9 | 7 << 0));
-               PMD_DRV_LOG(DEBUG, "Setting 100M force");
+               elink_cl22_write(sc, phy,
+                               0x18,
+                               (1 << 15 | 1 << 9 | 7 << 0));
+               ELINK_DEBUG_P0(sc, "Setting 100M force");
        }
        if (phy->req_line_speed == ELINK_SPEED_10) {
                /* Enabled AUTO-MDIX when autoneg is disabled */
-               elink_cl22_write(sc, phy, 0x18, (1 << 15 | 1 << 9 | 7 << 0));
-               PMD_DRV_LOG(DEBUG, "Setting 10M force");
+               elink_cl22_write(sc, phy,
+                               0x18,
+                               (1 << 15 | 1 << 9 | 7 << 0));
+               ELINK_DEBUG_P0(sc, "Setting 10M force");
        }
 
        if ((phy->flags & ELINK_FLAGS_EEE) && elink_eee_has_cap(params)) {
@@ -10288,7 +12078,7 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
 
                rc = elink_eee_initial_config(params, vars, SHMEM_EEE_1G_ADV);
                if (rc != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "Failed to configure EEE timers");
+                       ELINK_DEBUG_P0(sc, "Failed to configure EEE timers");
                        elink_eee_disable(phy, params, vars);
                } else if ((params->eee_mode & ELINK_EEE_MODE_ADV_LPI) &&
                           (phy->req_duplex == DUPLEX_FULL) &&
@@ -10302,38 +12092,42 @@ static uint8_t elink_54618se_config_init(struct elink_phy *phy,
                        elink_eee_advertise(phy, params, vars,
                                            SHMEM_EEE_1G_ADV);
                } else {
-                       PMD_DRV_LOG(DEBUG, "Don't Advertise 1GBase-T EEE");
+                       ELINK_DEBUG_P0(sc, "Don't Advertise 1GBase-T EEE");
                        elink_eee_disable(phy, params, vars);
                }
        } else {
-               vars->eee_status &= ~SHMEM_EEE_1G_ADV <<
-                   SHMEM_EEE_SUPPORTED_SHIFT;
+               vars->eee_status &= ((uint32_t)(~SHMEM_EEE_1G_ADV) <<
+                                   SHMEM_EEE_SUPPORTED_SHIFT);
 
                if (phy->flags & ELINK_FLAGS_EEE) {
                        /* Handle legacy auto-grEEEn */
                        if (params->feature_config_flags &
                            ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED) {
                                temp = 6;
-                               PMD_DRV_LOG(DEBUG, "Enabling Auto-GrEEEn");
+                               ELINK_DEBUG_P0(sc, "Enabling Auto-GrEEEn");
                        } else {
                                temp = 0;
-                               PMD_DRV_LOG(DEBUG, "Don't Adv. EEE");
+                               ELINK_DEBUG_P0(sc, "Don't Adv. EEE");
                        }
                        elink_cl45_write(sc, phy, MDIO_AN_DEVAD,
                                         MDIO_AN_REG_EEE_ADV, temp);
                }
        }
 
-       elink_cl22_write(sc, phy, 0x04, an_10_100_val | fc_val);
+       elink_cl22_write(sc, phy,
+                       0x04,
+                       an_10_100_val | fc_val);
 
        if (phy->req_duplex == DUPLEX_FULL)
                autoneg_val |= (1 << 8);
 
-       elink_cl22_write(sc, phy, MDIO_PMA_REG_CTRL, autoneg_val);
+       elink_cl22_write(sc, phy,
+                       MDIO_PMA_REG_CTRL, autoneg_val);
 
        return ELINK_STATUS_OK;
 }
 
+
 static void elink_5461x_set_link_led(struct elink_phy *phy,
                                     struct elink_params *params, uint8_t mode)
 {
@@ -10341,11 +12135,14 @@ static void elink_5461x_set_link_led(struct elink_phy *phy,
        uint16_t temp;
 
        elink_cl22_write(sc, phy,
-                        MDIO_REG_GPHY_SHADOW, MDIO_REG_GPHY_SHADOW_LED_SEL1);
-       elink_cl22_read(sc, phy, MDIO_REG_GPHY_SHADOW, &temp);
+               MDIO_REG_GPHY_SHADOW,
+               MDIO_REG_GPHY_SHADOW_LED_SEL1);
+       elink_cl22_read(sc, phy,
+               MDIO_REG_GPHY_SHADOW,
+               &temp);
        temp &= 0xff00;
 
-       PMD_DRV_LOG(DEBUG, "54618x set link led (mode=%x)", mode);
+       ELINK_DEBUG_P1(sc, "54618x set link led (mode=%x)", mode);
        switch (mode) {
        case ELINK_LED_MODE_FRONT_PANEL_OFF:
        case ELINK_LED_MODE_OFF:
@@ -10361,11 +12158,12 @@ static void elink_5461x_set_link_led(struct elink_phy *phy,
                break;
        }
        elink_cl22_write(sc, phy,
-                        MDIO_REG_GPHY_SHADOW,
-                        MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
+               MDIO_REG_GPHY_SHADOW,
+               MDIO_REG_GPHY_SHADOW_WR_ENA | temp);
        return;
 }
 
+
 static void elink_54618se_link_reset(struct elink_phy *phy,
                                     struct elink_params *params)
 {
@@ -10382,19 +12180,18 @@ static void elink_54618se_link_reset(struct elink_phy *phy,
         */
        port = params->port;
        cfg_pin = (REG_RD(sc, params->shmem_base +
-                         offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[port].
-                                  e3_cmn_pin_cfg)) &
-                  PORT_HW_CFG_E3_PHY_RESET_MASK) >>
-           PORT_HW_CFG_E3_PHY_RESET_SHIFT;
+                       offsetof(struct shmem_region,
+                       dev_info.port_hw_config[port].e3_cmn_pin_cfg)) &
+                       PORT_HW_CFG_E3_PHY_RESET_MASK) >>
+                       PORT_HW_CFG_E3_PHY_RESET_SHIFT;
 
        /* Drive pin low to put GPHY in reset. */
        elink_set_cfg_pin(sc, cfg_pin, 0);
 }
 
 static uint8_t elink_54618se_read_status(struct elink_phy *phy,
-                                        struct elink_params *params,
-                                        struct elink_vars *vars)
+                                   struct elink_params *params,
+                                   struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t val;
@@ -10402,11 +12199,15 @@ static uint8_t elink_54618se_read_status(struct elink_phy *phy,
        uint16_t legacy_status, legacy_speed;
 
        /* Get speed operation status */
-       elink_cl22_read(sc, phy, MDIO_REG_GPHY_AUX_STATUS, &legacy_status);
-       PMD_DRV_LOG(DEBUG, "54618SE read_status: 0x%x", legacy_status);
+       elink_cl22_read(sc, phy,
+                       MDIO_REG_GPHY_AUX_STATUS,
+                       &legacy_status);
+       ELINK_DEBUG_P1(sc, "54618SE read_status: 0x%x", legacy_status);
 
        /* Read status to clear the PHY interrupt. */
-       elink_cl22_read(sc, phy, MDIO_REG_INTR_STATUS, &val);
+       elink_cl22_read(sc, phy,
+                       MDIO_REG_INTR_STATUS,
+                       &val);
 
        link_up = ((legacy_status & (1 << 2)) == (1 << 2));
 
@@ -10432,25 +12233,30 @@ static uint8_t elink_54618se_read_status(struct elink_phy *phy,
                } else if (legacy_speed == (1 << 8)) {
                        vars->line_speed = ELINK_SPEED_10;
                        vars->duplex = DUPLEX_HALF;
-               } else          /* Should not happen */
+               } else /* Should not happen */
                        vars->line_speed = 0;
 
-               PMD_DRV_LOG(DEBUG,
-                           "Link is up in %dMbps, is_duplex_full= %d",
-                           vars->line_speed, (vars->duplex == DUPLEX_FULL));
+               ELINK_DEBUG_P2(sc,
+                  "Link is up in %dMbps, is_duplex_full= %d",
+                  vars->line_speed,
+                  (vars->duplex == DUPLEX_FULL));
 
                /* Check legacy speed AN resolution */
-               elink_cl22_read(sc, phy, 0x01, &val);
+               elink_cl22_read(sc, phy,
+                               0x01,
+                               &val);
                if (val & (1 << 5))
                        vars->link_status |=
-                           LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
-               elink_cl22_read(sc, phy, 0x06, &val);
+                               LINK_STATUS_AUTO_NEGOTIATE_COMPLETE;
+               elink_cl22_read(sc, phy,
+                               0x06,
+                               &val);
                if ((val & (1 << 0)) == 0)
                        vars->link_status |=
-                           LINK_STATUS_PARALLEL_DETECTION_USED;
+                               LINK_STATUS_PARALLEL_DETECTION_USED;
 
-               PMD_DRV_LOG(DEBUG, "BNX2X54618SE: link speed is %d",
-                           vars->line_speed);
+               ELINK_DEBUG_P1(sc, "BNX2X4618SE: link speed is %d",
+                          vars->line_speed);
 
                elink_ext_phy_resolve_fc(phy, params, vars);
 
@@ -10460,27 +12266,27 @@ static uint8_t elink_54618se_read_status(struct elink_phy *phy,
 
                        if (val & (1 << 5))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_10THD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_10THD_CAPABLE;
                        if (val & (1 << 6))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_10TFD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_10TFD_CAPABLE;
                        if (val & (1 << 7))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_100TXHD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_100TXHD_CAPABLE;
                        if (val & (1 << 8))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_100TXFD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_100TXFD_CAPABLE;
                        if (val & (1 << 9))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_100T4_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_100T4_CAPABLE;
 
                        elink_cl22_read(sc, phy, 0xa, &val);
                        if (val & (1 << 10))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_1000THD_CAPABLE;
                        if (val & (1 << 11))
                                vars->link_status |=
-                                   LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
+                                 LINK_STATUS_LINK_PARTNER_1000TFD_CAPABLE;
 
                        if ((phy->flags & ELINK_FLAGS_EEE) &&
                            elink_eee_has_cap(params))
@@ -10497,7 +12303,7 @@ static void elink_54618se_config_loopback(struct elink_phy *phy,
        uint16_t val;
        uint32_t umac_base = params->port ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
 
-       PMD_DRV_LOG(DEBUG, "2PMA/PMD ext_phy_loopback: 54618se");
+       ELINK_DEBUG_P0(sc, "2PMA/PMD ext_phy_loopback: 54618se");
 
        /* Enable master/slave manual mmode and set to master */
        /* mii write 9 [bits set 11 12] */
@@ -10543,30 +12349,33 @@ static void elink_7101_config_loopback(struct elink_phy *phy,
 }
 
 static uint8_t elink_7101_config_init(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                 struct elink_params *params,
+                                 struct elink_vars *vars)
 {
        uint16_t fw_ver1, fw_ver2, val;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "Setting the SFX7101 LASI indication");
+       ELINK_DEBUG_P0(sc, "Setting the SFX7101 LASI indication");
 
-       /* Restore normal power mode */
+       /* Restore normal power mode*/
        elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_HIGH, params->port);
        /* HW reset */
        elink_ext_phy_hw_reset(sc, params->port);
        elink_wait_reset_complete(sc, phy, params);
 
-       elink_cl45_write(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x1);
-       PMD_DRV_LOG(DEBUG, "Setting the SFX7101 LED to blink on traffic");
+       elink_cl45_write(sc, phy,
+                        MDIO_PMA_DEVAD, MDIO_PMA_LASI_CTRL, 0x1);
+       ELINK_DEBUG_P0(sc, "Setting the SFX7101 LED to blink on traffic");
        elink_cl45_write(sc, phy,
                         MDIO_PMA_DEVAD, MDIO_PMA_REG_7107_LED_CNTL, (1 << 3));
 
        elink_ext_phy_set_pause(params, phy, vars);
        /* Restart autoneg */
-       elink_cl45_read(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, &val);
+       elink_cl45_read(sc, phy,
+                       MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, &val);
        val |= 0x200;
-       elink_cl45_write(sc, phy, MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, val);
+       elink_cl45_write(sc, phy,
+                        MDIO_AN_DEVAD, MDIO_AN_REG_CTRL, val);
 
        /* Save spirom version */
        elink_cl45_read(sc, phy,
@@ -10575,24 +12384,30 @@ static uint8_t elink_7101_config_init(struct elink_phy *phy,
        elink_cl45_read(sc, phy,
                        MDIO_PMA_DEVAD, MDIO_PMA_REG_7101_VER2, &fw_ver2);
        elink_save_spirom_version(sc, params->port,
-                                 (uint32_t) (fw_ver1 << 16 | fw_ver2),
+                                 (uint32_t)(fw_ver1 << 16 | fw_ver2),
                                  phy->ver_addr);
        return ELINK_STATUS_OK;
 }
 
 static uint8_t elink_7101_read_status(struct elink_phy *phy,
-                                     struct elink_params *params,
-                                     struct elink_vars *vars)
+                                struct elink_params *params,
+                                struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t link_up;
        uint16_t val1, val2;
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val2);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
-       PMD_DRV_LOG(DEBUG, "10G-base-T LASI status 0x%x->0x%x", val2, val1);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
-       elink_cl45_read(sc, phy, MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
-       PMD_DRV_LOG(DEBUG, "10G-base-T PMA status 0x%x->0x%x", val2, val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_LASI_STAT, &val1);
+       ELINK_DEBUG_P2(sc, "10G-base-T LASI status 0x%x->0x%x",
+                  val2, val1);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val2);
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD, MDIO_PMA_REG_STATUS, &val1);
+       ELINK_DEBUG_P2(sc, "10G-base-T PMA status 0x%x->0x%x",
+                  val2, val1);
        link_up = ((val1 & 4) == 4);
        /* If link is up print the AN outcome of the SFX7101 PHY */
        if (link_up) {
@@ -10601,21 +12416,21 @@ static uint8_t elink_7101_read_status(struct elink_phy *phy,
                                &val2);
                vars->line_speed = ELINK_SPEED_10000;
                vars->duplex = DUPLEX_FULL;
-               PMD_DRV_LOG(DEBUG, "SFX7101 AN status 0x%x->Master=%x",
-                           val2, (val2 & (1 << 14)));
+               ELINK_DEBUG_P2(sc, "SFX7101 AN status 0x%x->Master=%x",
+                          val2, (val2 & (1 << 14)));
                elink_ext_phy_10G_an_resolve(sc, phy, vars);
                elink_ext_phy_resolve_fc(phy, params, vars);
 
                /* Read LP advertised speeds */
                if (val2 & (1 << 11))
                        vars->link_status |=
-                           LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
+                               LINK_STATUS_LINK_PARTNER_10GXFD_CAPABLE;
        }
        return link_up;
 }
 
-static uint8_t elink_7101_format_ver(uint32_t spirom_ver, uint8_t * str,
-                                    uint16_t * len)
+static elink_status_t elink_7101_format_ver(uint32_t spirom_ver, uint8_t *str,
+                                           uint16_t *len)
 {
        if (*len < 5)
                return ELINK_STATUS_ERROR;
@@ -10628,15 +12443,39 @@ static uint8_t elink_7101_format_ver(uint32_t spirom_ver, uint8_t * str,
        return ELINK_STATUS_OK;
 }
 
-static void elink_7101_hw_reset(__rte_unused struct elink_phy *phy,
-                               struct elink_params *params)
+void elink_sfx7101_sp_sw_reset(struct bnx2x_softc *sc, struct elink_phy *phy)
 {
+       uint16_t val, cnt;
+
+       elink_cl45_read(sc, phy,
+                       MDIO_PMA_DEVAD,
+                       MDIO_PMA_REG_7101_RESET, &val);
+
+       for (cnt = 0; cnt < 10; cnt++) {
+               DELAY(1000 * 50);
+               /* Writes a self-clearing reset */
+               elink_cl45_write(sc, phy,
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_7101_RESET,
+                                (val | (1 << 15)));
+               /* Wait for clear */
+               elink_cl45_read(sc, phy,
+                               MDIO_PMA_DEVAD,
+                               MDIO_PMA_REG_7101_RESET, &val);
+
+               if ((val & (1 << 15)) == 0)
+                       break;
+       }
+}
+
+static void elink_7101_hw_reset(__rte_unused struct elink_phy *phy,
+                               struct elink_params *params) {
        /* Low power mode is controlled by GPIO 2 */
        elink_cb_gpio_write(params->sc, MISC_REGISTERS_GPIO_2,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, params->port);
        /* The PHY reset is controlled by GPIO 1 */
        elink_cb_gpio_write(params->sc, MISC_REGISTERS_GPIO_1,
-                           MISC_REGISTERS_GPIO_OUTPUT_LOW, params->port);
+                      MISC_REGISTERS_GPIO_OUTPUT_LOW, params->port);
 }
 
 static void elink_7101_set_link_led(struct elink_phy *phy,
@@ -10657,7 +12496,9 @@ static void elink_7101_set_link_led(struct elink_phy *phy,
                break;
        }
        elink_cl45_write(sc, phy,
-                        MDIO_PMA_DEVAD, MDIO_PMA_REG_7107_LINK_LED_CNTL, val);
+                        MDIO_PMA_DEVAD,
+                        MDIO_PMA_REG_7107_LINK_LED_CNTL,
+                        val);
 }
 
 /******************************************************************/
@@ -10665,482 +12506,532 @@ static void elink_7101_set_link_led(struct elink_phy *phy,
 /******************************************************************/
 
 static const struct elink_phy phy_null = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN,
-       .addr = 0,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_INIT_XGXS_FIRST,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = 0,
-       .media_type = ELINK_ETH_PHY_NOT_PRESENT,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init NULL,
-       .read_status NULL,
-       .link_reset NULL,
-       .config_loopback = NULL,
-       .format_fw_ver NULL,
-       .hw_reset NULL,
-       .set_link_led NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN,
+       .addr           = 0,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_INIT_XGXS_FIRST,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = 0,
+       .media_type     = ELINK_ETH_PHY_NOT_PRESENT,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)NULL,
+       .read_status    = (read_status_t)NULL,
+       .link_reset     = (link_reset_t)NULL,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)NULL,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
 static const struct elink_phy phy_serdes = {
-       .type = PORT_HW_CFG_SERDES_EXT_PHY_TYPE_DIRECT,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = 0,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_2500baseX_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_xgxs_config_init,
-       .read_status = elink_link_settings_status,
-       .link_reset = elink_int_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = NULL,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_SERDES_EXT_PHY_TYPE_DIRECT,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = 0,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_2500baseX_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_xgxs_config_init,
+       .read_status    = (read_status_t)elink_link_settings_status,
+       .link_reset     = (link_reset_t)elink_int_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)NULL,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
 static const struct elink_phy phy_xgxs = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = 0,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_2500baseX_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_CX4,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_xgxs_config_init,
-       .read_status = elink_link_settings_status,
-       .link_reset = elink_int_link_reset,
-       .config_loopback = elink_set_xgxs_loopback,
-       .format_fw_ver = NULL,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = elink_xgxs_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = 0,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_2500baseX_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_CX4,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_xgxs_config_init,
+       .read_status    = (read_status_t)elink_link_settings_status,
+       .link_reset     = (link_reset_t)elink_int_link_reset,
+       .config_loopback = (config_loopback_t)elink_set_xgxs_loopback,
+       .format_fw_ver  = (format_fw_ver_t)NULL,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)elink_xgxs_specific_func
 };
-
 static const struct elink_phy phy_warpcore = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_TX_ERROR_CHECK,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_20000baseKR2_Full |
-                     ELINK_SUPPORTED_20000baseMLD2_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_UNSPECIFIED,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       /* req_duplex = */ 0,
-       /* rsrv = */ 0,
-       .config_init = elink_warpcore_config_init,
-       .read_status = elink_warpcore_read_status,
-       .link_reset = elink_warpcore_link_reset,
-       .config_loopback = elink_set_warpcore_loopback,
-       .format_fw_ver = NULL,
-       .hw_reset = elink_warpcore_hw_reset,
-       .set_link_led = NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_DIRECT,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_TX_ERROR_CHECK,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_1000baseKX_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_10000baseKR_Full |
+                          ELINK_SUPPORTED_20000baseKR2_Full |
+                          ELINK_SUPPORTED_20000baseMLD2_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_UNSPECIFIED,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       /* req_duplex = */0,
+       /* rsrv = */0,
+       .config_init    = (config_init_t)elink_warpcore_config_init,
+       .read_status    = (read_status_t)elink_warpcore_read_status,
+       .link_reset     = (link_reset_t)elink_warpcore_link_reset,
+       .config_loopback = (config_loopback_t)elink_set_warpcore_loopback,
+       .format_fw_ver  = (format_fw_ver_t)NULL,
+       .hw_reset       = (hw_reset_t)elink_warpcore_hw_reset,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
+
 static const struct elink_phy phy_7101 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_FAN_FAILURE_DET_REQ,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_7101_config_init,
-       .read_status = elink_7101_read_status,
-       .link_reset = elink_common_ext_link_reset,
-       .config_loopback = elink_7101_config_loopback,
-       .format_fw_ver = elink_7101_format_ver,
-       .hw_reset = elink_7101_hw_reset,
-       .set_link_led = elink_7101_set_link_led,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_FAN_FAILURE_DET_REQ,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_7101_config_init,
+       .read_status    = (read_status_t)elink_7101_read_status,
+       .link_reset     = (link_reset_t)elink_common_ext_link_reset,
+       .config_loopback = (config_loopback_t)elink_7101_config_loopback,
+       .format_fw_ver  = (format_fw_ver_t)elink_7101_format_ver,
+       .hw_reset       = (hw_reset_t)elink_7101_hw_reset,
+       .set_link_led   = (set_link_led_t)elink_7101_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
-
 static const struct elink_phy phy_8073 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8073,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = 0,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_2500baseX_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_KR,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8073_config_init,
-       .read_status = elink_8073_read_status,
-       .link_reset = elink_8073_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_format_ver,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = elink_8073_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8073,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = 0,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_2500baseX_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_KR,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8073_config_init,
+       .read_status    = (read_status_t)elink_8073_read_status,
+       .link_reset     = (link_reset_t)elink_8073_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_format_ver,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)elink_8073_specific_func
 };
-
 static const struct elink_phy phy_8705 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8705,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_INIT_XGXS_FIRST,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_XFP_FIBER,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8705_config_init,
-       .read_status = elink_8705_read_status,
-       .link_reset = elink_common_ext_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_null_format_ver,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8705,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_INIT_XGXS_FIRST,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_XFP_FIBER,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8705_config_init,
+       .read_status    = (read_status_t)elink_8705_read_status,
+       .link_reset     = (link_reset_t)elink_common_ext_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_null_format_ver,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
-
 static const struct elink_phy phy_8706 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8706,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_INIT_XGXS_FIRST,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_SFPP_10G_FIBER,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8706_config_init,
-       .read_status = elink_8706_read_status,
-       .link_reset = elink_common_ext_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_format_ver,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8706,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_INIT_XGXS_FIRST,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_SFPP_10G_FIBER,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8706_config_init,
+       .read_status    = (read_status_t)elink_8706_read_status,
+       .link_reset     = (link_reset_t)elink_common_ext_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_format_ver,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
 static const struct elink_phy phy_8726 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8726,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = (ELINK_FLAGS_INIT_XGXS_FIRST | ELINK_FLAGS_TX_ERROR_CHECK),
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_NOT_PRESENT,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8726_config_init,
-       .read_status = elink_8726_read_status,
-       .link_reset = elink_8726_link_reset,
-       .config_loopback = elink_8726_config_loopback,
-       .format_fw_ver = elink_format_ver,
-       .hw_reset = NULL,
-       .set_link_led = NULL,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8726,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = (ELINK_FLAGS_INIT_XGXS_FIRST |
+                          ELINK_FLAGS_TX_ERROR_CHECK),
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_NOT_PRESENT,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8726_config_init,
+       .read_status    = (read_status_t)elink_8726_read_status,
+       .link_reset     = (link_reset_t)elink_8726_link_reset,
+       .config_loopback = (config_loopback_t)elink_8726_config_loopback,
+       .format_fw_ver  = (format_fw_ver_t)elink_format_ver,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)NULL,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
 static const struct elink_phy phy_8727 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8727,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = (ELINK_FLAGS_FAN_FAILURE_DET_REQ | ELINK_FLAGS_TX_ERROR_CHECK),
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_FIBRE |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_NOT_PRESENT,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8727_config_init,
-       .read_status = elink_8727_read_status,
-       .link_reset = elink_8727_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_format_ver,
-       .hw_reset = elink_8727_hw_reset,
-       .set_link_led = elink_8727_set_link_led,
-       .phy_specific_func = elink_8727_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8727,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = (ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                          ELINK_FLAGS_TX_ERROR_CHECK),
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_FIBRE |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_NOT_PRESENT,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8727_config_init,
+       .read_status    = (read_status_t)elink_8727_read_status,
+       .link_reset     = (link_reset_t)elink_8727_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_format_ver,
+       .hw_reset       = (hw_reset_t)elink_8727_hw_reset,
+       .set_link_led   = (set_link_led_t)elink_8727_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_8727_specific_func
 };
-
 static const struct elink_phy phy_8481 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8481,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_FAN_FAILURE_DET_REQ |
-           ELINK_FLAGS_REARM_LATCH_SIGNAL,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_8481_config_init,
-       .read_status = elink_848xx_read_status,
-       .link_reset = elink_8481_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_848xx_format_ver,
-       .hw_reset = elink_8481_hw_reset,
-       .set_link_led = elink_848xx_set_link_led,
-       .phy_specific_func = NULL
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8481,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                         ELINK_FLAGS_REARM_LATCH_SIGNAL,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_8481_config_init,
+       .read_status    = (read_status_t)elink_848xx_read_status,
+       .link_reset     = (link_reset_t)elink_8481_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_848xx_format_ver,
+       .hw_reset       = (hw_reset_t)elink_8481_hw_reset,
+       .set_link_led   = (set_link_led_t)elink_848xx_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)NULL
 };
 
 static const struct elink_phy phy_84823 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84823,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = (ELINK_FLAGS_FAN_FAILURE_DET_REQ |
-                 ELINK_FLAGS_REARM_LATCH_SIGNAL | ELINK_FLAGS_TX_ERROR_CHECK),
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_848x3_config_init,
-       .read_status = elink_848xx_read_status,
-       .link_reset = elink_848x3_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_848xx_format_ver,
-       .hw_reset = NULL,
-       .set_link_led = elink_848xx_set_link_led,
-       .phy_specific_func = elink_848xx_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84823,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = (ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                          ELINK_FLAGS_REARM_LATCH_SIGNAL |
+                          ELINK_FLAGS_TX_ERROR_CHECK),
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_848x3_config_init,
+       .read_status    = (read_status_t)elink_848xx_read_status,
+       .link_reset     = (link_reset_t)elink_848x3_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_848xx_format_ver,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)elink_848xx_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_848xx_specific_func
 };
 
 static const struct elink_phy phy_84833 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = (ELINK_FLAGS_FAN_FAILURE_DET_REQ |
-                 ELINK_FLAGS_REARM_LATCH_SIGNAL |
-                 ELINK_FLAGS_TX_ERROR_CHECK | ELINK_FLAGS_TEMPERATURE),
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_848x3_config_init,
-       .read_status = elink_848xx_read_status,
-       .link_reset = elink_848x3_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_848xx_format_ver,
-       .hw_reset = elink_84833_hw_reset_phy,
-       .set_link_led = elink_848xx_set_link_led,
-       .phy_specific_func = elink_848xx_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = (ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                          ELINK_FLAGS_REARM_LATCH_SIGNAL |
+                          ELINK_FLAGS_TX_ERROR_CHECK |
+                          ELINK_FLAGS_TEMPERATURE),
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_848x3_config_init,
+       .read_status    = (read_status_t)elink_848xx_read_status,
+       .link_reset     = (link_reset_t)elink_848x3_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_848xx_format_ver,
+       .hw_reset       = (hw_reset_t)elink_84833_hw_reset_phy,
+       .set_link_led   = (set_link_led_t)elink_848xx_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_848xx_specific_func
 };
 
 static const struct elink_phy phy_84834 = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_FAN_FAILURE_DET_REQ |
-           ELINK_FLAGS_REARM_LATCH_SIGNAL,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_10000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       .req_duplex = 0,
-       .rsrv = 0,
-       .config_init = elink_848x3_config_init,
-       .read_status = elink_848xx_read_status,
-       .link_reset = elink_848x3_link_reset,
-       .config_loopback = NULL,
-       .format_fw_ver = elink_848xx_format_ver,
-       .hw_reset = elink_84833_hw_reset_phy,
-       .set_link_led = elink_848xx_set_link_led,
-       .phy_specific_func = elink_848xx_specific_func
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                           ELINK_FLAGS_REARM_LATCH_SIGNAL,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_848x3_config_init,
+       .read_status    = (read_status_t)elink_848xx_read_status,
+       .link_reset     = (link_reset_t)elink_848x3_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_848xx_format_ver,
+       .hw_reset       = (hw_reset_t)elink_84833_hw_reset_phy,
+       .set_link_led   = (set_link_led_t)elink_848xx_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_848xx_specific_func
 };
 
-static const struct elink_phy phy_54618se = {
-       .type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE,
-       .addr = 0xff,
-       .def_md_devad = 0,
-       .flags = ELINK_FLAGS_INIT_XGXS_FIRST,
-       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
-       .mdio_ctrl = 0,
-       .supported = (ELINK_SUPPORTED_10baseT_Half |
-                     ELINK_SUPPORTED_10baseT_Full |
-                     ELINK_SUPPORTED_100baseT_Half |
-                     ELINK_SUPPORTED_100baseT_Full |
-                     ELINK_SUPPORTED_1000baseT_Full |
-                     ELINK_SUPPORTED_TP |
-                     ELINK_SUPPORTED_Autoneg |
-                     ELINK_SUPPORTED_Pause | ELINK_SUPPORTED_Asym_Pause),
-       .media_type = ELINK_ETH_PHY_BASE_T,
-       .ver_addr = 0,
-       .req_flow_ctrl = 0,
-       .req_line_speed = 0,
-       .speed_cap_mask = 0,
-       /* req_duplex = */ 0,
-       /* rsrv = */ 0,
-       .config_init = elink_54618se_config_init,
-       .read_status = elink_54618se_read_status,
-       .link_reset = elink_54618se_link_reset,
-       .config_loopback = elink_54618se_config_loopback,
-       .format_fw_ver = NULL,
-       .hw_reset = NULL,
-       .set_link_led = elink_5461x_set_link_led,
-       .phy_specific_func = elink_54618se_specific_func
+static const struct elink_phy phy_84858 = {
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_FAN_FAILURE_DET_REQ |
+                           ELINK_FLAGS_REARM_LATCH_SIGNAL,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_10000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       .req_duplex     = 0,
+       .rsrv           = 0,
+       .config_init    = (config_init_t)elink_848x3_config_init,
+       .read_status    = (read_status_t)elink_848xx_read_status,
+       .link_reset     = (link_reset_t)elink_848x3_link_reset,
+       .config_loopback = (config_loopback_t)NULL,
+       .format_fw_ver  = (format_fw_ver_t)elink_848xx_format_ver,
+       .hw_reset       = (hw_reset_t)elink_84833_hw_reset_phy,
+       .set_link_led   = (set_link_led_t)elink_848xx_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_848xx_specific_func
 };
 
+
+static const struct elink_phy phy_54618se = {
+       .type           = PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE,
+       .addr           = 0xff,
+       .def_md_devad   = 0,
+       .flags          = ELINK_FLAGS_INIT_XGXS_FIRST,
+       .rx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff},
+       .mdio_ctrl      = 0,
+       .supported      = (ELINK_SUPPORTED_10baseT_Half |
+                          ELINK_SUPPORTED_10baseT_Full |
+                          ELINK_SUPPORTED_100baseT_Half |
+                          ELINK_SUPPORTED_100baseT_Full |
+                          ELINK_SUPPORTED_1000baseT_Full |
+                          ELINK_SUPPORTED_TP |
+                          ELINK_SUPPORTED_Autoneg |
+                          ELINK_SUPPORTED_Pause |
+                          ELINK_SUPPORTED_Asym_Pause),
+       .media_type     = ELINK_ETH_PHY_BASE_T,
+       .ver_addr       = 0,
+       .req_flow_ctrl  = 0,
+       .req_line_speed = 0,
+       .speed_cap_mask = 0,
+       /* req_duplex = */0,
+       /* rsrv = */0,
+       .config_init    = (config_init_t)elink_54618se_config_init,
+       .read_status    = (read_status_t)elink_54618se_read_status,
+       .link_reset     = (link_reset_t)elink_54618se_link_reset,
+       .config_loopback = (config_loopback_t)elink_54618se_config_loopback,
+       .format_fw_ver  = (format_fw_ver_t)NULL,
+       .hw_reset       = (hw_reset_t)NULL,
+       .set_link_led   = (set_link_led_t)elink_5461x_set_link_led,
+       .phy_specific_func = (phy_specific_func_t)elink_54618se_specific_func
+};
 /*****************************************************************/
 /*                                                               */
 /* Populate the phy according. Main function: elink_populate_phy   */
@@ -11148,9 +13039,9 @@ static const struct elink_phy phy_54618se = {
 /*****************************************************************/
 
 static void elink_populate_preemphasis(struct bnx2x_softc *sc,
-                                      uint32_t shmem_base,
-                                      struct elink_phy *phy, uint8_t port,
-                                      uint8_t phy_index)
+                                    uint32_t shmem_base,
+                                    struct elink_phy *phy, uint8_t port,
+                                    uint8_t phy_index)
 {
        /* Get the 4 lanes xgxs config rx and tx */
        uint32_t rx = 0, tx = 0, i;
@@ -11162,23 +13053,19 @@ static void elink_populate_preemphasis(struct bnx2x_softc *sc,
                if (phy_index == ELINK_INT_PHY || phy_index == ELINK_EXT_PHY1) {
                        rx = REG_RD(sc, shmem_base +
                                    offsetof(struct shmem_region,
-                                            dev_info.port_hw_config[port].
-                                            xgxs_config_rx[i << 1]));
+                       dev_info.port_hw_config[port].xgxs_config_rx[i << 1]));
 
                        tx = REG_RD(sc, shmem_base +
                                    offsetof(struct shmem_region,
-                                            dev_info.port_hw_config[port].
-                                            xgxs_config_tx[i << 1]));
+                       dev_info.port_hw_config[port].xgxs_config_tx[i << 1]));
                } else {
                        rx = REG_RD(sc, shmem_base +
                                    offsetof(struct shmem_region,
-                                            dev_info.port_hw_config[port].
-                                            xgxs_config2_rx[i << 1]));
+                       dev_info.port_hw_config[port].xgxs_config2_rx[i << 1]));
 
                        tx = REG_RD(sc, shmem_base +
                                    offsetof(struct shmem_region,
-                                            dev_info.port_hw_config[port].
-                                            xgxs_config2_rx[i << 1]));
+                       dev_info.port_hw_config[port].xgxs_config2_rx[i << 1]));
                }
 
                phy->rx_preemphasis[i << 1] = ((rx >> 16) & 0xffff);
@@ -11186,65 +13073,62 @@ static void elink_populate_preemphasis(struct bnx2x_softc *sc,
 
                phy->tx_preemphasis[i << 1] = ((tx >> 16) & 0xffff);
                phy->tx_preemphasis[(i << 1) + 1] = (tx & 0xffff);
+               ELINK_DEBUG_P2(sc, "phy->rx_preemphasis = %x, phy->tx_preemphasis = %x",
+                       phy->rx_preemphasis[i << 1],
+                       phy->tx_preemphasis[i << 1]);
        }
 }
 
 static uint32_t elink_get_ext_phy_config(struct bnx2x_softc *sc,
-                                        uint32_t shmem_base, uint8_t phy_index,
-                                        uint8_t port)
+                                   uint32_t shmem_base,
+                                   uint8_t phy_index, uint8_t port)
 {
        uint32_t ext_phy_config = 0;
        switch (phy_index) {
        case ELINK_EXT_PHY1:
                ext_phy_config = REG_RD(sc, shmem_base +
-                                       offsetof(struct shmem_region,
-                                                dev_info.port_hw_config[port].
-                                                external_phy_config));
+                                             offsetof(struct shmem_region,
+                       dev_info.port_hw_config[port].external_phy_config));
                break;
        case ELINK_EXT_PHY2:
                ext_phy_config = REG_RD(sc, shmem_base +
-                                       offsetof(struct shmem_region,
-                                                dev_info.port_hw_config[port].
-                                                external_phy_config2));
+                                             offsetof(struct shmem_region,
+                       dev_info.port_hw_config[port].external_phy_config2));
                break;
        default:
-               PMD_DRV_LOG(DEBUG, "Invalid phy_index %d", phy_index);
+               ELINK_DEBUG_P1(sc, "Invalid phy_index %d", phy_index);
                return ELINK_STATUS_ERROR;
        }
 
        return ext_phy_config;
 }
-
 static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
-                                            uint32_t shmem_base, uint8_t port,
-                                            struct elink_phy *phy)
+                                 uint32_t shmem_base, uint8_t port,
+                                 struct elink_phy *phy)
 {
        uint32_t phy_addr;
-       __rte_unused uint32_t chip_id;
+       uint32_t chip_id;
        uint32_t switch_cfg = (REG_RD(sc, shmem_base +
-                                     offsetof(struct shmem_region,
-                                              dev_info.
-                                              port_feature_config[port].
-                                              link_config)) &
-                              PORT_FEATURE_CONNECTED_SWITCH_MASK);
-       chip_id =
-           (REG_RD(sc, MISC_REG_CHIP_NUM) << 16) |
-           ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12);
-
-       PMD_DRV_LOG(DEBUG, ":chip_id = 0x%x", chip_id);
+                                      offsetof(struct shmem_region,
+                       dev_info.port_feature_config[port].link_config)) &
+                         PORT_FEATURE_CONNECTED_SWITCH_MASK);
+       chip_id = (REG_RD(sc, MISC_REG_CHIP_NUM) << 16) |
+               ((REG_RD(sc, MISC_REG_CHIP_REV) & 0xf) << 12);
+
+       ELINK_DEBUG_P1(sc, ":chip_id = 0x%x", chip_id);
        if (USES_WARPCORE(sc)) {
                uint32_t serdes_net_if;
-               phy_addr = REG_RD(sc, MISC_REG_WC0_CTRL_PHY_ADDR);
+               phy_addr = REG_RD(sc,
+                                 MISC_REG_WC0_CTRL_PHY_ADDR);
                *phy = phy_warpcore;
                if (REG_RD(sc, MISC_REG_PORT4MODE_EN_OVWR) == 0x3)
                        phy->flags |= ELINK_FLAGS_4_PORT_MODE;
                else
                        phy->flags &= ~ELINK_FLAGS_4_PORT_MODE;
-               /* Check Dual mode */
+                       /* Check Dual mode */
                serdes_net_if = (REG_RD(sc, shmem_base +
-                                       offsetof(struct shmem_region,
-                                                dev_info.port_hw_config[port].
-                                                default_cfg)) &
+                                       offsetof(struct shmem_region, dev_info.
+                                       port_hw_config[port].default_cfg)) &
                                 PORT_HW_CFG_NET_SERDES_IF_MASK);
                /* Set the appropriate supported and flags indications per
                 * interface type of the chip
@@ -11280,8 +13164,8 @@ static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
                        break;
                case PORT_HW_CFG_NET_SERDES_IF_KR:
                        phy->media_type = ELINK_ETH_PHY_KR;
-                       phy->supported &= (ELINK_SUPPORTED_1000baseT_Full |
-                                          ELINK_SUPPORTED_10000baseT_Full |
+                       phy->supported &= (ELINK_SUPPORTED_1000baseKX_Full |
+                                          ELINK_SUPPORTED_10000baseKR_Full |
                                           ELINK_SUPPORTED_FIBRE |
                                           ELINK_SUPPORTED_Autoneg |
                                           ELINK_SUPPORTED_Pause |
@@ -11299,8 +13183,8 @@ static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
                        phy->media_type = ELINK_ETH_PHY_KR;
                        phy->flags |= ELINK_FLAGS_WC_DUAL_MODE;
                        phy->supported &= (ELINK_SUPPORTED_20000baseKR2_Full |
-                                          ELINK_SUPPORTED_10000baseT_Full |
-                                          ELINK_SUPPORTED_1000baseT_Full |
+                                          ELINK_SUPPORTED_10000baseKR_Full |
+                                          ELINK_SUPPORTED_1000baseKX_Full |
                                           ELINK_SUPPORTED_Autoneg |
                                           ELINK_SUPPORTED_FIBRE |
                                           ELINK_SUPPORTED_Pause |
@@ -11308,8 +13192,8 @@ static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
                        phy->flags &= ~ELINK_FLAGS_TX_ERROR_CHECK;
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG, "Unknown WC interface type 0x%x",
-                                   serdes_net_if);
+                       ELINK_DEBUG_P1(sc, "Unknown WC interface type 0x%x",
+                                      serdes_net_if);
                        break;
                }
 
@@ -11321,6 +13205,8 @@ static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
                        phy->flags |= ELINK_FLAGS_MDC_MDIO_WA;
                else
                        phy->flags |= ELINK_FLAGS_MDC_MDIO_WA_B0;
+               ELINK_DEBUG_P3(sc, "media_type = %x, flags = %x, supported = %x",
+                               phy->media_type, phy->flags, phy->supported);
        } else {
                switch (switch_cfg) {
                case ELINK_SWITCH_CFG_1G:
@@ -11336,32 +13222,32 @@ static elink_status_t elink_populate_int_phy(struct bnx2x_softc *sc,
                        *phy = phy_xgxs;
                        break;
                default:
-                       PMD_DRV_LOG(DEBUG, "Invalid switch_cfg");
+                       ELINK_DEBUG_P0(sc, "Invalid switch_cfg");
                        return ELINK_STATUS_ERROR;
                }
        }
-       phy->addr = (uint8_t) phy_addr;
+       phy->addr = (uint8_t)phy_addr;
        phy->mdio_ctrl = elink_get_emac_base(sc,
-                                            SHARED_HW_CFG_MDC_MDIO_ACCESS1_BOTH,
-                                            port);
+                                           SHARED_HW_CFG_MDC_MDIO_ACCESS1_BOTH,
+                                           port);
        if (CHIP_IS_E2(sc))
                phy->def_md_devad = ELINK_E2_DEFAULT_PHY_DEV_ADDR;
        else
                phy->def_md_devad = ELINK_DEFAULT_PHY_DEV_ADDR;
 
-       PMD_DRV_LOG(DEBUG, "Internal phy port=%d, addr=0x%x, mdio_ctl=0x%x",
-                   port, phy->addr, phy->mdio_ctrl);
+       ELINK_DEBUG_P3(sc, "Internal phy port=%d, addr=0x%x, mdio_ctl=0x%x",
+                  port, phy->addr, phy->mdio_ctrl);
 
        elink_populate_preemphasis(sc, shmem_base, phy, port, ELINK_INT_PHY);
        return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_populate_ext_phy(struct bnx2x_softc *sc,
-                                            uint8_t phy_index,
-                                            uint32_t shmem_base,
-                                            uint32_t shmem2_base,
-                                            uint8_t port,
-                                            struct elink_phy *phy)
+                                 uint8_t phy_index,
+                                 uint32_t shmem_base,
+                                 uint32_t shmem2_base,
+                                 uint8_t port,
+                                 struct elink_phy *phy)
 {
        uint32_t ext_phy_config, phy_type, config2;
        uint32_t mdc_mdio_access = SHARED_HW_CFG_MDC_MDIO_ACCESS1_BOTH;
@@ -11407,10 +13293,13 @@ static elink_status_t elink_populate_ext_phy(struct bnx2x_softc *sc,
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834:
                *phy = phy_84834;
                break;
+       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858:
+               *phy = phy_84858;
+               break;
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54616:
-       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE:
+       case PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE:
                *phy = phy_54618se;
-               if (phy_type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X54618SE)
+               if (phy_type == PORT_HW_CFG_XGXS_EXT_PHY2_TYPE_BNX2X54618SE)
                        phy->flags |= ELINK_FLAGS_EEE;
                break;
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_SFX7101:
@@ -11436,21 +13325,20 @@ static elink_status_t elink_populate_ext_phy(struct bnx2x_softc *sc,
         * the address
         */
        config2 = REG_RD(sc, shmem_base + offsetof(struct shmem_region,
-                                                  dev_info.shared_hw_config.
-                                                  config2));
+                                       dev_info.shared_hw_config.config2));
        if (phy_index == ELINK_EXT_PHY1) {
                phy->ver_addr = shmem_base + offsetof(struct shmem_region,
-                                                     port_mb[port].
-                                                     ext_phy_fw_version);
+                               port_mb[port].ext_phy_fw_version);
 
                /* Check specific mdc mdio settings */
                if (config2 & SHARED_HW_CFG_MDC_MDIO_ACCESS1_MASK)
                        mdc_mdio_access = config2 &
-                           SHARED_HW_CFG_MDC_MDIO_ACCESS1_MASK;
+                       SHARED_HW_CFG_MDC_MDIO_ACCESS1_MASK;
        } else {
                uint32_t size = REG_RD(sc, shmem2_base);
 
-               if (size > offsetof(struct shmem2_region, ext_phy_fw_version2)) {
+               if (size >
+                   offsetof(struct shmem2_region, ext_phy_fw_version2)) {
                        phy->ver_addr = shmem2_base +
                            offsetof(struct shmem2_region,
                                     ext_phy_fw_version2[port]);
@@ -11458,35 +13346,34 @@ static elink_status_t elink_populate_ext_phy(struct bnx2x_softc *sc,
                /* Check specific mdc mdio settings */
                if (config2 & SHARED_HW_CFG_MDC_MDIO_ACCESS2_MASK)
                        mdc_mdio_access = (config2 &
-                                          SHARED_HW_CFG_MDC_MDIO_ACCESS2_MASK)
-                           >> (SHARED_HW_CFG_MDC_MDIO_ACCESS2_SHIFT -
-                               SHARED_HW_CFG_MDC_MDIO_ACCESS1_SHIFT);
+                       SHARED_HW_CFG_MDC_MDIO_ACCESS2_MASK) >>
+                       (SHARED_HW_CFG_MDC_MDIO_ACCESS2_SHIFT -
+                        SHARED_HW_CFG_MDC_MDIO_ACCESS1_SHIFT);
        }
        phy->mdio_ctrl = elink_get_emac_base(sc, mdc_mdio_access, port);
 
-       if (((phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833) ||
-            (phy->type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834)) &&
-           (phy->ver_addr)) {
+       if (elink_is_8483x_8485x(phy) && (phy->ver_addr)) {
                /* Remove 100Mb link supported for BNX2X84833/4 when phy fw
                 * version lower than or equal to 1.39
                 */
                uint32_t raw_ver = REG_RD(sc, phy->ver_addr);
-               if (((raw_ver & 0x7F) <= 39) && (((raw_ver & 0xF80) >> 7) <= 1))
+               if (((raw_ver & 0x7F) <= 39) &&
+                   (((raw_ver & 0xF80) >> 7) <= 1))
                        phy->supported &= ~(ELINK_SUPPORTED_100baseT_Half |
                                            ELINK_SUPPORTED_100baseT_Full);
        }
 
-       PMD_DRV_LOG(DEBUG, "phy_type 0x%x port %d found in index %d",
-                   phy_type, port, phy_index);
-       PMD_DRV_LOG(DEBUG, "             addr=0x%x, mdio_ctl=0x%x",
-                   phy->addr, phy->mdio_ctrl);
+       ELINK_DEBUG_P3(sc, "phy_type 0x%x port %d found in index %d",
+                  phy_type, port, phy_index);
+       ELINK_DEBUG_P2(sc, "             addr=0x%x, mdio_ctl=0x%x",
+                  phy->addr, phy->mdio_ctrl);
        return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_populate_phy(struct bnx2x_softc *sc,
-                                        uint8_t phy_index, uint32_t shmem_base,
-                                        uint32_t shmem2_base, uint8_t port,
-                                        struct elink_phy *phy)
+                             uint8_t phy_index, uint32_t shmem_base,
+                             uint32_t shmem2_base, uint8_t port,
+                             struct elink_phy *phy)
 {
        elink_status_t status = ELINK_STATUS_OK;
        phy->type = PORT_HW_CFG_XGXS_EXT_PHY_TYPE_NOT_CONN;
@@ -11498,50 +13385,44 @@ static elink_status_t elink_populate_phy(struct bnx2x_softc *sc,
 }
 
 static void elink_phy_def_cfg(struct elink_params *params,
-                             struct elink_phy *phy, uint8_t phy_index)
+                             struct elink_phy *phy,
+                             uint8_t phy_index)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t link_config;
        /* Populate the default phy configuration for MF mode */
        if (phy_index == ELINK_EXT_PHY2) {
                link_config = REG_RD(sc, params->shmem_base +
-                                    offsetof(struct shmem_region,
-                                             dev_info.port_feature_config
-                                             [params->port].link_config2));
-               phy->speed_cap_mask =
-                   REG_RD(sc,
-                          params->shmem_base + offsetof(struct shmem_region,
-                                                        dev_info.port_hw_config
-                                                        [params->port].
-                                                        speed_capability_mask2));
+                                    offsetof(struct shmem_region, dev_info.
+                       port_feature_config[params->port].link_config2));
+               phy->speed_cap_mask = REG_RD(sc, params->shmem_base +
+                                            offsetof(struct shmem_region,
+                                                     dev_info.
+                       port_hw_config[params->port].speed_capability_mask2));
        } else {
                link_config = REG_RD(sc, params->shmem_base +
-                                    offsetof(struct shmem_region,
-                                             dev_info.port_feature_config
-                                             [params->port].link_config));
-               phy->speed_cap_mask =
-                   REG_RD(sc,
-                          params->shmem_base + offsetof(struct shmem_region,
-                                                        dev_info.port_hw_config
-                                                        [params->port].
-                                                        speed_capability_mask));
+                                    offsetof(struct shmem_region, dev_info.
+                               port_feature_config[params->port].link_config));
+               phy->speed_cap_mask = REG_RD(sc, params->shmem_base +
+                                            offsetof(struct shmem_region,
+                                                     dev_info.
+                       port_hw_config[params->port].speed_capability_mask));
        }
-
-       PMD_DRV_LOG(DEBUG,
-                   "Default config phy idx %x cfg 0x%x speed_cap_mask 0x%x",
-                   phy_index, link_config, phy->speed_cap_mask);
+       ELINK_DEBUG_P3(sc,
+          "Default config phy idx %x cfg 0x%x speed_cap_mask 0x%x",
+          phy_index, link_config, phy->speed_cap_mask);
 
        phy->req_duplex = DUPLEX_FULL;
-       switch (link_config & PORT_FEATURE_LINK_SPEED_MASK) {
+       switch (link_config  & PORT_FEATURE_LINK_SPEED_MASK) {
        case PORT_FEATURE_LINK_SPEED_10M_HALF:
                phy->req_duplex = DUPLEX_HALF;
-               /* fall-through */
+               /* fallthrough */
        case PORT_FEATURE_LINK_SPEED_10M_FULL:
                phy->req_line_speed = ELINK_SPEED_10;
                break;
        case PORT_FEATURE_LINK_SPEED_100M_HALF:
                phy->req_duplex = DUPLEX_HALF;
-               /* fall-through */
+               /* fallthrough */
        case PORT_FEATURE_LINK_SPEED_100M_FULL:
                phy->req_line_speed = ELINK_SPEED_100;
                break;
@@ -11559,7 +13440,10 @@ static void elink_phy_def_cfg(struct elink_params *params,
                break;
        }
 
-       switch (link_config & PORT_FEATURE_FLOW_CONTROL_MASK) {
+       ELINK_DEBUG_P2(sc, "Default config phy idx %x, req_duplex config %x",
+                       phy_index, phy->req_duplex);
+
+       switch (link_config  & PORT_FEATURE_FLOW_CONTROL_MASK) {
        case PORT_FEATURE_FLOW_CONTROL_AUTO:
                phy->req_flow_ctrl = ELINK_FLOW_CTRL_AUTO;
                break;
@@ -11576,6 +13460,9 @@ static void elink_phy_def_cfg(struct elink_params *params,
                phy->req_flow_ctrl = ELINK_FLOW_CTRL_NONE;
                break;
        }
+       ELINK_DEBUG_P3(sc, "Requested Duplex = %x, line_speed = %x, flow_ctrl = %x",
+                      phy->req_duplex, phy->req_line_speed,
+                      phy->req_flow_ctrl);
 }
 
 uint32_t elink_phy_selection(struct elink_params *params)
@@ -11584,25 +13471,24 @@ uint32_t elink_phy_selection(struct elink_params *params)
        uint32_t return_cfg = PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT;
 
        phy_config_swapped = params->multi_phy_config &
-           PORT_HW_CFG_PHY_SWAPPED_ENABLED;
+               PORT_HW_CFG_PHY_SWAPPED_ENABLED;
 
-       prio_cfg = params->multi_phy_config & PORT_HW_CFG_PHY_SELECTION_MASK;
+       prio_cfg = params->multi_phy_config &
+                       PORT_HW_CFG_PHY_SELECTION_MASK;
 
        if (phy_config_swapped) {
                switch (prio_cfg) {
                case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
-                       return_cfg =
-                           PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY;
+                    return_cfg = PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY;
                        break;
                case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
-                       return_cfg =
-                           PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY;
+                    return_cfg = PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY;
                        break;
                case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY:
-                       return_cfg = PORT_HW_CFG_PHY_SELECTION_FIRST_PHY;
+                    return_cfg = PORT_HW_CFG_PHY_SELECTION_FIRST_PHY;
                        break;
                case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY:
-                       return_cfg = PORT_HW_CFG_PHY_SELECTION_SECOND_PHY;
+                    return_cfg = PORT_HW_CFG_PHY_SELECTION_SECOND_PHY;
                        break;
                }
        } else
@@ -11611,19 +13497,23 @@ uint32_t elink_phy_selection(struct elink_params *params)
        return return_cfg;
 }
 
-elink_status_t elink_phy_probe(struct elink_params * params)
+elink_status_t elink_phy_probe(struct elink_params *params)
 {
        uint8_t phy_index, actual_phy_idx;
        uint32_t phy_config_swapped, sync_offset, media_types;
        struct bnx2x_softc *sc = params->sc;
        struct elink_phy *phy;
        params->num_phys = 0;
-       PMD_DRV_LOG(DEBUG, "Begin phy probe");
-
+       ELINK_DEBUG_P0(sc, "Begin phy probe");
+#ifdef ELINK_INCLUDE_EMUL
+       if (CHIP_REV_IS_EMUL(sc))
+               return ELINK_STATUS_OK;
+#endif
        phy_config_swapped = params->multi_phy_config &
-           PORT_HW_CFG_PHY_SWAPPED_ENABLED;
+               PORT_HW_CFG_PHY_SWAPPED_ENABLED;
 
-       for (phy_index = ELINK_INT_PHY; phy_index < ELINK_MAX_PHYS; phy_index++) {
+       for (phy_index = ELINK_INT_PHY; phy_index < ELINK_MAX_PHYS;
+             phy_index++) {
                actual_phy_idx = phy_index;
                if (phy_config_swapped) {
                        if (phy_index == ELINK_EXT_PHY1)
@@ -11631,18 +13521,19 @@ elink_status_t elink_phy_probe(struct elink_params * params)
                        else if (phy_index == ELINK_EXT_PHY2)
                                actual_phy_idx = ELINK_EXT_PHY1;
                }
-               PMD_DRV_LOG(DEBUG, "phy_config_swapped %x, phy_index %x,"
-                           " actual_phy_idx %x", phy_config_swapped,
-                           phy_index, actual_phy_idx);
+               ELINK_DEBUG_P3(sc, "phy_config_swapped %x, phy_index %x,"
+                              " actual_phy_idx %x", phy_config_swapped,
+                          phy_index, actual_phy_idx);
                phy = &params->phy[actual_phy_idx];
                if (elink_populate_phy(sc, phy_index, params->shmem_base,
                                       params->shmem2_base, params->port,
                                       phy) != ELINK_STATUS_OK) {
                        params->num_phys = 0;
-                       PMD_DRV_LOG(DEBUG, "phy probe failed in phy index %d",
-                                   phy_index);
+                       ELINK_DEBUG_P1(sc, "phy probe failed in phy index %d",
+                                  phy_index);
                        for (phy_index = ELINK_INT_PHY;
-                            phy_index < ELINK_MAX_PHYS; phy_index++)
+                             phy_index < ELINK_MAX_PHYS;
+                             phy_index++)
                                *phy = phy_null;
                        return ELINK_STATUS_ERROR;
                }
@@ -11658,8 +13549,8 @@ elink_status_t elink_phy_probe(struct elink_params * params)
                        phy->flags |= ELINK_FLAGS_MDC_MDIO_WA_G;
 
                sync_offset = params->shmem_base +
-                   offsetof(struct shmem_region,
-                            dev_info.port_hw_config[params->port].media_type);
+                       offsetof(struct shmem_region,
+                       dev_info.port_hw_config[params->port].media_type);
                media_types = REG_RD(sc, sync_offset);
 
                /* Update media type for non-PMF sync only for the first time
@@ -11670,9 +13561,9 @@ elink_status_t elink_phy_probe(struct elink_params * params)
                                    (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT *
                                     actual_phy_idx))) == 0) {
                        media_types |= ((phy->media_type &
-                                        PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) <<
-                                       (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT *
-                                        actual_phy_idx));
+                                       PORT_HW_CFG_MEDIA_TYPE_PHY0_MASK) <<
+                               (PORT_HW_CFG_MEDIA_TYPE_PHY1_SHIFT *
+                                actual_phy_idx));
                }
                REG_WR(sc, sync_offset, media_types);
 
@@ -11680,47 +13571,231 @@ elink_status_t elink_phy_probe(struct elink_params * params)
                params->num_phys++;
        }
 
-       PMD_DRV_LOG(DEBUG, "End phy probe. #phys found %x", params->num_phys);
+       ELINK_DEBUG_P1(sc, "End phy probe. #phys found %x", params->num_phys);
        return ELINK_STATUS_OK;
 }
 
-static void elink_init_bmac_loopback(struct elink_params *params,
-                                    struct elink_vars *vars)
+#ifdef ELINK_INCLUDE_EMUL
+static elink_status_t elink_init_e3_emul_mac(struct elink_params *params,
+                                            struct elink_vars *vars)
+{
+       struct bnx2x_softc *sc = params->sc;
+       vars->line_speed = params->req_line_speed[0];
+       /* In case link speed is auto, set speed the highest as possible */
+       if (params->req_line_speed[0] == ELINK_SPEED_AUTO_NEG) {
+               if (params->feature_config_flags &
+                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC)
+                       vars->line_speed = ELINK_SPEED_2500;
+               else if (elink_is_4_port_mode(sc))
+                       vars->line_speed = ELINK_SPEED_10000;
+               else
+                       vars->line_speed = ELINK_SPEED_20000;
+       }
+       if (vars->line_speed < ELINK_SPEED_10000) {
+               if ((params->feature_config_flags &
+                    ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC)) {
+                       ELINK_DEBUG_P1(sc, "Invalid line speed %d while UMAC is"
+                                  " disabled!", params->req_line_speed[0]);
+                       return ELINK_STATUS_ERROR;
+               }
+               switch (vars->line_speed) {
+               case ELINK_SPEED_10:
+                       vars->link_status = ELINK_LINK_10TFD;
+                       break;
+               case ELINK_SPEED_100:
+                       vars->link_status = ELINK_LINK_100TXFD;
+                       break;
+               case ELINK_SPEED_1000:
+                       vars->link_status = ELINK_LINK_1000TFD;
+                       break;
+               case ELINK_SPEED_2500:
+                       vars->link_status = ELINK_LINK_2500TFD;
+                       break;
+               default:
+                       ELINK_DEBUG_P1(sc, "Invalid line speed %d for UMAC",
+                                  vars->line_speed);
+                       return ELINK_STATUS_ERROR;
+               }
+               vars->link_status |= LINK_STATUS_LINK_UP;
+
+               if (params->loopback_mode == ELINK_LOOPBACK_UMAC)
+                       elink_umac_enable(params, vars, 1);
+               else
+                       elink_umac_enable(params, vars, 0);
+       } else {
+               /* Link speed >= 10000 requires XMAC enabled */
+               if (params->feature_config_flags &
+                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC) {
+                       ELINK_DEBUG_P1(sc, "Invalid line speed %d while XMAC is"
+                                  " disabled!", params->req_line_speed[0]);
+               return ELINK_STATUS_ERROR;
+       }
+               /* Check link speed */
+               switch (vars->line_speed) {
+               case ELINK_SPEED_10000:
+                       vars->link_status = ELINK_LINK_10GTFD;
+                       break;
+               case ELINK_SPEED_20000:
+                       vars->link_status = ELINK_LINK_20GTFD;
+                       break;
+               default:
+                       ELINK_DEBUG_P1(sc, "Invalid line speed %d for XMAC",
+                                  vars->line_speed);
+                       return ELINK_STATUS_ERROR;
+               }
+               vars->link_status |= LINK_STATUS_LINK_UP;
+               if (params->loopback_mode == ELINK_LOOPBACK_XMAC)
+                       elink_xmac_enable(params, vars, 1);
+               else
+                       elink_xmac_enable(params, vars, 0);
+       }
+               return ELINK_STATUS_OK;
+}
+
+static elink_status_t elink_init_emul(struct elink_params *params,
+                           struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
+       if (CHIP_IS_E3(sc)) {
+               if (elink_init_e3_emul_mac(params, vars) !=
+                   ELINK_STATUS_OK)
+                       return ELINK_STATUS_ERROR;
+       } else {
+               if (params->feature_config_flags &
+                   ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC) {
+                       vars->line_speed = ELINK_SPEED_1000;
+                       vars->link_status = (LINK_STATUS_LINK_UP |
+                                            ELINK_LINK_1000XFD);
+                       if (params->loopback_mode ==
+                           ELINK_LOOPBACK_EMAC)
+                               elink_emac_enable(params, vars, 1);
+                       else
+                               elink_emac_enable(params, vars, 0);
+               } else {
+                       vars->line_speed = ELINK_SPEED_10000;
+                       vars->link_status = (LINK_STATUS_LINK_UP |
+                                            ELINK_LINK_10GTFD);
+                       if (params->loopback_mode ==
+                           ELINK_LOOPBACK_BMAC)
+                               elink_bmac_enable(params, vars, 1, 1);
+                       else
+                               elink_bmac_enable(params, vars, 0, 1);
+               }
+       }
        vars->link_up = 1;
-       vars->line_speed = ELINK_SPEED_10000;
        vars->duplex = DUPLEX_FULL;
        vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
-       vars->mac_type = ELINK_MAC_TYPE_BMAC;
-
-       vars->phy_flags = PHY_XGXS_FLAG;
 
-       elink_xgxs_deassert(params);
+               if (CHIP_IS_E1x(sc))
+                       elink_pbf_update(params, vars->flow_ctrl,
+                                        vars->line_speed);
+               /* Disable drain */
+               REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 
-       /* Set bmac loopback */
-       elink_bmac_enable(params, vars, 1, 1);
+               /* update shared memory */
+               elink_update_mng(params, vars->link_status);
+       return ELINK_STATUS_OK;
+}
+#endif
+#ifdef ELINK_INCLUDE_FPGA
+static elink_status_t elink_init_fpga(struct elink_params *params,
+                           struct elink_vars *vars)
+{
+       /* Enable on E1.5 FPGA */
+       struct bnx2x_softc *sc = params->sc;
+       vars->duplex = DUPLEX_FULL;
+       vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
+       if (!(CHIP_IS_E1(sc))) {
+               vars->flow_ctrl = (ELINK_FLOW_CTRL_TX |
+                                  ELINK_FLOW_CTRL_RX);
+               vars->link_status |= (LINK_STATUS_TX_FLOW_CONTROL_ENABLED |
+                                     LINK_STATUS_RX_FLOW_CONTROL_ENABLED);
+       }
+       if (CHIP_IS_E3(sc)) {
+               vars->line_speed = params->req_line_speed[0];
+               switch (vars->line_speed) {
+               case ELINK_SPEED_AUTO_NEG:
+                       vars->line_speed = ELINK_SPEED_2500;
+               case ELINK_SPEED_2500:
+                       vars->link_status = ELINK_LINK_2500TFD;
+                       break;
+               case ELINK_SPEED_1000:
+                       vars->link_status = ELINK_LINK_1000XFD;
+                       break;
+               case ELINK_SPEED_100:
+                       vars->link_status = ELINK_LINK_100TXFD;
+                       break;
+               case ELINK_SPEED_10:
+                       vars->link_status = ELINK_LINK_10TFD;
+                       break;
+               default:
+                       ELINK_DEBUG_P1(sc, "Invalid link speed %d",
+                                  params->req_line_speed[0]);
+                       return ELINK_STATUS_ERROR;
+               }
+               vars->link_status |= LINK_STATUS_LINK_UP;
+               if (params->loopback_mode == ELINK_LOOPBACK_UMAC)
+                       elink_umac_enable(params, vars, 1);
+               else
+                       elink_umac_enable(params, vars, 0);
+       } else {
+               vars->line_speed = ELINK_SPEED_10000;
+               vars->link_status = (LINK_STATUS_LINK_UP | ELINK_LINK_10GTFD);
+               if (params->loopback_mode == ELINK_LOOPBACK_EMAC)
+                       elink_emac_enable(params, vars, 1);
+               else
+                       elink_emac_enable(params, vars, 0);
+       }
+       vars->link_up = 1;
 
+       if (CHIP_IS_E1x(sc))
+               elink_pbf_update(params, vars->flow_ctrl,
+                                vars->line_speed);
+       /* Disable drain */
        REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
+
+       /* Update shared memory */
+       elink_update_mng(params, vars->link_status);
+               return ELINK_STATUS_OK;
+}
+#endif
+static void elink_init_bmac_loopback(struct elink_params *params,
+                                    struct elink_vars *vars)
+{
+       struct bnx2x_softc *sc = params->sc;
+               vars->link_up = 1;
+               vars->line_speed = ELINK_SPEED_10000;
+               vars->duplex = DUPLEX_FULL;
+               vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
+               vars->mac_type = ELINK_MAC_TYPE_BMAC;
+
+               vars->phy_flags = PHY_XGXS_FLAG;
+
+               elink_xgxs_deassert(params);
+
+               /* Set bmac loopback */
+               elink_bmac_enable(params, vars, 1, 1);
+
+               REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 }
 
 static void elink_init_emac_loopback(struct elink_params *params,
                                     struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
-       vars->link_up = 1;
-       vars->line_speed = ELINK_SPEED_1000;
-       vars->duplex = DUPLEX_FULL;
-       vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
-       vars->mac_type = ELINK_MAC_TYPE_EMAC;
+               vars->link_up = 1;
+               vars->line_speed = ELINK_SPEED_1000;
+               vars->duplex = DUPLEX_FULL;
+               vars->flow_ctrl = ELINK_FLOW_CTRL_NONE;
+               vars->mac_type = ELINK_MAC_TYPE_EMAC;
 
-       vars->phy_flags = PHY_XGXS_FLAG;
+               vars->phy_flags = PHY_XGXS_FLAG;
 
-       elink_xgxs_deassert(params);
-       /* Set bmac loopback */
-       elink_emac_enable(params, vars, 1);
-       elink_emac_program(params, vars);
-       REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
+               elink_xgxs_deassert(params);
+               /* Set bmac loopback */
+               elink_emac_enable(params, vars, 1);
+               elink_emac_program(params, vars);
+               REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 }
 
 static void elink_init_xmac_loopback(struct elink_params *params,
@@ -11741,8 +13816,9 @@ static void elink_init_xmac_loopback(struct elink_params *params,
         */
        elink_set_aer_mmd(params, &params->phy[0]);
        elink_warpcore_reset_lane(sc, &params->phy[0], 0);
-       params->phy[ELINK_INT_PHY].config_loopback(&params->phy[ELINK_INT_PHY],
-                                                  params);
+       params->phy[ELINK_INT_PHY].config_loopback(
+                       &params->phy[ELINK_INT_PHY],
+                       params);
 
        elink_xmac_enable(params, vars, 1);
        REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
@@ -11804,12 +13880,11 @@ static void elink_init_xgxs_loopback(struct elink_params *params,
                /* Set external phy loopback */
                uint8_t phy_index;
                for (phy_index = ELINK_EXT_PHY1;
-                    phy_index < params->num_phys; phy_index++)
+                     phy_index < params->num_phys; phy_index++)
                        if (params->phy[phy_index].config_loopback)
-                               params->phy[phy_index].config_loopback(&params->
-                                                                      phy
-                                                                      [phy_index],
-                                                                      params);
+                               params->phy[phy_index].config_loopback(
+                                       &params->phy[phy_index],
+                                       params);
        }
        REG_WR(sc, NIG_REG_EGRESS_DRAIN0_MODE + params->port * 4, 0);
 
@@ -11826,12 +13901,14 @@ void elink_set_rx_filter(struct elink_params *params, uint8_t en)
                val |= en * 0x20;
        REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK + params->port * 4, val);
 
-       REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + params->port * 4, en * 0x3);
+       if (!CHIP_IS_E1(sc)) {
+               REG_WR(sc, NIG_REG_LLH0_BRB1_DRV_MASK_MF + params->port * 4,
+                      en * 0x3);
+       }
 
        REG_WR(sc, (params->port ? NIG_REG_LLH1_BRB1_NOT_MCP :
                    NIG_REG_LLH0_BRB1_NOT_MCP), en);
 }
-
 static elink_status_t elink_avoid_link_flap(struct elink_params *params,
                                            struct elink_vars *vars)
 {
@@ -11839,6 +13916,7 @@ static elink_status_t elink_avoid_link_flap(struct elink_params *params,
        uint32_t dont_clear_stat, lfa_sts;
        struct bnx2x_softc *sc = params->sc;
 
+       elink_set_mdio_emac_per_phy(sc, params);
        /* Sync the link parameters */
        elink_link_status_update(params, vars);
 
@@ -11850,7 +13928,7 @@ static elink_status_t elink_avoid_link_flap(struct elink_params *params,
        for (phy_idx = ELINK_INT_PHY; phy_idx < params->num_phys; phy_idx++) {
                struct elink_phy *phy = &params->phy[phy_idx];
                if (phy->phy_specific_func) {
-                       PMD_DRV_LOG(DEBUG, "Calling PHY specific func");
+                       ELINK_DEBUG_P0(sc, "Calling PHY specific func");
                        phy->phy_specific_func(phy, params, ELINK_PHY_INIT);
                }
                if ((phy->media_type == ELINK_ETH_PHY_SFPP_10G_FIBER) ||
@@ -11859,7 +13937,8 @@ static elink_status_t elink_avoid_link_flap(struct elink_params *params,
                        elink_verify_sfp_module(phy, params);
        }
        lfa_sts = REG_RD(sc, params->lfa_base +
-                        offsetof(struct shmem_lfa, lfa_sts));
+                        offsetof(struct shmem_lfa,
+                                 lfa_sts));
 
        dont_clear_stat = lfa_sts & SHMEM_LFA_DONT_CLEAR_STAT;
 
@@ -11970,12 +14049,12 @@ elink_status_t elink_phy_init(struct elink_params *params,
 {
        int lfa_status;
        struct bnx2x_softc *sc = params->sc;
-       PMD_DRV_LOG(DEBUG, "Phy Initialization started");
-       PMD_DRV_LOG(DEBUG, "(1) req_speed %d, req_flowctrl %d",
-                   params->req_line_speed[0], params->req_flow_ctrl[0]);
-       PMD_DRV_LOG(DEBUG, "(2) req_speed %d, req_flowctrl %d",
-                   params->req_line_speed[1], params->req_flow_ctrl[1]);
-       PMD_DRV_LOG(DEBUG, "req_adv_flow_ctrl 0x%x", params->req_fc_auto_adv);
+       ELINK_DEBUG_P0(sc, "Phy Initialization started");
+       ELINK_DEBUG_P2(sc, "(1) req_speed %d, req_flowctrl %d",
+                  params->req_line_speed[0], params->req_flow_ctrl[0]);
+       ELINK_DEBUG_P2(sc, "(2) req_speed %d, req_flowctrl %d",
+                  params->req_line_speed[1], params->req_flow_ctrl[1]);
+       ELINK_DEBUG_P1(sc, "req_adv_flow_ctrl 0x%x", params->req_fc_auto_adv);
        vars->link_status = 0;
        vars->phy_link_up = 0;
        vars->link_up = 0;
@@ -11988,15 +14067,33 @@ elink_status_t elink_phy_init(struct elink_params *params,
        params->link_flags = ELINK_PHY_INITIALIZED;
        /* Driver opens NIG-BRB filters */
        elink_set_rx_filter(params, 1);
+       elink_chng_link_count(params, 1);
        /* Check if link flap can be avoided */
        lfa_status = elink_check_lfa(params);
 
+       ELINK_DEBUG_P3(sc, " params : port = %x, loopback_mode = %x req_duplex = %x",
+                      params->port, params->loopback_mode,
+                      params->req_duplex[0]);
+       ELINK_DEBUG_P3(sc, " params : switch_cfg = %x, lane_config = %x req_duplex[1] = %x",
+                      params->switch_cfg, params->lane_config,
+                      params->req_duplex[1]);
+       ELINK_DEBUG_P3(sc, " params : chip_id = %x, feature_config_flags = %x, num_phys = %x",
+                      params->chip_id, params->feature_config_flags,
+                      params->num_phys);
+       ELINK_DEBUG_P3(sc, " params : rsrv = %x, eee_mode = %x, hw_led_mode = %x",
+                      params->rsrv, params->eee_mode, params->hw_led_mode);
+       ELINK_DEBUG_P3(sc, " params : multi_phy = %x, req_fc_auto_adv = %x, link_flags = %x",
+                      params->multi_phy_config, params->req_fc_auto_adv,
+                      params->link_flags);
+       ELINK_DEBUG_P2(sc, " params : lfa_base = %x, link_attr = %x",
+                      params->lfa_base, params->link_attr_sync);
        if (lfa_status == 0) {
-               PMD_DRV_LOG(DEBUG, "Link Flap Avoidance in progress");
+               ELINK_DEBUG_P0(sc, "Link Flap Avoidance in progress");
                return elink_avoid_link_flap(params, vars);
        }
 
-       PMD_DRV_LOG(DEBUG, "Cannot avoid link flap lfa_sta=0x%x", lfa_status);
+       ELINK_DEBUG_P1(sc, "Cannot avoid link flap lfa_sta=0x%x",
+                      lfa_status);
        elink_cannot_avoid_link_flap(params, vars, lfa_status);
 
        /* Disable attentions */
@@ -12005,20 +14102,34 @@ elink_status_t elink_phy_init(struct elink_params *params,
                        ELINK_NIG_MASK_XGXS0_LINK10G |
                        ELINK_NIG_MASK_SERDES0_LINK_STATUS |
                        ELINK_NIG_MASK_MI_INT));
+#ifdef ELINK_INCLUDE_EMUL
+       if (!(params->feature_config_flags &
+             ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC))
+#endif
 
-       elink_emac_init(params);
+       elink_emac_init(params, vars);
 
        if (params->feature_config_flags & ELINK_FEATURE_CONFIG_PFC_ENABLED)
                vars->link_status |= LINK_STATUS_PFC_ENABLED;
 
-       if ((params->num_phys == 0) && !CHIP_REV_IS_SLOW(sc)) {
-               PMD_DRV_LOG(DEBUG, "No phy found for initialization !!");
+       if ((params->num_phys == 0) &&
+           !CHIP_REV_IS_SLOW(sc)) {
+               ELINK_DEBUG_P0(sc, "No phy found for initialization !!");
                return ELINK_STATUS_ERROR;
        }
        set_phy_vars(params, vars);
 
-       PMD_DRV_LOG(DEBUG, "Num of phys on board: %d", params->num_phys);
-
+       ELINK_DEBUG_P1(sc, "Num of phys on board: %d", params->num_phys);
+#ifdef ELINK_INCLUDE_FPGA
+       if (CHIP_REV_IS_FPGA(sc)) {
+               return elink_init_fpga(params, vars);
+       } else
+#endif
+#ifdef ELINK_INCLUDE_EMUL
+       if (CHIP_REV_IS_EMUL(sc)) {
+               return elink_init_emul(params, vars);
+       } else
+#endif
        switch (params->loopback_mode) {
        case ELINK_LOOPBACK_BMAC:
                elink_init_bmac_loopback(params, vars);
@@ -12054,15 +14165,16 @@ elink_status_t elink_phy_init(struct elink_params *params,
        return ELINK_STATUS_OK;
 }
 
-static elink_status_t elink_link_reset(struct elink_params *params,
-                                      struct elink_vars *vars,
-                                      uint8_t reset_ext_phy)
+elink_status_t elink_link_reset(struct elink_params *params,
+                    struct elink_vars *vars,
+                    uint8_t reset_ext_phy)
 {
        struct bnx2x_softc *sc = params->sc;
        uint8_t phy_index, port = params->port, clear_latch_ind = 0;
-       PMD_DRV_LOG(DEBUG, "Resetting the link of port %d", port);
+       ELINK_DEBUG_P1(sc, "Resetting the link of port %d", port);
        /* Disable attentions */
        vars->link_status = 0;
+       elink_chng_link_count(params, 1);
        elink_update_mng(params, vars->link_status);
        vars->eee_status &= ~(SHMEM_EEE_LP_ADV_STATUS_MASK |
                              SHMEM_EEE_ACTIVE_BIT);
@@ -12081,12 +14193,24 @@ static elink_status_t elink_link_reset(struct elink_params *params,
                REG_WR(sc, NIG_REG_BMAC0_OUT_EN + port * 4, 0);
                REG_WR(sc, NIG_REG_EGRESS_EMAC0_OUT_EN + port * 4, 0);
        }
-       if (!CHIP_IS_E3(sc))
-               elink_set_bmac_rx(sc, port, 0);
-       if (CHIP_IS_E3(sc) && !CHIP_REV_IS_FPGA(sc)) {
-               elink_set_xmac_rxtx(params, 0);
-               elink_set_umac_rxtx(params, 0);
-       }
+
+#ifdef ELINK_INCLUDE_EMUL
+       /* Stop BigMac rx */
+       if (!(params->feature_config_flags &
+             ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC))
+#endif
+               if (!CHIP_IS_E3(sc))
+                       elink_set_bmac_rx(sc, params->chip_id, port, 0);
+#ifdef ELINK_INCLUDE_EMUL
+       /* Stop XMAC/UMAC rx */
+       if (!(params->feature_config_flags &
+             ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC))
+#endif
+               if (CHIP_IS_E3(sc) &&
+               !CHIP_REV_IS_FPGA(sc)) {
+                       elink_set_xmac_rxtx(params, 0);
+                       elink_set_umac_rxtx(params, 0);
+               }
        /* Disable emac */
        if (!CHIP_IS_E3(sc))
                REG_WR(sc, NIG_REG_NIG_EMAC0_EN + port * 4, 0);
@@ -12095,20 +14219,19 @@ static elink_status_t elink_link_reset(struct elink_params *params,
        /* The PHY reset is controlled by GPIO 1
         * Hold it as vars low
         */
-       /* Clear link led */
+        /* Clear link led */
        elink_set_mdio_emac_per_phy(sc, params);
        elink_set_led(params, vars, ELINK_LED_MODE_OFF, 0);
 
        if (reset_ext_phy && (!CHIP_REV_IS_SLOW(sc))) {
                for (phy_index = ELINK_EXT_PHY1; phy_index < params->num_phys;
-                    phy_index++) {
+                     phy_index++) {
                        if (params->phy[phy_index].link_reset) {
                                elink_set_aer_mmd(params,
                                                  &params->phy[phy_index]);
-                               params->phy[phy_index].link_reset(&params->
-                                                                 phy
-                                                                 [phy_index],
-                                                                 params);
+                               params->phy[phy_index].link_reset(
+                                       &params->phy[phy_index],
+                                       params);
                        }
                        if (params->phy[phy_index].flags &
                            ELINK_FLAGS_REARM_LATCH_SIGNAL)
@@ -12122,11 +14245,12 @@ static elink_status_t elink_link_reset(struct elink_params *params,
                elink_bits_dis(sc, NIG_REG_LATCH_BC_0 + port * 4,
                               1 << ELINK_NIG_LATCH_BC_ENABLE_MI_INT);
        }
+#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
+       if (!CHIP_REV_IS_SLOW(sc))
+#endif
        if (params->phy[ELINK_INT_PHY].link_reset)
-               params->phy[ELINK_INT_PHY].link_reset(&params->
-                                                     phy
-                                                     [ELINK_INT_PHY],
-                                                     params);
+               params->phy[ELINK_INT_PHY].link_reset(
+                       &params->phy[ELINK_INT_PHY], params);
 
        /* Disable nig ingress interface */
        if (!CHIP_IS_E3(sc)) {
@@ -12136,8 +14260,8 @@ static elink_status_t elink_link_reset(struct elink_params *params,
                REG_WR(sc, NIG_REG_BMAC0_IN_EN + port * 4, 0);
                REG_WR(sc, NIG_REG_EMAC0_IN_EN + port * 4, 0);
        } else {
-               uint32_t xmac_base =
-                   (params->port) ? GRCBASE_XMAC1 : GRCBASE_XMAC0;
+               uint32_t xmac_base = (params->port) ? GRCBASE_XMAC1 :
+                                                     GRCBASE_XMAC0;
                elink_set_xumac_nig(params, 0, 0);
                if (REG_RD(sc, MISC_REG_RESET_REG_2) &
                    MISC_REGISTERS_RESET_REG_2_XMAC)
@@ -12148,9 +14272,8 @@ static elink_status_t elink_link_reset(struct elink_params *params,
        vars->phy_flags = 0;
        return ELINK_STATUS_OK;
 }
-
-elink_status_t elink_lfa_reset(struct elink_params * params,
-                              struct elink_vars * vars)
+elink_status_t elink_lfa_reset(struct elink_params *params,
+                              struct elink_vars *vars)
 {
        struct bnx2x_softc *sc = params->sc;
        vars->link_up = 0;
@@ -12169,13 +14292,13 @@ elink_status_t elink_lfa_reset(struct elink_params * params,
         * are passed.
         */
        if (!CHIP_IS_E3(sc))
-               elink_set_bmac_rx(sc, params->port, 0);
+               elink_set_bmac_rx(sc, params->chip_id, params->port, 0);
 
        if (CHIP_IS_E3(sc)) {
                elink_set_xmac_rxtx(params, 0);
                elink_set_umac_rxtx(params, 0);
        }
-       /* Wait 10ms for the pipe to clean up */
+       /* Wait 10ms for the pipe to clean up*/
        DELAY(1000 * 10);
 
        /* Clean the NIG-BRB using the network filters in a way that will
@@ -12190,7 +14313,7 @@ elink_status_t elink_lfa_reset(struct elink_params * params,
         * minimum management protocol down time.
         */
        if (!CHIP_IS_E3(sc))
-               elink_set_bmac_rx(sc, params->port, 1);
+               elink_set_bmac_rx(sc, params->chip_id, params->port, 1);
 
        if (CHIP_IS_E3(sc)) {
                elink_set_xmac_rxtx(params, 1);
@@ -12205,10 +14328,10 @@ elink_status_t elink_lfa_reset(struct elink_params * params,
 /*                             Common function                             */
 /****************************************************************************/
 static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
-                                                uint32_t shmem_base_path[],
-                                                uint32_t shmem2_base_path[],
-                                                uint8_t phy_index,
-                                                __rte_unused uint32_t chip_id)
+                                     uint32_t shmem_base_path[],
+                                     uint32_t shmem2_base_path[],
+                                     uint8_t phy_index,
+                                     __rte_unused uint32_t chip_id)
 {
        struct elink_phy phy[PORT_MAX];
        struct elink_phy *phy_blk[PORT_MAX];
@@ -12216,8 +14339,8 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
        int8_t port = 0;
        int8_t port_of_path = 0;
        uint32_t swap_val, swap_override;
-       swap_val = REG_RD(sc, NIG_REG_PORT_SWAP);
-       swap_override = REG_RD(sc, NIG_REG_STRAP_OVERRIDE);
+       swap_val = REG_RD(sc,  NIG_REG_PORT_SWAP);
+       swap_override = REG_RD(sc,  NIG_REG_STRAP_OVERRIDE);
        port ^= (swap_val && swap_override);
        elink_ext_phy_hw_reset(sc, port);
        /* PART1 - Reset both phys */
@@ -12238,7 +14361,7 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
                if (elink_populate_phy(sc, phy_index, shmem_base, shmem2_base,
                                       port_of_path, &phy[port]) !=
                    ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "populate_phy failed");
+                       ELINK_DEBUG_P0(sc, "populate_phy failed");
                        return ELINK_STATUS_ERROR;
                }
                /* Disable attentions */
@@ -12253,11 +14376,14 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
                 * to write to access its registers
                 */
                elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                                   MISC_REGISTERS_GPIO_OUTPUT_HIGH, port);
+                              MISC_REGISTERS_GPIO_OUTPUT_HIGH,
+                              port);
 
                /* Reset the phy */
                elink_cl45_write(sc, &phy[port],
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1 << 15);
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_CTRL,
+                                1 << 15);
        }
 
        /* Add delay of 150ms after reset */
@@ -12278,8 +14404,8 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
                else
                        port_of_path = 0;
 
-               PMD_DRV_LOG(DEBUG, "Loading spirom for phy address 0x%x",
-                           phy_blk[port]->addr);
+               ELINK_DEBUG_P1(sc, "Loading spirom for phy address 0x%x",
+                          phy_blk[port]->addr);
                if (elink_8073_8727_external_rom_boot(sc, phy_blk[port],
                                                      port_of_path))
                        return ELINK_STATUS_ERROR;
@@ -12292,7 +14418,8 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
                /* Phase1 of TX_POWER_DOWN reset */
                elink_cl45_write(sc, phy_blk[port],
                                 MDIO_PMA_DEVAD,
-                                MDIO_PMA_REG_TX_POWER_DOWN, (val | 1 << 10));
+                                MDIO_PMA_REG_TX_POWER_DOWN,
+                                (val | 1 << 10));
        }
 
        /* Toggle Transmitter: Power down and then up with 600ms delay
@@ -12309,9 +14436,9 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
                                MDIO_PMA_REG_TX_POWER_DOWN, &val);
 
                elink_cl45_write(sc, phy_blk[port],
-                                MDIO_PMA_DEVAD,
-                                MDIO_PMA_REG_TX_POWER_DOWN,
-                                (val & (~(1 << 10))));
+                               MDIO_PMA_DEVAD,
+                               MDIO_PMA_REG_TX_POWER_DOWN,
+                               (val & (~(1 << 10))));
                DELAY(1000 * 15);
 
                /* Read modify write the SPI-ROM version select register */
@@ -12324,16 +14451,15 @@ static elink_status_t elink_8073_common_init_phy(struct bnx2x_softc *sc,
 
                /* set GPIO2 back to LOW */
                elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_2,
-                                   MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
+                              MISC_REGISTERS_GPIO_OUTPUT_LOW, port);
        }
        return ELINK_STATUS_OK;
 }
-
 static elink_status_t elink_8726_common_init_phy(struct bnx2x_softc *sc,
-                                                uint32_t shmem_base_path[],
-                                                uint32_t shmem2_base_path[],
-                                                uint8_t phy_index,
-                                                __rte_unused uint32_t chip_id)
+                                     uint32_t shmem_base_path[],
+                                     uint32_t shmem2_base_path[],
+                                     uint8_t phy_index,
+                                     __rte_unused uint32_t chip_id)
 {
        uint32_t val;
        int8_t port;
@@ -12342,8 +14468,8 @@ static elink_status_t elink_8726_common_init_phy(struct bnx2x_softc *sc,
        /* Enable the module detection interrupt */
        val = REG_RD(sc, MISC_REG_GPIO_EVENT_EN);
        val |= ((1 << MISC_REGISTERS_GPIO_3) |
-               (1 <<
-                (MISC_REGISTERS_GPIO_3 + MISC_REGISTERS_GPIO_PORT_SHIFT)));
+               (1 << (MISC_REGISTERS_GPIO_3 +
+                MISC_REGISTERS_GPIO_PORT_SHIFT)));
        REG_WR(sc, MISC_REG_GPIO_EVENT_EN, val);
 
        elink_ext_phy_hw_reset(sc, 0);
@@ -12361,33 +14487,33 @@ static elink_status_t elink_8726_common_init_phy(struct bnx2x_softc *sc,
                }
                /* Extract the ext phy address for the port */
                if (elink_populate_phy(sc, phy_index, shmem_base, shmem2_base,
-                                      port, &phy) != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "populate phy failed");
+                                      port, &phy) !=
+                   ELINK_STATUS_OK) {
+                       ELINK_DEBUG_P0(sc, "populate phy failed");
                        return ELINK_STATUS_ERROR;
                }
 
-               /* Reset phy */
+               /* Reset phy*/
                elink_cl45_write(sc, &phy,
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_GEN_CTRL, 0x0001);
 
+
                /* Set fault module detected LED on */
                elink_cb_gpio_write(sc, MISC_REGISTERS_GPIO_0,
-                                   MISC_REGISTERS_GPIO_HIGH, port);
+                              MISC_REGISTERS_GPIO_HIGH,
+                              port);
        }
 
        return ELINK_STATUS_OK;
 }
-
 static void elink_get_ext_phy_reset_gpio(struct bnx2x_softc *sc,
-                                        uint32_t shmem_base, uint8_t * io_gpio,
-                                        uint8_t * io_port)
+                                        uint32_t shmem_base,
+                                        uint8_t *io_gpio, uint8_t *io_port)
 {
 
        uint32_t phy_gpio_reset = REG_RD(sc, shmem_base +
-                                        offsetof(struct shmem_region,
-                                                 dev_info.
-                                                 port_hw_config[PORT_0].
-                                                 default_cfg));
+                                         offsetof(struct shmem_region,
+                               dev_info.port_hw_config[PORT_0].default_cfg));
        switch (phy_gpio_reset) {
        case PORT_HW_CFG_EXT_PHY_GPIO_RST_GPIO0_P0:
                *io_gpio = 0;
@@ -12428,10 +14554,10 @@ static void elink_get_ext_phy_reset_gpio(struct bnx2x_softc *sc,
 }
 
 static elink_status_t elink_8727_common_init_phy(struct bnx2x_softc *sc,
-                                                uint32_t shmem_base_path[],
-                                                uint32_t shmem2_base_path[],
-                                                uint8_t phy_index,
-                                                __rte_unused uint32_t chip_id)
+                                     uint32_t shmem_base_path[],
+                                     uint32_t shmem2_base_path[],
+                                     uint8_t phy_index,
+                                     __rte_unused uint32_t chip_id)
 {
        int8_t port, reset_gpio;
        uint32_t swap_val, swap_override;
@@ -12448,18 +14574,17 @@ static elink_status_t elink_8727_common_init_phy(struct bnx2x_softc *sc,
         * Default is GPIO1, PORT1
         */
        elink_get_ext_phy_reset_gpio(sc, shmem_base_path[0],
-                                    (uint8_t *) & reset_gpio,
-                                    (uint8_t *) & port);
+                                    (uint8_t *)&reset_gpio, (uint8_t *)&port);
 
        /* Calculate the port based on port swap */
        port ^= (swap_val && swap_override);
 
-       /* Initiate PHY reset */
+       /* Initiate PHY reset*/
        elink_cb_gpio_write(sc, reset_gpio, MISC_REGISTERS_GPIO_OUTPUT_LOW,
-                           port);
+                      port);
        DELAY(1000 * 1);
        elink_cb_gpio_write(sc, reset_gpio, MISC_REGISTERS_GPIO_OUTPUT_HIGH,
-                           port);
+                      port);
 
        DELAY(1000 * 5);
 
@@ -12481,8 +14606,8 @@ static elink_status_t elink_8727_common_init_phy(struct bnx2x_softc *sc,
                /* Extract the ext phy address for the port */
                if (elink_populate_phy(sc, phy_index, shmem_base, shmem2_base,
                                       port_of_path, &phy[port]) !=
-                   ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "populate phy failed");
+                                      ELINK_STATUS_OK) {
+                       ELINK_DEBUG_P0(sc, "populate phy failed");
                        return ELINK_STATUS_ERROR;
                }
                /* disable attentions */
@@ -12493,6 +14618,7 @@ static elink_status_t elink_8727_common_init_phy(struct bnx2x_softc *sc,
                                ELINK_NIG_MASK_SERDES0_LINK_STATUS |
                                ELINK_NIG_MASK_MI_INT));
 
+
                /* Reset the phy */
                elink_cl45_write(sc, &phy[port],
                                 MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 1 << 15);
@@ -12513,25 +14639,25 @@ static elink_status_t elink_8727_common_init_phy(struct bnx2x_softc *sc,
                        port_of_path = port;
                else
                        port_of_path = 0;
-               PMD_DRV_LOG(DEBUG, "Loading spirom for phy address 0x%x",
-                           phy_blk[port]->addr);
+               ELINK_DEBUG_P1(sc, "Loading spirom for phy address 0x%x",
+                          phy_blk[port]->addr);
                if (elink_8073_8727_external_rom_boot(sc, phy_blk[port],
                                                      port_of_path))
                        return ELINK_STATUS_ERROR;
                /* Disable PHY transmitter output */
                elink_cl45_write(sc, phy_blk[port],
-                                MDIO_PMA_DEVAD, MDIO_PMA_REG_TX_DISABLE, 1);
+                                MDIO_PMA_DEVAD,
+                                MDIO_PMA_REG_TX_DISABLE, 1);
 
        }
        return ELINK_STATUS_OK;
 }
 
 static elink_status_t elink_84833_common_init_phy(struct bnx2x_softc *sc,
-                                                 uint32_t shmem_base_path[],
-                                                 __rte_unused uint32_t
-                                                 shmem2_base_path[],
-                                                 __rte_unused uint8_t
-                                                 phy_index, uint32_t chip_id)
+                               uint32_t shmem_base_path[],
+                               __rte_unused uint32_t shmem2_base_path[],
+                               __rte_unused uint8_t phy_index,
+                               uint32_t chip_id)
 {
        uint8_t reset_gpios;
        reset_gpios = elink_84833_get_reset_gpios(sc, shmem_base_path, chip_id);
@@ -12540,16 +14666,15 @@ static elink_status_t elink_84833_common_init_phy(struct bnx2x_softc *sc,
        DELAY(10);
        elink_cb_gpio_mult_write(sc, reset_gpios,
                                 MISC_REGISTERS_GPIO_OUTPUT_HIGH);
-       PMD_DRV_LOG(DEBUG, "84833 reset pulse on pin values 0x%x", reset_gpios);
+       ELINK_DEBUG_P1(sc, "84833 reset pulse on pin values 0x%x",
+               reset_gpios);
        return ELINK_STATUS_OK;
 }
-
 static elink_status_t elink_ext_phy_common_init(struct bnx2x_softc *sc,
-                                               uint32_t shmem_base_path[],
-                                               uint32_t shmem2_base_path[],
-                                               uint8_t phy_index,
-                                               uint32_t ext_phy_type,
-                                               uint32_t chip_id)
+                                    uint32_t shmem_base_path[],
+                                    uint32_t shmem2_base_path[],
+                                    uint8_t phy_index,
+                                    uint32_t ext_phy_type, uint32_t chip_id)
 {
        elink_status_t rc = ELINK_STATUS_OK;
 
@@ -12577,44 +14702,50 @@ static elink_status_t elink_ext_phy_common_init(struct bnx2x_softc *sc,
                break;
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84833:
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84834:
+       case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X84858:
                /* GPIO3's are linked, and so both need to be toggled
                 * to obtain required 2us pulse.
                 */
                rc = elink_84833_common_init_phy(sc, shmem_base_path,
-                                                shmem2_base_path,
-                                                phy_index, chip_id);
+                                               shmem2_base_path,
+                                               phy_index, chip_id);
                break;
        case PORT_HW_CFG_XGXS_EXT_PHY_TYPE_FAILURE:
                rc = ELINK_STATUS_ERROR;
                break;
        default:
-               PMD_DRV_LOG(DEBUG,
-                           "ext_phy 0x%x common init not required",
-                           ext_phy_type);
+               ELINK_DEBUG_P1(sc,
+                          "ext_phy 0x%x common init not required",
+                          ext_phy_type);
                break;
        }
 
        if (rc != ELINK_STATUS_OK)
-               elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED, 0);      // "Warning: PHY was not initialized,"
-       // " Port %d",
+               elink_cb_event_log(sc, ELINK_LOG_ID_PHY_UNINITIALIZED, 0);
+                                    /* "Warning: PHY was not initialized,"
+                                     * " Port %d",
+                                     */
 
        return rc;
 }
 
-elink_status_t elink_common_init_phy(struct bnx2x_softc * sc,
-                                    uint32_t shmem_base_path[],
-                                    uint32_t shmem2_base_path[],
-                                    uint32_t chip_id,
-                                    __rte_unused uint8_t one_port_enabled)
+elink_status_t elink_common_init_phy(struct bnx2x_softc *sc,
+                         uint32_t shmem_base_path[],
+                         uint32_t shmem2_base_path[], uint32_t chip_id,
+                         __rte_unused uint8_t one_port_enabled)
 {
        elink_status_t rc = ELINK_STATUS_OK;
        uint32_t phy_ver, val;
        uint8_t phy_index = 0;
        uint32_t ext_phy_type, ext_phy_config;
+#if defined(ELINK_INCLUDE_EMUL) || defined(ELINK_INCLUDE_FPGA)
+       if (CHIP_REV_IS_EMUL(sc) || CHIP_REV_IS_FPGA(sc))
+               return ELINK_STATUS_OK;
+#endif
 
-       elink_set_mdio_clk(sc, GRCBASE_EMAC0);
-       elink_set_mdio_clk(sc, GRCBASE_EMAC1);
-       PMD_DRV_LOG(DEBUG, "Begin common phy init");
+       elink_set_mdio_clk(sc, chip_id, GRCBASE_EMAC0);
+       elink_set_mdio_clk(sc, chip_id, GRCBASE_EMAC1);
+       ELINK_DEBUG_P0(sc, "Begin common phy init");
        if (CHIP_IS_E3(sc)) {
                /* Enable EPIO */
                val = REG_RD(sc, MISC_REG_GEN_PURP_HWG);
@@ -12625,14 +14756,14 @@ elink_status_t elink_common_init_phy(struct bnx2x_softc * sc,
                         offsetof(struct shmem_region,
                                  port_mb[PORT_0].ext_phy_fw_version));
        if (phy_ver) {
-               PMD_DRV_LOG(DEBUG, "Not doing common init; phy ver is 0x%x",
-                           phy_ver);
+               ELINK_DEBUG_P1(sc, "Not doing common init; phy ver is 0x%x",
+                              phy_ver);
                return ELINK_STATUS_OK;
        }
 
        /* Read the ext_phy_type for arbitrary port(0) */
        for (phy_index = ELINK_EXT_PHY1; phy_index < ELINK_MAX_PHYS;
-            phy_index++) {
+             phy_index++) {
                ext_phy_config = elink_get_ext_phy_config(sc,
                                                          shmem_base_path[0],
                                                          phy_index, 0);
@@ -12655,10 +14786,9 @@ static void elink_check_over_curr(struct elink_params *params,
 
        cfg_pin = (REG_RD(sc, params->shmem_base +
                          offsetof(struct shmem_region,
-                                  dev_info.port_hw_config[port].
-                                  e3_cmn_pin_cfg1)) &
+                              dev_info.port_hw_config[port].e3_cmn_pin_cfg1)) &
                   PORT_HW_CFG_E3_OVER_CURRENT_MASK) >>
-           PORT_HW_CFG_E3_OVER_CURRENT_SHIFT;
+               PORT_HW_CFG_E3_OVER_CURRENT_SHIFT;
 
        /* Ignore check if no external input PIN available */
        if (elink_get_cfg_pin(sc, cfg_pin, &pin_val) != ELINK_STATUS_OK)
@@ -12666,13 +14796,16 @@ static void elink_check_over_curr(struct elink_params *params,
 
        if (!pin_val) {
                if ((vars->phy_flags & PHY_OVER_CURRENT_FLAG) == 0) {
-                       elink_cb_event_log(sc, ELINK_LOG_ID_OVER_CURRENT, params->port);        //"Error:  Power fault on Port %d has"
-                       //  " been detected and the power to "
-                       //  "that SFP+ module has been removed"
-                       //  " to prevent failure of the card."
-                       //  " Please remove the SFP+ module and"
-                       //  " restart the system to clear this"
-                       //  " error.",
+                       elink_cb_event_log(sc, ELINK_LOG_ID_OVER_CURRENT,
+                                          params->port);
+                                       /* "Error:  Power fault on Port %d has"
+                                        *  " been detected and the power to "
+                                        *  "that SFP+ module has been removed"
+                                        *  " to prevent failure of the card."
+                                        *  " Please remove the SFP+ module and"
+                                        *  " restart the system to clear this"
+                                        *  " error.",
+                                        */
                        vars->phy_flags |= PHY_OVER_CURRENT_FLAG;
                        elink_warpcore_power_module(params, 0);
                }
@@ -12682,9 +14815,9 @@ static void elink_check_over_curr(struct elink_params *params,
 
 /* Returns 0 if no change occurred since last check; 1 otherwise. */
 static uint8_t elink_analyze_link_error(struct elink_params *params,
-                                       struct elink_vars *vars,
-                                       uint32_t status, uint32_t phy_flag,
-                                       uint32_t link_flag, uint8_t notify)
+                                   struct elink_vars *vars, uint32_t status,
+                                   uint32_t phy_flag, uint32_t link_flag,
+                                   uint8_t notify)
 {
        struct bnx2x_softc *sc = params->sc;
        /* Compare new value with previous value */
@@ -12697,16 +14830,20 @@ static uint8_t elink_analyze_link_error(struct elink_params *params,
        /* If values differ */
        switch (phy_flag) {
        case PHY_HALF_OPEN_CONN_FLAG:
-               PMD_DRV_LOG(DEBUG, "Analyze Remote Fault");
+               ELINK_DEBUG_P0(sc, "Analyze Remote Fault");
                break;
        case PHY_SFP_TX_FAULT_FLAG:
-               PMD_DRV_LOG(DEBUG, "Analyze TX Fault");
+               ELINK_DEBUG_P0(sc, "Analyze TX Fault");
                break;
        default:
-               PMD_DRV_LOG(DEBUG, "Analyze UNKNOWN");
+               ELINK_DEBUG_P0(sc, "Analyze UNKNOWN");
        }
-       PMD_DRV_LOG(DEBUG, "Link changed:[%x %x]->%x", vars->link_up,
-                   old_status, status);
+       ELINK_DEBUG_P3(sc, "Link changed:[%x %x]->%x", vars->link_up,
+          old_status, status);
+
+       /* Do not touch the link in case physical link down */
+       if ((vars->phy_flags & PHY_PHYSICAL_LINK_FLAG) == 0)
+               return 1;
 
        /* a. Update shmem->link_status accordingly
         * b. Update elink_vars->link_up
@@ -12749,17 +14886,18 @@ static uint8_t elink_analyze_link_error(struct elink_params *params,
 }
 
 /******************************************************************************
-* Description:
-*      This function checks for half opened connection change indication.
-*      When such change occurs, it calls the elink_analyze_link_error
-*      to check if Remote Fault is set or cleared. Reception of remote fault
-*      status message in the MAC indicates that the peer's MAC has detected
-*      a fault, for example, due to break in the TX side of fiber.
-*
-******************************************************************************/
-static elink_status_t elink_check_half_open_conn(struct elink_params *params,
-                                                struct elink_vars *vars,
-                                                uint8_t notify)
+ * Description:
+ *     This function checks for half opened connection change indication.
+ *     When such change occurs, it calls the elink_analyze_link_error
+ *     to check if Remote Fault is set or cleared. Reception of remote fault
+ *     status message in the MAC indicates that the peer's MAC has detected
+ *     a fault, for example, due to break in the TX side of fiber.
+ *
+ ******************************************************************************/
+static
+elink_status_t elink_check_half_open_conn(struct elink_params *params,
+                               struct elink_vars *vars,
+                               uint8_t notify)
 {
        struct bnx2x_softc *sc = params->sc;
        uint32_t lss_status = 0;
@@ -12771,7 +14909,7 @@ static elink_status_t elink_check_half_open_conn(struct elink_params *params,
 
        if (CHIP_IS_E3(sc) &&
            (REG_RD(sc, MISC_REG_RESET_REG_2) &
-            (MISC_REGISTERS_RESET_REG_2_XMAC))) {
+             (MISC_REGISTERS_RESET_REG_2_XMAC))) {
                /* Check E3 XMAC */
                /* Note that link speed cannot be queried here, since it may be
                 * zero while link is down. In case UMAC is active, LSS will
@@ -12796,7 +14934,7 @@ static elink_status_t elink_check_half_open_conn(struct elink_params *params,
                uint32_t lss_status_reg;
                uint32_t wb_data[2];
                mac_base = params->port ? NIG_REG_INGRESS_BMAC1_MEM :
-                   NIG_REG_INGRESS_BMAC0_MEM;
+                       NIG_REG_INGRESS_BMAC0_MEM;
                /*  Read BIGMAC_REGISTER_RX_LSS_STATUS */
                if (CHIP_IS_E2(sc))
                        lss_status_reg = BIGMAC2_REGISTER_RX_LSS_STAT;
@@ -12812,7 +14950,6 @@ static elink_status_t elink_check_half_open_conn(struct elink_params *params,
        }
        return ELINK_STATUS_OK;
 }
-
 static void elink_sfp_tx_fault_detection(struct elink_phy *phy,
                                         struct elink_params *params,
                                         struct elink_vars *vars)
@@ -12823,15 +14960,12 @@ static void elink_sfp_tx_fault_detection(struct elink_phy *phy,
 
        /* Get The SFP+ TX_Fault controlling pin ([eg]pio) */
        cfg_pin = (REG_RD(sc, params->shmem_base + offsetof(struct shmem_region,
-                                                           dev_info.
-                                                           port_hw_config
-                                                           [port].
-                                                           e3_cmn_pin_cfg)) &
+                         dev_info.port_hw_config[port].e3_cmn_pin_cfg)) &
                   PORT_HW_CFG_E3_TX_FAULT_MASK) >>
-           PORT_HW_CFG_E3_TX_FAULT_SHIFT;
+                 PORT_HW_CFG_E3_TX_FAULT_SHIFT;
 
        if (elink_get_cfg_pin(sc, cfg_pin, &value)) {
-               PMD_DRV_LOG(DEBUG, "Failed to read pin 0x%02x", cfg_pin);
+               ELINK_DEBUG_P1(sc, "Failed to read pin 0x%02x", cfg_pin);
                return;
        }
 
@@ -12853,24 +14987,25 @@ static void elink_sfp_tx_fault_detection(struct elink_phy *phy,
 
                /* If module is unapproved, led should be on regardless */
                if (!(phy->flags & ELINK_FLAGS_SFP_NOT_APPROVED)) {
-                       PMD_DRV_LOG(DEBUG, "Change TX_Fault LED: ->%x",
-                                   led_mode);
+                       ELINK_DEBUG_P1(sc, "Change TX_Fault LED: ->%x",
+                          led_mode);
                        elink_set_e3_module_fault_led(params, led_mode);
                }
        }
 }
-
 static void elink_kr2_recovery(struct elink_params *params,
-                              struct elink_vars *vars, struct elink_phy *phy)
+                              struct elink_vars *vars,
+                              struct elink_phy *phy)
 {
-       PMD_DRV_LOG(DEBUG, "KR2 recovery");
-
+       struct bnx2x_softc *sc = params->sc;
+       ELINK_DEBUG_P0(sc, "KR2 recovery");
        elink_warpcore_enable_AN_KR2(phy, params, vars);
        elink_warpcore_restart_AN_KR(phy, params);
 }
 
 static void elink_check_kr2_wa(struct elink_params *params,
-                              struct elink_vars *vars, struct elink_phy *phy)
+                              struct elink_vars *vars,
+                              struct elink_phy *phy)
 {
        struct bnx2x_softc *sc = params->sc;
        uint16_t base_page, next_page, not_kr2_device, lane;
@@ -12888,14 +15023,14 @@ static void elink_check_kr2_wa(struct elink_params *params,
 
        sigdet = elink_warpcore_get_sigdet(phy, params);
        if (!sigdet) {
-               if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
+               if (!(params->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
                        elink_kr2_recovery(params, vars, phy);
-                       PMD_DRV_LOG(DEBUG, "No sigdet");
+                       ELINK_DEBUG_P0(sc, "No sigdet");
                }
                return;
        }
 
-       lane = elink_get_warpcore_lane(params);
+       lane = elink_get_warpcore_lane(phy, params);
        CL22_WR_OVER_CL45(sc, phy, MDIO_REG_BANK_AER_BLOCK,
                          MDIO_AER_BLOCK_AER_REG, lane);
        elink_cl45_read(sc, phy, MDIO_AN_DEVAD,
@@ -12906,9 +15041,9 @@ static void elink_check_kr2_wa(struct elink_params *params,
 
        /* CL73 has not begun yet */
        if (base_page == 0) {
-               if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
+               if (!(params->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
                        elink_kr2_recovery(params, vars, phy);
-                       PMD_DRV_LOG(DEBUG, "No BP");
+                       ELINK_DEBUG_P0(sc, "No BP");
                }
                return;
        }
@@ -12922,10 +15057,10 @@ static void elink_check_kr2_wa(struct elink_params *params,
                            ((next_page & 0xe0) == 0x20))));
 
        /* In case KR2 is already disabled, check if we need to re-enable it */
-       if (!(vars->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
+       if (!(params->link_attr_sync & LINK_ATTR_SYNC_KR2_ENABLE)) {
                if (!not_kr2_device) {
-                       PMD_DRV_LOG(DEBUG, "BP=0x%x, NP=0x%x", base_page,
-                                   next_page);
+                       ELINK_DEBUG_P2(sc, "BP=0x%x, NP=0x%x", base_page,
+                          next_page);
                        elink_kr2_recovery(params, vars, phy);
                }
                return;
@@ -12933,7 +15068,7 @@ static void elink_check_kr2_wa(struct elink_params *params,
        /* KR2 is enabled, but not KR2 device */
        if (not_kr2_device) {
                /* Disable KR2 on both lanes */
-               PMD_DRV_LOG(DEBUG, "BP=0x%x, NP=0x%x", base_page, next_page);
+               ELINK_DEBUG_P2(sc, "BP=0x%x, NP=0x%x", base_page, next_page);
                elink_disable_kr2(params, vars, phy);
                /* Restart AN on leading lane */
                elink_warpcore_restart_AN_KR(phy, params);
@@ -12949,9 +15084,8 @@ void elink_period_func(struct elink_params *params, struct elink_vars *vars)
                if (params->phy[phy_idx].flags & ELINK_FLAGS_TX_ERROR_CHECK) {
                        elink_set_aer_mmd(params, &params->phy[phy_idx]);
                        if (elink_check_half_open_conn(params, vars, 1) !=
-                           ELINK_STATUS_OK) {
-                               PMD_DRV_LOG(DEBUG, "Fault detection failed");
-                       }
+                           ELINK_STATUS_OK)
+                               ELINK_DEBUG_P0(sc, "Fault detection failed");
                        break;
                }
        }
@@ -12959,22 +15093,24 @@ void elink_period_func(struct elink_params *params, struct elink_vars *vars)
        if (CHIP_IS_E3(sc)) {
                struct elink_phy *phy = &params->phy[ELINK_INT_PHY];
                elink_set_aer_mmd(params, phy);
-               if ((phy->supported & ELINK_SUPPORTED_20000baseKR2_Full) &&
-                   (phy->speed_cap_mask & PORT_HW_CFG_SPEED_CAPABILITY_D0_20G))
+               if (((phy->req_line_speed == ELINK_SPEED_AUTO_NEG) &&
+                    (phy->speed_cap_mask &
+                     PORT_HW_CFG_SPEED_CAPABILITY_D0_20G)) ||
+                   (phy->req_line_speed == ELINK_SPEED_20000))
                        elink_check_kr2_wa(params, vars, phy);
                elink_check_over_curr(params, vars);
                if (vars->rx_tx_asic_rst)
                        elink_warpcore_config_runtime(phy, params, vars);
 
                if ((REG_RD(sc, params->shmem_base +
-                           offsetof(struct shmem_region,
-                                    dev_info.port_hw_config[params->port].
-                                    default_cfg))
-                    & PORT_HW_CFG_NET_SERDES_IF_MASK) ==
+                           offsetof(struct shmem_region, dev_info.
+                               port_hw_config[params->port].default_cfg))
+                   & PORT_HW_CFG_NET_SERDES_IF_MASK) ==
                    PORT_HW_CFG_NET_SERDES_IF_SFI) {
-                       if (elink_is_sfp_module_plugged(params)) {
+                       if (elink_is_sfp_module_plugged(phy, params)) {
                                elink_sfp_tx_fault_detection(phy, params, vars);
-                       } else if (vars->link_status & LINK_STATUS_SFP_TX_FAULT) {
+                       } else if (vars->link_status &
+                               LINK_STATUS_SFP_TX_FAULT) {
                                /* Clean trail, interrupt corrects the leds */
                                vars->link_status &= ~LINK_STATUS_SFP_TX_FAULT;
                                vars->phy_flags &= ~PHY_SFP_TX_FAULT_FLAG;
@@ -12986,17 +15122,18 @@ void elink_period_func(struct elink_params *params, struct elink_vars *vars)
 }
 
 uint8_t elink_fan_failure_det_req(struct bnx2x_softc *sc,
-                                 uint32_t shmem_base,
-                                 uint32_t shmem2_base, uint8_t port)
+                            uint32_t shmem_base,
+                            uint32_t shmem2_base,
+                            uint8_t port)
 {
        uint8_t phy_index, fan_failure_det_req = 0;
        struct elink_phy phy;
        for (phy_index = ELINK_EXT_PHY1; phy_index < ELINK_MAX_PHYS;
-            phy_index++) {
+             phy_index++) {
                if (elink_populate_phy(sc, phy_index, shmem_base, shmem2_base,
                                       port, &phy)
                    != ELINK_STATUS_OK) {
-                       PMD_DRV_LOG(DEBUG, "populate phy failed");
+                       ELINK_DEBUG_P0(sc, "populate phy failed");
                        return 0;
                }
                fan_failure_det_req |= (phy.flags &
@@ -13016,24 +15153,27 @@ void elink_hw_reset_phy(struct elink_params *params)
                        ELINK_NIG_MASK_SERDES0_LINK_STATUS |
                        ELINK_NIG_MASK_MI_INT));
 
-       for (phy_index = ELINK_INT_PHY; phy_index < ELINK_MAX_PHYS; phy_index++) {
+       for (phy_index = ELINK_INT_PHY; phy_index < ELINK_MAX_PHYS;
+             phy_index++) {
                if (params->phy[phy_index].hw_reset) {
-                       params->phy[phy_index].hw_reset(&params->phy[phy_index],
-                                                       params);
+                       params->phy[phy_index].hw_reset(
+                               &params->phy[phy_index],
+                               params);
                        params->phy[phy_index] = phy_null;
                }
        }
 }
 
 void elink_init_mod_abs_int(struct bnx2x_softc *sc, struct elink_vars *vars,
-                           __rte_unused uint32_t chip_id, uint32_t shmem_base,
-                           uint32_t shmem2_base, uint8_t port)
+                           uint32_t chip_id, uint32_t shmem_base,
+                           uint32_t shmem2_base,
+                           uint8_t port)
 {
        uint8_t gpio_num = 0xff, gpio_port = 0xff, phy_index;
        uint32_t val;
        uint32_t offset, aeu_mask, swap_val, swap_override, sync_offset;
        if (CHIP_IS_E3(sc)) {
-               if (elink_get_mod_abs_int_cfg(sc,
+               if (elink_get_mod_abs_int_cfg(sc, chip_id,
                                              shmem_base,
                                              port,
                                              &gpio_num,
@@ -13042,11 +15182,11 @@ void elink_init_mod_abs_int(struct bnx2x_softc *sc, struct elink_vars *vars,
        } else {
                struct elink_phy phy;
                for (phy_index = ELINK_EXT_PHY1; phy_index < ELINK_MAX_PHYS;
-                    phy_index++) {
+                     phy_index++) {
                        if (elink_populate_phy(sc, phy_index, shmem_base,
                                               shmem2_base, port, &phy)
                            != ELINK_STATUS_OK) {
-                               PMD_DRV_LOG(DEBUG, "populate phy failed");
+                               ELINK_DEBUG_P0(sc, "populate phy failed");
                                return;
                        }
                        if (phy.type == PORT_HW_CFG_XGXS_EXT_PHY_TYPE_BNX2X8726) {
@@ -13069,15 +15209,15 @@ void elink_init_mod_abs_int(struct bnx2x_softc *sc, struct elink_vars *vars,
        gpio_port ^= (swap_val && swap_override);
 
        vars->aeu_int_mask = AEU_INPUTS_ATTN_BITS_GPIO0_FUNCTION_0 <<
-           (gpio_num + (gpio_port << 2));
+               (gpio_num + (gpio_port << 2));
 
        sync_offset = shmem_base +
-           offsetof(struct shmem_region,
-                    dev_info.port_hw_config[port].aeu_int_mask);
+               offsetof(struct shmem_region,
+                        dev_info.port_hw_config[port].aeu_int_mask);
        REG_WR(sc, sync_offset, vars->aeu_int_mask);
 
-       PMD_DRV_LOG(DEBUG, "Setting MOD_ABS (GPIO%d_P%d) AEU to 0x%x",
-                   gpio_num, gpio_port, vars->aeu_int_mask);
+       ELINK_DEBUG_P3(sc, "Setting MOD_ABS (GPIO%d_P%d) AEU to 0x%x",
+                      gpio_num, gpio_port, vars->aeu_int_mask);
 
        if (port == 0)
                offset = MISC_REG_AEU_ENABLE1_FUNC_0_OUT_0;
index 40000c2..c8b08bc 100644 (file)
@@ -14,7 +14,7 @@
 #ifndef ELINK_H
 #define ELINK_H
 
-#define ELINK_DEBUG
+#include "bnx2x_logs.h"
 
 
 
@@ -29,6 +29,11 @@ struct bnx2x_softc;
 
 extern uint32_t elink_cb_reg_read(struct bnx2x_softc *sc, uint32_t reg_addr);
 extern void elink_cb_reg_write(struct bnx2x_softc *sc, uint32_t reg_addr, uint32_t val);
+/* wb_write - pointer to 2 32 bits vars to be passed to the DMAE*/
+extern void elink_cb_reg_wb_write(struct bnx2x_softc *sc, uint32_t offset,
+                               uint32_t *wb_write, uint16_t len);
+extern void elink_cb_reg_wb_read(struct bnx2x_softc *sc, uint32_t offset,
+                              uint32_t *wb_write, uint16_t len);
 
 /* mode - 0( LOW ) /1(HIGH)*/
 extern uint8_t elink_cb_gpio_write(struct bnx2x_softc *sc,
@@ -45,6 +50,9 @@ extern uint8_t elink_cb_gpio_int_write(struct bnx2x_softc *sc,
 
 extern uint32_t elink_cb_fw_command(struct bnx2x_softc *sc, uint32_t command, uint32_t param);
 
+/* Delay */
+extern void elink_cb_udelay(struct bnx2x_softc *sc, uint32_t microsecond);
+
 /* This function is called every 1024 bytes downloading of phy firmware.
 Driver can use it to print to screen indication for download progress */
 extern void elink_cb_download_progress(struct bnx2x_softc *sc, uint32_t cur, uint32_t total);
@@ -69,6 +77,8 @@ typedef enum elink_status {
 extern void elink_cb_event_log(struct bnx2x_softc *sc, const elink_log_id_t log_id, ...);
 extern void elink_cb_load_warpcore_microcode(void);
 
+extern uint8_t elink_cb_path_id(struct bnx2x_softc *sc);
+
 extern void elink_cb_notify_link_changed(struct bnx2x_softc *sc);
 
 #define ELINK_EVENT_LOG_LEVEL_ERROR    1
@@ -78,6 +88,32 @@ extern void elink_cb_notify_link_changed(struct bnx2x_softc *sc);
 
 #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
 /* Debug prints */
+#ifdef ELINK_DEBUG
+
+extern void elink_cb_dbg(struct bnx2x_softc *sc,  const char *fmt);
+extern void elink_cb_dbg1(struct bnx2x_softc *sc,  const char *fmt,
+                         uint32_t arg1);
+extern void elink_cb_dbg2(struct bnx2x_softc *sc,  const char *fmt,
+                         uint32_t arg1, uint32_t arg2);
+extern void elink_cb_dbg3(struct bnx2x_softc *sc,  const char *fmt,
+                         uint32_t arg1, uint32_t arg2,
+                         uint32_t arg3);
+
+#define ELINK_DEBUG_P0(sc, fmt)                        elink_cb_dbg(sc, fmt)
+#define ELINK_DEBUG_P1(sc, fmt, arg1)          elink_cb_dbg1(sc, fmt, arg1)
+#define ELINK_DEBUG_P2(sc, fmt, arg1, arg2)    \
+       elink_cb_dbg2(sc, fmt, arg1, arg2)
+#define ELINK_DEBUG_P3(sc, fmt, arg1, arg2, arg3) \
+       elink_cb_dbg3(sc, fmt, arg1, arg2, arg3)
+#else
+#define ELINK_DEBUG_P0(sc, fmt)                   PMD_DRV_LOG(DEBUG, sc, fmt)
+#define ELINK_DEBUG_P1(sc, fmt, arg1)             \
+       PMD_DRV_LOG(DEBUG, sc, fmt, arg1)
+#define ELINK_DEBUG_P2(sc, fmt, arg1, arg2)       \
+       PMD_DRV_LOG(DEBUG, sc, fmt, arg1, arg2)
+#define ELINK_DEBUG_P3(sc, fmt, arg1, arg2, arg3) \
+       PMD_DRV_LOG(DEBUG, sc, fmt, arg1, arg2, arg3)
+#endif
 
 /***********************************************************/
 /*                         Defines                         */
@@ -126,9 +162,12 @@ extern void elink_cb_notify_link_changed(struct bnx2x_softc *sc);
 #define ELINK_SFP_EEPROM_DATE_SIZE                     6
 #define ELINK_SFP_EEPROM_DIAG_TYPE_ADDR                        0x5c
 #define ELINK_SFP_EEPROM_DIAG_TYPE_SIZE                        1
-#define ELINK_SFP_EEPROM_DIAG_ADDR_CHANGE_REQ          (1<<2)
+#define ELINK_SFP_EEPROM_DIAG_ADDR_CHANGE_REQ          (1 << 2)
 #define ELINK_SFP_EEPROM_SFF_8472_COMP_ADDR            0x5e
 #define ELINK_SFP_EEPROM_SFF_8472_COMP_SIZE            1
+#define ELINK_SFP_EEPROM_VENDOR_SPECIFIC_ADDR  0x60
+#define ELINK_SFP_EEPROM_VENDOR_SPECIFIC_SIZE  16
+
 
 #define ELINK_SFP_EEPROM_A2_CHECKSUM_RANGE             0x5e
 #define ELINK_SFP_EEPROM_A2_CC_DMI_ADDR                        0x5f
@@ -199,7 +238,7 @@ typedef void (*link_reset_t)(struct elink_phy *phy,
                             struct elink_params *params);
 typedef void (*config_loopback_t)(struct elink_phy *phy,
                                  struct elink_params *params);
-typedef uint8_t (*format_fw_ver_t)(uint32_t raw, uint8_t *str, uint16_t *len);
+typedef elink_status_t (*format_fw_ver_t)(uint32_t raw, uint8_t *str, uint16_t *len);
 typedef void (*hw_reset_t)(struct elink_phy *phy, struct elink_params *params);
 typedef void (*set_link_led_t)(struct elink_phy *phy,
                               struct elink_params *params, uint8_t mode);
@@ -219,23 +258,23 @@ struct elink_phy {
        uint8_t def_md_devad;
        uint16_t flags;
        /* No Over-Current detection */
-#define ELINK_FLAGS_NOC                        (1<<1)
+#define ELINK_FLAGS_NOC                        (1 << 1)
        /* Fan failure detection required */
-#define ELINK_FLAGS_FAN_FAILURE_DET_REQ        (1<<2)
+#define ELINK_FLAGS_FAN_FAILURE_DET_REQ        (1 << 2)
        /* Initialize first the XGXS and only then the phy itself */
-#define ELINK_FLAGS_INIT_XGXS_FIRST            (1<<3)
-#define ELINK_FLAGS_WC_DUAL_MODE               (1<<4)
-#define ELINK_FLAGS_4_PORT_MODE                (1<<5)
-#define ELINK_FLAGS_REARM_LATCH_SIGNAL         (1<<6)
-#define ELINK_FLAGS_SFP_NOT_APPROVED           (1<<7)
-#define ELINK_FLAGS_MDC_MDIO_WA                (1<<8)
-#define ELINK_FLAGS_DUMMY_READ                 (1<<9)
-#define ELINK_FLAGS_MDC_MDIO_WA_B0             (1<<10)
-#define ELINK_FLAGS_SFP_MODULE_PLUGGED_IN_WC   (1<<11)
-#define ELINK_FLAGS_TX_ERROR_CHECK             (1<<12)
-#define ELINK_FLAGS_EEE                        (1<<13)
-#define ELINK_FLAGS_TEMPERATURE                (1<<14)
-#define ELINK_FLAGS_MDC_MDIO_WA_G              (1<<15)
+#define ELINK_FLAGS_INIT_XGXS_FIRST            (1 << 3)
+#define ELINK_FLAGS_WC_DUAL_MODE               (1 << 4)
+#define ELINK_FLAGS_4_PORT_MODE                (1 << 5)
+#define ELINK_FLAGS_REARM_LATCH_SIGNAL         (1 << 6)
+#define ELINK_FLAGS_SFP_NOT_APPROVED           (1 << 7)
+#define ELINK_FLAGS_MDC_MDIO_WA                (1 << 8)
+#define ELINK_FLAGS_DUMMY_READ                 (1 << 9)
+#define ELINK_FLAGS_MDC_MDIO_WA_B0             (1 << 10)
+#define ELINK_FLAGS_SFP_MODULE_PLUGGED_IN_WC   (1 << 11)
+#define ELINK_FLAGS_TX_ERROR_CHECK             (1 << 12)
+#define ELINK_FLAGS_EEE                        (1 << 13)
+#define ELINK_FLAGS_TEMPERATURE                (1 << 14)
+#define ELINK_FLAGS_MDC_MDIO_WA_G              (1 << 15)
 
        /* preemphasis values for the rx side */
        uint16_t rx_preemphasis[4];
@@ -247,20 +286,22 @@ struct elink_phy {
        uint32_t mdio_ctrl;
 
        uint32_t supported;
-#define ELINK_SUPPORTED_10baseT_Half           (1<<0)
-#define ELINK_SUPPORTED_10baseT_Full           (1<<1)
-#define ELINK_SUPPORTED_100baseT_Half          (1<<2)
-#define ELINK_SUPPORTED_100baseT_Full          (1<<3)
-#define ELINK_SUPPORTED_1000baseT_Full         (1<<4)
-#define ELINK_SUPPORTED_2500baseX_Full         (1<<5)
-#define ELINK_SUPPORTED_10000baseT_Full        (1<<6)
-#define ELINK_SUPPORTED_TP                     (1<<7)
-#define ELINK_SUPPORTED_FIBRE                  (1<<8)
-#define ELINK_SUPPORTED_Autoneg                (1<<9)
-#define ELINK_SUPPORTED_Pause                  (1<<10)
-#define ELINK_SUPPORTED_Asym_Pause             (1<<11)
-#define ELINK_SUPPORTED_20000baseMLD2_Full     (1<<21)
-#define ELINK_SUPPORTED_20000baseKR2_Full      (1<<22)
+#define ELINK_SUPPORTED_10baseT_Half           (1 << 0)
+#define ELINK_SUPPORTED_10baseT_Full           (1 << 1)
+#define ELINK_SUPPORTED_100baseT_Half          (1 << 2)
+#define ELINK_SUPPORTED_100baseT_Full          (1 << 3)
+#define ELINK_SUPPORTED_1000baseT_Full         (1 << 4)
+#define ELINK_SUPPORTED_2500baseX_Full         (1 << 5)
+#define ELINK_SUPPORTED_10000baseT_Full                (1 << 6)
+#define ELINK_SUPPORTED_TP                     (1 << 7)
+#define ELINK_SUPPORTED_FIBRE                  (1 << 8)
+#define ELINK_SUPPORTED_Autoneg                        (1 << 9)
+#define ELINK_SUPPORTED_Pause                  (1 << 10)
+#define ELINK_SUPPORTED_Asym_Pause             (1 << 11)
+#define ELINK_SUPPORTED_1000baseKX_Full                (1 << 17)
+#define ELINK_SUPPORTED_10000baseKR_Full       (1 << 19)
+#define ELINK_SUPPORTED_20000baseMLD2_Full     (1 << 21)
+#define ELINK_SUPPORTED_20000baseKR2_Full      (1 << 22)
 
        uint32_t media_type;
 #define        ELINK_ETH_PHY_UNSPECIFIED       0x0
@@ -353,17 +394,22 @@ struct elink_params {
 
        /* features */
        uint32_t feature_config_flags;
-#define ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED      (1<<0)
-#define ELINK_FEATURE_CONFIG_PFC_ENABLED                       (1<<1)
-#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY          (1<<2)
-#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_DUAL_PHY_OPT_MDL_VRFY (1<<3)
-#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX                  (1<<8)
-#define ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED                (1<<9)
-#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED       (1<<10)
-#define ELINK_FEATURE_CONFIG_DISABLE_REMOTE_FAULT_DET          (1<<11)
-#define ELINK_FEATURE_CONFIG_IEEE_PHY_TEST                     (1<<12)
-#define ELINK_FEATURE_CONFIG_MT_SUPPORT                        (1<<13)
-#define ELINK_FEATURE_CONFIG_BOOT_FROM_SAN                     (1<<14)
+#define ELINK_FEATURE_CONFIG_OVERRIDE_PREEMPHASIS_ENABLED      (1 << 0)
+#define ELINK_FEATURE_CONFIG_PFC_ENABLED                       (1 << 1)
+#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_OPT_MDL_VRFY          (1 << 2)
+#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_DUAL_PHY_OPT_MDL_VRFY (1 << 3)
+#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_EMAC                 (1 << 4)
+#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_BMAC                 (1 << 5)
+#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_UMAC                 (1 << 6)
+#define ELINK_FEATURE_CONFIG_EMUL_DISABLE_XMAC                 (1 << 7)
+#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_AFEX                  (1 << 8)
+#define ELINK_FEATURE_CONFIG_AUTOGREEEN_ENABLED                (1 << 9)
+#define ELINK_FEATURE_CONFIG_BC_SUPPORTS_SFP_TX_DISABLED       (1 << 10)
+#define ELINK_FEATURE_CONFIG_DISABLE_REMOTE_FAULT_DET          (1 << 11)
+#define ELINK_FEATURE_CONFIG_IEEE_PHY_TEST                     (1 << 12)
+#define ELINK_FEATURE_CONFIG_MT_SUPPORT                        (1 << 13)
+#define ELINK_FEATURE_CONFIG_BOOT_FROM_SAN                     (1 << 14)
+#define ELINK_FEATURE_CONFIG_DISABLE_PD                                (1 << 15)
 
        /* Will be populated during common init */
        struct elink_phy phy[ELINK_MAX_PHYS];
@@ -391,10 +437,10 @@ struct elink_params {
 #define ELINK_EEE_MODE_NVRAM_LATENCY_TIME              (0x6000)
 #define ELINK_EEE_MODE_NVRAM_MASK              (0x3)
 #define ELINK_EEE_MODE_TIMER_MASK              (0xfffff)
-#define ELINK_EEE_MODE_OUTPUT_TIME             (1<<28)
-#define ELINK_EEE_MODE_OVERRIDE_NVRAM          (1<<29)
-#define ELINK_EEE_MODE_ENABLE_LPI              (1<<30)
-#define ELINK_EEE_MODE_ADV_LPI                 (1<<31)
+#define ELINK_EEE_MODE_OUTPUT_TIME             (1 << 28)
+#define ELINK_EEE_MODE_OVERRIDE_NVRAM          (1 << 29)
+#define ELINK_EEE_MODE_ENABLE_LPI              (1 << 30)
+#define ELINK_EEE_MODE_ADV_LPI                 (1 << 31)
 
        uint16_t hw_led_mode; /* part of the hw_config read from the shmem */
        uint32_t multi_phy_config;
@@ -404,20 +450,23 @@ struct elink_params {
        uint16_t req_fc_auto_adv; /* Should be set to TX / BOTH when
                                req_flow_ctrl is set to AUTO */
        uint16_t link_flags;
-#define ELINK_LINK_FLAGS_INT_DISABLED          (1<<0)
-#define ELINK_PHY_INITIALIZED          (1<<1)
+#define ELINK_LINK_FLAGS_INT_DISABLED          (1 << 0)
+#define ELINK_PHY_INITIALIZED          (1 << 1)
        uint32_t lfa_base;
+
+       /* The same definitions as the shmem2 parameter */
+       uint32_t link_attr_sync;
 };
 
 /* Output parameters */
 struct elink_vars {
        uint8_t phy_flags;
-#define PHY_XGXS_FLAG                  (1<<0)
-#define PHY_SGMII_FLAG                 (1<<1)
-#define PHY_PHYSICAL_LINK_FLAG         (1<<2)
-#define PHY_HALF_OPEN_CONN_FLAG                (1<<3)
-#define PHY_OVER_CURRENT_FLAG          (1<<4)
-#define PHY_SFP_TX_FAULT_FLAG          (1<<5)
+#define PHY_XGXS_FLAG                  (1 << 0)
+#define PHY_SGMII_FLAG                 (1 << 1)
+#define PHY_PHYSICAL_LINK_FLAG         (1 << 2)
+#define PHY_HALF_OPEN_CONN_FLAG                (1 << 3)
+#define PHY_OVER_CURRENT_FLAG          (1 << 4)
+#define PHY_SFP_TX_FAULT_FLAG          (1 << 5)
 
        uint8_t mac_type;
 #define ELINK_MAC_TYPE_NONE            0
@@ -448,8 +497,7 @@ struct elink_vars {
        uint8_t rx_tx_asic_rst;
        uint8_t turn_to_run_wc_rt;
        uint16_t rsrv2;
-       /* The same definitions as the shmem2 parameter */
-       uint32_t link_attr_sync;
+
 };
 
 /***********************************************************/
@@ -460,14 +508,32 @@ elink_status_t elink_phy_init(struct elink_params *params, struct elink_vars *va
 /* Reset the link. Should be called when driver or interface goes down
    Before calling phy firmware upgrade, the reset_ext_phy should be set
    to 0 */
+elink_status_t elink_link_reset(struct elink_params *params,
+                    struct elink_vars *vars,
+                    uint8_t reset_ext_phy);
 elink_status_t elink_lfa_reset(struct elink_params *params, struct elink_vars *vars);
 /* elink_link_update should be called upon link interrupt */
 elink_status_t elink_link_update(struct elink_params *params, struct elink_vars *vars);
 
+/* use the following phy functions to read/write from external_phy
+ * In order to use it to read/write internal phy registers, use
+ * ELINK_DEFAULT_PHY_DEV_ADDR as devad, and (_bank + (_addr & 0xf)) as
+ * the register
+ */
+elink_status_t elink_phy_read(struct elink_params *params, uint8_t phy_addr,
+                  uint8_t devad, uint16_t reg, uint16_t *ret_val);
+
+elink_status_t elink_phy_write(struct elink_params *params, uint8_t phy_addr,
+                   uint8_t devad, uint16_t reg, uint16_t val);
+
 /* Reads the link_status from the shmem,
    and update the link vars accordingly */
 void elink_link_status_update(struct elink_params *input,
                            struct elink_vars *output);
+/* returns string representing the fw_version of the external phy */
+elink_status_t elink_get_ext_phy_fw_version(struct elink_params *params,
+                                uint8_t *version,
+                                uint16_t len);
 
 /* Set/Unset the led
    Basically, the CLC takes care of the led for the link, but in case one needs
@@ -481,12 +547,34 @@ elink_status_t elink_set_led(struct elink_params *params,
 #define ELINK_LED_MODE_FRONT_PANEL_OFF 3
 
 /* elink_handle_module_detect_int should be called upon module detection
-   interrupt */
+ * interrupt
+ */
 void elink_handle_module_detect_int(struct elink_params *params);
 
+/* Get the actual link status. In case it returns ELINK_STATUS_OK, link is up,
+ * otherwise link is down
+ */
+elink_status_t elink_test_link(struct elink_params *params,
+                   struct elink_vars *vars,
+                   uint8_t is_serdes);
+
+
 /* One-time initialization for external phy after power up */
 elink_status_t elink_common_init_phy(struct bnx2x_softc *sc, uint32_t shmem_base_path[],
-                         uint32_t shmem2_base_path[], uint32_t chip_id, uint8_t one_port_enabled);
+                         uint32_t shmem2_base_path[], uint32_t chip_id,
+                         uint8_t one_port_enabled);
+
+/* Reset the external PHY using GPIO */
+void elink_ext_phy_hw_reset(struct bnx2x_softc *sc, uint8_t port);
+
+/* Reset the external of SFX7101 */
+void elink_sfx7101_sp_sw_reset(struct bnx2x_softc *sc, struct elink_phy *phy);
+
+/* Read "byte_cnt" bytes from address "addr" from the SFP+ EEPROM */
+elink_status_t elink_read_sfp_module_eeprom(struct elink_phy *phy,
+                                struct elink_params *params, uint8_t dev_addr,
+                                uint16_t addr, uint16_t byte_cnt,
+                                uint8_t *o_buf);
 
 void elink_hw_reset_phy(struct elink_params *params);
 
@@ -569,12 +657,42 @@ elink_status_t elink_update_pfc(struct elink_params *params,
                      struct elink_vars *vars,
                      struct elink_nig_brb_pfc_port_params *pfc_params);
 
+
+/* Used to configure the ETS to disable */
+elink_status_t elink_ets_disabled(struct elink_params *params,
+                      struct elink_vars *vars);
+
+/* Used to configure the ETS to BW limited */
+void elink_ets_bw_limit(const struct elink_params *params,
+                       const uint32_t cos0_bw,
+                       const uint32_t cos1_bw);
+
+/* Used to configure the ETS to strict */
+elink_status_t elink_ets_strict(const struct elink_params *params,
+                               const uint8_t strict_cos);
+
+
+/*  Configure the COS to ETS according to BW and SP settings.*/
+elink_status_t elink_ets_e3b0_config(const struct elink_params *params,
+                        const struct elink_vars *vars,
+                        struct elink_ets_params *ets_params);
+/* Read pfc statistic*/
+void elink_pfc_statistic(struct elink_params *params, struct elink_vars *vars,
+                        uint32_t pfc_frames_sent[2],
+                        uint32_t pfc_frames_received[2]);
 void elink_init_mod_abs_int(struct bnx2x_softc *sc, struct elink_vars *vars,
                            uint32_t chip_id, uint32_t shmem_base, uint32_t shmem2_base,
                            uint8_t port);
+/* elink_status_t elink_sfp_module_detection(struct elink_phy *phy,
+ *                            struct elink_params *params);
+ */
 
 void elink_period_func(struct elink_params *params, struct elink_vars *vars);
 
+/*elink_status_t elink_check_half_open_conn(struct elink_params *params,
+ *                                 struct elink_vars *vars, uint8_t notify);
+ */
+
 void elink_enable_pmd_tx(struct elink_params *params);
 
 
index db5c4eb..f75b0ad 100644 (file)
@@ -20,7 +20,7 @@
 
 #include "bnxt_cpr.h"
 
-#define BNXT_MAX_MTU           9500
+#define BNXT_MAX_MTU           9574
 #define VLAN_TAG_SIZE          4
 #define BNXT_VF_RSV_NUM_RSS_CTX        1
 #define BNXT_VF_RSV_NUM_L2_CTX 4
 #define BNXT_MAX_RX_RING_DESC  8192
 #define BNXT_DB_SIZE           0x80
 
+/* Chimp Communication Channel */
+#define GRCPF_REG_CHIMP_CHANNEL_OFFSET         0x0
+#define GRCPF_REG_CHIMP_COMM_TRIGGER           0x100
+/* Kong Communication Channel */
+#define GRCPF_REG_KONG_CHANNEL_OFFSET          0xA00
+#define GRCPF_REG_KONG_COMM_TRIGGER            0xB00
+
 #define BNXT_INT_LAT_TMR_MIN                   75
 #define BNXT_INT_LAT_TMR_MAX                   150
 #define BNXT_NUM_CMPL_AGGR_INT                 36
@@ -250,6 +257,11 @@ struct bnxt {
 #define BNXT_FLAG_UPDATE_HASH  (1 << 5)
 #define BNXT_FLAG_PTP_SUPPORTED        (1 << 6)
 #define BNXT_FLAG_MULTI_HOST    (1 << 7)
+#define BNXT_FLAG_EXT_RX_PORT_STATS    (1 << 8)
+#define BNXT_FLAG_EXT_TX_PORT_STATS    (1 << 9)
+#define BNXT_FLAG_KONG_MB_EN   (1 << 10)
+#define BNXT_FLAG_TRUSTED_VF_EN        (1 << 11)
+#define BNXT_FLAG_DFLT_VNIC_SET        (1 << 12)
 #define BNXT_FLAG_NEW_RM       (1 << 30)
 #define BNXT_FLAG_INIT_DONE    (1 << 31)
 #define BNXT_PF(bp)            (!((bp)->flags & BNXT_FLAG_VF))
@@ -257,6 +269,9 @@ struct bnxt {
 #define BNXT_NPAR(bp)          ((bp)->port_partition_type)
 #define BNXT_MH(bp)             ((bp)->flags & BNXT_FLAG_MULTI_HOST)
 #define BNXT_SINGLE_PF(bp)      (BNXT_PF(bp) && !BNXT_NPAR(bp) && !BNXT_MH(bp))
+#define BNXT_USE_CHIMP_MB      0 //For non-CFA commands, everything uses Chimp.
+#define BNXT_USE_KONG(bp)      ((bp)->flags & BNXT_FLAG_KONG_MB_EN)
+#define BNXT_VF_IS_TRUSTED(bp) ((bp)->flags & BNXT_FLAG_TRUSTED_VF_EN)
 
        unsigned int            rx_nr_rings;
        unsigned int            rx_cp_nr_rings;
@@ -264,6 +279,9 @@ struct bnxt {
        const void              *rx_mem_zone;
        struct rx_port_stats    *hw_rx_port_stats;
        rte_iova_t              hw_rx_port_stats_map;
+       struct rx_port_stats_ext    *hw_rx_port_stats_ext;
+       rte_iova_t              hw_rx_port_stats_ext_map;
+       uint16_t                fw_rx_port_stats_ext_size;
 
        unsigned int            tx_nr_rings;
        unsigned int            tx_cp_nr_rings;
@@ -271,6 +289,9 @@ struct bnxt {
        const void              *tx_mem_zone;
        struct tx_port_stats    *hw_tx_port_stats;
        rte_iova_t              hw_tx_port_stats_map;
+       struct tx_port_stats_ext    *hw_tx_port_stats_ext;
+       rte_iova_t              hw_tx_port_stats_ext_map;
+       uint16_t                fw_tx_port_stats_ext_size;
 
        /* Default completion ring */
        struct bnxt_cp_ring_info        *def_cp_ring;
@@ -285,16 +306,13 @@ struct bnxt {
        struct bnxt_filter_info *filter_info;
        STAILQ_HEAD(, bnxt_filter_info) free_filter_list;
 
-       /* VNIC pointer for flow filter (VMDq) pools */
-#define MAX_FF_POOLS   256
-       STAILQ_HEAD(, bnxt_vnic_info)   ff_pool[MAX_FF_POOLS];
-
        struct bnxt_irq         *irq_tbl;
 
 #define MAX_NUM_MAC_ADDR       32
        uint8_t                 mac_addr[ETHER_ADDR_LEN];
 
        uint16_t                        hwrm_cmd_seq;
+       uint16_t                        kong_cmd_seq;
        void                            *hwrm_cmd_resp_addr;
        rte_iova_t                      hwrm_cmd_resp_dma_addr;
        void                            *hwrm_short_cmd_req_addr;
index ff20b6f..0fd6e51 100644 (file)
@@ -35,6 +35,7 @@ void bnxt_handle_async_event(struct bnxt *bp,
                break;
        case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE:
                PMD_DRV_LOG(INFO, "Async event: VF config changed\n");
+               bnxt_hwrm_func_qcfg(bp);
                break;
        case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED:
                PMD_DRV_LOG(INFO, "Port conn async event\n");
index cc7e439..801c6ff 100644 (file)
@@ -149,7 +149,6 @@ static const struct rte_pci_id bnxt_pci_id_map[] = {
                                     DEV_RX_OFFLOAD_TCP_CKSUM | \
                                     DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM | \
                                     DEV_RX_OFFLOAD_JUMBO_FRAME | \
-                                    DEV_RX_OFFLOAD_CRC_STRIP | \
                                     DEV_RX_OFFLOAD_KEEP_CRC | \
                                     DEV_RX_OFFLOAD_TCP_LRO)
 
@@ -203,7 +202,9 @@ static int bnxt_init_chip(struct bnxt *bp)
        struct bnxt_rx_queue *rxq;
        struct rte_eth_link new;
        struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(bp->eth_dev);
+       struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
        struct rte_intr_handle *intr_handle = &pci_dev->intr_handle;
+       uint64_t rx_offloads = dev_conf->rxmode.offloads;
        uint32_t intr_vector = 0;
        uint32_t queue_id, base = BNXT_MISC_VEC_ID;
        uint32_t vec = BNXT_MISC_VEC_ID;
@@ -263,6 +264,9 @@ static int bnxt_init_chip(struct bnxt *bp)
                }
                memset(vnic->fw_grp_ids, -1, size);
 
+               PMD_DRV_LOG(DEBUG, "vnic[%d] = %p vnic->fw_grp_ids = %p\n",
+                           i, vnic, vnic->fw_grp_ids);
+
                rc = bnxt_hwrm_vnic_alloc(bp, vnic);
                if (rc) {
                        PMD_DRV_LOG(ERR, "HWRM vnic %d alloc failure rc: %x\n",
@@ -281,6 +285,16 @@ static int bnxt_init_chip(struct bnxt *bp)
                        }
                }
 
+               /*
+                * Firmware sets pf pair in default vnic cfg. If the VLAN strip
+                * setting is not available at this time, it will not be
+                * configured correctly in the CFA.
+                */
+               if (rx_offloads & DEV_RX_OFFLOAD_VLAN_STRIP)
+                       vnic->vlan_strip = true;
+               else
+                       vnic->vlan_strip = false;
+
                rc = bnxt_hwrm_vnic_cfg(bp, vnic);
                if (rc) {
                        PMD_DRV_LOG(ERR, "HWRM vnic %d cfg failure rc: %x\n",
@@ -299,6 +313,10 @@ static int bnxt_init_chip(struct bnxt *bp)
                for (j = 0; j < bp->rx_nr_rings; j++) {
                        rxq = bp->eth_dev->data->rx_queues[j];
 
+                       PMD_DRV_LOG(DEBUG,
+                                   "rxq[%d]->vnic=%p vnic->fw_grp_ids=%p\n",
+                                   j, rxq->vnic, rxq->vnic->fw_grp_ids);
+
                        if (rxq->rx_deferred_start)
                                rxq->vnic->fw_grp_ids[j] = INVALID_HW_RING_ID;
                }
@@ -445,7 +463,7 @@ static void bnxt_dev_info_get_op(struct rte_eth_dev *eth_dev,
        /* Fast path specifics */
        dev_info->min_rx_bufsize = 1;
        dev_info->max_rx_pktlen = BNXT_MAX_MTU + ETHER_HDR_LEN + ETHER_CRC_LEN
-                                 + VLAN_TAG_SIZE;
+                                 + VLAN_TAG_SIZE * 2;
 
        dev_info->rx_offload_capa = BNXT_DEV_RX_OFFLOAD_SUPPORT;
        if (bp->flags & BNXT_FLAG_PTP_SUPPORTED)
@@ -694,7 +712,6 @@ static void bnxt_dev_close_op(struct rte_eth_dev *eth_dev)
        if (bp->dev_stopped == 0)
                bnxt_dev_stop_op(eth_dev);
 
-       bnxt_free_mem(bp);
        if (eth_dev->data->mac_addrs != NULL) {
                rte_free(eth_dev->data->mac_addrs);
                eth_dev->data->mac_addrs = NULL;
@@ -714,34 +731,30 @@ static void bnxt_mac_addr_remove_op(struct rte_eth_dev *eth_dev,
        uint64_t pool_mask = eth_dev->data->mac_pool_sel[index];
        struct bnxt_vnic_info *vnic;
        struct bnxt_filter_info *filter, *temp_filter;
-       uint32_t pool = RTE_MIN(MAX_FF_POOLS, ETH_64_POOLS);
        uint32_t i;
 
        /*
         * Loop through all VNICs from the specified filter flow pools to
         * remove the corresponding MAC addr filter
         */
-       for (i = 0; i < pool; i++) {
+       for (i = 0; i < bp->nr_vnics; i++) {
                if (!(pool_mask & (1ULL << i)))
                        continue;
 
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       filter = STAILQ_FIRST(&vnic->filter);
-                       while (filter) {
-                               temp_filter = STAILQ_NEXT(filter, next);
-                               if (filter->mac_index == index) {
-                                       STAILQ_REMOVE(&vnic->filter, filter,
-                                                     bnxt_filter_info, next);
-                                       bnxt_hwrm_clear_l2_filter(bp, filter);
-                                       filter->mac_index = INVALID_MAC_INDEX;
-                                       memset(&filter->l2_addr, 0,
-                                              ETHER_ADDR_LEN);
-                                       STAILQ_INSERT_TAIL(
-                                                       &bp->free_filter_list,
-                                                       filter, next);
-                               }
-                               filter = temp_filter;
+               vnic = &bp->vnic_info[i];
+               filter = STAILQ_FIRST(&vnic->filter);
+               while (filter) {
+                       temp_filter = STAILQ_NEXT(filter, next);
+                       if (filter->mac_index == index) {
+                               STAILQ_REMOVE(&vnic->filter, filter,
+                                               bnxt_filter_info, next);
+                               bnxt_hwrm_clear_l2_filter(bp, filter);
+                               filter->mac_index = INVALID_MAC_INDEX;
+                               memset(&filter->l2_addr, 0, ETHER_ADDR_LEN);
+                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
+                                                  filter, next);
                        }
+                       filter = temp_filter;
                }
        }
 }
@@ -751,10 +764,10 @@ static int bnxt_mac_addr_add_op(struct rte_eth_dev *eth_dev,
                                uint32_t index, uint32_t pool)
 {
        struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
-       struct bnxt_vnic_info *vnic = STAILQ_FIRST(&bp->ff_pool[pool]);
+       struct bnxt_vnic_info *vnic = &bp->vnic_info[pool];
        struct bnxt_filter_info *filter;
 
-       if (BNXT_VF(bp)) {
+       if (BNXT_VF(bp) & !BNXT_VF_IS_TRUSTED(bp)) {
                PMD_DRV_LOG(ERR, "Cannot add MAC address to a VF interface\n");
                return -ENOTSUP;
        }
@@ -898,12 +911,10 @@ static int bnxt_reta_update_op(struct rte_eth_dev *eth_dev,
                return -EINVAL;
        }
        /* Update the RSS VNIC(s) */
-       for (i = 0; i < MAX_FF_POOLS; i++) {
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       memcpy(vnic->rss_table, reta_conf, reta_size);
-
-                       bnxt_hwrm_vnic_rss_cfg(bp, vnic);
-               }
+       for (i = 0; i < bp->max_vnics; i++) {
+               vnic = &bp->vnic_info[i];
+               memcpy(vnic->rss_table, reta_conf, reta_size);
+               bnxt_hwrm_vnic_rss_cfg(bp, vnic);
        }
        return 0;
 }
@@ -947,7 +958,7 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
        struct rte_eth_conf *dev_conf = &bp->eth_dev->data->dev_conf;
        struct bnxt_vnic_info *vnic;
        uint16_t hash_type = 0;
-       int i;
+       unsigned int i;
 
        /*
         * If RSS enablement were different than dev_configure,
@@ -978,21 +989,20 @@ static int bnxt_rss_hash_update_op(struct rte_eth_dev *eth_dev,
                hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6;
 
        /* Update the RSS VNIC(s) */
-       for (i = 0; i < MAX_FF_POOLS; i++) {
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       vnic->hash_type = hash_type;
-
-                       /*
-                        * Use the supplied key if the key length is
-                        * acceptable and the rss_key is not NULL
-                        */
-                       if (rss_conf->rss_key &&
-                           rss_conf->rss_key_len <= HW_HASH_KEY_SIZE)
-                               memcpy(vnic->rss_hash_key, rss_conf->rss_key,
-                                      rss_conf->rss_key_len);
-
-                       bnxt_hwrm_vnic_rss_cfg(bp, vnic);
-               }
+       for (i = 0; i < bp->nr_vnics; i++) {
+               vnic = &bp->vnic_info[i];
+               vnic->hash_type = hash_type;
+
+               /*
+                * Use the supplied key if the key length is
+                * acceptable and the rss_key is not NULL
+                */
+               if (rss_conf->rss_key &&
+                   rss_conf->rss_key_len <= HW_HASH_KEY_SIZE)
+                       memcpy(vnic->rss_hash_key, rss_conf->rss_key,
+                              rss_conf->rss_key_len);
+
+               bnxt_hwrm_vnic_rss_cfg(bp, vnic);
        }
        return 0;
 }
@@ -1269,53 +1279,51 @@ static int bnxt_del_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
                 * else
                 *      VLAN filter doesn't exist, just skip and continue
                 */
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       filter = STAILQ_FIRST(&vnic->filter);
-                       while (filter) {
-                               temp_filter = STAILQ_NEXT(filter, next);
-
-                               if (filter->enables & chk &&
-                                   filter->l2_ovlan == vlan_id) {
-                                       /* Must delete the filter */
-                                       STAILQ_REMOVE(&vnic->filter, filter,
-                                                     bnxt_filter_info, next);
-                                       bnxt_hwrm_clear_l2_filter(bp, filter);
-                                       STAILQ_INSERT_TAIL(
-                                                       &bp->free_filter_list,
-                                                       filter, next);
-
-                                       /*
-                                        * Need to examine to see if the MAC
-                                        * filter already existed or not before
-                                        * allocating a new one
-                                        */
-
-                                       new_filter = bnxt_alloc_filter(bp);
-                                       if (!new_filter) {
-                                               PMD_DRV_LOG(ERR,
+               vnic = &bp->vnic_info[i];
+               filter = STAILQ_FIRST(&vnic->filter);
+               while (filter) {
+                       temp_filter = STAILQ_NEXT(filter, next);
+
+                       if (filter->enables & chk &&
+                           filter->l2_ovlan == vlan_id) {
+                               /* Must delete the filter */
+                               STAILQ_REMOVE(&vnic->filter, filter,
+                                             bnxt_filter_info, next);
+                               bnxt_hwrm_clear_l2_filter(bp, filter);
+                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
+                                                  filter, next);
+
+                               /*
+                                * Need to examine to see if the MAC
+                                * filter already existed or not before
+                                * allocating a new one
+                                */
+
+                               new_filter = bnxt_alloc_filter(bp);
+                               if (!new_filter) {
+                                       PMD_DRV_LOG(ERR,
                                                        "MAC/VLAN filter alloc failed\n");
-                                               rc = -ENOMEM;
-                                               goto exit;
-                                       }
-                                       STAILQ_INSERT_TAIL(&vnic->filter,
-                                                          new_filter, next);
-                                       /* Inherit MAC from previous filter */
-                                       new_filter->mac_index =
-                                                       filter->mac_index;
-                                       memcpy(new_filter->l2_addr,
-                                              filter->l2_addr, ETHER_ADDR_LEN);
-                                       /* MAC only filter */
-                                       rc = bnxt_hwrm_set_l2_filter(bp,
-                                                       vnic->fw_vnic_id,
-                                                       new_filter);
-                                       if (rc)
-                                               goto exit;
-                                       PMD_DRV_LOG(INFO,
-                                               "Del Vlan filter for %d\n",
-                                               vlan_id);
+                                       rc = -ENOMEM;
+                                       goto exit;
                                }
-                               filter = temp_filter;
+                               STAILQ_INSERT_TAIL(&vnic->filter,
+                                               new_filter, next);
+                               /* Inherit MAC from previous filter */
+                               new_filter->mac_index =
+                                       filter->mac_index;
+                               memcpy(new_filter->l2_addr, filter->l2_addr,
+                                      ETHER_ADDR_LEN);
+                               /* MAC only filter */
+                               rc = bnxt_hwrm_set_l2_filter(bp,
+                                                            vnic->fw_vnic_id,
+                                                            new_filter);
+                               if (rc)
+                                       goto exit;
+                               PMD_DRV_LOG(INFO,
+                                           "Del Vlan filter for %d\n",
+                                           vlan_id);
                        }
+                       filter = temp_filter;
                }
        }
 exit:
@@ -1345,51 +1353,48 @@ static int bnxt_add_vlan_filter(struct bnxt *bp, uint16_t vlan_id)
                 *   Remove the old MAC only filter
                 *    Add a new MAC+VLAN filter
                 */
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       filter = STAILQ_FIRST(&vnic->filter);
-                       while (filter) {
-                               temp_filter = STAILQ_NEXT(filter, next);
-
-                               if (filter->enables & chk) {
-                                       if (filter->l2_ovlan == vlan_id)
-                                               goto cont;
-                               } else {
-                                       /* Must delete the MAC filter */
-                                       STAILQ_REMOVE(&vnic->filter, filter,
-                                                     bnxt_filter_info, next);
-                                       bnxt_hwrm_clear_l2_filter(bp, filter);
-                                       filter->l2_ovlan = 0;
-                                       STAILQ_INSERT_TAIL(
-                                                       &bp->free_filter_list,
-                                                       filter, next);
-                               }
-                               new_filter = bnxt_alloc_filter(bp);
-                               if (!new_filter) {
-                                       PMD_DRV_LOG(ERR,
+               vnic = &bp->vnic_info[i];
+               filter = STAILQ_FIRST(&vnic->filter);
+               while (filter) {
+                       temp_filter = STAILQ_NEXT(filter, next);
+
+                       if (filter->enables & chk) {
+                               if (filter->l2_ivlan == vlan_id)
+                                       goto cont;
+                       } else {
+                               /* Must delete the MAC filter */
+                               STAILQ_REMOVE(&vnic->filter, filter,
+                                               bnxt_filter_info, next);
+                               bnxt_hwrm_clear_l2_filter(bp, filter);
+                               filter->l2_ovlan = 0;
+                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
+                                                  filter, next);
+                       }
+                       new_filter = bnxt_alloc_filter(bp);
+                       if (!new_filter) {
+                               PMD_DRV_LOG(ERR,
                                                "MAC/VLAN filter alloc failed\n");
-                                       rc = -ENOMEM;
-                                       goto exit;
-                               }
-                               STAILQ_INSERT_TAIL(&vnic->filter, new_filter,
-                                                  next);
-                               /* Inherit MAC from the previous filter */
-                               new_filter->mac_index = filter->mac_index;
-                               memcpy(new_filter->l2_addr, filter->l2_addr,
-                                      ETHER_ADDR_LEN);
-                               /* MAC + VLAN ID filter */
-                               new_filter->l2_ivlan = vlan_id;
-                               new_filter->l2_ivlan_mask = 0xF000;
-                               new_filter->enables |= en;
-                               rc = bnxt_hwrm_set_l2_filter(bp,
-                                                            vnic->fw_vnic_id,
-                                                            new_filter);
-                               if (rc)
-                                       goto exit;
-                               PMD_DRV_LOG(INFO,
-                                       "Added Vlan filter for %d\n", vlan_id);
-cont:
-                               filter = temp_filter;
+                               rc = -ENOMEM;
+                               goto exit;
                        }
+                       STAILQ_INSERT_TAIL(&vnic->filter, new_filter, next);
+                       /* Inherit MAC from the previous filter */
+                       new_filter->mac_index = filter->mac_index;
+                       memcpy(new_filter->l2_addr, filter->l2_addr,
+                              ETHER_ADDR_LEN);
+                       /* MAC + VLAN ID filter */
+                       new_filter->l2_ivlan = vlan_id;
+                       new_filter->l2_ivlan_mask = 0xF000;
+                       new_filter->enables |= en;
+                       rc = bnxt_hwrm_set_l2_filter(bp,
+                                       vnic->fw_vnic_id,
+                                       new_filter);
+                       if (rc)
+                               goto exit;
+                       PMD_DRV_LOG(INFO,
+                                   "Added Vlan filter for %d\n", vlan_id);
+cont:
+                       filter = temp_filter;
                }
        }
 exit:
@@ -1397,7 +1402,7 @@ exit:
 }
 
 static int bnxt_vlan_filter_set_op(struct rte_eth_dev *eth_dev,
-                                  uint16_t vlan_id, int on)
+               uint16_t vlan_id, int on)
 {
        struct bnxt *bp = (struct bnxt *)eth_dev->data->dev_private;
 
@@ -1454,7 +1459,7 @@ bnxt_set_default_mac_addr_op(struct rte_eth_dev *dev, struct ether_addr *addr)
        struct bnxt_filter_info *filter;
        int rc;
 
-       if (BNXT_VF(bp))
+       if (BNXT_VF(bp) && !BNXT_VF_IS_TRUSTED(bp))
                return -EPERM;
 
        memcpy(bp->mac_addr, addr, sizeof(bp->mac_addr));
@@ -1571,21 +1576,17 @@ static int bnxt_mtu_set_op(struct rte_eth_dev *eth_dev, uint16_t new_mtu)
 {
        struct bnxt *bp = eth_dev->data->dev_private;
        struct rte_eth_dev_info dev_info;
-       uint32_t max_dev_mtu;
        uint32_t rc = 0;
        uint32_t i;
 
        bnxt_dev_info_get_op(eth_dev, &dev_info);
-       max_dev_mtu = dev_info.max_rx_pktlen -
-                     ETHER_HDR_LEN - ETHER_CRC_LEN - VLAN_TAG_SIZE * 2;
 
-       if (new_mtu < ETHER_MIN_MTU || new_mtu > max_dev_mtu) {
+       if (new_mtu < ETHER_MIN_MTU || new_mtu > BNXT_MAX_MTU) {
                PMD_DRV_LOG(ERR, "MTU requested must be within (%d, %d)\n",
-                       ETHER_MIN_MTU, max_dev_mtu);
+                       ETHER_MIN_MTU, BNXT_MAX_MTU);
                return -EINVAL;
        }
 
-
        if (new_mtu > ETHER_MTU) {
                bp->flags |= BNXT_FLAG_JUMBO;
                bp->eth_dev->data->dev_conf.rxmode.offloads |=
@@ -1805,8 +1806,8 @@ bnxt_match_and_validate_ether_filter(struct bnxt *bp,
                goto exit;
        }
 
-       vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
-       vnic = STAILQ_FIRST(&bp->ff_pool[efilter->queue]);
+       vnic0 = &bp->vnic_info[0];
+       vnic = &bp->vnic_info[efilter->queue];
        if (vnic == NULL) {
                PMD_DRV_LOG(ERR, "Invalid queue %d\n", efilter->queue);
                *ret = -EINVAL;
@@ -1864,8 +1865,8 @@ bnxt_ethertype_filter(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
-       vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
-       vnic = STAILQ_FIRST(&bp->ff_pool[efilter->queue]);
+       vnic0 = &bp->vnic_info[0];
+       vnic = &bp->vnic_info[efilter->queue];
 
        switch (filter_op) {
        case RTE_ETH_FILTER_ADD:
@@ -2081,8 +2082,8 @@ bnxt_cfg_ntuple_filter(struct bnxt *bp,
        if (ret < 0)
                goto free_filter;
 
-       vnic = STAILQ_FIRST(&bp->ff_pool[nfilter->queue]);
-       vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+       vnic = &bp->vnic_info[nfilter->queue];
+       vnic0 = &bp->vnic_info[0];
        filter1 = STAILQ_FIRST(&vnic0->filter);
        if (filter1 == NULL) {
                ret = -1;
@@ -2375,8 +2376,8 @@ bnxt_parse_fdir_filter(struct bnxt *bp,
                return -EINVAL;
        }
 
-       vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
-       vnic = STAILQ_FIRST(&bp->ff_pool[fdir->action.rx_queue]);
+       vnic0 = &bp->vnic_info[0];
+       vnic = &bp->vnic_info[fdir->action.rx_queue];
        if (vnic == NULL) {
                PMD_DRV_LOG(ERR, "Invalid queue %d\n", fdir->action.rx_queue);
                return -EINVAL;
@@ -2497,9 +2498,9 @@ bnxt_fdir_filter(struct rte_eth_dev *dev,
                filter->filter_type = HWRM_CFA_NTUPLE_FILTER;
 
                if (fdir->action.behavior == RTE_ETH_FDIR_REJECT)
-                       vnic = STAILQ_FIRST(&bp->ff_pool[0]);
+                       vnic = &bp->vnic_info[0];
                else
-                       vnic = STAILQ_FIRST(&bp->ff_pool[fdir->action.rx_queue]);
+                       vnic = &bp->vnic_info[fdir->action.rx_queue];
 
                match = bnxt_match_fdir(bp, filter, &mvnic);
                if (match != NULL && filter_op == RTE_ETH_FILTER_ADD) {
@@ -3226,7 +3227,9 @@ skip_init:
                mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
                mz = rte_memzone_lookup(mz_name);
                total_alloc_len = RTE_CACHE_LINE_ROUNDUP(
-                               sizeof(struct rx_port_stats) + 512);
+                                       sizeof(struct rx_port_stats) +
+                                       sizeof(struct rx_port_stats_ext) +
+                                       512);
                if (!mz) {
                        mz = rte_memzone_reserve(mz_name, total_alloc_len,
                                        SOCKET_ID_ANY,
@@ -3262,7 +3265,9 @@ skip_init:
                mz_name[RTE_MEMZONE_NAMESIZE - 1] = 0;
                mz = rte_memzone_lookup(mz_name);
                total_alloc_len = RTE_CACHE_LINE_ROUNDUP(
-                               sizeof(struct tx_port_stats) + 512);
+                                       sizeof(struct tx_port_stats) +
+                                       sizeof(struct tx_port_stats_ext) +
+                                       512);
                if (!mz) {
                        mz = rte_memzone_reserve(mz_name,
                                        total_alloc_len,
@@ -3293,8 +3298,30 @@ skip_init:
                bp->hw_tx_port_stats_map = mz_phys_addr;
 
                bp->flags |= BNXT_FLAG_PORT_STATS;
+
+               /* Display extended statistics if FW supports it */
+               if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_8_4 ||
+                   bp->hwrm_spec_code == HWRM_SPEC_CODE_1_9_0)
+                       goto skip_ext_stats;
+
+               bp->hw_rx_port_stats_ext = (void *)
+                       (bp->hw_rx_port_stats + sizeof(struct rx_port_stats));
+               bp->hw_rx_port_stats_ext_map = bp->hw_rx_port_stats_map +
+                       sizeof(struct rx_port_stats);
+               bp->flags |= BNXT_FLAG_EXT_RX_PORT_STATS;
+
+
+               if (bp->hwrm_spec_code < HWRM_SPEC_CODE_1_9_2) {
+                       bp->hw_tx_port_stats_ext = (void *)
+                       (bp->hw_tx_port_stats + sizeof(struct tx_port_stats));
+                       bp->hw_tx_port_stats_ext_map =
+                               bp->hw_tx_port_stats_map +
+                               sizeof(struct tx_port_stats);
+                       bp->flags |= BNXT_FLAG_EXT_TX_PORT_STATS;
+               }
        }
 
+skip_ext_stats:
        rc = bnxt_alloc_hwrm_resources(bp);
        if (rc) {
                PMD_DRV_LOG(ERR,
@@ -3474,10 +3501,6 @@ bnxt_dev_uninit(struct rte_eth_dev *eth_dev)
        bnxt_disable_int(bp);
        bnxt_free_int(bp);
        bnxt_free_mem(bp);
-       if (eth_dev->data->mac_addrs != NULL) {
-               rte_free(eth_dev->data->mac_addrs);
-               eth_dev->data->mac_addrs = NULL;
-       }
        if (bp->grp_info != NULL) {
                rte_free(bp->grp_info);
                bp->grp_info = NULL;
@@ -3515,7 +3538,11 @@ static int bnxt_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
 
 static int bnxt_pci_remove(struct rte_pci_device *pci_dev)
 {
-       return rte_eth_dev_pci_generic_remove(pci_dev, bnxt_dev_uninit);
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               return rte_eth_dev_pci_generic_remove(pci_dev,
+                               bnxt_dev_uninit);
+       else
+               return rte_eth_dev_pci_generic_remove(pci_dev, NULL);
 }
 
 static struct rte_pci_driver bnxt_rte_pmd = {
@@ -3542,7 +3569,7 @@ bool is_bnxt_supported(struct rte_eth_dev *dev)
 
 RTE_INIT(bnxt_init_log)
 {
-       bnxt_logtype_driver = rte_log_register("pmd.bnxt.driver");
+       bnxt_logtype_driver = rte_log_register("pmd.net.bnxt.driver");
        if (bnxt_logtype_driver >= 0)
                rte_log_set_level(bnxt_logtype_driver, RTE_LOG_INFO);
 }
index 1038941..f43fe0d 100644 (file)
@@ -80,21 +80,21 @@ void bnxt_free_all_filters(struct bnxt *bp)
 {
        struct bnxt_vnic_info *vnic;
        struct bnxt_filter_info *filter, *temp_filter;
-       int i;
-
-       for (i = 0; i < MAX_FF_POOLS; i++) {
-               STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
-                       filter = STAILQ_FIRST(&vnic->filter);
-                       while (filter) {
-                               temp_filter = STAILQ_NEXT(filter, next);
-                               STAILQ_REMOVE(&vnic->filter, filter,
-                                             bnxt_filter_info, next);
-                               STAILQ_INSERT_TAIL(&bp->free_filter_list,
-                                                  filter, next);
-                               filter = temp_filter;
-                       }
-                       STAILQ_INIT(&vnic->filter);
+       unsigned int i;
+
+//     for (i = 0; i < MAX_FF_POOLS; i++) {
+       for (i = 0; i < bp->nr_vnics; i++) {
+               vnic = &bp->vnic_info[i];
+               filter = STAILQ_FIRST(&vnic->filter);
+               while (filter) {
+                       temp_filter = STAILQ_NEXT(filter, next);
+                       STAILQ_REMOVE(&vnic->filter, filter,
+                                       bnxt_filter_info, next);
+                       STAILQ_INSERT_TAIL(&bp->free_filter_list,
+                                       filter, next);
+                       filter = temp_filter;
                }
+               STAILQ_INIT(&vnic->filter);
        }
 
        for (i = 0; i < bp->pf.max_vfs; i++) {
index ac76567..1afe674 100644 (file)
@@ -678,7 +678,7 @@ bnxt_get_l2_filter(struct bnxt *bp, struct bnxt_filter_info *nf,
        struct bnxt_vnic_info *vnic0;
        int rc;
 
-       vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+       vnic0 = &bp->vnic_info[0];
        f0 = STAILQ_FIRST(&vnic0->filter);
 
        /* This flow has same DST MAC as the port/l2 filter. */
@@ -763,8 +763,8 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
                }
                PMD_DRV_LOG(DEBUG, "Queue index %d\n", act_q->index);
 
-               vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
-               vnic = STAILQ_FIRST(&bp->ff_pool[act_q->index]);
+               vnic0 = &bp->vnic_info[0];
+               vnic =  &bp->vnic_info[act_q->index];
                if (vnic == NULL) {
                        rte_flow_error_set(error,
                                           EINVAL,
@@ -786,7 +786,7 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
                PMD_DRV_LOG(DEBUG, "VNIC found\n");
                break;
        case RTE_FLOW_ACTION_TYPE_DROP:
-               vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+               vnic0 = &bp->vnic_info[0];
                filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
                if (filter1 == NULL) {
                        rc = -ENOSPC;
@@ -802,7 +802,7 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
                                HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP;
                break;
        case RTE_FLOW_ACTION_TYPE_COUNT:
-               vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+               vnic0 = &bp->vnic_info[0];
                filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
                if (filter1 == NULL) {
                        rc = -ENOSPC;
@@ -854,7 +854,7 @@ bnxt_validate_and_parse_flow(struct rte_eth_dev *dev,
                filter->mirror_vnic_id = dflt_vnic;
                filter->enables |= NTUPLE_FLTR_ALLOC_INPUT_EN_MIRROR_VNIC_ID;
 
-               vnic0 = STAILQ_FIRST(&bp->ff_pool[0]);
+               vnic0 = &bp->vnic_info[0];
                filter1 = bnxt_get_l2_filter(bp, filter, vnic0);
                if (filter1 == NULL) {
                        rc = -ENOSPC;
index c682488..9999760 100644 (file)
@@ -26,7 +26,7 @@
 
 #include <rte_io.h>
 
-#define HWRM_CMD_TIMEOUT               10000
+#define HWRM_CMD_TIMEOUT               6000000
 #define HWRM_SPEC_CODE_1_8_3           0x10803
 #define HWRM_VERSION_1_9_1             0x10901
 
@@ -70,7 +70,7 @@ static int page_roundup(size_t size)
  */
 
 static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
-                                       uint32_t msg_len)
+                                 uint32_t msg_len, bool use_kong_mb)
 {
        unsigned int i;
        struct input *req = msg;
@@ -80,6 +80,10 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
        uint8_t *valid;
        uint16_t max_req_len = bp->max_req_len;
        struct hwrm_short_input short_input = { 0 };
+       uint16_t bar_offset = use_kong_mb ?
+               GRCPF_REG_KONG_CHANNEL_OFFSET : GRCPF_REG_CHIMP_CHANNEL_OFFSET;
+       uint16_t mb_trigger_offset = use_kong_mb ?
+               GRCPF_REG_KONG_COMM_TRIGGER : GRCPF_REG_CHIMP_COMM_TRIGGER;
 
        if (bp->flags & BNXT_FLAG_SHORT_CMD) {
                void *short_cmd_req = bp->hwrm_short_cmd_req_addr;
@@ -105,19 +109,19 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
 
        /* Write request msg to hwrm channel */
        for (i = 0; i < msg_len; i += 4) {
-               bar = (uint8_t *)bp->bar0 + i;
+               bar = (uint8_t *)bp->bar0 + bar_offset + i;
                rte_write32(*data, bar);
                data++;
        }
 
        /* Zero the rest of the request space */
        for (; i < max_req_len; i += 4) {
-               bar = (uint8_t *)bp->bar0 + i;
+               bar = (uint8_t *)bp->bar0 + bar_offset + i;
                rte_write32(0, bar);
        }
 
        /* Ring channel doorbell */
-       bar = (uint8_t *)bp->bar0 + 0x100;
+       bar = (uint8_t *)bp->bar0 + mb_trigger_offset;
        rte_write32(1, bar);
 
        /* Poll for the valid bit */
@@ -131,7 +135,7 @@ static int bnxt_hwrm_send_message(struct bnxt *bp, void *msg,
                        if (*valid == HWRM_RESP_VALID_KEY)
                                break;
                }
-               rte_delay_us(600);
+               rte_delay_us(1);
        }
 
        if (i >= HWRM_CMD_TIMEOUT) {
@@ -156,12 +160,13 @@ err_ret:
  *
  * HWRM_UNLOCK() must be called after all response processing is completed.
  */
-#define HWRM_PREP(req, type) do { \
+#define HWRM_PREP(req, type, kong) do { \
        rte_spinlock_lock(&bp->hwrm_lock); \
        memset(bp->hwrm_cmd_resp_addr, 0, bp->max_resp_len); \
        req.req_type = rte_cpu_to_le_16(HWRM_##type); \
        req.cmpl_ring = rte_cpu_to_le_16(-1); \
-       req.seq_id = rte_cpu_to_le_16(bp->hwrm_cmd_seq++); \
+       req.seq_id = kong ? rte_cpu_to_le_16(bp->kong_cmd_seq++) :\
+               rte_cpu_to_le_16(bp->hwrm_cmd_seq++); \
        req.target_id = rte_cpu_to_le_16(0xffff); \
        req.resp_addr = rte_cpu_to_le_64(bp->hwrm_cmd_resp_dma_addr); \
 } while (0)
@@ -220,11 +225,11 @@ int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        struct hwrm_cfa_l2_set_rx_mask_input req = {.req_type = 0 };
        struct hwrm_cfa_l2_set_rx_mask_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, CFA_L2_SET_RX_MASK);
+       HWRM_PREP(req, CFA_L2_SET_RX_MASK, BNXT_USE_CHIMP_MB);
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
        req.mask = 0;
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -245,7 +250,7 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp,
        if (vnic->fw_vnic_id == INVALID_HW_RING_ID)
                return rc;
 
-       HWRM_PREP(req, CFA_L2_SET_RX_MASK);
+       HWRM_PREP(req, CFA_L2_SET_RX_MASK, BNXT_USE_CHIMP_MB);
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
        /* FIXME add multicast flag, when multicast adding options is supported
@@ -275,7 +280,7 @@ int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp,
        }
        req.mask = rte_cpu_to_le_32(mask);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -307,14 +312,14 @@ int bnxt_hwrm_cfa_vlan_antispoof_cfg(struct bnxt *bp, uint16_t fid,
                                return 0;
                }
        }
-       HWRM_PREP(req, CFA_VLAN_ANTISPOOF_CFG);
+       HWRM_PREP(req, CFA_VLAN_ANTISPOOF_CFG, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(fid);
 
        req.vlan_tag_mask_tbl_addr =
                rte_cpu_to_le_64(rte_mem_virt2iova(vlan_table));
        req.num_vlan_entries = rte_cpu_to_le_32((uint32_t)vlan_count);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -332,11 +337,11 @@ int bnxt_hwrm_clear_l2_filter(struct bnxt *bp,
        if (filter->fw_l2_filter_id == UINT64_MAX)
                return 0;
 
-       HWRM_PREP(req, CFA_L2_FILTER_FREE);
+       HWRM_PREP(req, CFA_L2_FILTER_FREE, BNXT_USE_CHIMP_MB);
 
        req.l2_filter_id = rte_cpu_to_le_64(filter->fw_l2_filter_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -375,9 +380,11 @@ int bnxt_hwrm_set_l2_filter(struct bnxt *bp,
        if (filter->fw_l2_filter_id != UINT64_MAX)
                bnxt_hwrm_clear_l2_filter(bp, filter);
 
-       HWRM_PREP(req, CFA_L2_FILTER_ALLOC);
+       HWRM_PREP(req, CFA_L2_FILTER_ALLOC, BNXT_USE_CHIMP_MB);
 
        req.flags = rte_cpu_to_le_32(filter->flags);
+       req.flags |=
+       rte_cpu_to_le_32(HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_OUTERMOST);
 
        enables = filter->enables |
              HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_DST_ID;
@@ -410,7 +417,7 @@ int bnxt_hwrm_set_l2_filter(struct bnxt *bp,
 
        req.enables = rte_cpu_to_le_32(enables);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -430,7 +437,7 @@ int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
        if (!ptp)
                return 0;
 
-       HWRM_PREP(req, PORT_MAC_CFG);
+       HWRM_PREP(req, PORT_MAC_CFG, BNXT_USE_CHIMP_MB);
 
        if (ptp->rx_filter)
                flags |= HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_RX_TS_CAPTURE_ENABLE;
@@ -447,7 +454,7 @@ int bnxt_hwrm_ptp_cfg(struct bnxt *bp)
                (HWRM_PORT_MAC_CFG_INPUT_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE);
        req.rx_ts_capture_ptp_msg_type = rte_cpu_to_le_16(ptp->rxctl);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_UNLOCK();
 
        return rc;
@@ -464,11 +471,11 @@ static int bnxt_hwrm_ptp_qcfg(struct bnxt *bp)
        if (ptp)
                return 0;
 
-       HWRM_PREP(req, PORT_MAC_PTP_QCFG);
+       HWRM_PREP(req, PORT_MAC_PTP_QCFG, BNXT_USE_CHIMP_MB);
 
        req.port_id = rte_cpu_to_le_16(bp->pf.port_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -513,11 +520,11 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp)
        uint32_t flags;
        int i;
 
-       HWRM_PREP(req, FUNC_QCAPS);
+       HWRM_PREP(req, FUNC_QCAPS, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(0xffff);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -615,11 +622,11 @@ int bnxt_hwrm_func_reset(struct bnxt *bp)
        struct hwrm_func_reset_input req = {.req_type = 0 };
        struct hwrm_func_reset_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_RESET);
+       HWRM_PREP(req, FUNC_RESET, BNXT_USE_CHIMP_MB);
 
        req.enables = rte_cpu_to_le_32(0);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -636,7 +643,7 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
        if (bp->flags & BNXT_FLAG_REGISTERED)
                return 0;
 
-       HWRM_PREP(req, FUNC_DRV_RGTR);
+       HWRM_PREP(req, FUNC_DRV_RGTR, BNXT_USE_CHIMP_MB);
        req.enables = rte_cpu_to_le_32(HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_VER |
                        HWRM_FUNC_DRV_RGTR_INPUT_ENABLES_ASYNC_EVENT_FWD);
        req.ver_maj = RTE_VER_YEAR;
@@ -668,7 +675,7 @@ int bnxt_hwrm_func_driver_register(struct bnxt *bp)
                rte_cpu_to_le_32(ASYNC_CMPL_EVENT_ID_PF_DRVR_UNLOAD |
                                 ASYNC_CMPL_EVENT_ID_VF_CFG_CHANGE);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -694,7 +701,7 @@ int bnxt_hwrm_func_reserve_vf_resc(struct bnxt *bp, bool test)
        struct hwrm_func_vf_cfg_output *resp = bp->hwrm_cmd_resp_addr;
        struct hwrm_func_vf_cfg_input req = {0};
 
-       HWRM_PREP(req, FUNC_VF_CFG);
+       HWRM_PREP(req, FUNC_VF_CFG, BNXT_USE_CHIMP_MB);
 
        req.enables = rte_cpu_to_le_32
                        (HWRM_FUNC_VF_CFG_INPUT_ENABLES_NUM_RX_RINGS  |
@@ -733,7 +740,7 @@ int bnxt_hwrm_func_reserve_vf_resc(struct bnxt *bp, bool test)
 
        req.flags = rte_cpu_to_le_32(flags);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        if (test)
                HWRM_CHECK_RESULT_SILENT();
@@ -750,10 +757,10 @@ int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp)
        struct hwrm_func_resource_qcaps_output *resp = bp->hwrm_cmd_resp_addr;
        struct hwrm_func_resource_qcaps_input req = {0};
 
-       HWRM_PREP(req, FUNC_RESOURCE_QCAPS);
+       HWRM_PREP(req, FUNC_RESOURCE_QCAPS, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(0xffff);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -782,20 +789,19 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
        int rc = 0;
        struct hwrm_ver_get_input req = {.req_type = 0 };
        struct hwrm_ver_get_output *resp = bp->hwrm_cmd_resp_addr;
-       uint32_t my_version;
        uint32_t fw_version;
        uint16_t max_resp_len;
        char type[RTE_MEMZONE_NAMESIZE];
        uint32_t dev_caps_cfg;
 
        bp->max_req_len = HWRM_MAX_REQ_LEN;
-       HWRM_PREP(req, VER_GET);
+       HWRM_PREP(req, VER_GET, BNXT_USE_CHIMP_MB);
 
        req.hwrm_intf_maj = HWRM_VERSION_MAJOR;
        req.hwrm_intf_min = HWRM_VERSION_MINOR;
        req.hwrm_intf_upd = HWRM_VERSION_UPDATE;
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -810,10 +816,6 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
        PMD_DRV_LOG(INFO, "Driver HWRM version: %d.%d.%d\n",
                HWRM_VERSION_MAJOR, HWRM_VERSION_MINOR, HWRM_VERSION_UPDATE);
 
-       my_version = HWRM_VERSION_MAJOR << 16;
-       my_version |= HWRM_VERSION_MINOR << 8;
-       my_version |= HWRM_VERSION_UPDATE;
-
        fw_version = resp->hwrm_intf_maj_8b << 16;
        fw_version |= resp->hwrm_intf_min_8b << 8;
        fw_version |= resp->hwrm_intf_upd_8b;
@@ -825,21 +827,6 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
                goto error;
        }
 
-       if (my_version != fw_version) {
-               PMD_DRV_LOG(INFO, "BNXT Driver/HWRM API mismatch.\n");
-               if (my_version < fw_version) {
-                       PMD_DRV_LOG(INFO,
-                               "Firmware API version is newer than driver.\n");
-                       PMD_DRV_LOG(INFO,
-                               "The driver may be missing features.\n");
-               } else {
-                       PMD_DRV_LOG(INFO,
-                               "Firmware API version is older than driver.\n");
-                       PMD_DRV_LOG(INFO,
-                               "Not all driver features may be functional.\n");
-               }
-       }
-
        if (bp->max_req_len > resp->max_req_win_len) {
                PMD_DRV_LOG(ERR, "Unsupported request length\n");
                rc = -EINVAL;
@@ -899,6 +886,14 @@ int bnxt_hwrm_ver_get(struct bnxt *bp)
 
                bp->flags |= BNXT_FLAG_SHORT_CMD;
        }
+       if (dev_caps_cfg &
+           HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED) {
+               bp->flags |= BNXT_FLAG_KONG_MB_EN;
+               PMD_DRV_LOG(DEBUG, "Kong mailbox channel enabled\n");
+       }
+       if (dev_caps_cfg &
+           HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED)
+               PMD_DRV_LOG(DEBUG, "FW supports Trusted VFs\n");
 
 error:
        HWRM_UNLOCK();
@@ -914,10 +909,10 @@ int bnxt_hwrm_func_driver_unregister(struct bnxt *bp, uint32_t flags)
        if (!(bp->flags & BNXT_FLAG_REGISTERED))
                return 0;
 
-       HWRM_PREP(req, FUNC_DRV_UNRGTR);
+       HWRM_PREP(req, FUNC_DRV_UNRGTR, BNXT_USE_CHIMP_MB);
        req.flags = flags;
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -934,7 +929,7 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf)
        struct hwrm_port_phy_cfg_output *resp = bp->hwrm_cmd_resp_addr;
        uint32_t enables = 0;
 
-       HWRM_PREP(req, PORT_PHY_CFG);
+       HWRM_PREP(req, PORT_PHY_CFG, BNXT_USE_CHIMP_MB);
 
        if (conf->link_up) {
                /* Setting Fixed Speed. But AutoNeg is ON, So disable it */
@@ -983,7 +978,7 @@ static int bnxt_hwrm_port_phy_cfg(struct bnxt *bp, struct bnxt_link_info *conf)
                PMD_DRV_LOG(INFO, "Force Link Down\n");
        }
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -998,9 +993,9 @@ static int bnxt_hwrm_port_phy_qcfg(struct bnxt *bp,
        struct hwrm_port_phy_qcfg_input req = {0};
        struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, PORT_PHY_QCFG);
+       HWRM_PREP(req, PORT_PHY_QCFG, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1046,14 +1041,14 @@ int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
        struct hwrm_queue_qportcfg_output *resp = bp->hwrm_cmd_resp_addr;
        int i;
 
-       HWRM_PREP(req, QUEUE_QPORTCFG);
+       HWRM_PREP(req, QUEUE_QPORTCFG, BNXT_USE_CHIMP_MB);
 
        req.flags = HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX;
        /* HWRM Version >= 1.9.1 */
        if (bp->hwrm_spec_code >= HWRM_VERSION_1_9_1)
                req.drv_qmap_cap =
                        HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED;
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1099,7 +1094,7 @@ int bnxt_hwrm_ring_alloc(struct bnxt *bp,
        struct hwrm_ring_alloc_input req = {.req_type = 0 };
        struct hwrm_ring_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, RING_ALLOC);
+       HWRM_PREP(req, RING_ALLOC, BNXT_USE_CHIMP_MB);
 
        req.page_tbl_addr = rte_cpu_to_le_64(ring->bd_dma);
        req.fbo = rte_cpu_to_le_32(0);
@@ -1135,7 +1130,7 @@ int bnxt_hwrm_ring_alloc(struct bnxt *bp,
        }
        req.enables = rte_cpu_to_le_32(enables);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        if (rc || resp->error_code) {
                if (rc == 0 && resp->error_code)
@@ -1175,12 +1170,12 @@ int bnxt_hwrm_ring_free(struct bnxt *bp,
        struct hwrm_ring_free_input req = {.req_type = 0 };
        struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, RING_FREE);
+       HWRM_PREP(req, RING_FREE, BNXT_USE_CHIMP_MB);
 
        req.ring_type = ring_type;
        req.ring_id = rte_cpu_to_le_16(ring->fw_ring_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        if (rc || resp->error_code) {
                if (rc == 0 && resp->error_code)
@@ -1215,14 +1210,14 @@ int bnxt_hwrm_ring_grp_alloc(struct bnxt *bp, unsigned int idx)
        struct hwrm_ring_grp_alloc_input req = {.req_type = 0 };
        struct hwrm_ring_grp_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, RING_GRP_ALLOC);
+       HWRM_PREP(req, RING_GRP_ALLOC, BNXT_USE_CHIMP_MB);
 
        req.cr = rte_cpu_to_le_16(bp->grp_info[idx].cp_fw_ring_id);
        req.rr = rte_cpu_to_le_16(bp->grp_info[idx].rx_fw_ring_id);
        req.ar = rte_cpu_to_le_16(bp->grp_info[idx].ag_fw_ring_id);
        req.sc = rte_cpu_to_le_16(bp->grp_info[idx].fw_stats_ctx);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1240,11 +1235,11 @@ int bnxt_hwrm_ring_grp_free(struct bnxt *bp, unsigned int idx)
        struct hwrm_ring_grp_free_input req = {.req_type = 0 };
        struct hwrm_ring_grp_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, RING_GRP_FREE);
+       HWRM_PREP(req, RING_GRP_FREE, BNXT_USE_CHIMP_MB);
 
        req.ring_group_id = rte_cpu_to_le_16(bp->grp_info[idx].fw_grp_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1262,11 +1257,11 @@ int bnxt_hwrm_stat_clear(struct bnxt *bp, struct bnxt_cp_ring_info *cpr)
        if (cpr->hw_stats_ctx_id == (uint32_t)HWRM_NA_SIGNATURE)
                return rc;
 
-       HWRM_PREP(req, STAT_CTX_CLR_STATS);
+       HWRM_PREP(req, STAT_CTX_CLR_STATS, BNXT_USE_CHIMP_MB);
 
        req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1281,14 +1276,14 @@ int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
        struct hwrm_stat_ctx_alloc_input req = {.req_type = 0 };
        struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, STAT_CTX_ALLOC);
+       HWRM_PREP(req, STAT_CTX_ALLOC, BNXT_USE_CHIMP_MB);
 
        req.update_period_ms = rte_cpu_to_le_32(0);
 
        req.stats_dma_addr =
            rte_cpu_to_le_64(cpr->hw_stats_map);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1306,11 +1301,11 @@ int bnxt_hwrm_stat_ctx_free(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
        struct hwrm_stat_ctx_free_input req = {.req_type = 0 };
        struct hwrm_stat_ctx_free_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, STAT_CTX_FREE);
+       HWRM_PREP(req, STAT_CTX_FREE, BNXT_USE_CHIMP_MB);
 
        req.stat_ctx_id = rte_cpu_to_le_16(cpr->hw_stats_ctx_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1336,12 +1331,12 @@ int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        vnic->lb_rule = (uint16_t)HWRM_NA_SIGNATURE;
        vnic->mru = bp->eth_dev->data->mtu + ETHER_HDR_LEN +
                                ETHER_CRC_LEN + VLAN_TAG_SIZE;
-       HWRM_PREP(req, VNIC_ALLOC);
+       HWRM_PREP(req, VNIC_ALLOC, BNXT_USE_CHIMP_MB);
 
        if (vnic->func_default)
                req.flags =
                        rte_cpu_to_le_32(HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1359,11 +1354,11 @@ static int bnxt_hwrm_vnic_plcmodes_qcfg(struct bnxt *bp,
        struct hwrm_vnic_plcmodes_qcfg_input req = {.req_type = 0 };
        struct hwrm_vnic_plcmodes_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, VNIC_PLCMODES_QCFG);
+       HWRM_PREP(req, VNIC_PLCMODES_QCFG, BNXT_USE_CHIMP_MB);
 
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1387,7 +1382,7 @@ static int bnxt_hwrm_vnic_plcmodes_cfg(struct bnxt *bp,
        struct hwrm_vnic_plcmodes_cfg_input req = {.req_type = 0 };
        struct hwrm_vnic_plcmodes_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, VNIC_PLCMODES_CFG);
+       HWRM_PREP(req, VNIC_PLCMODES_CFG, BNXT_USE_CHIMP_MB);
 
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
        req.flags = rte_cpu_to_le_32(pmode->flags);
@@ -1400,7 +1395,7 @@ static int bnxt_hwrm_vnic_plcmodes_cfg(struct bnxt *bp,
            HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_JUMBO_THRESH_VALID
        );
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1425,7 +1420,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        if (rc)
                return rc;
 
-       HWRM_PREP(req, VNIC_CFG);
+       HWRM_PREP(req, VNIC_CFG, BNXT_USE_CHIMP_MB);
 
        /* Only RSS support for now TBD: COS & LB */
        req.enables =
@@ -1445,9 +1440,12 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        req.cos_rule = rte_cpu_to_le_16(vnic->cos_rule);
        req.lb_rule = rte_cpu_to_le_16(vnic->lb_rule);
        req.mru = rte_cpu_to_le_16(vnic->mru);
-       if (vnic->func_default)
+       /* Configure default VNIC only once. */
+       if (vnic->func_default && !(bp->flags & BNXT_FLAG_DFLT_VNIC_SET)) {
                req.flags |=
                    rte_cpu_to_le_32(HWRM_VNIC_CFG_INPUT_FLAGS_DEFAULT);
+               bp->flags |= BNXT_FLAG_DFLT_VNIC_SET;
+       }
        if (vnic->vlan_strip)
                req.flags |=
                    rte_cpu_to_le_32(HWRM_VNIC_CFG_INPUT_FLAGS_VLAN_STRIP_MODE);
@@ -1464,7 +1462,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic)
                req.flags |= rte_cpu_to_le_32(
                        HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1485,14 +1483,14 @@ int bnxt_hwrm_vnic_qcfg(struct bnxt *bp, struct bnxt_vnic_info *vnic,
                PMD_DRV_LOG(DEBUG, "VNIC QCFG ID %d\n", vnic->fw_vnic_id);
                return rc;
        }
-       HWRM_PREP(req, VNIC_QCFG);
+       HWRM_PREP(req, VNIC_QCFG, BNXT_USE_CHIMP_MB);
 
        req.enables =
                rte_cpu_to_le_32(HWRM_VNIC_QCFG_INPUT_ENABLES_VF_ID_VALID);
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
        req.vf_id = rte_cpu_to_le_16(fw_vf_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1526,9 +1524,9 @@ int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic)
        struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp =
                                                bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_ALLOC);
+       HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_ALLOC, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1550,11 +1548,11 @@ int bnxt_hwrm_vnic_ctx_free(struct bnxt *bp, struct bnxt_vnic_info *vnic)
                PMD_DRV_LOG(DEBUG, "VNIC RSS Rule %x\n", vnic->rss_rule);
                return rc;
        }
-       HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_FREE);
+       HWRM_PREP(req, VNIC_RSS_COS_LB_CTX_FREE, BNXT_USE_CHIMP_MB);
 
        req.rss_cos_lb_ctx_id = rte_cpu_to_le_16(vnic->rss_rule);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1575,16 +1573,20 @@ int bnxt_hwrm_vnic_free(struct bnxt *bp, struct bnxt_vnic_info *vnic)
                return rc;
        }
 
-       HWRM_PREP(req, VNIC_FREE);
+       HWRM_PREP(req, VNIC_FREE, BNXT_USE_CHIMP_MB);
 
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
 
        vnic->fw_vnic_id = INVALID_HW_RING_ID;
+       /* Configure default VNIC again if necessary. */
+       if (vnic->func_default && (bp->flags & BNXT_FLAG_DFLT_VNIC_SET))
+               bp->flags &= ~BNXT_FLAG_DFLT_VNIC_SET;
+
        return rc;
 }
 
@@ -1595,7 +1597,7 @@ int bnxt_hwrm_vnic_rss_cfg(struct bnxt *bp,
        struct hwrm_vnic_rss_cfg_input req = {.req_type = 0 };
        struct hwrm_vnic_rss_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, VNIC_RSS_CFG);
+       HWRM_PREP(req, VNIC_RSS_CFG, BNXT_USE_CHIMP_MB);
 
        req.hash_type = rte_cpu_to_le_32(vnic->hash_type);
        req.hash_mode_flags = vnic->hash_mode;
@@ -1606,7 +1608,7 @@ int bnxt_hwrm_vnic_rss_cfg(struct bnxt *bp,
            rte_cpu_to_le_64(vnic->rss_hash_key_dma_addr);
        req.rss_ctx_idx = rte_cpu_to_le_16(vnic->rss_rule);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1627,7 +1629,7 @@ int bnxt_hwrm_vnic_plcmode_cfg(struct bnxt *bp,
                return rc;
        }
 
-       HWRM_PREP(req, VNIC_PLCMODES_CFG);
+       HWRM_PREP(req, VNIC_PLCMODES_CFG, BNXT_USE_CHIMP_MB);
 
        req.flags = rte_cpu_to_le_32(
                        HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_JUMBO_PLACEMENT);
@@ -1641,7 +1643,7 @@ int bnxt_hwrm_vnic_plcmode_cfg(struct bnxt *bp,
        req.jumbo_thresh = rte_cpu_to_le_16(size);
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1656,7 +1658,7 @@ int bnxt_hwrm_vnic_tpa_cfg(struct bnxt *bp,
        struct hwrm_vnic_tpa_cfg_input req = {.req_type = 0 };
        struct hwrm_vnic_tpa_cfg_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, VNIC_TPA_CFG);
+       HWRM_PREP(req, VNIC_TPA_CFG, BNXT_USE_CHIMP_MB);
 
        if (enable) {
                req.enables = rte_cpu_to_le_32(
@@ -1677,7 +1679,7 @@ int bnxt_hwrm_vnic_tpa_cfg(struct bnxt *bp,
        }
        req.vnic_id = rte_cpu_to_le_16(vnic->fw_vnic_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -1697,9 +1699,9 @@ int bnxt_hwrm_func_vf_mac(struct bnxt *bp, uint16_t vf, const uint8_t *mac_addr)
        memcpy(req.dflt_mac_addr, mac_addr, sizeof(req.dflt_mac_addr));
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
 
@@ -1715,11 +1717,11 @@ int bnxt_hwrm_func_qstats_tx_drop(struct bnxt *bp, uint16_t fid,
        struct hwrm_func_qstats_input req = {.req_type = 0};
        struct hwrm_func_qstats_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_QSTATS);
+       HWRM_PREP(req, FUNC_QSTATS, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(fid);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1738,11 +1740,11 @@ int bnxt_hwrm_func_qstats(struct bnxt *bp, uint16_t fid,
        struct hwrm_func_qstats_input req = {.req_type = 0};
        struct hwrm_func_qstats_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_QSTATS);
+       HWRM_PREP(req, FUNC_QSTATS, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(fid);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -1775,11 +1777,11 @@ int bnxt_hwrm_func_clr_stats(struct bnxt *bp, uint16_t fid)
        struct hwrm_func_clr_stats_input req = {.req_type = 0};
        struct hwrm_func_clr_stats_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_CLR_STATS);
+       HWRM_PREP(req, FUNC_CLR_STATS, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(fid);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2435,10 +2437,10 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp)
        uint16_t flags;
        int rc = 0;
 
-       HWRM_PREP(req, FUNC_QCFG);
+       HWRM_PREP(req, FUNC_QCFG, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(0xffff);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -2448,6 +2450,11 @@ int bnxt_hwrm_func_qcfg(struct bnxt *bp)
        if (BNXT_PF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_MULTI_HOST))
                bp->flags |= BNXT_FLAG_MULTI_HOST;
 
+       if (BNXT_VF(bp) && (flags & HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF)) {
+               bp->flags |= BNXT_FLAG_TRUSTED_VF_EN;
+               PMD_DRV_LOG(INFO, "Trusted VF cap enabled\n");
+       }
+
        switch (resp->port_partition_type) {
        case HWRM_FUNC_QCFG_OUTPUT_PORT_PARTITION_TYPE_NPAR1_0:
        case HWRM_FUNC_QCFG_OUTPUT_PORT_PARTITION_TYPE_NPAR1_5:
@@ -2522,9 +2529,9 @@ static int bnxt_hwrm_pf_func_cfg(struct bnxt *bp, int tx_rings)
        req.num_hw_ring_grps = rte_cpu_to_le_16(bp->max_ring_grps);
        req.fid = rte_cpu_to_le_16(0xffff);
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2595,9 +2602,9 @@ static void reserve_resources_from_vf(struct bnxt *bp,
        int rc;
 
        /* Get the actual allocated values now */
-       HWRM_PREP(req, FUNC_QCAPS);
+       HWRM_PREP(req, FUNC_QCAPS, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        if (rc) {
                PMD_DRV_LOG(ERR, "hwrm_func_qcaps failed rc:%d\n", rc);
@@ -2631,9 +2638,9 @@ int bnxt_hwrm_func_qcfg_current_vf_vlan(struct bnxt *bp, int vf)
        int rc;
 
        /* Check for zero MAC address */
-       HWRM_PREP(req, FUNC_QCFG);
+       HWRM_PREP(req, FUNC_QCFG, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        if (rc) {
                PMD_DRV_LOG(ERR, "hwrm_func_qcfg failed rc:%d\n", rc);
                return -1;
@@ -2656,9 +2663,9 @@ static int update_pf_resource_max(struct bnxt *bp)
        int rc;
 
        /* And copy the allocated numbers into the pf struct */
-       HWRM_PREP(req, FUNC_QCFG);
+       HWRM_PREP(req, FUNC_QCFG, BNXT_USE_CHIMP_MB);
        req.fid = rte_cpu_to_le_16(0xffff);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
 
        /* Only TX ring value reflects actual allocation? TODO */
@@ -2758,10 +2765,13 @@ int bnxt_hwrm_allocate_vfs(struct bnxt *bp, int num_vfs)
        for (i = 0; i < num_vfs; i++) {
                add_random_mac_if_needed(bp, &req, i);
 
-               HWRM_PREP(req, FUNC_CFG);
+               HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
                req.flags = rte_cpu_to_le_32(bp->pf.vf_info[i].func_cfg_flags);
                req.fid = rte_cpu_to_le_16(bp->pf.vf_info[i].fid);
-               rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+               rc = bnxt_hwrm_send_message(bp,
+                                           &req,
+                                           sizeof(req),
+                                           BNXT_USE_CHIMP_MB);
 
                /* Clear enable flag for next pass */
                req.enables &= ~rte_cpu_to_le_32(
@@ -2811,13 +2821,13 @@ int bnxt_hwrm_pf_evb_mode(struct bnxt *bp)
        struct hwrm_func_cfg_output *resp = bp->hwrm_cmd_resp_addr;
        int rc;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(0xffff);
        req.enables = rte_cpu_to_le_32(HWRM_FUNC_CFG_INPUT_ENABLES_EVB_MODE);
        req.evb_mode = bp->pf.evb_mode;
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
 
@@ -2831,10 +2841,10 @@ int bnxt_hwrm_tunnel_dst_port_alloc(struct bnxt *bp, uint16_t port,
        struct hwrm_tunnel_dst_port_alloc_output *resp = bp->hwrm_cmd_resp_addr;
        int rc = 0;
 
-       HWRM_PREP(req, TUNNEL_DST_PORT_ALLOC);
+       HWRM_PREP(req, TUNNEL_DST_PORT_ALLOC, BNXT_USE_CHIMP_MB);
        req.tunnel_type = tunnel_type;
        req.tunnel_dst_port_val = port;
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
 
        switch (tunnel_type) {
@@ -2862,11 +2872,11 @@ int bnxt_hwrm_tunnel_dst_port_free(struct bnxt *bp, uint16_t port,
        struct hwrm_tunnel_dst_port_free_output *resp = bp->hwrm_cmd_resp_addr;
        int rc = 0;
 
-       HWRM_PREP(req, TUNNEL_DST_PORT_FREE);
+       HWRM_PREP(req, TUNNEL_DST_PORT_FREE, BNXT_USE_CHIMP_MB);
 
        req.tunnel_type = tunnel_type;
        req.tunnel_dst_port_id = rte_cpu_to_be_16(port);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2881,11 +2891,11 @@ int bnxt_hwrm_func_cfg_vf_set_flags(struct bnxt *bp, uint16_t vf,
        struct hwrm_func_cfg_input req = {0};
        int rc;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
        req.flags = rte_cpu_to_le_32(flags);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2911,7 +2921,7 @@ int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
        struct hwrm_func_buf_rgtr_input req = {.req_type = 0 };
        struct hwrm_func_buf_rgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_BUF_RGTR);
+       HWRM_PREP(req, FUNC_BUF_RGTR, BNXT_USE_CHIMP_MB);
 
        req.req_buf_num_pages = rte_cpu_to_le_16(1);
        req.req_buf_page_size = rte_cpu_to_le_16(
@@ -2925,7 +2935,7 @@ int bnxt_hwrm_func_buf_rgtr(struct bnxt *bp)
                return -ENOMEM;
        }
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2939,9 +2949,9 @@ int bnxt_hwrm_func_buf_unrgtr(struct bnxt *bp)
        struct hwrm_func_buf_unrgtr_input req = {.req_type = 0 };
        struct hwrm_func_buf_unrgtr_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, FUNC_BUF_UNRGTR);
+       HWRM_PREP(req, FUNC_BUF_UNRGTR, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2955,7 +2965,7 @@ int bnxt_hwrm_func_cfg_def_cp(struct bnxt *bp)
        struct hwrm_func_cfg_input req = {0};
        int rc;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(0xffff);
        req.flags = rte_cpu_to_le_32(bp->pf.func_cfg_flags);
@@ -2963,7 +2973,7 @@ int bnxt_hwrm_func_cfg_def_cp(struct bnxt *bp)
                        HWRM_FUNC_CFG_INPUT_ENABLES_ASYNC_EVENT_CR);
        req.async_event_cr = rte_cpu_to_le_16(
                        bp->def_cp_ring->cp_ring_struct->fw_ring_id);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2977,13 +2987,13 @@ int bnxt_hwrm_vf_func_cfg_def_cp(struct bnxt *bp)
        struct hwrm_func_vf_cfg_input req = {0};
        int rc;
 
-       HWRM_PREP(req, FUNC_VF_CFG);
+       HWRM_PREP(req, FUNC_VF_CFG, BNXT_USE_CHIMP_MB);
 
        req.enables = rte_cpu_to_le_32(
-                       HWRM_FUNC_CFG_INPUT_ENABLES_ASYNC_EVENT_CR);
+                       HWRM_FUNC_VF_CFG_INPUT_ENABLES_ASYNC_EVENT_CR);
        req.async_event_cr = rte_cpu_to_le_16(
                        bp->def_cp_ring->cp_ring_struct->fw_ring_id);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -2999,7 +3009,7 @@ int bnxt_hwrm_set_default_vlan(struct bnxt *bp, int vf, uint8_t is_vf)
        uint32_t func_cfg_flags;
        int rc = 0;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        if (is_vf) {
                dflt_vlan = bp->pf.vf_info[vf].dflt_vlan;
@@ -3016,7 +3026,7 @@ int bnxt_hwrm_set_default_vlan(struct bnxt *bp, int vf, uint8_t is_vf)
        req.enables |= rte_cpu_to_le_32(HWRM_FUNC_CFG_INPUT_ENABLES_DFLT_VLAN);
        req.dflt_vlan = rte_cpu_to_le_16(dflt_vlan);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3031,13 +3041,13 @@ int bnxt_hwrm_func_bw_cfg(struct bnxt *bp, uint16_t vf,
        struct hwrm_func_cfg_input req = {0};
        int rc;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
        req.enables |= rte_cpu_to_le_32(enables);
        req.flags = rte_cpu_to_le_32(bp->pf.vf_info[vf].func_cfg_flags);
        req.max_bw = rte_cpu_to_le_32(max_bw);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3051,14 +3061,14 @@ int bnxt_hwrm_set_vf_vlan(struct bnxt *bp, int vf)
        struct hwrm_func_cfg_output *resp = bp->hwrm_cmd_resp_addr;
        int rc = 0;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.flags = rte_cpu_to_le_32(bp->pf.vf_info[vf].func_cfg_flags);
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
        req.enables |= rte_cpu_to_le_32(HWRM_FUNC_CFG_INPUT_ENABLES_DFLT_VLAN);
        req.dflt_vlan = rte_cpu_to_le_16(bp->pf.vf_info[vf].dflt_vlan);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3088,12 +3098,12 @@ int bnxt_hwrm_reject_fwd_resp(struct bnxt *bp, uint16_t target_id,
        if (ec_size > sizeof(req.encap_request))
                return -1;
 
-       HWRM_PREP(req, REJECT_FWD_RESP);
+       HWRM_PREP(req, REJECT_FWD_RESP, BNXT_USE_CHIMP_MB);
 
        req.encap_resp_target_id = rte_cpu_to_le_16(target_id);
        memcpy(req.encap_request, encaped, ec_size);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3108,10 +3118,10 @@ int bnxt_hwrm_func_qcfg_vf_default_mac(struct bnxt *bp, uint16_t vf,
        struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
        int rc;
 
-       HWRM_PREP(req, FUNC_QCFG);
+       HWRM_PREP(req, FUNC_QCFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -3132,12 +3142,12 @@ int bnxt_hwrm_exec_fwd_resp(struct bnxt *bp, uint16_t target_id,
        if (ec_size > sizeof(req.encap_request))
                return -1;
 
-       HWRM_PREP(req, EXEC_FWD_RESP);
+       HWRM_PREP(req, EXEC_FWD_RESP, BNXT_USE_CHIMP_MB);
 
        req.encap_resp_target_id = rte_cpu_to_le_16(target_id);
        memcpy(req.encap_request, encaped, ec_size);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3152,11 +3162,11 @@ int bnxt_hwrm_ctx_qstats(struct bnxt *bp, uint32_t cid, int idx,
        struct hwrm_stat_ctx_query_input req = {.req_type = 0};
        struct hwrm_stat_ctx_query_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, STAT_CTX_QUERY);
+       HWRM_PREP(req, STAT_CTX_QUERY, BNXT_USE_CHIMP_MB);
 
        req.stat_ctx_id = rte_cpu_to_le_32(cid);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -3192,12 +3202,12 @@ int bnxt_hwrm_port_qstats(struct bnxt *bp)
        struct bnxt_pf_info *pf = &bp->pf;
        int rc;
 
-       HWRM_PREP(req, PORT_QSTATS);
+       HWRM_PREP(req, PORT_QSTATS, BNXT_USE_CHIMP_MB);
 
        req.port_id = rte_cpu_to_le_16(pf->port_id);
        req.tx_stat_host_addr = rte_cpu_to_le_64(bp->hw_tx_port_stats_map);
        req.rx_stat_host_addr = rte_cpu_to_le_64(bp->hw_rx_port_stats_map);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3217,10 +3227,10 @@ int bnxt_hwrm_port_clr_stats(struct bnxt *bp)
            BNXT_NPAR(bp) || BNXT_MH(bp) || BNXT_TOTAL_VFS(bp))
                return 0;
 
-       HWRM_PREP(req, PORT_CLR_STATS);
+       HWRM_PREP(req, PORT_CLR_STATS, BNXT_USE_CHIMP_MB);
 
        req.port_id = rte_cpu_to_le_16(pf->port_id);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3237,9 +3247,9 @@ int bnxt_hwrm_port_led_qcaps(struct bnxt *bp)
        if (BNXT_VF(bp))
                return 0;
 
-       HWRM_PREP(req, PORT_LED_QCAPS);
+       HWRM_PREP(req, PORT_LED_QCAPS, BNXT_USE_CHIMP_MB);
        req.port_id = bp->pf.port_id;
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -3279,7 +3289,7 @@ int bnxt_hwrm_port_led_cfg(struct bnxt *bp, bool led_on)
        if (!bp->num_leds || BNXT_VF(bp))
                return -EOPNOTSUPP;
 
-       HWRM_PREP(req, PORT_LED_CFG);
+       HWRM_PREP(req, PORT_LED_CFG, BNXT_USE_CHIMP_MB);
 
        if (led_on) {
                led_state = HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT;
@@ -3297,7 +3307,7 @@ int bnxt_hwrm_port_led_cfg(struct bnxt *bp, bool led_on)
                led_cfg->led_group_id = bp->leds[i].led_group_id;
        }
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3312,9 +3322,9 @@ int bnxt_hwrm_nvm_get_dir_info(struct bnxt *bp, uint32_t *entries,
        struct hwrm_nvm_get_dir_info_input req = {0};
        struct hwrm_nvm_get_dir_info_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, NVM_GET_DIR_INFO);
+       HWRM_PREP(req, NVM_GET_DIR_INFO, BNXT_USE_CHIMP_MB);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3357,9 +3367,9 @@ int bnxt_get_nvram_directory(struct bnxt *bp, uint32_t len, uint8_t *data)
                        "unable to map response address to physical memory\n");
                return -ENOMEM;
        }
-       HWRM_PREP(req, NVM_GET_DIR_ENTRIES);
+       HWRM_PREP(req, NVM_GET_DIR_ENTRIES, BNXT_USE_CHIMP_MB);
        req.host_dest_addr = rte_cpu_to_le_64(dma_handle);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        if (rc == 0)
                memcpy(data, buf, len > buflen ? buflen : len);
@@ -3392,12 +3402,12 @@ int bnxt_hwrm_get_nvram_item(struct bnxt *bp, uint32_t index,
                        "unable to map response address to physical memory\n");
                return -ENOMEM;
        }
-       HWRM_PREP(req, NVM_READ);
+       HWRM_PREP(req, NVM_READ, BNXT_USE_CHIMP_MB);
        req.host_dest_addr = rte_cpu_to_le_64(dma_handle);
        req.dir_idx = rte_cpu_to_le_16(index);
        req.offset = rte_cpu_to_le_32(offset);
        req.len = rte_cpu_to_le_32(length);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        if (rc == 0)
                memcpy(data, buf, length);
 
@@ -3414,9 +3424,9 @@ int bnxt_hwrm_erase_nvram_directory(struct bnxt *bp, uint8_t index)
        struct hwrm_nvm_erase_dir_entry_input req = {0};
        struct hwrm_nvm_erase_dir_entry_output *resp = bp->hwrm_cmd_resp_addr;
 
-       HWRM_PREP(req, NVM_ERASE_DIR_ENTRY);
+       HWRM_PREP(req, NVM_ERASE_DIR_ENTRY, BNXT_USE_CHIMP_MB);
        req.dir_idx = rte_cpu_to_le_16(index);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
 
@@ -3448,7 +3458,7 @@ int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type,
        }
        memcpy(buf, data, data_len);
 
-       HWRM_PREP(req, NVM_WRITE);
+       HWRM_PREP(req, NVM_WRITE, BNXT_USE_CHIMP_MB);
 
        req.dir_type = rte_cpu_to_le_16(dir_type);
        req.dir_ordinal = rte_cpu_to_le_16(dir_ordinal);
@@ -3457,7 +3467,7 @@ int bnxt_hwrm_flash_nvram(struct bnxt *bp, uint16_t dir_type,
        req.dir_data_length = rte_cpu_to_le_32(data_len);
        req.host_src_addr = rte_cpu_to_le_64(dma_handle);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        rte_free(buf);
        HWRM_CHECK_RESULT();
@@ -3499,7 +3509,7 @@ static int bnxt_hwrm_func_vf_vnic_query(struct bnxt *bp, uint16_t vf,
        int rc;
 
        /* First query all VNIC ids */
-       HWRM_PREP(req, FUNC_VF_VNIC_IDS_QUERY);
+       HWRM_PREP(req, FUNC_VF_VNIC_IDS_QUERY, BNXT_USE_CHIMP_MB);
 
        req.vf_id = rte_cpu_to_le_16(bp->pf.first_vf_id + vf);
        req.max_vnic_id_cnt = rte_cpu_to_le_32(bp->pf.total_vnics);
@@ -3511,7 +3521,7 @@ static int bnxt_hwrm_func_vf_vnic_query(struct bnxt *bp, uint16_t vf,
                "unable to map VNIC ID table address to physical memory\n");
                return -ENOMEM;
        }
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        if (rc) {
                HWRM_UNLOCK();
                PMD_DRV_LOG(ERR, "hwrm_func_vf_vnic_query failed rc:%d\n", rc);
@@ -3591,7 +3601,7 @@ int bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(struct bnxt *bp, uint16_t vf,
        struct hwrm_func_cfg_input req = {0};
        int rc;
 
-       HWRM_PREP(req, FUNC_CFG);
+       HWRM_PREP(req, FUNC_CFG, BNXT_USE_CHIMP_MB);
 
        req.fid = rte_cpu_to_le_16(bp->pf.vf_info[vf].fid);
        req.enables |= rte_cpu_to_le_32(
@@ -3599,7 +3609,7 @@ int bnxt_hwrm_func_cfg_vf_set_vlan_anti_spoof(struct bnxt *bp, uint16_t vf,
        req.vlan_antispoof_mode = on ?
                HWRM_FUNC_CFG_INPUT_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN :
                HWRM_FUNC_CFG_INPUT_VLAN_ANTISPOOF_MODE_NOCHECK;
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3668,7 +3678,7 @@ int bnxt_hwrm_set_em_filter(struct bnxt *bp,
        if (filter->fw_em_filter_id != UINT64_MAX)
                bnxt_hwrm_clear_em_filter(bp, filter);
 
-       HWRM_PREP(req, CFA_EM_FLOW_ALLOC);
+       HWRM_PREP(req, CFA_EM_FLOW_ALLOC, BNXT_USE_KONG(bp));
 
        req.flags = rte_cpu_to_le_32(filter->flags);
 
@@ -3721,7 +3731,7 @@ int bnxt_hwrm_set_em_filter(struct bnxt *bp,
 
        req.enables = rte_cpu_to_le_32(enables);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
 
        HWRM_CHECK_RESULT();
 
@@ -3741,11 +3751,11 @@ int bnxt_hwrm_clear_em_filter(struct bnxt *bp, struct bnxt_filter_info *filter)
                return 0;
 
        PMD_DRV_LOG(ERR, "Clear EM filter\n");
-       HWRM_PREP(req, CFA_EM_FLOW_FREE);
+       HWRM_PREP(req, CFA_EM_FLOW_FREE, BNXT_USE_KONG(bp));
 
        req.em_filter_id = rte_cpu_to_le_64(filter->fw_em_filter_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_KONG(bp));
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3769,7 +3779,7 @@ int bnxt_hwrm_set_ntuple_filter(struct bnxt *bp,
        if (filter->fw_ntuple_filter_id != UINT64_MAX)
                bnxt_hwrm_clear_ntuple_filter(bp, filter);
 
-       HWRM_PREP(req, CFA_NTUPLE_FILTER_ALLOC);
+       HWRM_PREP(req, CFA_NTUPLE_FILTER_ALLOC, BNXT_USE_CHIMP_MB);
 
        req.flags = rte_cpu_to_le_32(filter->flags);
 
@@ -3832,7 +3842,7 @@ int bnxt_hwrm_set_ntuple_filter(struct bnxt *bp,
 
        req.enables = rte_cpu_to_le_32(enables);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
 
@@ -3853,11 +3863,11 @@ int bnxt_hwrm_clear_ntuple_filter(struct bnxt *bp,
        if (filter->fw_ntuple_filter_id == UINT64_MAX)
                return 0;
 
-       HWRM_PREP(req, CFA_NTUPLE_FILTER_FREE);
+       HWRM_PREP(req, CFA_NTUPLE_FILTER_FREE, BNXT_USE_CHIMP_MB);
 
        req.ntuple_filter_id = rte_cpu_to_le_64(filter->fw_ntuple_filter_id);
 
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
 
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
@@ -3937,11 +3947,55 @@ int bnxt_hwrm_set_ring_coal(struct bnxt *bp,
        if (!bnxt_stratus_device(bp))
                return 0;
 
-       HWRM_PREP(req, RING_CMPL_RING_CFG_AGGINT_PARAMS);
+       HWRM_PREP(req, RING_CMPL_RING_CFG_AGGINT_PARAMS, BNXT_USE_CHIMP_MB);
        bnxt_hwrm_set_coal_params(coal, &req);
        req.ring_id = rte_cpu_to_le_16(ring_id);
-       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req));
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
        HWRM_CHECK_RESULT();
        HWRM_UNLOCK();
        return 0;
 }
+
+int bnxt_hwrm_ext_port_qstats(struct bnxt *bp)
+{
+       struct hwrm_port_qstats_ext_input req = {0};
+       struct hwrm_port_qstats_ext_output *resp = bp->hwrm_cmd_resp_addr;
+       struct bnxt_pf_info *pf = &bp->pf;
+       int rc;
+
+       if (!(bp->flags & BNXT_FLAG_EXT_RX_PORT_STATS ||
+             bp->flags & BNXT_FLAG_EXT_TX_PORT_STATS))
+               return 0;
+
+       HWRM_PREP(req, PORT_QSTATS_EXT, BNXT_USE_CHIMP_MB);
+
+       req.port_id = rte_cpu_to_le_16(pf->port_id);
+       if (bp->flags & BNXT_FLAG_EXT_TX_PORT_STATS) {
+               req.tx_stat_host_addr =
+                       rte_cpu_to_le_64(bp->hw_tx_port_stats_map);
+               req.tx_stat_size =
+                       rte_cpu_to_le_16(sizeof(struct tx_port_stats_ext));
+       }
+       if (bp->flags & BNXT_FLAG_EXT_RX_PORT_STATS) {
+               req.rx_stat_host_addr =
+                       rte_cpu_to_le_64(bp->hw_rx_port_stats_map);
+               req.rx_stat_size =
+                       rte_cpu_to_le_16(sizeof(struct rx_port_stats_ext));
+       }
+       rc = bnxt_hwrm_send_message(bp, &req, sizeof(req), BNXT_USE_CHIMP_MB);
+
+       if (rc) {
+               bp->fw_rx_port_stats_ext_size = 0;
+               bp->fw_tx_port_stats_ext_size = 0;
+       } else {
+               bp->fw_rx_port_stats_ext_size =
+                       rte_le_to_cpu_16(resp->rx_stat_size);
+               bp->fw_tx_port_stats_ext_size =
+                       rte_le_to_cpu_16(resp->tx_stat_size);
+       }
+
+       HWRM_CHECK_RESULT();
+       HWRM_UNLOCK();
+
+       return rc;
+}
index 379aac6..ec9b3e0 100644 (file)
@@ -32,6 +32,10 @@ struct bnxt_cp_ring_info;
 #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESV_STRATEGY_MINIMAL_STATIC \
        HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_VF_RESERVATION_STRATEGY_MINIMAL_STATIC
 
+#define HWRM_SPEC_CODE_1_8_4           0x10804
+#define HWRM_SPEC_CODE_1_9_0           0x10900
+#define HWRM_SPEC_CODE_1_9_2           0x10902
+
 int bnxt_hwrm_cfa_l2_clear_rx_mask(struct bnxt *bp,
                                   struct bnxt_vnic_info *vnic);
 int bnxt_hwrm_cfa_l2_set_rx_mask(struct bnxt *bp, struct bnxt_vnic_info *vnic,
@@ -174,4 +178,5 @@ int bnxt_vnic_rss_configure(struct bnxt *bp,
 int bnxt_hwrm_set_ring_coal(struct bnxt *bp,
                        struct bnxt_coal *coal, uint16_t ring_id);
 int bnxt_hwrm_check_vf_rings(struct bnxt *bp);
+int bnxt_hwrm_ext_port_qstats(struct bnxt *bp);
 #endif
index 832fc9e..5345d39 100644 (file)
@@ -43,21 +43,19 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
 
        /* Single queue mode */
        if (bp->rx_cp_nr_rings < 2) {
-               vnic = bnxt_alloc_vnic(bp);
+               vnic = &bp->vnic_info[0];
                if (!vnic) {
                        PMD_DRV_LOG(ERR, "VNIC alloc failed\n");
                        rc = -ENOMEM;
                        goto err_out;
                }
                vnic->flags |= BNXT_VNIC_INFO_BCAST;
-               STAILQ_INSERT_TAIL(&bp->ff_pool[0], vnic, next);
                bp->nr_vnics++;
 
                rxq = bp->eth_dev->data->rx_queues[0];
                rxq->vnic = vnic;
 
                vnic->func_default = true;
-               vnic->ff_pool_idx = 0;
                vnic->start_grp_id = 0;
                vnic->end_grp_id = vnic->start_grp_id;
                filter = bnxt_alloc_filter(bp);
@@ -85,6 +83,9 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
                                            RTE_MIN(bp->max_l2_ctx,
                                            RTE_MIN(bp->max_rsscos_ctx,
                                                    ETH_64_POOLS)));
+                       PMD_DRV_LOG(DEBUG,
+                                   "pools = %u max_pools = %u\n",
+                                   pools, max_pools);
                        if (pools > max_pools)
                                pools = max_pools;
                        break;
@@ -98,25 +99,27 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
                        goto err_out;
                }
        }
-
        nb_q_per_grp = bp->rx_cp_nr_rings / pools;
+       PMD_DRV_LOG(ERR, "pools = %u nb_q_per_grp = %u\n", pools, nb_q_per_grp);
        start_grp_id = 0;
        end_grp_id = nb_q_per_grp;
 
        for (i = 0; i < pools; i++) {
-               vnic = bnxt_alloc_vnic(bp);
+               vnic = &bp->vnic_info[i];
                if (!vnic) {
                        PMD_DRV_LOG(ERR, "VNIC alloc failed\n");
                        rc = -ENOMEM;
                        goto err_out;
                }
                vnic->flags |= BNXT_VNIC_INFO_BCAST;
-               STAILQ_INSERT_TAIL(&bp->ff_pool[i], vnic, next);
                bp->nr_vnics++;
 
                for (j = 0; j < nb_q_per_grp; j++, ring_idx++) {
                        rxq = bp->eth_dev->data->rx_queues[ring_idx];
                        rxq->vnic = vnic;
+                       PMD_DRV_LOG(DEBUG,
+                                   "rxq[%d] = %p vnic[%d] = %p\n",
+                                   ring_idx, rxq, i, vnic);
                }
                if (i == 0) {
                        if (dev_conf->rxmode.mq_mode & ETH_MQ_RX_VMDQ_DCB) {
@@ -125,7 +128,6 @@ int bnxt_mq_rx_configure(struct bnxt *bp)
                        }
                        vnic->func_default = true;
                }
-               vnic->ff_pool_idx = i;
                vnic->start_grp_id = start_grp_id;
                vnic->end_grp_id = end_grp_id;
 
@@ -176,7 +178,7 @@ out:
                        hash_type |= HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6;
 
                for (i = 0; i < bp->nr_vnics; i++) {
-                       STAILQ_FOREACH(vnic, &bp->ff_pool[i], next) {
+                       vnic = &bp->vnic_info[i];
                        vnic->hash_type = hash_type;
 
                        /*
@@ -187,7 +189,6 @@ out:
                            rss->rss_key_len <= HW_HASH_KEY_SIZE)
                                memcpy(vnic->rss_hash_key,
                                       rss->rss_key, rss->rss_key_len);
-                       }
                }
        }
 
@@ -331,8 +332,10 @@ int bnxt_rx_queue_setup_op(struct rte_eth_dev *eth_dev,
 
        rxq->queue_id = queue_idx;
        rxq->port_id = eth_dev->data->port_id;
-       rxq->crc_len = rte_eth_dev_must_keep_crc(rx_offloads) ?
-               ETHER_CRC_LEN : 0;
+       if (rx_offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+               rxq->crc_len = ETHER_CRC_LEN;
+       else
+               rxq->crc_len = 0;
 
        eth_dev->data->rx_queues[queue_idx] = rxq;
        /* Allocate RX ring hardware descriptors */
index a5d3c86..c16bf99 100644 (file)
@@ -26,8 +26,8 @@ static const struct bnxt_xstats_name_off bnxt_rx_stats_strings[] = {
                                rx_256b_511b_frames)},
        {"rx_512b_1023b_frames", offsetof(struct rx_port_stats,
                                rx_512b_1023b_frames)},
-       {"rx_1024b_1518_frames", offsetof(struct rx_port_stats,
-                               rx_1024b_1518_frames)},
+       {"rx_1024b_1518b_frames", offsetof(struct rx_port_stats,
+                               rx_1024b_1518b_frames)},
        {"rx_good_vlan_frames", offsetof(struct rx_port_stats,
                                rx_good_vlan_frames)},
        {"rx_1519b_2047b_frames", offsetof(struct rx_port_stats,
@@ -93,12 +93,12 @@ static const struct bnxt_xstats_name_off bnxt_tx_stats_strings[] = {
                                tx_256b_511b_frames)},
        {"tx_512b_1023b_frames", offsetof(struct tx_port_stats,
                                tx_512b_1023b_frames)},
-       {"tx_1024b_1518_frames", offsetof(struct tx_port_stats,
-                               tx_1024b_1518_frames)},
+       {"tx_1024b_1518b_frames", offsetof(struct tx_port_stats,
+                               tx_1024b_1518b_frames)},
        {"tx_good_vlan_frames", offsetof(struct tx_port_stats,
                                tx_good_vlan_frames)},
-       {"tx_1519b_2047_frames", offsetof(struct tx_port_stats,
-                               tx_1519b_2047_frames)},
+       {"tx_1519b_2047b_frames", offsetof(struct tx_port_stats,
+                               tx_1519b_2047b_frames)},
        {"tx_2048b_4095b_frames", offsetof(struct tx_port_stats,
                                tx_2048b_4095b_frames)},
        {"tx_4096b_9216b_frames", offsetof(struct tx_port_stats,
@@ -180,6 +180,150 @@ static const struct bnxt_xstats_name_off bnxt_func_stats_strings[] = {
                                rx_agg_aborts)},
 };
 
+static const struct bnxt_xstats_name_off bnxt_rx_ext_stats_strings[] = {
+       {"link_down_events", offsetof(struct rx_port_stats_ext,
+                               link_down_events)},
+       {"continuous_pause_events", offsetof(struct rx_port_stats_ext,
+                               continuous_pause_events)},
+       {"resume_pause_events", offsetof(struct rx_port_stats_ext,
+                               resume_pause_events)},
+       {"continuous_roce_pause_events", offsetof(struct rx_port_stats_ext,
+                               continuous_roce_pause_events)},
+       {"resume_roce_pause_events", offsetof(struct rx_port_stats_ext,
+                               resume_roce_pause_events)},
+       {"rx_bytes_cos0", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos0)},
+       {"rx_bytes_cos1", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos1)},
+       {"rx_bytes_cos2", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos2)},
+       {"rx_bytes_cos3", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos3)},
+       {"rx_bytes_cos4", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos4)},
+       {"rx_bytes_cos5", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos5)},
+       {"rx_bytes_cos6", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos6)},
+       {"rx_bytes_cos7", offsetof(struct rx_port_stats_ext,
+                               rx_bytes_cos7)},
+       {"rx_packets_cos0", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos0)},
+       {"rx_packets_cos1", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos1)},
+       {"rx_packets_cos2", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos2)},
+       {"rx_packets_cos3", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos3)},
+       {"rx_packets_cos4", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos4)},
+       {"rx_packets_cos5", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos5)},
+       {"rx_packets_cos6", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos6)},
+       {"rx_packets_cos7", offsetof(struct rx_port_stats_ext,
+                               rx_packets_cos7)},
+       {"pfc_pri0_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri0_rx_duration_us)},
+       {"pfc_pri0_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri0_rx_transitions)},
+       {"pfc_pri1_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri1_rx_duration_us)},
+       {"pfc_pri1_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri1_rx_transitions)},
+       {"pfc_pri2_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri2_rx_duration_us)},
+       {"pfc_pri2_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri2_rx_transitions)},
+       {"pfc_pri3_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri3_rx_duration_us)},
+       {"pfc_pri3_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri3_rx_transitions)},
+       {"pfc_pri4_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri4_rx_duration_us)},
+       {"pfc_pri4_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri4_rx_transitions)},
+       {"pfc_pri5_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri5_rx_duration_us)},
+       {"pfc_pri5_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri5_rx_transitions)},
+       {"pfc_pri6_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri6_rx_duration_us)},
+       {"pfc_pri6_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri6_rx_transitions)},
+       {"pfc_pri7_rx_duration_us", offsetof(struct rx_port_stats_ext,
+                               pfc_pri7_rx_duration_us)},
+       {"pfc_pri7_rx_transitions", offsetof(struct rx_port_stats_ext,
+                               pfc_pri7_rx_transitions)},
+};
+
+static const struct bnxt_xstats_name_off bnxt_tx_ext_stats_strings[] = {
+       {"tx_bytes_cos0", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos0)},
+       {"tx_bytes_cos1", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos1)},
+       {"tx_bytes_cos2", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos2)},
+       {"tx_bytes_cos3", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos3)},
+       {"tx_bytes_cos4", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos4)},
+       {"tx_bytes_cos5", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos5)},
+       {"tx_bytes_cos6", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos6)},
+       {"tx_bytes_cos7", offsetof(struct tx_port_stats_ext,
+                               tx_bytes_cos7)},
+       {"tx_packets_cos0", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos0)},
+       {"tx_packets_cos1", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos1)},
+       {"tx_packets_cos2", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos2)},
+       {"tx_packets_cos3", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos3)},
+       {"tx_packets_cos4", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos4)},
+       {"tx_packets_cos5", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos5)},
+       {"tx_packets_cos6", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos6)},
+       {"tx_packets_cos7", offsetof(struct tx_port_stats_ext,
+                               tx_packets_cos7)},
+       {"pfc_pri0_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri0_tx_duration_us)},
+       {"pfc_pri0_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri0_tx_transitions)},
+       {"pfc_pri1_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri1_tx_duration_us)},
+       {"pfc_pri1_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri1_tx_transitions)},
+       {"pfc_pri2_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri2_tx_duration_us)},
+       {"pfc_pri2_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri2_tx_transitions)},
+       {"pfc_pri3_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri3_tx_duration_us)},
+       {"pfc_pri3_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri3_tx_transitions)},
+       {"pfc_pri4_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri4_tx_duration_us)},
+       {"pfc_pri4_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri4_tx_transitions)},
+       {"pfc_pri5_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri5_tx_duration_us)},
+       {"pfc_pri5_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri5_tx_transitions)},
+       {"pfc_pri6_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri6_tx_duration_us)},
+       {"pfc_pri6_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri6_tx_transitions)},
+       {"pfc_pri7_tx_duration_us", offsetof(struct tx_port_stats_ext,
+                               pfc_pri7_tx_duration_us)},
+       {"pfc_pri7_tx_transitions", offsetof(struct tx_port_stats_ext,
+                               pfc_pri7_tx_transitions)},
+};
+
 /*
  * Statistics functions
  */
@@ -265,12 +409,22 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
 
        unsigned int count, i;
        uint64_t tx_drop_pkts;
+       unsigned int rx_port_stats_ext_cnt;
+       unsigned int tx_port_stats_ext_cnt;
+       unsigned int stat_size = sizeof(uint64_t);
+       unsigned int stat_count;
 
        bnxt_hwrm_port_qstats(bp);
        bnxt_hwrm_func_qstats_tx_drop(bp, 0xffff, &tx_drop_pkts);
+       bnxt_hwrm_ext_port_qstats(bp);
+       rx_port_stats_ext_cnt = bp->fw_rx_port_stats_ext_size / stat_size;
+       tx_port_stats_ext_cnt = bp->fw_tx_port_stats_ext_size / stat_size;
 
        count = RTE_DIM(bnxt_rx_stats_strings) +
-               RTE_DIM(bnxt_tx_stats_strings) + 1; /* For tx_drop_pkts */
+               RTE_DIM(bnxt_tx_stats_strings) + 1/* For tx_drop_pkts */ +
+               RTE_DIM(bnxt_rx_ext_stats_strings) +
+               RTE_DIM(bnxt_tx_ext_stats_strings);
+       stat_count = count;
 
        if (n < count)
                return count;
@@ -299,7 +453,27 @@ int bnxt_dev_xstats_get_op(struct rte_eth_dev *eth_dev,
        xstats[count].value = rte_le_to_cpu_64(tx_drop_pkts);
        count++;
 
-       return count;
+       for (i = 0; i < tx_port_stats_ext_cnt; i++) {
+               uint64_t *tx_stats_ext = (uint64_t *)bp->hw_tx_port_stats_ext;
+
+               xstats[count].value = rte_le_to_cpu_64
+                                       (*(uint64_t *)((char *)tx_stats_ext +
+                                        bnxt_tx_ext_stats_strings[i].offset));
+
+               count++;
+       }
+
+       for (i = 0; i < rx_port_stats_ext_cnt; i++) {
+               uint64_t *rx_stats_ext = (uint64_t *)bp->hw_rx_port_stats_ext;
+
+               xstats[count].value = rte_le_to_cpu_64
+                                       (*(uint64_t *)((char *)rx_stats_ext +
+                                        bnxt_rx_ext_stats_strings[i].offset));
+
+               count++;
+       }
+
+       return stat_count;
 }
 
 int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
@@ -308,7 +482,9 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
 {
        /* Account for the Tx drop pkts aka the Anti spoof counter */
        const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
-                               RTE_DIM(bnxt_tx_stats_strings) + 1;
+                               RTE_DIM(bnxt_tx_stats_strings) + 1 +
+                               RTE_DIM(bnxt_rx_ext_stats_strings) +
+                               RTE_DIM(bnxt_tx_ext_stats_strings);
        unsigned int i, count;
 
        if (xstats_names != NULL) {
@@ -335,6 +511,25 @@ int bnxt_dev_xstats_get_names_op(__rte_unused struct rte_eth_dev *eth_dev,
                                "%s",
                                bnxt_func_stats_strings[4].name);
                count++;
+
+               for (i = 0; i < RTE_DIM(bnxt_rx_ext_stats_strings); i++) {
+                       snprintf(xstats_names[count].name,
+                                sizeof(xstats_names[count].name),
+                                "%s",
+                                bnxt_rx_ext_stats_strings[i].name);
+
+                       count++;
+               }
+
+               for (i = 0; i < RTE_DIM(bnxt_tx_ext_stats_strings); i++) {
+                       snprintf(xstats_names[count].name,
+                                sizeof(xstats_names[count].name),
+                                "%s",
+                                bnxt_tx_ext_stats_strings[i].name);
+
+                       count++;
+               }
+
        }
        return stat_cnt;
 }
@@ -359,7 +554,9 @@ int bnxt_dev_xstats_get_by_id_op(struct rte_eth_dev *dev, const uint64_t *ids,
 {
        /* Account for the Tx drop pkts aka the Anti spoof counter */
        const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
-                               RTE_DIM(bnxt_tx_stats_strings) + 1;
+                               RTE_DIM(bnxt_tx_stats_strings) + 1 +
+                               RTE_DIM(bnxt_rx_ext_stats_strings) +
+                               RTE_DIM(bnxt_tx_ext_stats_strings);
        struct rte_eth_xstat xstats[stat_cnt];
        uint64_t values_copy[stat_cnt];
        uint16_t i;
@@ -384,7 +581,9 @@ int bnxt_dev_xstats_get_names_by_id_op(struct rte_eth_dev *dev,
 {
        /* Account for the Tx drop pkts aka the Anti spoof counter */
        const unsigned int stat_cnt = RTE_DIM(bnxt_rx_stats_strings) +
-                               RTE_DIM(bnxt_tx_stats_strings) + 1;
+                               RTE_DIM(bnxt_tx_stats_strings) + 1 +
+                               RTE_DIM(bnxt_rx_ext_stats_strings) +
+                               RTE_DIM(bnxt_tx_ext_stats_strings);
        struct rte_eth_xstat_name xstats_names_copy[stat_cnt];
        uint16_t i;
 
index 67bb35e..39be7bd 100644 (file)
@@ -120,7 +120,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
 {
        struct bnxt_tx_ring_info *txr = txq->tx_ring;
        struct tx_bd_long *txbd;
-       struct tx_bd_long_hi *txbd1;
+       struct tx_bd_long_hi *txbd1 = NULL;
        uint32_t vlan_tag_flags, cfa_action;
        bool long_bd = false;
        uint16_t last_prod = 0;
@@ -295,7 +295,8 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
        }
 
        txbd->flags_type |= TX_BD_LONG_FLAGS_PACKET_END;
-       txbd1->lflags = rte_cpu_to_le_32(txbd1->lflags);
+       if (txbd1)
+               txbd1->lflags = rte_cpu_to_le_32(txbd1->lflags);
 
        txr->tx_prod = RING_NEXT(txr->tx_ring_struct, txr->tx_prod);
 
index c0577cd..aebfb1f 100644 (file)
@@ -57,29 +57,6 @@ void bnxt_init_vnics(struct bnxt *bp)
                STAILQ_INIT(&vnic->flow_list);
                STAILQ_INSERT_TAIL(&bp->free_vnic_list, vnic, next);
        }
-       for (i = 0; i < MAX_FF_POOLS; i++)
-               STAILQ_INIT(&bp->ff_pool[i]);
-}
-
-int bnxt_free_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic,
-                         int pool)
-{
-       struct bnxt_vnic_info *temp;
-
-       temp = STAILQ_FIRST(&bp->ff_pool[pool]);
-       while (temp) {
-               if (temp == vnic) {
-                       STAILQ_REMOVE(&bp->ff_pool[pool], vnic,
-                                     bnxt_vnic_info, next);
-                       vnic->fw_vnic_id = (uint16_t)HWRM_NA_SIGNATURE;
-                       STAILQ_INSERT_TAIL(&bp->free_vnic_list, vnic,
-                                          next);
-                       return 0;
-               }
-               temp = STAILQ_NEXT(temp, next);
-       }
-       PMD_DRV_LOG(ERR, "VNIC %p is not found in pool[%d]\n", vnic, pool);
-       return -EINVAL;
 }
 
 struct bnxt_vnic_info *bnxt_alloc_vnic(struct bnxt *bp)
@@ -98,26 +75,22 @@ struct bnxt_vnic_info *bnxt_alloc_vnic(struct bnxt *bp)
 
 void bnxt_free_all_vnics(struct bnxt *bp)
 {
-       struct bnxt_vnic_info *temp, *next;
-       int i;
+       struct bnxt_vnic_info *temp;
+       unsigned int i;
 
-       for (i = 0; i < MAX_FF_POOLS; i++) {
-               temp = STAILQ_FIRST(&bp->ff_pool[i]);
-               while (temp) {
-                       next = STAILQ_NEXT(temp, next);
-                       STAILQ_REMOVE(&bp->ff_pool[i], temp, bnxt_vnic_info,
-                                     next);
-                       STAILQ_INSERT_TAIL(&bp->free_vnic_list, temp, next);
-                       temp = next;
-               }
+       for (i = 0; i < bp->nr_vnics; i++) {
+               temp = &bp->vnic_info[i];
+               STAILQ_INSERT_TAIL(&bp->free_vnic_list, temp, next);
        }
 }
 
 void bnxt_free_vnic_attributes(struct bnxt *bp)
 {
        struct bnxt_vnic_info *vnic;
+       unsigned int i;
 
-       STAILQ_FOREACH(vnic, &bp->free_vnic_list, next) {
+       for (i = 0; i < bp->max_vnics; i++) {
+               vnic = &bp->vnic_info[i];
                if (vnic->rss_table) {
                        /* 'Unreserve' the rss_table */
                        /* N/A */
index f5c7b42..e800579 100644 (file)
@@ -67,6 +67,10 @@ struct hwrm_resp_hdr {
 #define TLV_TYPE_HWRM_RESPONSE                   UINT32_C(0x2)
 /* RoCE slow path command */
 #define TLV_TYPE_ROCE_SP_COMMAND                 UINT32_C(0x3)
+/* RoCE slow path command to query CC Gen1 support. */
+#define TLV_TYPE_QUERY_ROCE_CC_GEN1              UINT32_C(0xcommand 0x0005)
+/* RoCE slow path command to modify CC Gen1 support. */
+#define TLV_TYPE_MODIFY_ROCE_CC_GEN1             UINT32_C(0xcommand 0x0005)
 /* Engine CKV - The device's serial number. */
 #define TLV_TYPE_ENGINE_CKV_DEVICE_SERIAL_NUMBER UINT32_C(0x8001)
 /* Engine CKV - Per-function random nonce data. */
@@ -256,6 +260,7 @@ struct cmd_nums {
         */
        uint16_t        req_type;
        #define HWRM_VER_GET                              UINT32_C(0x0)
+       #define HWRM_FUNC_DRV_IF_CHANGE                   UINT32_C(0xd)
        #define HWRM_FUNC_BUF_UNRGTR                      UINT32_C(0xe)
        #define HWRM_FUNC_VF_CFG                          UINT32_C(0xf)
        /* Reserved for future use. */
@@ -328,6 +333,7 @@ struct cmd_nums {
        #define HWRM_RING_FREE                            UINT32_C(0x51)
        #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS        UINT32_C(0x52)
        #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS     UINT32_C(0x53)
+       #define HWRM_RING_AGGINT_QCAPS                    UINT32_C(0x54)
        #define HWRM_RING_RESET                           UINT32_C(0x5e)
        #define HWRM_RING_GRP_ALLOC                       UINT32_C(0x60)
        #define HWRM_RING_GRP_FREE                        UINT32_C(0x61)
@@ -367,6 +373,8 @@ struct cmd_nums {
        #define HWRM_PORT_QSTATS_EXT                      UINT32_C(0xb4)
        #define HWRM_FW_RESET                             UINT32_C(0xc0)
        #define HWRM_FW_QSTATUS                           UINT32_C(0xc1)
+       #define HWRM_FW_HEALTH_CHECK                      UINT32_C(0xc2)
+       #define HWRM_FW_SYNC                              UINT32_C(0xc3)
        /* Experimental */
        #define HWRM_FW_SET_TIME                          UINT32_C(0xc8)
        /* Experimental */
@@ -433,6 +441,7 @@ struct cmd_nums {
        /* Experimental */
        #define HWRM_FW_IPC_MSG                           UINT32_C(0x110)
        #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO        UINT32_C(0x111)
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE       UINT32_C(0x112)
        /* Engine CKV - Ping the device and SRT firmware to get the public key. */
        #define HWRM_ENGINE_CKV_HELLO                     UINT32_C(0x12d)
        /* Engine CKV - Get the current allocation status of keys provisioned in the key vault. */
@@ -515,6 +524,10 @@ struct cmd_nums {
        #define HWRM_FUNC_BACKING_STORE_CFG               UINT32_C(0x193)
        /* Experimental */
        #define HWRM_FUNC_BACKING_STORE_QCFG              UINT32_C(0x194)
+       /* Configures the BW of any VF */
+       #define HWRM_FUNC_VF_BW_CFG                       UINT32_C(0x195)
+       /* Queries the BW of any VF */
+       #define HWRM_FUNC_VF_BW_QCFG                      UINT32_C(0x196)
        /* Experimental */
        #define HWRM_SELFTEST_QLIST                       UINT32_C(0x200)
        /* Experimental */
@@ -544,8 +557,12 @@ struct cmd_nums {
        #define HWRM_DBG_COREDUMP_INITIATE                UINT32_C(0xff18)
        /* Experimental */
        #define HWRM_DBG_COREDUMP_RETRIEVE                UINT32_C(0xff19)
+       /* Experimental */
+       #define HWRM_DBG_FW_CLI                           UINT32_C(0xff1a)
        /*  */
        #define HWRM_DBG_I2C_CMD                          UINT32_C(0xff1b)
+       /*  */
+       #define HWRM_DBG_RING_INFO_GET                    UINT32_C(0xff1c)
        /* Experimental */
        #define HWRM_NVM_FACTORY_DEFAULTS                 UINT32_C(0xffee)
        #define HWRM_NVM_VALIDATE_OPTION                  UINT32_C(0xffef)
@@ -615,6 +632,11 @@ struct ret_codes {
         * should retry the request.
         */
        #define HWRM_ERR_CODE_NO_BUFFER              UINT32_C(0x8)
+       /*
+        * This error code is only reported by firmware when some
+        * sub-option of a supported HWRM command is unsupported.
+        */
+       #define HWRM_ERR_CODE_UNSUPPORTED_OPTION_ERR UINT32_C(0x9)
        /*
         * Generic HWRM execution error that represents an
         * internal error.
@@ -686,8 +708,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MINOR 9
 #define HWRM_VERSION_UPDATE 2
 /* non-zero means beta version */
-#define HWRM_VERSION_RSVD 9
-#define HWRM_VERSION_STR "1.9.2.9"
+#define HWRM_VERSION_RSVD 53
+#define HWRM_VERSION_STR "1.9.2.53"
 
 /****************
  * hwrm_ver_get *
@@ -901,6 +923,42 @@ struct hwrm_ver_get_output {
         */
        #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_SHORT_CMD_REQUIRED \
                UINT32_C(0x8)
+       /*
+        * If set to 1, then the KONG host mailbox channel is supported.
+        * If set to 0, then the KONG host mailbox channel is not supported.
+        * By default, this flag should be 0 for older version of core firmware.
+        */
+       #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_KONG_MB_CHNL_SUPPORTED \
+               UINT32_C(0x10)
+       /*
+        * If set to 1, then the 64bit flow handle is supported in addition to the
+        * legacy 16bit flow handle. If set to 0, then the 64bit flow handle is not
+        * supported. By default, this flag should be 0 for older version of core firmware.
+        */
+       #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_FLOW_HANDLE_64BIT_SUPPORTED \
+               UINT32_C(0x20)
+       /*
+        * If set to 1, then filter type can be provided in filter_alloc or filter_cfg
+        * filter types like L2 for l2 traffic and ROCE for roce & l2 traffic.
+        * If set to 0, then filter types not supported.
+        * By default, this flag should be 0 for older version of core firmware.
+        */
+       #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_L2_FILTER_TYPES_ROCE_OR_L2_SUPPORTED \
+               UINT32_C(0x40)
+       /*
+        * If set to 1, firmware is capable to support virtio vSwitch offload model.
+        * If set to 0, firmware can't supported virtio vSwitch offload model.
+        * By default, this flag should be 0 for older version of core firmware.
+        */
+       #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_VIRTIO_VSWITCH_OFFLOAD_SUPPORTED \
+               UINT32_C(0x80)
+       /*
+        * If set to 1, firmware is capable to support trusted VF.
+        * If set to 0, firmware is not capable to support trusted VF.
+        * By default, this flag should be 0 for older version of core firmware.
+        */
+       #define HWRM_VER_GET_OUTPUT_DEV_CAPS_CFG_TRUSTED_VF_SUPPORTED \
+               UINT32_C(0x100)
        /*
         * This field represents the major version of RoCE firmware.
         * A change in major version represents a major release.
@@ -1154,39 +1212,45 @@ struct hwrm_ver_get_output {
 struct bd_base {
        uint8_t type;
        /* This value identifies the type of buffer descriptor. */
-       #define BD_BASE_TYPE_MASK       UINT32_C(0x3f)
-       #define BD_BASE_TYPE_SFT        0
+       #define BD_BASE_TYPE_MASK             UINT32_C(0x3f)
+       #define BD_BASE_TYPE_SFT              0
        /*
         * Indicates that this BD is 16B long and is used for
         * normal L2 packet transmission.
         */
-       #define BD_BASE_TYPE_TX_BD_SHORT  UINT32_C(0x0)
+       #define BD_BASE_TYPE_TX_BD_SHORT        UINT32_C(0x0)
        /*
         * Indicates that this BD is 1BB long and is an empty
         * TX BD.  Not valid for use by the driver.
         */
-       #define BD_BASE_TYPE_TX_BD_EMPTY  UINT32_C(0x1)
+       #define BD_BASE_TYPE_TX_BD_EMPTY        UINT32_C(0x1)
        /*
         * Indicates that this BD is 16B long and is an RX Producer
         * (ie. empty) buffer descriptor.
         */
-       #define BD_BASE_TYPE_RX_PROD_PKT  UINT32_C(0x4)
+       #define BD_BASE_TYPE_RX_PROD_PKT        UINT32_C(0x4)
        /*
         * Indicates that this BD is 16B long and is an RX
         * Producer Buffer BD.
         */
-       #define BD_BASE_TYPE_RX_PROD_BFR  UINT32_C(0x5)
+       #define BD_BASE_TYPE_RX_PROD_BFR        UINT32_C(0x5)
        /*
         * Indicates that this BD is 16B long and is an
         * RX Producer Assembly Buffer Descriptor.
         */
-       #define BD_BASE_TYPE_RX_PROD_AGG  UINT32_C(0x6)
+       #define BD_BASE_TYPE_RX_PROD_AGG        UINT32_C(0x6)
        /*
         * Indicates that this BD is 32B long and is used for
         * normal L2 packet transmission.
         */
-       #define BD_BASE_TYPE_TX_BD_LONG   UINT32_C(0x10)
-       #define BD_BASE_TYPE_LAST        BD_BASE_TYPE_TX_BD_LONG
+       #define BD_BASE_TYPE_TX_BD_LONG         UINT32_C(0x10)
+       /*
+        * Indicates that this BD is 32B long and is used for
+        * L2 packet transmission for small packets that require
+        * low latency.
+        */
+       #define BD_BASE_TYPE_TX_BD_LONG_INLINE  UINT32_C(0x11)
+       #define BD_BASE_TYPE_LAST              BD_BASE_TYPE_TX_BD_LONG_INLINE
        uint8_t unused_1[7];
 } __attribute__((packed));
 
@@ -1406,6 +1470,7 @@ struct tx_bd_long {
        uint64_t        address;
 } __attribute__((packed));
 
+/* Last 16 bytes of tx_bd_long. */
 /* tx_bd_long_hi (size:128b/16B) */
 struct tx_bd_long_hi {
        /*
@@ -1595,6 +1660,219 @@ struct tx_bd_long_hi {
                TX_BD_LONG_CFA_META_KEY_VLAN_TAG
 } __attribute__((packed));
 
+/*
+ * This structure is used to inform the NIC of packet data that needs to be
+ * transmitted with additional processing that requires extra data such as
+ * VLAN insertion plus attached inline data. This BD type may be used to
+ * improve latency for small packets needing the additional extended features
+ * supported by long BDs.
+ */
+/* tx_bd_long_inline (size:256b/32B) */
+struct tx_bd_long_inline {
+       uint16_t        flags_type;
+       /* This value identifies the type of buffer descriptor. */
+       #define TX_BD_LONG_INLINE_TYPE_MASK             UINT32_C(0x3f)
+       #define TX_BD_LONG_INLINE_TYPE_SFT              0
+       /*
+        * This type of BD is 32B long and is used for inline L2 packet
+        * transmission.
+        */
+       #define TX_BD_LONG_INLINE_TYPE_TX_BD_LONG_INLINE  UINT32_C(0x11)
+       #define TX_BD_LONG_INLINE_TYPE_LAST \
+               TX_BD_LONG_INLINE_TYPE_TX_BD_LONG_INLINE
+       /*
+        * All bits in this field may be set on the first BD of a packet.
+        * Only the packet_end bit may be set in non-first BDs.
+        */
+       #define TX_BD_LONG_INLINE_FLAGS_MASK            UINT32_C(0xffc0)
+       #define TX_BD_LONG_INLINE_FLAGS_SFT             6
+       /*
+        * If set to 1, the packet ends with the data in the buffer
+        * pointed to by this descriptor.  This flag must be
+        * valid on every BD.
+        */
+       #define TX_BD_LONG_INLINE_FLAGS_PACKET_END       UINT32_C(0x40)
+       /*
+        * If set to 1, the device will not generate a completion for
+        * this transmit packet unless there is an error in its processing.
+        * If this bit is set to 0, then the packet will be completed
+        * normally.
+        *
+        * This bit may be set only on the first BD of a packet.
+        */
+       #define TX_BD_LONG_INLINE_FLAGS_NO_CMPL          UINT32_C(0x80)
+       /*
+        * This value indicates how many 16B BD locations are consumed
+        * in the ring by this packet, including the BD and inline
+        * data.
+        */
+       #define TX_BD_LONG_INLINE_FLAGS_BD_CNT_MASK      UINT32_C(0x1f00)
+       #define TX_BD_LONG_INLINE_FLAGS_BD_CNT_SFT       8
+       /* This field is deprecated. */
+       #define TX_BD_LONG_INLINE_FLAGS_LHINT_MASK       UINT32_C(0x6000)
+       #define TX_BD_LONG_INLINE_FLAGS_LHINT_SFT        13
+       /*
+        * If set to 1, the device immediately updates the Send Consumer
+        * Index after the buffer associated with this descriptor has
+        * been transferred via DMA to NIC memory from host memory. An
+        * interrupt may or may not be generated according to the state
+        * of the interrupt avoidance mechanisms. If this bit
+        * is set to 0, then the Consumer Index is only updated as soon
+        * as one of the host interrupt coalescing conditions has been met.
+        *
+        * This bit must be valid on the first BD of a packet.
+        */
+       #define TX_BD_LONG_INLINE_FLAGS_COAL_NOW         UINT32_C(0x8000)
+       /*
+        * This is the length of the inline data, not including BD length, in
+        * bytes.
+        * The maximum value is 480.
+        *
+        * This field must be valid on all BDs of a packet.
+        */
+       uint16_t        len;
+       /*
+        * The opaque data field is passed through to the completion and can be
+        * used for any data that the driver wants to associate with the transmit
+        * BD.
+        *
+        * This field must be valid on the first BD of a packet.
+        */
+       uint32_t        opaque;
+       uint64_t        unused1;
+       /*
+        * All bits in this field must be valid on the first BD of a packet.
+        * Their value on other BDs of the packet is ignored.
+        */
+       uint16_t        lflags;
+       /*
+        * If set to 1, the controller replaces the TCP/UPD checksum
+        * fields of normal TCP/UPD checksum, or the inner TCP/UDP
+        * checksum field of the encapsulated TCP/UDP packets with the
+        * hardware calculated TCP/UDP checksum for the packet associated
+        * with this descriptor. The flag is ignored if the LSO flag is set.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_TCP_UDP_CHKSUM     UINT32_C(0x1)
+       /*
+        * If set to 1, the controller replaces the IP checksum of the
+        * normal packets, or the inner IP checksum of the encapsulated
+        * packets with the hardware calculated IP checksum for the
+        * packet associated with this descriptor.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_IP_CHKSUM          UINT32_C(0x2)
+       /*
+        * If set to 1, the controller will not append an Ethernet CRC
+        * to the end of the frame.
+        *
+        * Packet must be 64B or longer when this flag is set. It is not
+        * useful to use this bit with any form of TX offload such as
+        * CSO or LSO. The intent is that the packet from the host already
+        * has a valid Ethernet CRC on the packet.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_NOCRC              UINT32_C(0x4)
+       /*
+        * If set to 1, the device will record the time at which the packet
+        * was actually transmitted at the TX MAC.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_STAMP              UINT32_C(0x8)
+       /*
+        * If set to 1, the controller replaces the tunnel IP checksum
+        * field with hardware calculated IP checksum for the IP header
+        * of the packet associated with this descriptor. The hardware
+        * updates an outer UDP checksum if it is non-zero.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_T_IP_CHKSUM        UINT32_C(0x10)
+       /*
+        * This bit must be 0 for BDs of this type. LSO is not supported with
+        * inline BDs.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_LSO                UINT32_C(0x20)
+       /* Since LSO is not supported with inline BDs, this bit is not used. */
+       #define TX_BD_LONG_INLINE_LFLAGS_IPID_FMT           UINT32_C(0x40)
+       /* Since LSO is not supported with inline BDs, this bit is not used. */
+       #define TX_BD_LONG_INLINE_LFLAGS_T_IPID             UINT32_C(0x80)
+       /*
+        * If set to '1', then the RoCE ICRC will be appended to the
+        * packet.  Packet must be a valid RoCE format packet.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_ROCE_CRC           UINT32_C(0x100)
+       /*
+        * If set to '1', then the FCoE CRC will be appended to the
+        * packet.  Packet must be a valid FCoE format packet.
+        */
+       #define TX_BD_LONG_INLINE_LFLAGS_FCOE_CRC           UINT32_C(0x200)
+       uint16_t        unused2;
+       uint32_t        unused3;
+       uint16_t        unused4;
+       /*
+        * This value selects a CFA action to perform on the packet.
+        * Set this value to zero if no CFA action is desired.
+        *
+        * This value must be valid on the first BD of a packet.
+        */
+       uint16_t        cfa_action;
+       /*
+        * This value is action meta-data that defines CFA edit operations
+        * that are done in addition to any action editing.
+        */
+       uint32_t        cfa_meta;
+       /* When key = 1, this is the VLAN tag VID value. */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_VID_MASK     UINT32_C(0xfff)
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_VID_SFT      0
+       /* When key = 1, this is the VLAN tag DE value. */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_DE           UINT32_C(0x1000)
+       /* When key = 1, this is the VLAN tag PRI value. */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_PRI_MASK     UINT32_C(0xe000)
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_PRI_SFT      13
+       /* When key = 1, this is the VLAN tag TPID select value. */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_MASK    UINT32_C(0x70000)
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_SFT     16
+       /* 0x88a8 */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPID88A8 \
+               (UINT32_C(0x0) << 16)
+       /* 0x8100 */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPID8100 \
+               (UINT32_C(0x1) << 16)
+       /* 0x9100 */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPID9100 \
+               (UINT32_C(0x2) << 16)
+       /* 0x9200 */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPID9200 \
+               (UINT32_C(0x3) << 16)
+       /* 0x9300 */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPID9300 \
+               (UINT32_C(0x4) << 16)
+       /* Value programmed in CFA VLANTPID register. */
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPIDCFG \
+               (UINT32_C(0x5) << 16)
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_LAST \
+               TX_BD_LONG_INLINE_CFA_META_VLAN_TPID_TPIDCFG
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_RESERVED_MASK \
+               UINT32_C(0xff80000)
+       #define TX_BD_LONG_INLINE_CFA_META_VLAN_RESERVED_SFT 19
+       /*
+        * This field identifies the type of edit to be performed
+        * on the packet.
+        *
+        * This value must be valid on the first BD of a packet.
+        */
+       #define TX_BD_LONG_INLINE_CFA_META_KEY_MASK \
+               UINT32_C(0xf0000000)
+       #define TX_BD_LONG_INLINE_CFA_META_KEY_SFT           28
+       /* No editing */
+       #define TX_BD_LONG_INLINE_CFA_META_KEY_NONE \
+               (UINT32_C(0x0) << 28)
+       /*
+        * - meta[17:16] - TPID select value (0 = 0x8100).
+        * - meta[15:12] - PRI/DE value.
+        * - meta[11:0] - VID value.
+        */
+       #define TX_BD_LONG_INLINE_CFA_META_KEY_VLAN_TAG \
+               (UINT32_C(0x1) << 28)
+       #define TX_BD_LONG_INLINE_CFA_META_KEY_LAST \
+               TX_BD_LONG_INLINE_CFA_META_KEY_VLAN_TAG
+} __attribute__((packed));
+
 /* tx_bd_empty (size:128b/16B) */
 struct tx_bd_empty {
        /* This value identifies the type of buffer descriptor. */
@@ -2121,6 +2399,7 @@ struct rx_pkt_cmpl {
        uint32_t        rss_hash;
 } __attribute__((packed));
 
+/* Last 16 bytes of rx_pkt_cmpl. */
 /* rx_pkt_cmpl_hi (size:128b/16B) */
 struct rx_pkt_cmpl_hi {
        uint32_t        flags2;
@@ -2566,6 +2845,7 @@ struct rx_tpa_start_cmpl {
        uint32_t        rss_hash;
 } __attribute__((packed));
 
+/* Last 16 bytes of rx_tpq_start_cmpl. */
 /* rx_tpa_start_cmpl_hi (size:128b/16B) */
 struct rx_tpa_start_cmpl_hi {
        uint32_t        flags2;
@@ -2830,6 +3110,7 @@ struct rx_tpa_end_cmpl {
        uint32_t        tsdelta;
 } __attribute__((packed));
 
+/* Last 16 bytes of rx_tpa_end_cmpl. */
 /* rx_tpa_end_cmpl_hi (size:128b/16B) */
 struct rx_tpa_end_cmpl_hi {
        /*
@@ -3153,6 +3434,9 @@ struct hwrm_async_event_cmpl {
        /* Port PHY configuration change */
        #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE \
                UINT32_C(0x7)
+       /* Reset notification to clients */
+       #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY \
+               UINT32_C(0x8)
        /* Function driver unloaded */
        #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD \
                UINT32_C(0x10)
@@ -3790,6 +4074,96 @@ struct hwrm_async_event_cmpl_port_phy_cfg_change {
                UINT32_C(0x40000)
 } __attribute__((packed));
 
+/* hwrm_async_event_cmpl_reset_notify (size:128b/16B) */
+struct hwrm_async_event_cmpl_reset_notify {
+       uint16_t        type;
+       /*
+        * This field indicates the exact type of the completion.
+        * By convention, the LSB identifies the length of the
+        * record in 16B units.  Even values indicate 16B
+        * records.  Odd values indicate 32B
+        * records.
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_MASK \
+               UINT32_C(0x3f)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_SFT             0
+       /* HWRM Asynchronous Event Information */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT \
+               UINT32_C(0x2e)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_LAST \
+               HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_TYPE_HWRM_ASYNC_EVENT
+       /* Identifiers of events. */
+       uint16_t        event_id;
+       /* Notify clients of imminent reset. */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY \
+               UINT32_C(0x8)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_LAST \
+               HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_ID_RESET_NOTIFY
+       /* Event specific data */
+       uint32_t        event_data2;
+       uint8_t opaque_v;
+       /*
+        * This value is written by the NIC such that it will be different
+        * for each pass through the completion queue.   The even passes
+        * will write 1.  The odd passes will write 0.
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_V          UINT32_C(0x1)
+       /* opaque is 7 b */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_MASK UINT32_C(0xfe)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_OPAQUE_SFT 1
+       /* 8-lsb timestamp from POR (100-msec resolution) */
+       uint8_t timestamp_lo;
+       /* 16-lsb timestamp from POR (100-msec resolution) */
+       uint16_t        timestamp_hi;
+       /* Event specific data */
+       uint32_t        event_data1;
+       /* Indicates driver action requested */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_MASK \
+               UINT32_C(0xff)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_SFT \
+               0
+       /*
+        * If set to 1, it indicates that the l2 client should
+        * stop sending in band traffic to Nitro.
+        * if set to 0, there is no change in L2 client behavior.
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_STOP_TX_QUEUE \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, it indicates that the L2 client should
+        * bring down the interface.
+        * If set to 0, then there is no change in L2 client behavior.
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN \
+               UINT32_C(0x2)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_LAST \
+               HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DRIVER_ACTION_DRIVER_IFDOWN
+       /* Indicates reason for reset. */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MASK \
+               UINT32_C(0xff00)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_SFT \
+               8
+       /* A management client has requested reset. */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_MANAGEMENT_RESET_REQUEST \
+               (UINT32_C(0x1) << 8)
+       /* A fatal firmware exception has occurred. */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_FATAL \
+               (UINT32_C(0x2) << 8)
+       /* A non-fatal firmware exception has occurred. */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL \
+               (UINT32_C(0x3) << 8)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_LAST \
+               HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_REASON_CODE_FW_EXCEPTION_NON_FATAL
+       /*
+        * Minimum time before driver should attempt access - units 100ms ticks.
+        * Range 0-65535
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_MASK \
+               UINT32_C(0xffff0000)
+       #define HWRM_ASYNC_EVENT_CMPL_RESET_NOTIFY_EVENT_DATA1_DELAY_IN_100MS_TICKS_SFT \
+               16
+} __attribute__((packed));
+
 /* hwrm_async_event_cmpl_func_drvr_unload (size:128b/16B) */
 struct hwrm_async_event_cmpl_func_drvr_unload {
        uint16_t        type;
@@ -4285,6 +4659,13 @@ struct hwrm_async_event_cmpl_vf_cfg_change {
         */
        #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE \
                UINT32_C(0x8)
+       /*
+        * If this bit is set to 1, then the value of trusted VF enable
+        * was changed on this VF.
+        * If set to 0, then this bit should be ignored.
+        */
+       #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_TRUSTED_VF_CFG_CHANGE \
+               UINT32_C(0x10)
 } __attribute__((packed));
 
 /* hwrm_async_event_cmpl_llfc_pfc_change (size:128b/16B) */
@@ -5305,6 +5686,20 @@ struct hwrm_func_qcaps_output {
         */
        #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_ADMIN_PF_SUPPORTED \
                UINT32_C(0x40000)
+       /*
+        * If the query is for a VF, then this flag shall be ignored.
+        * If this query is for a PF and this flag is set to 1, then
+        * the PF will know that the firmware has the capability to track
+        * the virtual link status.
+        */
+       #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_LINK_ADMIN_STATUS_SUPPORTED \
+               UINT32_C(0x80000)
+       /*
+        * If 1, then this function supports the push mode that uses
+        * write combine buffers and the long inline tx buffer descriptor.
+        */
+       #define HWRM_FUNC_QCAPS_OUTPUT_FLAGS_WCB_PUSH_MODE \
+               UINT32_C(0x100000)
        /*
         * This value is current MAC address configured for this
         * function. A value of 00-00-00-00-00-00 indicates no
@@ -5547,6 +5942,15 @@ struct hwrm_func_qcfg_output {
         */
        #define HWRM_FUNC_QCFG_OUTPUT_FLAGS_MULTI_HOST \
                UINT32_C(0x20)
+       /*
+        * If the function that is being queried is a PF, then the HWRM shall
+        * set this field to 0 and the HWRM client shall ignore this field.
+        * If the function that is being queried is a VF, then the HWRM shall
+        * set this field to 1 if the queried VF is trusted, otherwise the HWRM
+        * shall set this field to 0.
+        */
+       #define HWRM_FUNC_QCFG_OUTPUT_FLAGS_TRUSTED_VF \
+               UINT32_C(0x40)
        /*
         * This value is current MAC address configured for this
         * function. A value of 00-00-00-00-00-00 indicates no
@@ -5755,7 +6159,7 @@ struct hwrm_func_qcfg_output {
         */
        #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_MASK \
                UINT32_C(0x3)
-       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_SFT     0
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_SFT          0
        /* Cache Line Size 64 bytes */
        #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_SIZE_64 \
                UINT32_C(0x0)
@@ -5764,10 +6168,25 @@ struct hwrm_func_qcfg_output {
                UINT32_C(0x1)
        #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_LAST \
                HWRM_FUNC_QCFG_OUTPUT_OPTIONS_CACHE_LINESIZE_SIZE_128
+       /* This value is the virtual link admin state setting. */
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_MASK \
+               UINT32_C(0xc)
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_SFT        2
+       /* Admin link state is in forced down mode. */
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_FORCED_DOWN \
+               (UINT32_C(0x0) << 2)
+       /* Admin link state is in forced up mode. */
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_FORCED_UP \
+               (UINT32_C(0x1) << 2)
+       /* Admin link state is in auto mode  - follows the physical link state. */
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_AUTO \
+               (UINT32_C(0x2) << 2)
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_LAST \
+               HWRM_FUNC_QCFG_OUTPUT_OPTIONS_LINK_ADMIN_STATE_AUTO
        /* Reserved for future. */
        #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_RSVD_MASK \
-               UINT32_C(0xfc)
-       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_RSVD_SFT               2
+               UINT32_C(0xf0)
+       #define HWRM_FUNC_QCFG_OUTPUT_OPTIONS_RSVD_SFT                    4
        /*
         * The number of VFs that are allocated to the function.
         * This is valid only on the PF with SR-IOV enabled.
@@ -5814,13 +6233,13 @@ struct hwrm_func_qcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_func_vlan_qcfg *
- ***********************/
+/*****************
+ * hwrm_func_cfg *
+ *****************/
 
 
-/* hwrm_func_vlan_qcfg_input (size:192b/24B) */
-struct hwrm_func_vlan_qcfg_input {
+/* hwrm_func_cfg_input (size:704b/88B) */
+struct hwrm_func_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -5851,236 +6270,25 @@ struct hwrm_func_vlan_qcfg_input {
        /*
         * Function ID of the function that is being
         * configured.
-        * If set to 0xFF... (All Fs), then the configuration is
+        * If set to 0xFF... (All Fs), then the the configuration is
         * for the requesting function.
         */
        uint16_t        fid;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_func_vlan_qcfg_output (size:320b/40B) */
-struct hwrm_func_vlan_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This field specifies how many NQs will be reserved for the PF.
+        * Remaining NQs that belong to the PF become available for VFs.
+        * Once a PF has created VFs, it cannot change how many NQs are
+        * reserved for itself (since the NQs must be contiguous in HW).
         */
-       uint8_t valid;
-       /* S-TAG VLAN identifier configured for the function. */
-       uint16_t        stag_vid;
-       /* S-TAG PCP value configured for the function. */
-       uint8_t stag_pcp;
-       uint8_t unused_1;
+       uint16_t        num_msix;
+       uint32_t        flags;
        /*
-        * S-TAG TPID value configured for the function. This field is specified in
-        * network byte order.
-        */
-       uint16_t        stag_tpid;
-       /* C-TAG VLAN identifier configured for the function. */
-       uint16_t        ctag_vid;
-       /* C-TAG PCP value configured for the function. */
-       uint8_t ctag_pcp;
-       uint8_t unused_2;
-       /*
-        * C-TAG TPID value configured for the function. This field is specified in
-        * network byte order.
-        */
-       uint16_t        ctag_tpid;
-       /* Future use. */
-       uint32_t        rsvd2;
-       /* Future use. */
-       uint32_t        rsvd3;
-       uint32_t        unused_3;
-} __attribute__((packed));
-
-/**********************
- * hwrm_func_vlan_cfg *
- **********************/
-
-
-/* hwrm_func_vlan_cfg_input (size:384b/48B) */
-struct hwrm_func_vlan_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /*
-        * Function ID of the function that is being
-        * configured.
-        * If set to 0xFF... (All Fs), then the configuration is
-        * for the requesting function.
-        */
-       uint16_t        fid;
-       uint8_t unused_0[2];
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the stag_vid field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_VID      UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the ctag_vid field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_VID      UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the stag_pcp field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_PCP      UINT32_C(0x4)
-       /*
-        * This bit must be '1' for the ctag_pcp field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_PCP      UINT32_C(0x8)
-       /*
-        * This bit must be '1' for the stag_tpid field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_TPID     UINT32_C(0x10)
-       /*
-        * This bit must be '1' for the ctag_tpid field to be
-        * configured.
-        */
-       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_TPID     UINT32_C(0x20)
-       /* S-TAG VLAN identifier configured for the function. */
-       uint16_t        stag_vid;
-       /* S-TAG PCP value configured for the function. */
-       uint8_t stag_pcp;
-       uint8_t unused_1;
-       /*
-        * S-TAG TPID value configured for the function. This field is specified in
-        * network byte order.
-        */
-       uint16_t        stag_tpid;
-       /* C-TAG VLAN identifier configured for the function. */
-       uint16_t        ctag_vid;
-       /* C-TAG PCP value configured for the function. */
-       uint8_t ctag_pcp;
-       uint8_t unused_2;
-       /*
-        * C-TAG TPID value configured for the function. This field is specified in
-        * network byte order.
-        */
-       uint16_t        ctag_tpid;
-       /* Future use. */
-       uint32_t        rsvd1;
-       /* Future use. */
-       uint32_t        rsvd2;
-       uint8_t unused_3[4];
-} __attribute__((packed));
-
-/* hwrm_func_vlan_cfg_output (size:128b/16B) */
-struct hwrm_func_vlan_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*****************
- * hwrm_func_cfg *
- *****************/
-
-
-/* hwrm_func_cfg_input (size:704b/88B) */
-struct hwrm_func_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /*
-        * Function ID of the function that is being
-        * configured.
-        * If set to 0xFF... (All Fs), then the the configuration is
-        * for the requesting function.
-        */
-       uint16_t        fid;
-       /*
-        * This field specifies how many NQs will be reserved for the PF.
-        * Remaining NQs that belong to the PF become available for VFs.
-        * Once a PF has created VFs, it cannot change how many NQs are
-        * reserved for itself (since the NQs must be contiguous in HW).
-        */
-       uint16_t        num_msix;
-       uint32_t        flags;
-       /*
-        * When this bit is '1', the function is disabled with
-        * source MAC address check.
-        * This is an anti-spoofing check. If this flag is set,
-        * then the function shall be configured to disallow
-        * transmission of frames with the source MAC address that
-        * is configured for this function.
+        * When this bit is '1', the function is disabled with
+        * source MAC address check.
+        * This is an anti-spoofing check. If this flag is set,
+        * then the function shall be configured to disallow
+        * transmission of frames with the source MAC address that
+        * is configured for this function.
         */
        #define HWRM_FUNC_CFG_INPUT_FLAGS_SRC_MAC_ADDR_CHECK_DISABLE \
                UINT32_C(0x1)
@@ -6205,6 +6413,17 @@ struct hwrm_func_cfg_input {
         */
        #define HWRM_FUNC_CFG_INPUT_FLAGS_L2_CTX_ASSETS_TEST \
                UINT32_C(0x100000)
+       /*
+        * This configuration change can be initiated by a PF driver. This
+        * configuration request shall be targeted to a VF. From local host
+        * resident HWRM clients, only the parent PF driver shall be allowed
+        * to initiate this change on one of its children VFs. If this bit is
+        * set to 1, then the VF that is being configured is requested to be
+        * trusted. If this bit is set to 0, then the VF that is being configured
+        * is requested to be not trusted.
+        */
+       #define HWRM_FUNC_CFG_INPUT_FLAGS_TRUSTED_VF_ENABLE \
+               UINT32_C(0x200000)
        uint32_t        enables;
        /*
         * This bit must be '1' for the mtu field to be
@@ -6338,6 +6557,12 @@ struct hwrm_func_cfg_input {
         */
        #define HWRM_FUNC_CFG_INPUT_ENABLES_NUM_MSIX \
                UINT32_C(0x200000)
+       /*
+        * This bit must be '1' for the link admin state field to be
+        * configured.
+        */
+       #define HWRM_FUNC_CFG_INPUT_ENABLES_ADMIN_LINK_STATE \
+               UINT32_C(0x400000)
        /*
         * The maximum transmission unit of the function.
         * The HWRM should make sure that the mtu of
@@ -6569,7 +6794,7 @@ struct hwrm_func_cfg_input {
         */
        #define HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_MASK \
                UINT32_C(0x3)
-       #define HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_SFT     0
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_SFT          0
        /* Cache Line Size 64 bytes */
        #define HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_SIZE_64 \
                UINT32_C(0x0)
@@ -6578,10 +6803,25 @@ struct hwrm_func_cfg_input {
                UINT32_C(0x1)
        #define HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_LAST \
                HWRM_FUNC_CFG_INPUT_OPTIONS_CACHE_LINESIZE_SIZE_128
+       /* This value is the virtual link admin state setting. */
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_MASK \
+               UINT32_C(0xc)
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_SFT        2
+       /* Admin state is forced down. */
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_FORCED_DOWN \
+               (UINT32_C(0x0) << 2)
+       /* Admin state is forced up. */
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_FORCED_UP \
+               (UINT32_C(0x1) << 2)
+       /* Admin state is in auto mode - is to follow the physical link state. */
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_AUTO \
+               (UINT32_C(0x2) << 2)
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_LAST \
+               HWRM_FUNC_CFG_INPUT_OPTIONS_LINK_ADMIN_STATE_AUTO
        /* Reserved for future. */
        #define HWRM_FUNC_CFG_INPUT_OPTIONS_RSVD_MASK \
-               UINT32_C(0xfc)
-       #define HWRM_FUNC_CFG_INPUT_OPTIONS_RSVD_SFT               2
+               UINT32_C(0xf0)
+       #define HWRM_FUNC_CFG_INPUT_OPTIONS_RSVD_SFT                    4
        /*
         * The number of multicast filters that should
         * be reserved for this function on the RX side.
@@ -6862,13 +7102,13 @@ struct hwrm_func_vf_resc_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************************
- * hwrm_func_vf_vnic_ids_query *
- *******************************/
+/**********************
+ * hwrm_func_drv_rgtr *
+ **********************/
 
 
-/* hwrm_func_vf_vnic_ids_query_input (size:256b/32B) */
-struct hwrm_func_vf_vnic_ids_query_input {
+/* hwrm_func_drv_rgtr_input (size:896b/112B) */
+struct hwrm_func_drv_rgtr_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -6896,98 +7136,27 @@ struct hwrm_func_vf_vnic_ids_query_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
+       uint32_t        flags;
        /*
-        * This value is used to identify a Virtual Function (VF).
-        * The scope of VF ID is local within a PF.
+        * When this bit is '1', the function driver is requesting
+        * all requests from its children VF drivers to be
+        * forwarded to itself.
+        * This flag can only be set by the PF driver.
+        * If a VF driver sets this flag, it should be ignored
+        * by the HWRM.
         */
-       uint16_t        vf_id;
-       uint8_t unused_0[2];
-       /* Max number of vnic ids in vnic id table */
-       uint32_t        max_vnic_id_cnt;
-       /* This is the address for VF VNIC ID table */
-       uint64_t        vnic_id_tbl_addr;
-} __attribute__((packed));
-
-/* hwrm_func_vf_vnic_ids_query_output (size:128b/16B) */
-struct hwrm_func_vf_vnic_ids_query_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_ALL_MODE \
+               UINT32_C(0x1)
        /*
-        * Actual number of vnic ids
-        *
-        * Each VNIC ID is written as a 32-bit number.
+        * When this bit is '1', the function is requesting none of
+        * the requests from its children VF drivers to be
+        * forwarded to itself.
+        * This flag can only be set by the PF driver.
+        * If a VF driver sets this flag, it should be ignored
+        * by the HWRM.
         */
-       uint32_t        vnic_id_cnt;
-       uint8_t unused_0[3];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_func_drv_rgtr *
- **********************/
-
-
-/* hwrm_func_drv_rgtr_input (size:896b/112B) */
-struct hwrm_func_drv_rgtr_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       /*
-        * When this bit is '1', the function driver is requesting
-        * all requests from its children VF drivers to be
-        * forwarded to itself.
-        * This flag can only be set by the PF driver.
-        * If a VF driver sets this flag, it should be ignored
-        * by the HWRM.
-        */
-       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_ALL_MODE       UINT32_C(0x1)
-       /*
-        * When this bit is '1', the function is requesting none of
-        * the requests from its children VF drivers to be
-        * forwarded to itself.
-        * This flag can only be set by the PF driver.
-        * If a VF driver sets this flag, it should be ignored
-        * by the HWRM.
-        */
-       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE      UINT32_C(0x2)
+       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FWD_NONE_MODE \
+               UINT32_C(0x2)
        /*
         * When this bit is '1', then ver_maj_8b, ver_min_8b, ver_upd_8b
         * fields shall be ignored and ver_maj, ver_min, ver_upd
@@ -6996,7 +7165,22 @@ struct hwrm_func_drv_rgtr_input {
         * fields shall be used for the driver version information and
         * ver_maj, ver_min, ver_upd and ver_patch shall be ignored.
         */
-       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_16BIT_VER_MODE     UINT32_C(0x4)
+       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_16BIT_VER_MODE \
+               UINT32_C(0x4)
+       /*
+        * When this bit is '1', the function is indicating support of
+        * 64bit flow handle.  The firmware that only supports 64bit flow
+        * handle should check this bit before allowing processing of
+        * HWRM_CFA_FLOW_XXX commands from the requesting function as firmware
+        * with 64bit flow handle support can only be compatible with drivers
+        * that support 64bit flow handle. The legacy drivers that don't support
+        * 64bit flow handle won't be able to use HWRM_CFA_FLOW_XXX commands when
+        * running with new firmware that only supports 64bit flow handle. The new
+        * firmware support 64bit flow handle returns HWRM_ERR_CODE_CMD_NOT_SUPPORTED
+        * status to the legacy driver when encounters these commands.
+        */
+       #define HWRM_FUNC_DRV_RGTR_INPUT_FLAGS_FLOW_HANDLE_64BIT_MODE \
+               UINT32_C(0x8)
        uint32_t        enables;
        /*
         * This bit must be '1' for the os_type field to be
@@ -7117,7 +7301,14 @@ struct hwrm_func_drv_rgtr_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint32_t        flags;
+       /*
+        * When this bit is '1', it indicates that the
+        * HWRM_FUNC_DRV_IF_CHANGE call is supported.
+        */
+       #define HWRM_FUNC_DRV_RGTR_OUTPUT_FLAGS_IF_CHANGE_SUPPORTED \
+               UINT32_C(0x1)
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -7441,7 +7632,7 @@ struct hwrm_func_drv_qver_input {
        uint8_t unused_0[2];
 } __attribute__((packed));
 
-/* hwrm_func_drv_qver_output (size:192b/24B) */
+/* hwrm_func_drv_qver_output (size:256b/32B) */
 struct hwrm_func_drv_qver_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
@@ -7483,15 +7674,7 @@ struct hwrm_func_drv_qver_output {
        uint8_t ver_min_8b;
        /* This is the 8bit update version of the driver. */
        uint8_t ver_upd_8b;
-       uint8_t unused_0[2];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
+       uint8_t unused_0[3];
        /* This is the 16bit major version of the driver. */
        uint16_t        ver_maj;
        /* This is the 16bit minor version of the driver. */
@@ -7500,6 +7683,15 @@ struct hwrm_func_drv_qver_output {
        uint16_t        ver_upd;
        /* This is the 16bit patch version of the driver. */
        uint16_t        ver_patch;
+       uint8_t unused_1[7];
+       /*
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
+        */
+       uint8_t valid;
 } __attribute__((packed));
 
 /****************************
@@ -7612,117 +7804,15 @@ struct hwrm_func_resource_qcaps_output {
         * The number of TX rings assigned to the function cannot exceed this value.
         */
        uint16_t        max_tx_scheduler_inputs;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*****************************
- * hwrm_func_vf_resource_cfg *
- *****************************/
-
-
-/* hwrm_func_vf_resource_cfg_input (size:448b/56B) */
-struct hwrm_func_vf_resource_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
+       uint16_t        flags;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * When this bit is '1', it indicates that VF_RESOURCE_CFG supports
+        * feature to reserve all minimum resources when minimum >= 1, otherwise
+        * returns an error.
         */
-       uint64_t        resp_addr;
-       /* VF ID that is being configured by PF */
-       uint16_t        vf_id;
-       /* Maximum guaranteed number of MSI-X vectors for the function */
-       uint16_t        max_msix;
-       /* Minimum guaranteed number of RSS/COS contexts */
-       uint16_t        min_rsscos_ctx;
-       /* Maximum non-guaranteed number of RSS/COS contexts */
-       uint16_t        max_rsscos_ctx;
-       /* Minimum guaranteed number of completion rings */
-       uint16_t        min_cmpl_rings;
-       /* Maximum non-guaranteed number of completion rings */
-       uint16_t        max_cmpl_rings;
-       /* Minimum guaranteed number of transmit rings */
-       uint16_t        min_tx_rings;
-       /* Maximum non-guaranteed number of transmit rings */
-       uint16_t        max_tx_rings;
-       /* Minimum guaranteed number of receive rings */
-       uint16_t        min_rx_rings;
-       /* Maximum non-guaranteed number of receive rings */
-       uint16_t        max_rx_rings;
-       /* Minimum guaranteed number of L2 contexts */
-       uint16_t        min_l2_ctxs;
-       /* Maximum non-guaranteed number of L2 contexts */
-       uint16_t        max_l2_ctxs;
-       /* Minimum guaranteed number of VNICs */
-       uint16_t        min_vnics;
-       /* Maximum non-guaranteed number of VNICs */
-       uint16_t        max_vnics;
-       /* Minimum guaranteed number of statistic contexts */
-       uint16_t        min_stat_ctx;
-       /* Maximum non-guaranteed number of statistic contexts */
-       uint16_t        max_stat_ctx;
-       /* Minimum guaranteed number of ring groups */
-       uint16_t        min_hw_ring_grps;
-       /* Maximum non-guaranteed number of ring groups */
-       uint16_t        max_hw_ring_grps;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_func_vf_resource_cfg_output (size:256b/32B) */
-struct hwrm_func_vf_resource_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* Reserved number of RSS/COS contexts */
-       uint16_t        reserved_rsscos_ctx;
-       /* Reserved number of completion rings */
-       uint16_t        reserved_cmpl_rings;
-       /* Reserved number of transmit rings */
-       uint16_t        reserved_tx_rings;
-       /* Reserved number of receive rings */
-       uint16_t        reserved_rx_rings;
-       /* Reserved number of L2 contexts */
-       uint16_t        reserved_l2_ctxs;
-       /* Reserved number of VNICs */
-       uint16_t        reserved_vnics;
-       /* Reserved number of statistic contexts */
-       uint16_t        reserved_stat_ctx;
-       /* Reserved number of ring groups */
-       uint16_t        reserved_hw_ring_grps;
-       uint8_t unused_0[7];
+       #define HWRM_FUNC_RESOURCE_QCAPS_OUTPUT_FLAGS_MIN_GUARANTEED \
+               UINT32_C(0x1)
+       uint8_t unused_0[5];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -7769,7 +7859,7 @@ struct hwrm_func_backing_store_qcaps_input {
        uint64_t        resp_addr;
 } __attribute__((packed));
 
-/* hwrm_func_backing_store_qcaps_output (size:512b/64B) */
+/* hwrm_func_backing_store_qcaps_output (size:576b/72B) */
 struct hwrm_func_backing_store_qcaps_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
@@ -7813,19 +7903,51 @@ struct hwrm_func_backing_store_qcaps_output {
        uint32_t        stat_max_entries;
        /* Number of bytes that must be allocated for each context entry. */
        uint16_t        stat_entry_size;
-       /* Maximum number of TQM context entries supported per ring. */
-       uint16_t        tqm_max_entries_per_ring;
        /* Number of bytes that must be allocated for each context entry. */
        uint16_t        tqm_entry_size;
-       /* Number of bytes that must be allocated for each context entry. */
-       uint16_t        mrav_entry_size;
+       /* Minimum number of TQM context entries required per ring. */
+       uint32_t        tqm_min_entries_per_ring;
+       /*
+        * Maximum number of TQM context entries supported per ring. This is
+        * actually a recommended TQM queue size based on worst case usage of
+        * the TQM queue.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * TQM slowpath rings should be sized as follows:
+        *
+        * num_entries = num_vnics + num_l2_tx_rings + num_roce_qps + tqm_min_size
+        *
+        * Where:
+        *   num_vnics is the number of VNICs allocated in the VNIC backing store
+        *   num_l2_tx_rings is the number of L2 rings in the QP backing store
+        *   num_roce_qps is the number of RoCE QPs in the QP backing store
+        *   tqm_min_size is tqm_min_entries_per_ring reported by
+        *     HWRM_FUNC_BACKING_STORE_QCAPS
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
+       uint32_t        tqm_max_entries_per_ring;
        /* Maximum number of MR/AV context entries supported for this function. */
        uint32_t        mrav_max_entries;
-       /* Maximum number of Timer context entries supported for this function. */
-       uint32_t        tim_max_entries;
+       /* Number of bytes that must be allocated for each context entry. */
+       uint16_t        mrav_entry_size;
        /* Number of bytes that must be allocated for each context entry. */
        uint16_t        tim_entry_size;
-       uint8_t unused_0;
+       /* Maximum number of Timer context entries supported for this function. */
+       uint32_t        tim_max_entries;
+       uint8_t unused_0[2];
+       /*
+        * The number of entries specified for any TQM ring must be a
+        * multiple of this value to prevent any resource allocation
+        * limitations.
+        */
+       uint8_t tqm_entries_multiple;
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -8672,23 +8794,129 @@ struct hwrm_func_backing_store_cfg_input {
        uint32_t        cq_num_entries;
        /* Number of Stats. */
        uint32_t        stat_num_entries;
-       /* Number of TQM slowpath entries. */
+       /*
+        * Number of TQM slowpath entries.
+        *
+        * TQM slowpath rings should be sized as follows:
+        *
+        * num_entries = num_vnics + num_l2_tx_rings + num_roce_qps + tqm_min_size
+        *
+        * Where:
+        *   num_vnics is the number of VNICs allocated in the VNIC backing store
+        *   num_l2_tx_rings is the number of L2 rings in the QP backing store
+        *   num_roce_qps is the number of RoCE QPs in the QP backing store
+        *   tqm_min_size is tqm_min_entries_per_ring reported by
+        *     HWRM_FUNC_BACKING_STORE_QCAPS
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_sp_num_entries;
-       /* Number of TQM ring 0 entries. */
+       /*
+        * Number of TQM ring 0 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring0_num_entries;
-       /* Number of TQM ring 1 entries. */
+       /*
+        * Number of TQM ring 1 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring1_num_entries;
-       /* Number of TQM ring 2 entries. */
+       /*
+        * Number of TQM ring 2 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring2_num_entries;
-       /* Number of TQM ring 3 entries. */
+       /*
+        * Number of TQM ring 3 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring3_num_entries;
-       /* Number of TQM ring 4 entries. */
+       /*
+        * Number of TQM ring 4 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring4_num_entries;
-       /* Number of TQM ring 5 entries. */
+       /*
+        * Number of TQM ring 5 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring5_num_entries;
-       /* Number of TQM ring 6 entries. */
+       /*
+        * Number of TQM ring 6 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring6_num_entries;
-       /* Number of TQM ring 7 entries. */
+       /*
+        * Number of TQM ring 7 entries.
+        *
+        * TQM fastpath rings should be sized large enough to accommodate the
+        * maximum number of QPs (either L2 or RoCE, or both if shared)
+        * that can be enqueued to the TQM ring.
+        *
+        * Note that TQM ring sizes cannot be extended while the system is
+        * operational. If a PF driver needs to extend a TQM ring, it needs
+        * to reset the function (e.g. HWRM_FUNC_RESET) and then reallocate
+        * the backing store.
+        */
        uint32_t        tqm_ring7_num_entries;
        /* Number of MR/AV entries. */
        uint32_t        mrav_num_entries;
@@ -9638,13 +9866,13 @@ struct hwrm_func_backing_store_qcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************
- * hwrm_port_phy_cfg *
- *********************/
+/***********************
+ * hwrm_func_vlan_qcfg *
+ ***********************/
 
 
-/* hwrm_port_phy_cfg_input (size:448b/56B) */
-struct hwrm_port_phy_cfg_input {
+/* hwrm_func_vlan_qcfg_input (size:192b/24B) */
+struct hwrm_func_vlan_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -9672,508 +9900,430 @@ struct hwrm_port_phy_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        flags;
        /*
-        * When this bit is set to '1', the PHY for the port shall
-        * be reset.
-        *
-        * # If this bit is set to 1, then the HWRM shall reset the
-        * PHY after applying PHY configuration changes specified
-        * in this command.
-        * # In order to guarantee that PHY configuration changes
-        * specified in this command take effect, the HWRM
-        * client should set this flag to 1.
-        * # If this bit is not set to 1, then the HWRM may reset
-        * the PHY depending on the current PHY configuration and
-        * settings specified in this command.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESET_PHY \
-               UINT32_C(0x1)
-       /* deprecated bit.  Do not use!!! */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_DEPRECATED \
-               UINT32_C(0x2)
-       /*
-        * When this bit is set to '1', the link shall be forced to
-        * the force_link_speed value.
-        *
-        * When this bit is set to '1', the HWRM client should
-        * not enable any of the auto negotiation related
-        * fields represented by auto_XXX fields in this command.
-        * When this bit is set to '1' and the HWRM client has
-        * enabled a auto_XXX field in this command, then the
-        * HWRM shall ignore the enabled auto_XXX field.
-        *
-        * When this bit is set to zero, the link
-        * shall be allowed to autoneg.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE \
-               UINT32_C(0x4)
-       /*
-        * When this bit is set to '1', the auto-negotiation process
-        * shall be restarted on the link.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESTART_AUTONEG \
-               UINT32_C(0x8)
-       /*
-        * When this bit is set to '1', Energy Efficient Ethernet
-        * (EEE) is requested to be enabled on this link.
-        * If EEE is not supported on this port, then this flag
-        * shall be ignored by the HWRM.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_ENABLE \
-               UINT32_C(0x10)
-       /*
-        * When this bit is set to '1', Energy Efficient Ethernet
-        * (EEE) is requested to be disabled on this link.
-        * If EEE is not supported on this port, then this flag
-        * shall be ignored by the HWRM.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_DISABLE \
-               UINT32_C(0x20)
-       /*
-        * When this bit is set to '1' and EEE is enabled on this
-        * link, then TX LPI is requested to be enabled on the link.
-        * If EEE is not supported on this port, then this flag
-        * shall be ignored by the HWRM.
-        * If EEE is disabled on this port, then this flag shall be
-        * ignored by the HWRM.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_TX_LPI_ENABLE \
-               UINT32_C(0x40)
-       /*
-        * When this bit is set to '1' and EEE is enabled on this
-        * link, then TX LPI is requested to be disabled on the link.
-        * If EEE is not supported on this port, then this flag
-        * shall be ignored by the HWRM.
-        * If EEE is disabled on this port, then this flag shall be
-        * ignored by the HWRM.
+        * Function ID of the function that is being
+        * configured.
+        * If set to 0xFF... (All Fs), then the configuration is
+        * for the requesting function.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_TX_LPI_DISABLE \
-               UINT32_C(0x80)
+       uint16_t        fid;
+       uint8_t unused_0[6];
+} __attribute__((packed));
+
+/* hwrm_func_vlan_qcfg_output (size:320b/40B) */
+struct hwrm_func_vlan_qcfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint64_t        unused_0;
+       /* S-TAG VLAN identifier configured for the function. */
+       uint16_t        stag_vid;
+       /* S-TAG PCP value configured for the function. */
+       uint8_t stag_pcp;
+       uint8_t unused_1;
        /*
-        * When set to 1, then the HWRM shall enable FEC autonegotitation
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC autonegotiation is not supported, then the HWRM shall ignore this
-        * flag.
+        * S-TAG TPID value configured for the function. This field is specified in
+        * network byte order.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_AUTONEG_ENABLE \
-               UINT32_C(0x100)
+       uint16_t        stag_tpid;
+       /* C-TAG VLAN identifier configured for the function. */
+       uint16_t        ctag_vid;
+       /* C-TAG PCP value configured for the function. */
+       uint8_t ctag_pcp;
+       uint8_t unused_2;
        /*
-        * When set to 1, then the HWRM shall disable FEC autonegotiation
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC autonegotiation is not supported, then the HWRM shall ignore this
-        * flag.
+        * C-TAG TPID value configured for the function. This field is specified in
+        * network byte order.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_AUTONEG_DISABLE \
-               UINT32_C(0x200)
+       uint16_t        ctag_tpid;
+       /* Future use. */
+       uint32_t        rsvd2;
+       /* Future use. */
+       uint32_t        rsvd3;
+       uint8_t unused_3[3];
        /*
-        * When set to 1, then the HWRM shall enable FEC CLAUSE 74 (Fire Code)
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC CLAUSE 74 is not supported, then the HWRM shall ignore this
-        * flag.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE74_ENABLE \
-               UINT32_C(0x400)
+       uint8_t valid;
+} __attribute__((packed));
+
+/**********************
+ * hwrm_func_vlan_cfg *
+ **********************/
+
+
+/* hwrm_func_vlan_cfg_input (size:384b/48B) */
+struct hwrm_func_vlan_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * When set to 1, then the HWRM shall disable FEC CLAUSE 74 (Fire Code)
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC CLAUSE 74 is not supported, then the HWRM shall ignore this
-        * flag.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE74_DISABLE \
-               UINT32_C(0x800)
+       uint16_t        cmpl_ring;
        /*
-        * When set to 1, then the HWRM shall enable FEC CLAUSE 91 (Reed Solomon)
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC CLAUSE 91 is not supported, then the HWRM shall ignore this
-        * flag.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE91_ENABLE \
-               UINT32_C(0x1000)
+       uint16_t        seq_id;
        /*
-        * When set to 1, then the HWRM shall disable FEC CLAUSE 91 (Reed Solomon)
-        * on this port if supported.
-        * When set to 0, then this flag shall be ignored.
-        * If FEC CLAUSE 91 is not supported, then the HWRM shall ignore this
-        * flag.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE91_DISABLE \
-               UINT32_C(0x2000)
+       uint16_t        target_id;
        /*
-        * When this bit is set to '1', the link shall be forced to
-        * be taken down.
-        *
-        * # When this bit is set to '1", all other
-        * command input settings related to the link speed shall
-        * be ignored.
-        * Once the link state is forced down, it can be
-        * explicitly cleared from that state by setting this flag
-        * to '0'.
-        * # If this flag is set to '0', then the link shall be
-        * cleared from forced down state if the link is in forced
-        * down state.
-        * There may be conditions (e.g. out-of-band or sideband
-        * configuration changes for the link) outside the scope
-        * of the HWRM implementation that may clear forced down
-        * link state.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE_LINK_DWN \
-               UINT32_C(0x4000)
-       uint32_t        enables;
+       uint64_t        resp_addr;
        /*
-        * This bit must be '1' for the auto_mode field to be
+        * Function ID of the function that is being
         * configured.
+        * If set to 0xFF... (All Fs), then the configuration is
+        * for the requesting function.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_MODE \
-               UINT32_C(0x1)
+       uint16_t        fid;
+       uint8_t unused_0[2];
+       uint32_t        enables;
        /*
-        * This bit must be '1' for the auto_duplex field to be
+        * This bit must be '1' for the stag_vid field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_DUPLEX \
-               UINT32_C(0x2)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_VID      UINT32_C(0x1)
        /*
-        * This bit must be '1' for the auto_pause field to be
+        * This bit must be '1' for the ctag_vid field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_PAUSE \
-               UINT32_C(0x4)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_VID      UINT32_C(0x2)
        /*
-        * This bit must be '1' for the auto_link_speed field to be
+        * This bit must be '1' for the stag_pcp field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_LINK_SPEED \
-               UINT32_C(0x8)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_PCP      UINT32_C(0x4)
        /*
-        * This bit must be '1' for the auto_link_speed_mask field to be
+        * This bit must be '1' for the ctag_pcp field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_LINK_SPEED_MASK \
-               UINT32_C(0x10)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_PCP      UINT32_C(0x8)
        /*
-        * This bit must be '1' for the wirespeed field to be
+        * This bit must be '1' for the stag_tpid field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_WIRESPEED \
-               UINT32_C(0x20)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_STAG_TPID     UINT32_C(0x10)
        /*
-        * This bit must be '1' for the lpbk field to be
+        * This bit must be '1' for the ctag_tpid field to be
         * configured.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_LPBK \
-               UINT32_C(0x40)
+       #define HWRM_FUNC_VLAN_CFG_INPUT_ENABLES_CTAG_TPID     UINT32_C(0x20)
+       /* S-TAG VLAN identifier configured for the function. */
+       uint16_t        stag_vid;
+       /* S-TAG PCP value configured for the function. */
+       uint8_t stag_pcp;
+       uint8_t unused_1;
        /*
-        * This bit must be '1' for the preemphasis field to be
-        * configured.
+        * S-TAG TPID value configured for the function. This field is specified in
+        * network byte order.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_PREEMPHASIS \
-               UINT32_C(0x80)
-       /*
-        * This bit must be '1' for the force_pause field to be
-        * configured.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_FORCE_PAUSE \
-               UINT32_C(0x100)
+       uint16_t        stag_tpid;
+       /* C-TAG VLAN identifier configured for the function. */
+       uint16_t        ctag_vid;
+       /* C-TAG PCP value configured for the function. */
+       uint8_t ctag_pcp;
+       uint8_t unused_2;
        /*
-        * This bit must be '1' for the eee_link_speed_mask field to be
-        * configured.
+        * C-TAG TPID value configured for the function. This field is specified in
+        * network byte order.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_EEE_LINK_SPEED_MASK \
-               UINT32_C(0x200)
+       uint16_t        ctag_tpid;
+       /* Future use. */
+       uint32_t        rsvd1;
+       /* Future use. */
+       uint32_t        rsvd2;
+       uint8_t unused_3[4];
+} __attribute__((packed));
+
+/* hwrm_func_vlan_cfg_output (size:128b/16B) */
+struct hwrm_func_vlan_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * This bit must be '1' for the tx_lpi_timer field to be
-        * configured.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_TX_LPI_TIMER \
-               UINT32_C(0x400)
-       /* Port ID of port that is to be configured. */
-       uint16_t        port_id;
+       uint8_t valid;
+} __attribute__((packed));
+
+/*******************************
+ * hwrm_func_vf_vnic_ids_query *
+ *******************************/
+
+
+/* hwrm_func_vf_vnic_ids_query_input (size:256b/32B) */
+struct hwrm_func_vf_vnic_ids_query_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * This is the speed that will be used if the force
-        * bit is '1'.  If unsupported speed is selected, an error
-        * will be generated.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint16_t        force_link_speed;
-       /* 100Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100MB UINT32_C(0x1)
-       /* 1Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_1GB   UINT32_C(0xa)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2GB   UINT32_C(0x14)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2_5GB UINT32_C(0x19)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10GB  UINT32_C(0x64)
-       /* 20Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_20GB  UINT32_C(0xc8)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_25GB  UINT32_C(0xfa)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB  UINT32_C(0x190)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB  UINT32_C(0x1f4)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB UINT32_C(0x3e8)
-       /* 10Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10MB  UINT32_C(0xffff)
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10MB
+       uint16_t        cmpl_ring;
        /*
-        * This value is used to identify what autoneg mode is
-        * used when the link speed is not being forced.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint8_t auto_mode;
-       /* Disable autoneg or autoneg disabled. No speeds are selected. */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_NONE         UINT32_C(0x0)
-       /* Select all possible speeds for autoneg mode. */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ALL_SPEEDS   UINT32_C(0x1)
+       uint16_t        seq_id;
        /*
-        * Select only the auto_link_speed speed for autoneg mode. This mode has
-        * been DEPRECATED. An HWRM client should not use this mode.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ONE_SPEED    UINT32_C(0x2)
+       uint16_t        target_id;
        /*
-        * Select the auto_link_speed or any speed below that speed for autoneg.
-        * This mode has been DEPRECATED. An HWRM client should not use this mode.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ONE_OR_BELOW UINT32_C(0x3)
+       uint64_t        resp_addr;
        /*
-        * Select the speeds based on the corresponding link speed mask value
-        * that is provided.
+        * This value is used to identify a Virtual Function (VF).
+        * The scope of VF ID is local within a PF.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK   UINT32_C(0x4)
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK
+       uint16_t        vf_id;
+       uint8_t unused_0[2];
+       /* Max number of vnic ids in vnic id table */
+       uint32_t        max_vnic_id_cnt;
+       /* This is the address for VF VNIC ID table */
+       uint64_t        vnic_id_tbl_addr;
+} __attribute__((packed));
+
+/* hwrm_func_vf_vnic_ids_query_output (size:128b/16B) */
+struct hwrm_func_vf_vnic_ids_query_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
        /*
-        * This is the duplex setting that will be used if the autoneg_mode
-        * is "one_speed" or "one_or_below".
+        * Actual number of vnic ids
+        *
+        * Each VNIC ID is written as a 32-bit number.
         */
-       uint8_t auto_duplex;
-       /* Half Duplex will be requested. */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_HALF UINT32_C(0x0)
-       /* Full duplex will be requested. */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_FULL UINT32_C(0x1)
-       /* Both Half and Full dupex will be requested. */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_BOTH UINT32_C(0x2)
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_BOTH
+       uint32_t        vnic_id_cnt;
+       uint8_t unused_0[3];
        /*
-        * This value is used to configure the pause that will be
-        * used for autonegotiation.
-        * Add text on the usage of auto_pause and force_pause.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t auto_pause;
+       uint8_t valid;
+} __attribute__((packed));
+
+/***********************
+ * hwrm_func_vf_bw_cfg *
+ ***********************/
+
+
+/* hwrm_func_vf_bw_cfg_input (size:960b/120B) */
+struct hwrm_func_vf_bw_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * When this bit is '1', Generation of tx pause messages
-        * has been requested. Disabled otherwise.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_TX \
-               UINT32_C(0x1)
+       uint16_t        cmpl_ring;
        /*
-        * When this bit is '1', Reception of rx pause messages
-        * has been requested. Disabled otherwise.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_RX \
-               UINT32_C(0x2)
+       uint16_t        seq_id;
        /*
-        * When set to 1, the advertisement of pause is enabled.
-        *
-        * # When the auto_mode is not set to none and this flag is
-        * set to 1, then the auto_pause bits on this port are being
-        * advertised and autoneg pause results are being interpreted.
-        * # When the auto_mode is not set to none and this
-        * flag is set to 0, the pause is forced as indicated in
-        * force_pause, and also advertised as auto_pause bits, but
-        * the autoneg results are not interpreted since the pause
-        * configuration is being forced.
-        * # When the auto_mode is set to none and this flag is set to
-        * 1, auto_pause bits should be ignored and should be set to 0.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_AUTONEG_PAUSE \
-               UINT32_C(0x4)
-       uint8_t unused_0;
+       uint16_t        target_id;
        /*
-        * This is the speed that will be used if the autoneg_mode
-        * is "one_speed" or "one_or_below".  If an unsupported speed
-        * is selected, an error will be generated.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint16_t        auto_link_speed;
-       /* 100Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_100MB UINT32_C(0x1)
-       /* 1Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_1GB   UINT32_C(0xa)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_2GB   UINT32_C(0x14)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_2_5GB UINT32_C(0x19)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10GB  UINT32_C(0x64)
-       /* 20Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_20GB  UINT32_C(0xc8)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_25GB  UINT32_C(0xfa)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_40GB  UINT32_C(0x190)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_50GB  UINT32_C(0x1f4)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_100GB UINT32_C(0x3e8)
-       /* 10Mb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10MB  UINT32_C(0xffff)
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10MB
+       uint64_t        resp_addr;
        /*
-        * This is a mask of link speeds that will be used if
-        * autoneg_mode is "mask".  If unsupported speed is enabled
-        * an error will be generated.
+        * The number of VF functions that are being configured.
+        * The cmd space allows up to 50 VFs' BW to be configured with one cmd.
         */
-       uint16_t        auto_link_speed_mask;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100MBHD \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100MB \
-               UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_1GBHD \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_1GB \
-               UINT32_C(0x8)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_2GB \
-               UINT32_C(0x10)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_2_5GB \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10GB \
-               UINT32_C(0x40)
-       /* 20Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_20GB \
-               UINT32_C(0x80)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_25GB \
-               UINT32_C(0x100)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_40GB \
-               UINT32_C(0x200)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_50GB \
-               UINT32_C(0x400)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100GB \
-               UINT32_C(0x800)
-       /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10MBHD \
-               UINT32_C(0x1000)
-       /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10MB \
-               UINT32_C(0x2000)
-       /* This value controls the wirespeed feature. */
-       uint8_t wirespeed;
-       /* Wirespeed feature is disabled. */
-       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_OFF UINT32_C(0x0)
-       /* Wirespeed feature is enabled. */
-       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_ON  UINT32_C(0x1)
-       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_ON
-       /* This value controls the loopback setting for the PHY. */
-       uint8_t lpbk;
-       /* No loopback is selected.  Normal operation. */
-       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_NONE     UINT32_C(0x0)
-       /*
-        * The HW will be configured with local loopback such that
-        * host data is sent back to the host without modification.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_LOCAL    UINT32_C(0x1)
-       /*
-        * The HW will be configured with remote loopback such that
-        * port logic will send packets back out the transmitter that
-        * are received.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_REMOTE   UINT32_C(0x2)
-       /*
-        * The HW will be configured with external loopback such that
-        * host data is sent on the trasmitter and based on the external
-        * loopback connection the data will be received without modification.
-        */
-       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_EXTERNAL UINT32_C(0x3)
-       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_LAST \
-               HWRM_PORT_PHY_CFG_INPUT_LPBK_EXTERNAL
+       uint16_t        num_vfs;
+       uint16_t        unused[3];
+       /* These 16-bit fields contain the VF fid and the rate scale percentage. */
+       uint16_t        vfn[48];
+       /* The physical VF id the adjustment will be made to. */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_VFID_MASK     UINT32_C(0xfff)
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_VFID_SFT      0
+       /*
+        * This field configures the rate scale percentage of the VF as specified
+        * by the physical VF id.
+        */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_MASK     UINT32_C(0xf000)
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_SFT      12
+       /* 0% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_0 \
+               (UINT32_C(0x0) << 12)
+       /* 6.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_6_66 \
+               (UINT32_C(0x1) << 12)
+       /* 13.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_13_33 \
+               (UINT32_C(0x2) << 12)
+       /* 20% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_20 \
+               (UINT32_C(0x3) << 12)
+       /* 26.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_26_66 \
+               (UINT32_C(0x4) << 12)
+       /* 33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_33_33 \
+               (UINT32_C(0x5) << 12)
+       /* 40% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_40 \
+               (UINT32_C(0x6) << 12)
+       /* 46.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_46_66 \
+               (UINT32_C(0x7) << 12)
+       /* 53.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_53_33 \
+               (UINT32_C(0x8) << 12)
+       /* 60% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_60 \
+               (UINT32_C(0x9) << 12)
+       /* 66.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_66_66 \
+               (UINT32_C(0xa) << 12)
+       /* 53.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_73_33 \
+               (UINT32_C(0xb) << 12)
+       /* 80% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_80 \
+               (UINT32_C(0xc) << 12)
+       /* 86.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_86_66 \
+               (UINT32_C(0xd) << 12)
+       /* 93.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_93_33 \
+               (UINT32_C(0xe) << 12)
+       /* 100% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_100 \
+               (UINT32_C(0xf) << 12)
+       #define HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_LAST \
+               HWRM_FUNC_VF_BW_CFG_INPUT_VFN_RATE_PCT_100
+} __attribute__((packed));
+
+/* hwrm_func_vf_bw_cfg_output (size:128b/16B) */
+struct hwrm_func_vf_bw_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * This value is used to configure the pause that will be
-        * used for force mode.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t force_pause;
+       uint8_t valid;
+} __attribute__((packed));
+
+/************************
+ * hwrm_func_vf_bw_qcfg *
+ ************************/
+
+
+/* hwrm_func_vf_bw_qcfg_input (size:960b/120B) */
+struct hwrm_func_vf_bw_qcfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * When this bit is '1', Generation of tx pause messages
-        * is supported. Disabled otherwise.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_TX     UINT32_C(0x1)
+       uint16_t        cmpl_ring;
        /*
-        * When this bit is '1', Reception of rx pause messages
-        * is supported. Disabled otherwise.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_RX     UINT32_C(0x2)
-       uint8_t unused_1;
+       uint16_t        seq_id;
        /*
-        * This value controls the pre-emphasis to be used for the
-        * link.  Driver should not set this value (use
-        * enable.preemphasis = 0) unless driver is sure of setting.
-        * Normally HWRM FW will determine proper pre-emphasis.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint32_t        preemphasis;
+       uint16_t        target_id;
        /*
-        * Setting for link speed mask that is used to
-        * advertise speeds during autonegotiation when EEE is enabled.
-        * This field is valid only when EEE is enabled.
-        * The speeds specified in this field shall be a subset of
-        * speeds specified in auto_link_speed_mask.
-        * If EEE is enabled,then at least one speed shall be provided
-        * in this mask.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint16_t        eee_link_speed_mask;
-       /* Reserved */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD1 \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_100MB \
-               UINT32_C(0x2)
-       /* Reserved */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD2 \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_1GB \
-               UINT32_C(0x8)
-       /* Reserved */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD3 \
-               UINT32_C(0x10)
-       /* Reserved */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD4 \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_10GB \
-               UINT32_C(0x40)
-       uint8_t unused_2[2];
+       uint64_t        resp_addr;
        /*
-        * Reuested setting of TX LPI timer in microseconds.
-        * This field is valid only when EEE is enabled and TX LPI is
-        * enabled.
+        * The number of VF functions that are being queried.
+        * The inline response space allows the host to query up to 50 VFs'
+        * rate scale percentage
         */
-       uint32_t        tx_lpi_timer;
-       #define HWRM_PORT_PHY_CFG_INPUT_TX_LPI_TIMER_MASK UINT32_C(0xffffff)
-       #define HWRM_PORT_PHY_CFG_INPUT_TX_LPI_TIMER_SFT 0
-       uint32_t        unused_3;
+       uint16_t        num_vfs;
+       uint16_t        unused[3];
+       /* These 16-bit fields contain the VF fid */
+       uint16_t        vfn[48];
+       /* The physical VF id of interest */
+       #define HWRM_FUNC_VF_BW_QCFG_INPUT_VFN_VFID_MASK UINT32_C(0xfff)
+       #define HWRM_FUNC_VF_BW_QCFG_INPUT_VFN_VFID_SFT 0
 } __attribute__((packed));
 
-/* hwrm_port_phy_cfg_output (size:128b/16B) */
-struct hwrm_port_phy_cfg_output {
+/* hwrm_func_vf_bw_qcfg_output (size:960b/120B) */
+struct hwrm_func_vf_bw_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -10182,6 +10332,74 @@ struct hwrm_port_phy_cfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
+       /*
+        * The number of VF functions that are being queried.
+        * The inline response space allows the host to query up to 50 VFs' rate
+        * scale percentage
+        */
+       uint16_t        num_vfs;
+       uint16_t        unused[3];
+       /* These 16-bit fields contain the VF fid and the rate scale percentage. */
+       uint16_t        vfn[48];
+       /* The physical VF id the adjustment will be made to. */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_VFID_MASK     UINT32_C(0xfff)
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_VFID_SFT      0
+       /*
+        * This field configures the rate scale percentage of the VF as specified
+        * by the physical VF id.
+        */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_MASK     UINT32_C(0xf000)
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_SFT      12
+       /* 0% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_0 \
+               (UINT32_C(0x0) << 12)
+       /* 6.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_6_66 \
+               (UINT32_C(0x1) << 12)
+       /* 13.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_13_33 \
+               (UINT32_C(0x2) << 12)
+       /* 20% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_20 \
+               (UINT32_C(0x3) << 12)
+       /* 26.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_26_66 \
+               (UINT32_C(0x4) << 12)
+       /* 33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_33_33 \
+               (UINT32_C(0x5) << 12)
+       /* 40% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_40 \
+               (UINT32_C(0x6) << 12)
+       /* 46.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_46_66 \
+               (UINT32_C(0x7) << 12)
+       /* 53.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_53_33 \
+               (UINT32_C(0x8) << 12)
+       /* 60% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_60 \
+               (UINT32_C(0x9) << 12)
+       /* 66.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_66_66 \
+               (UINT32_C(0xa) << 12)
+       /* 53.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_73_33 \
+               (UINT32_C(0xb) << 12)
+       /* 80% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_80 \
+               (UINT32_C(0xc) << 12)
+       /* 86.66% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_86_66 \
+               (UINT32_C(0xd) << 12)
+       /* 93.33% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_93_33 \
+               (UINT32_C(0xe) << 12)
+       /* 100% of the max tx rate */
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_100 \
+               (UINT32_C(0xf) << 12)
+       #define HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_LAST \
+               HWRM_FUNC_VF_BW_QCFG_OUTPUT_VFN_RATE_PCT_100
        uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
@@ -10193,42 +10411,13 @@ struct hwrm_port_phy_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/* hwrm_port_phy_cfg_cmd_err (size:64b/8B) */
-struct hwrm_port_phy_cfg_cmd_err {
-       /*
-        * command specific error codes that goes to
-        * the cmd_err field in Common HWRM Error Response.
-        */
-       uint8_t code;
-       /* Unknown error */
-       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_UNKNOWN       UINT32_C(0x0)
-       /* Unable to complete operation due to invalid speed */
-       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_ILLEGAL_SPEED UINT32_C(0x1)
-       /*
-        * retry the command since the phy is not ready.
-        * retry count is returned in opaque_0.
-        * This is only valid for the first command and
-        * this value will not change for successive calls.
-        * but if a 0 is returned at any time then this should
-        * be treated as an un recoverable failure,
-        *
-        * retry interval in milli seconds is returned in opaque_1.
-        * This specifies the time that user should wait before
-        * issuing the next port_phy_cfg command.
-        */
-       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_RETRY         UINT32_C(0x2)
-       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_LAST \
-               HWRM_PORT_PHY_CFG_CMD_ERR_CODE_RETRY
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
-/**********************
- * hwrm_port_phy_qcfg *
- **********************/
+/***************************
+ * hwrm_func_drv_if_change *
+ ***************************/
 
 
-/* hwrm_port_phy_qcfg_input (size:192b/24B) */
-struct hwrm_port_phy_qcfg_input {
+/* hwrm_func_drv_if_change_input (size:192b/24B) */
+struct hwrm_func_drv_if_change_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -10256,13 +10445,26 @@ struct hwrm_port_phy_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Port ID of port that is to be queried. */
-       uint16_t        port_id;
-       uint8_t unused_0[6];
+       uint32_t        flags;
+       /*
+        * When this bit is '1', the function driver is indicating
+        * that the IF state is changing to UP state.  The call should
+        * be made at the beginning of the driver's open call before
+        * resources are allocated.  After making the call, the driver
+        * should check the response to see if any resources may have
+        * changed (see the response below).  If the driver fails
+        * the open call, the driver should make this call again with
+        * this bit cleared to indicate that the IF state is not UP.
+        * During the driver's close call when the IF state is changing
+        * to DOWN, the driver should make this call with the bit cleared
+        * after all resources have been freed.
+        */
+       #define HWRM_FUNC_DRV_IF_CHANGE_INPUT_FLAGS_UP     UINT32_C(0x1)
+       uint32_t        unused;
 } __attribute__((packed));
 
-/* hwrm_port_phy_qcfg_output (size:768b/96B) */
-struct hwrm_port_phy_qcfg_output {
+/* hwrm_func_drv_if_change_output (size:128b/16B) */
+struct hwrm_func_drv_if_change_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -10271,826 +10473,617 @@ struct hwrm_port_phy_qcfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This value indicates the current link status. */
-       uint8_t link;
-       /* There is no link or cable detected. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_NO_LINK UINT32_C(0x0)
-       /* There is no link, but a cable has been detected. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SIGNAL  UINT32_C(0x1)
-       /* There is a link. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK    UINT32_C(0x2)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK
-       uint8_t unused_0;
-       /* This value indicates the current link speed of the connection. */
-       uint16_t        link_speed;
-       /* 100Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB UINT32_C(0x1)
-       /* 1Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB   UINT32_C(0xa)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2GB   UINT32_C(0x14)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB UINT32_C(0x19)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB  UINT32_C(0x64)
-       /* 20Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB  UINT32_C(0xc8)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB  UINT32_C(0xfa)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB  UINT32_C(0x190)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB  UINT32_C(0x1f4)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB UINT32_C(0x3e8)
-       /* 10Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB  UINT32_C(0xffff)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB
+       uint32_t        flags;
        /*
-        * This value is indicates the duplex of the current
-        * configuration.
+        * When this bit is '1', it indicates that the resources reserved
+        * for this function may have changed.  The driver should check
+        * resource capabilities and reserve resources again before
+        * allocating resources.
         */
-       uint8_t duplex_cfg;
-       /* Half Duplex connection. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_HALF UINT32_C(0x0)
-       /* Full duplex connection. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL UINT32_C(0x1)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL
+       #define HWRM_FUNC_DRV_IF_CHANGE_OUTPUT_FLAGS_RESC_CHANGE \
+               UINT32_C(0x1)
+       uint8_t unused_0[3];
        /*
-        * This value is used to indicate the current
-        * pause configuration. When autoneg is enabled, this value
-        * represents the autoneg results of pause configuration.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t pause;
+       uint8_t valid;
+} __attribute__((packed));
+
+/*********************
+ * hwrm_port_phy_cfg *
+ *********************/
+
+
+/* hwrm_port_phy_cfg_input (size:448b/56B) */
+struct hwrm_port_phy_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * When this bit is '1', Generation of tx pause messages
-        * is supported. Disabled otherwise.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX     UINT32_C(0x1)
+       uint16_t        cmpl_ring;
        /*
-        * When this bit is '1', Reception of rx pause messages
-        * is supported. Disabled otherwise.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX     UINT32_C(0x2)
+       uint16_t        seq_id;
        /*
-        * The supported speeds for the port. This is a bit mask.
-        * For each speed that is supported, the corrresponding
-        * bit will be set to '1'.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint16_t        support_speeds;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100MBHD \
+       uint16_t        target_id;
+       /*
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
+        */
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /*
+        * When this bit is set to '1', the PHY for the port shall
+        * be reset.
+        *
+        * # If this bit is set to 1, then the HWRM shall reset the
+        * PHY after applying PHY configuration changes specified
+        * in this command.
+        * # In order to guarantee that PHY configuration changes
+        * specified in this command take effect, the HWRM
+        * client should set this flag to 1.
+        * # If this bit is not set to 1, then the HWRM may reset
+        * the PHY depending on the current PHY configuration and
+        * settings specified in this command.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESET_PHY \
                UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100MB \
+       /* deprecated bit.  Do not use!!! */
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_DEPRECATED \
                UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GBHD \
+       /*
+        * When this bit is set to '1', the link shall be forced to
+        * the force_link_speed value.
+        *
+        * When this bit is set to '1', the HWRM client should
+        * not enable any of the auto negotiation related
+        * fields represented by auto_XXX fields in this command.
+        * When this bit is set to '1' and the HWRM client has
+        * enabled a auto_XXX field in this command, then the
+        * HWRM shall ignore the enabled auto_XXX field.
+        *
+        * When this bit is set to zero, the link
+        * shall be allowed to autoneg.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE \
                UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GB \
-               UINT32_C(0x8)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2GB \
-               UINT32_C(0x10)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB \
-               UINT32_C(0x40)
-       /* 20Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB \
-               UINT32_C(0x80)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_25GB \
-               UINT32_C(0x100)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB \
-               UINT32_C(0x200)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_50GB \
-               UINT32_C(0x400)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB \
-               UINT32_C(0x800)
-       /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10MBHD \
-               UINT32_C(0x1000)
-       /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10MB \
-               UINT32_C(0x2000)
        /*
-        * Current setting of forced link speed.
-        * When the link speed is not being forced, this
-        * value shall be set to 0.
+        * When this bit is set to '1', the auto-negotiation process
+        * shall be restarted on the link.
         */
-       uint16_t        force_link_speed;
-       /* 100Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_100MB UINT32_C(0x1)
-       /* 1Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_1GB   UINT32_C(0xa)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_2GB   UINT32_C(0x14)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_2_5GB UINT32_C(0x19)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10GB  UINT32_C(0x64)
-       /* 20Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_20GB  UINT32_C(0xc8)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_25GB  UINT32_C(0xfa)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_40GB \
-               UINT32_C(0x190)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_50GB \
-               UINT32_C(0x1f4)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_100GB \
-               UINT32_C(0x3e8)
-       /* 10Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10MB \
-               UINT32_C(0xffff)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10MB
-       /* Current setting of auto negotiation mode. */
-       uint8_t auto_mode;
-       /* Disable autoneg or autoneg disabled. No speeds are selected. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE         UINT32_C(0x0)
-       /* Select all possible speeds for autoneg mode. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ALL_SPEEDS   UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_RESTART_AUTONEG \
+               UINT32_C(0x8)
        /*
-        * Select only the auto_link_speed speed for autoneg mode. This mode has
-        * been DEPRECATED. An HWRM client should not use this mode.
+        * When this bit is set to '1', Energy Efficient Ethernet
+        * (EEE) is requested to be enabled on this link.
+        * If EEE is not supported on this port, then this flag
+        * shall be ignored by the HWRM.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ONE_SPEED    UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_ENABLE \
+               UINT32_C(0x10)
        /*
-        * Select the auto_link_speed or any speed below that speed for autoneg.
-        * This mode has been DEPRECATED. An HWRM client should not use this mode.
+        * When this bit is set to '1', Energy Efficient Ethernet
+        * (EEE) is requested to be disabled on this link.
+        * If EEE is not supported on this port, then this flag
+        * shall be ignored by the HWRM.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ONE_OR_BELOW UINT32_C(0x3)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_DISABLE \
+               UINT32_C(0x20)
        /*
-        * Select the speeds based on the corresponding link speed mask value
-        * that is provided.
+        * When this bit is set to '1' and EEE is enabled on this
+        * link, then TX LPI is requested to be enabled on the link.
+        * If EEE is not supported on this port, then this flag
+        * shall be ignored by the HWRM.
+        * If EEE is disabled on this port, then this flag shall be
+        * ignored by the HWRM.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_SPEED_MASK   UINT32_C(0x4)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_SPEED_MASK
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_TX_LPI_ENABLE \
+               UINT32_C(0x40)
        /*
-        * Current setting of pause autonegotiation.
-        * Move autoneg_pause flag here.
+        * When this bit is set to '1' and EEE is enabled on this
+        * link, then TX LPI is requested to be disabled on the link.
+        * If EEE is not supported on this port, then this flag
+        * shall be ignored by the HWRM.
+        * If EEE is disabled on this port, then this flag shall be
+        * ignored by the HWRM.
         */
-       uint8_t auto_pause;
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_EEE_TX_LPI_DISABLE \
+               UINT32_C(0x80)
        /*
-        * When this bit is '1', Generation of tx pause messages
-        * has been requested. Disabled otherwise.
+        * When set to 1, then the HWRM shall enable FEC autonegotitation
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC autonegotiation is not supported, then the HWRM shall ignore this
+        * flag.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_TX \
-               UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_AUTONEG_ENABLE \
+               UINT32_C(0x100)
        /*
-        * When this bit is '1', Reception of rx pause messages
-        * has been requested. Disabled otherwise.
+        * When set to 1, then the HWRM shall disable FEC autonegotiation
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC autonegotiation is not supported, then the HWRM shall ignore this
+        * flag.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_RX \
-               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_AUTONEG_DISABLE \
+               UINT32_C(0x200)
        /*
-        * When set to 1, the advertisement of pause is enabled.
-        *
-        * # When the auto_mode is not set to none and this flag is
-        * set to 1, then the auto_pause bits on this port are being
-        * advertised and autoneg pause results are being interpreted.
-        * # When the auto_mode is not set to none and this
-        * flag is set to 0, the pause is forced as indicated in
-        * force_pause, and also advertised as auto_pause bits, but
-        * the autoneg results are not interpreted since the pause
-        * configuration is being forced.
-        * # When the auto_mode is set to none and this flag is set to
-        * 1, auto_pause bits should be ignored and should be set to 0.
+        * When set to 1, then the HWRM shall enable FEC CLAUSE 74 (Fire Code)
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC CLAUSE 74 is not supported, then the HWRM shall ignore this
+        * flag.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_AUTONEG_PAUSE \
-               UINT32_C(0x4)
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE74_ENABLE \
+               UINT32_C(0x400)
        /*
-        * Current setting for auto_link_speed. This field is only
-        * valid when auto_mode is set to "one_speed" or "one_or_below".
+        * When set to 1, then the HWRM shall disable FEC CLAUSE 74 (Fire Code)
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC CLAUSE 74 is not supported, then the HWRM shall ignore this
+        * flag.
         */
-       uint16_t        auto_link_speed;
-       /* 100Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_100MB UINT32_C(0x1)
-       /* 1Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_1GB   UINT32_C(0xa)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_2GB   UINT32_C(0x14)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_2_5GB UINT32_C(0x19)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10GB  UINT32_C(0x64)
-       /* 20Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_20GB  UINT32_C(0xc8)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_25GB  UINT32_C(0xfa)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_40GB  UINT32_C(0x190)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_50GB  UINT32_C(0x1f4)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_100GB UINT32_C(0x3e8)
-       /* 10Mb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10MB \
-               UINT32_C(0xffff)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10MB
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE74_DISABLE \
+               UINT32_C(0x800)
        /*
-        * Current setting for auto_link_speed_mask that is used to
-        * advertise speeds during autonegotiation.
-        * This field is only valid when auto_mode is set to "mask".
-        * The speeds specified in this field shall be a subset of
-        * supported speeds on this port.
+        * When set to 1, then the HWRM shall enable FEC CLAUSE 91 (Reed Solomon)
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC CLAUSE 91 is not supported, then the HWRM shall ignore this
+        * flag.
         */
-       uint16_t        auto_link_speed_mask;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100MBHD \
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE91_ENABLE \
+               UINT32_C(0x1000)
+       /*
+        * When set to 1, then the HWRM shall disable FEC CLAUSE 91 (Reed Solomon)
+        * on this port if supported.
+        * When set to 0, then this flag shall be ignored.
+        * If FEC CLAUSE 91 is not supported, then the HWRM shall ignore this
+        * flag.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FEC_CLAUSE91_DISABLE \
+               UINT32_C(0x2000)
+       /*
+        * When this bit is set to '1', the link shall be forced to
+        * be taken down.
+        *
+        * # When this bit is set to '1", all other
+        * command input settings related to the link speed shall
+        * be ignored.
+        * Once the link state is forced down, it can be
+        * explicitly cleared from that state by setting this flag
+        * to '0'.
+        * # If this flag is set to '0', then the link shall be
+        * cleared from forced down state if the link is in forced
+        * down state.
+        * There may be conditions (e.g. out-of-band or sideband
+        * configuration changes for the link) outside the scope
+        * of the HWRM implementation that may clear forced down
+        * link state.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_FLAGS_FORCE_LINK_DWN \
+               UINT32_C(0x4000)
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the auto_mode field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_MODE \
                UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100MB \
+       /*
+        * This bit must be '1' for the auto_duplex field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_DUPLEX \
                UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_1GBHD \
+       /*
+        * This bit must be '1' for the auto_pause field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_PAUSE \
                UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_1GB \
+       /*
+        * This bit must be '1' for the auto_link_speed field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_LINK_SPEED \
                UINT32_C(0x8)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_2GB \
+       /*
+        * This bit must be '1' for the auto_link_speed_mask field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_AUTO_LINK_SPEED_MASK \
                UINT32_C(0x10)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_2_5GB \
+       /*
+        * This bit must be '1' for the wirespeed field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_WIRESPEED \
                UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10GB \
+       /*
+        * This bit must be '1' for the lpbk field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_LPBK \
                UINT32_C(0x40)
-       /* 20Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_20GB \
+       /*
+        * This bit must be '1' for the preemphasis field to be
+        * configured.
+        */
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_PREEMPHASIS \
                UINT32_C(0x80)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_25GB \
-               UINT32_C(0x100)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_40GB \
-               UINT32_C(0x200)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_50GB \
-               UINT32_C(0x400)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100GB \
-               UINT32_C(0x800)
-       /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10MBHD \
-               UINT32_C(0x1000)
-       /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10MB \
-               UINT32_C(0x2000)
-       /* Current setting for wirespeed. */
-       uint8_t wirespeed;
-       /* Wirespeed feature is disabled. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_OFF UINT32_C(0x0)
-       /* Wirespeed feature is enabled. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_ON  UINT32_C(0x1)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_ON
-       /* Current setting for loopback. */
-       uint8_t lpbk;
-       /* No loopback is selected.  Normal operation. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_NONE     UINT32_C(0x0)
        /*
-        * The HW will be configured with local loopback such that
-        * host data is sent back to the host without modification.
+        * This bit must be '1' for the force_pause field to be
+        * configured.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_LOCAL    UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_FORCE_PAUSE \
+               UINT32_C(0x100)
        /*
-        * The HW will be configured with remote loopback such that
-        * port logic will send packets back out the transmitter that
-        * are received.
+        * This bit must be '1' for the eee_link_speed_mask field to be
+        * configured.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_REMOTE   UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_EEE_LINK_SPEED_MASK \
+               UINT32_C(0x200)
        /*
-        * The HW will be configured with external loopback such that
-        * host data is sent on the trasmitter and based on the external
-        * loopback connection the data will be received without modification.
+        * This bit must be '1' for the tx_lpi_timer field to be
+        * configured.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_EXTERNAL UINT32_C(0x3)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_EXTERNAL
+       #define HWRM_PORT_PHY_CFG_INPUT_ENABLES_TX_LPI_TIMER \
+               UINT32_C(0x400)
+       /* Port ID of port that is to be configured. */
+       uint16_t        port_id;
        /*
-        * Current setting of forced pause.
-        * When the pause configuration is not being forced, then
-        * this value shall be set to 0.
+        * This is the speed that will be used if the force
+        * bit is '1'.  If unsupported speed is selected, an error
+        * will be generated.
         */
-       uint8_t force_pause;
+       uint16_t        force_link_speed;
+       /* 100Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100MB UINT32_C(0x1)
+       /* 1Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_1GB   UINT32_C(0xa)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2GB   UINT32_C(0x14)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_2_5GB UINT32_C(0x19)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10GB  UINT32_C(0x64)
+       /* 20Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_20GB  UINT32_C(0xc8)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_25GB  UINT32_C(0xfa)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_40GB  UINT32_C(0x190)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_50GB  UINT32_C(0x1f4)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_100GB UINT32_C(0x3e8)
+       /* 10Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10MB  UINT32_C(0xffff)
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_FORCE_LINK_SPEED_10MB
        /*
-        * When this bit is '1', Generation of tx pause messages
-        * is supported. Disabled otherwise.
+        * This value is used to identify what autoneg mode is
+        * used when the link speed is not being forced.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_PAUSE_TX     UINT32_C(0x1)
+       uint8_t auto_mode;
+       /* Disable autoneg or autoneg disabled. No speeds are selected. */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_NONE         UINT32_C(0x0)
+       /* Select all possible speeds for autoneg mode. */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ALL_SPEEDS   UINT32_C(0x1)
        /*
-        * When this bit is '1', Reception of rx pause messages
-        * is supported. Disabled otherwise.
+        * Select only the auto_link_speed speed for autoneg mode. This mode has
+        * been DEPRECATED. An HWRM client should not use this mode.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_PAUSE_RX     UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ONE_SPEED    UINT32_C(0x2)
        /*
-        * This value indicates the current status of the optics module on
-        * this port.
+        * Select the auto_link_speed or any speed below that speed for autoneg.
+        * This mode has been DEPRECATED. An HWRM client should not use this mode.
         */
-       uint8_t module_status;
-       /* Module is inserted and accepted */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NONE \
-               UINT32_C(0x0)
-       /* Module is rejected and transmit side Laser is disabled. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_DISABLETX \
-               UINT32_C(0x1)
-       /* Module mismatch warning. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_WARNINGMSG \
-               UINT32_C(0x2)
-       /* Module is rejected and powered down. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_PWRDOWN \
-               UINT32_C(0x3)
-       /* Module is not inserted. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTINSERTED \
-               UINT32_C(0x4)
-       /* Module status is not applicable. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTAPPLICABLE \
-               UINT32_C(0xff)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTAPPLICABLE
-       /* Current setting for preemphasis. */
-       uint32_t        preemphasis;
-       /* This field represents the major version of the PHY. */
-       uint8_t phy_maj;
-       /* This field represents the minor version of the PHY. */
-       uint8_t phy_min;
-       /* This field represents the build version of the PHY. */
-       uint8_t phy_bld;
-       /* This value represents a PHY type. */
-       uint8_t phy_type;
-       /* Unknown */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN \
-               UINT32_C(0x0)
-       /* BASE-CR */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR \
-               UINT32_C(0x1)
-       /* BASE-KR4 (Deprecated) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4 \
-               UINT32_C(0x2)
-       /* BASE-LR */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR \
-               UINT32_C(0x3)
-       /* BASE-SR */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR \
-               UINT32_C(0x4)
-       /* BASE-KR2 (Deprecated) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2 \
-               UINT32_C(0x5)
-       /* BASE-KX */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX \
-               UINT32_C(0x6)
-       /* BASE-KR */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR \
-               UINT32_C(0x7)
-       /* BASE-T */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET \
-               UINT32_C(0x8)
-       /* EEE capable BASE-T */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASETE \
-               UINT32_C(0x9)
-       /* SGMII connected external PHY */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY \
-               UINT32_C(0xa)
-       /* 25G_BASECR_CA_L */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_L \
-               UINT32_C(0xb)
-       /* 25G_BASECR_CA_S */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_S \
-               UINT32_C(0xc)
-       /* 25G_BASECR_CA_N */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_N \
-               UINT32_C(0xd)
-       /* 25G_BASESR */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASESR \
-               UINT32_C(0xe)
-       /* 100G_BASECR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASECR4 \
-               UINT32_C(0xf)
-       /* 100G_BASESR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR4 \
-               UINT32_C(0x10)
-       /* 100G_BASELR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASELR4 \
-               UINT32_C(0x11)
-       /* 100G_BASEER4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASEER4 \
-               UINT32_C(0x12)
-       /* 100G_BASESR10 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR10 \
-               UINT32_C(0x13)
-       /* 40G_BASECR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASECR4 \
-               UINT32_C(0x14)
-       /* 40G_BASESR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASESR4 \
-               UINT32_C(0x15)
-       /* 40G_BASELR4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASELR4 \
-               UINT32_C(0x16)
-       /* 40G_BASEER4 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASEER4 \
-               UINT32_C(0x17)
-       /* 40G_ACTIVE_CABLE */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_ACTIVE_CABLE \
-               UINT32_C(0x18)
-       /* 1G_baseT */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASET \
-               UINT32_C(0x19)
-       /* 1G_baseSX */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASESX \
-               UINT32_C(0x1a)
-       /* 1G_baseCX */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX \
-               UINT32_C(0x1b)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX
-       /* This value represents a media type. */
-       uint8_t media_type;
-       /* Unknown */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_UNKNOWN UINT32_C(0x0)
-       /* Twisted Pair */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_TP      UINT32_C(0x1)
-       /* Direct Attached Copper */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_DAC     UINT32_C(0x2)
-       /* Fiber */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE   UINT32_C(0x3)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE
-       /* This value represents a transceiver type. */
-       uint8_t xcvr_pkg_type;
-       /* PHY and MAC are in the same package */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_INTERNAL \
-               UINT32_C(0x1)
-       /* PHY and MAC are in different packages */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_EXTERNAL \
-               UINT32_C(0x2)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_EXTERNAL
-       uint8_t eee_config_phy_addr;
-       /* This field represents PHY address. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_ADDR_MASK \
-               UINT32_C(0x1f)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_ADDR_SFT               0
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_ONE_OR_BELOW UINT32_C(0x3)
        /*
-        * This field represents flags related to EEE configuration.
-        * These EEE configuration flags are valid only when the
-        * auto_mode is not set to none (in other words autonegotiation
-        * is enabled).
+        * Select the speeds based on the corresponding link speed mask value
+        * that is provided.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_MASK \
-               UINT32_C(0xe0)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_SFT             5
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK   UINT32_C(0x4)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_AUTO_MODE_SPEED_MASK
        /*
-        * When set to 1, Energy Efficient Ethernet (EEE) mode is enabled.
-        * Speeds for autoneg with EEE mode enabled
-        * are based on eee_link_speed_mask.
+        * This is the duplex setting that will be used if the autoneg_mode
+        * is "one_speed" or "one_or_below".
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_ENABLED \
-               UINT32_C(0x20)
+       uint8_t auto_duplex;
+       /* Half Duplex will be requested. */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_HALF UINT32_C(0x0)
+       /* Full duplex will be requested. */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_FULL UINT32_C(0x1)
+       /* Both Half and Full dupex will be requested. */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_BOTH UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_AUTO_DUPLEX_BOTH
        /*
-        * This flag is valid only when eee_enabled is set to 1.
-        *
-        * # If eee_enabled is set to 0, then EEE mode is disabled
-        * and this flag shall be ignored.
-        * # If eee_enabled is set to 1 and this flag is set to 1,
-        * then Energy Efficient Ethernet (EEE) mode is enabled
-        * and in use.
-        * # If eee_enabled is set to 1 and this flag is set to 0,
-        * then Energy Efficient Ethernet (EEE) mode is enabled
-        * but is currently not in use.
+        * This value is used to configure the pause that will be
+        * used for autonegotiation.
+        * Add text on the usage of auto_pause and force_pause.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_ACTIVE \
-               UINT32_C(0x40)
+       uint8_t auto_pause;
        /*
-        * This flag is valid only when eee_enabled is set to 1.
-        *
-        * # If eee_enabled is set to 0, then EEE mode is disabled
-        * and this flag shall be ignored.
-        * # If eee_enabled is set to 1 and this flag is set to 1,
-        * then Energy Efficient Ethernet (EEE) mode is enabled
-        * and TX LPI is enabled.
-        * # If eee_enabled is set to 1 and this flag is set to 0,
-        * then Energy Efficient Ethernet (EEE) mode is enabled
-        * but TX LPI is disabled.
+        * When this bit is '1', Generation of tx pause messages
+        * has been requested. Disabled otherwise.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_TX_LPI \
-               UINT32_C(0x80)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_TX \
+               UINT32_C(0x1)
        /*
-        * When set to 1, the parallel detection is used to determine
-        * the speed of the link partner.
-        *
-        * Parallel detection is used when a autonegotiation capable
-        * device is connected to a link parter that is not capable
-        * of autonegotiation.
+        * When this bit is '1', Reception of rx pause messages
+        * has been requested. Disabled otherwise.
         */
-       uint8_t parallel_detect;
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_RX \
+               UINT32_C(0x2)
        /*
-        * When set to 1, the parallel detection is used to determine
-        * the speed of the link partner.
+        * When set to 1, the advertisement of pause is enabled.
         *
-        * Parallel detection is used when a autonegotiation capable
-        * device is connected to a link parter that is not capable
-        * of autonegotiation.
+        * # When the auto_mode is not set to none and this flag is
+        * set to 1, then the auto_pause bits on this port are being
+        * advertised and autoneg pause results are being interpreted.
+        * # When the auto_mode is not set to none and this
+        * flag is set to 0, the pause is forced as indicated in
+        * force_pause, and also advertised as auto_pause bits, but
+        * the autoneg results are not interpreted since the pause
+        * configuration is being forced.
+        * # When the auto_mode is set to none and this flag is set to
+        * 1, auto_pause bits should be ignored and should be set to 0.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_PARALLEL_DETECT     UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_PAUSE_AUTONEG_PAUSE \
+               UINT32_C(0x4)
+       uint8_t unused_0;
        /*
-        * The advertised speeds for the port by the link partner.
-        * Each advertised speed will be set to '1'.
+        * This is the speed that will be used if the autoneg_mode
+        * is "one_speed" or "one_or_below".  If an unsupported speed
+        * is selected, an error will be generated.
         */
-       uint16_t        link_partner_adv_speeds;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100MBHD \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100MB \
-               UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_1GBHD \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_1GB \
+       uint16_t        auto_link_speed;
+       /* 100Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_100MB UINT32_C(0x1)
+       /* 1Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_1GB   UINT32_C(0xa)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_2GB   UINT32_C(0x14)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_2_5GB UINT32_C(0x19)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10GB  UINT32_C(0x64)
+       /* 20Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_20GB  UINT32_C(0xc8)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_25GB  UINT32_C(0xfa)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_40GB  UINT32_C(0x190)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_50GB  UINT32_C(0x1f4)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_100GB UINT32_C(0x3e8)
+       /* 10Mb link speed */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10MB  UINT32_C(0xffff)
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_10MB
+       /*
+        * This is a mask of link speeds that will be used if
+        * autoneg_mode is "mask".  If unsupported speed is enabled
+        * an error will be generated.
+        */
+       uint16_t        auto_link_speed_mask;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100MBHD \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100MB \
+               UINT32_C(0x2)
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_1GBHD \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_1GB \
                UINT32_C(0x8)
        /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_2GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_2GB \
                UINT32_C(0x10)
        /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_2_5GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_2_5GB \
                UINT32_C(0x20)
        /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10GB \
                UINT32_C(0x40)
        /* 20Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_20GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_20GB \
                UINT32_C(0x80)
        /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_25GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_25GB \
                UINT32_C(0x100)
        /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_40GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_40GB \
                UINT32_C(0x200)
        /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_50GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_50GB \
                UINT32_C(0x400)
        /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_100GB \
                UINT32_C(0x800)
        /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10MBHD \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10MBHD \
                UINT32_C(0x1000)
        /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10MB \
+       #define HWRM_PORT_PHY_CFG_INPUT_AUTO_LINK_SPEED_MASK_10MB \
                UINT32_C(0x2000)
+       /* This value controls the wirespeed feature. */
+       uint8_t wirespeed;
+       /* Wirespeed feature is disabled. */
+       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_OFF UINT32_C(0x0)
+       /* Wirespeed feature is enabled. */
+       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_ON  UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_WIRESPEED_ON
+       /* This value controls the loopback setting for the PHY. */
+       uint8_t lpbk;
+       /* No loopback is selected.  Normal operation. */
+       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_NONE     UINT32_C(0x0)
        /*
-        * The advertised autoneg for the port by the link partner.
-        * This field is deprecated and should be set to 0.
+        * The HW will be configured with local loopback such that
+        * host data is sent back to the host without modification.
         */
-       uint8_t link_partner_adv_auto_mode;
-       /* Disable autoneg or autoneg disabled. No speeds are selected. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_NONE \
-               UINT32_C(0x0)
-       /* Select all possible speeds for autoneg mode. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS \
-               UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_LOCAL    UINT32_C(0x1)
        /*
-        * Select only the auto_link_speed speed for autoneg mode. This mode has
-        * been DEPRECATED. An HWRM client should not use this mode.
+        * The HW will be configured with remote loopback such that
+        * port logic will send packets back out the transmitter that
+        * are received.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED \
-               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_REMOTE   UINT32_C(0x2)
        /*
-        * Select the auto_link_speed or any speed below that speed for autoneg.
-        * This mode has been DEPRECATED. An HWRM client should not use this mode.
+        * The HW will be configured with external loopback such that
+        * host data is sent on the trasmitter and based on the external
+        * loopback connection the data will be received without modification.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW \
-               UINT32_C(0x3)
+       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_EXTERNAL UINT32_C(0x3)
+       #define HWRM_PORT_PHY_CFG_INPUT_LPBK_LAST \
+               HWRM_PORT_PHY_CFG_INPUT_LPBK_EXTERNAL
        /*
-        * Select the speeds based on the corresponding link speed mask value
-        * that is provided.
+        * This value is used to configure the pause that will be
+        * used for force mode.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK \
-               UINT32_C(0x4)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK
-       /* The advertised pause settings on the port by the link partner. */
-       uint8_t link_partner_adv_pause;
+       uint8_t force_pause;
        /*
         * When this bit is '1', Generation of tx pause messages
         * is supported. Disabled otherwise.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_PAUSE_TX \
-               UINT32_C(0x1)
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_TX     UINT32_C(0x1)
        /*
         * When this bit is '1', Reception of rx pause messages
         * is supported. Disabled otherwise.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_PAUSE_RX \
-               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_INPUT_FORCE_PAUSE_RX     UINT32_C(0x2)
+       uint8_t unused_1;
        /*
-        * Current setting for link speed mask that is used to
+        * This value controls the pre-emphasis to be used for the
+        * link.  Driver should not set this value (use
+        * enable.preemphasis = 0) unless driver is sure of setting.
+        * Normally HWRM FW will determine proper pre-emphasis.
+        */
+       uint32_t        preemphasis;
+       /*
+        * Setting for link speed mask that is used to
         * advertise speeds during autonegotiation when EEE is enabled.
-        * This field is valid only when eee_enabled flags is set to 1.
+        * This field is valid only when EEE is enabled.
         * The speeds specified in this field shall be a subset of
         * speeds specified in auto_link_speed_mask.
+        * If EEE is enabled,then at least one speed shall be provided
+        * in this mask.
         */
-       uint16_t        adv_eee_link_speed_mask;
+       uint16_t        eee_link_speed_mask;
        /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD1 \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD1 \
                UINT32_C(0x1)
        /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_100MB \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_100MB \
                UINT32_C(0x2)
        /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD2 \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD2 \
                UINT32_C(0x4)
        /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_1GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_1GB \
                UINT32_C(0x8)
        /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD3 \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD3 \
                UINT32_C(0x10)
        /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD4 \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_RSVD4 \
                UINT32_C(0x20)
        /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_10GB \
+       #define HWRM_PORT_PHY_CFG_INPUT_EEE_LINK_SPEED_MASK_10GB \
                UINT32_C(0x40)
+       uint8_t unused_2[2];
        /*
-        * Current setting for link speed mask that is advertised by
-        * the link partner when EEE is enabled.
-        * This field is valid only when eee_enabled flags is set to 1.
+        * Reuested setting of TX LPI timer in microseconds.
+        * This field is valid only when EEE is enabled and TX LPI is
+        * enabled.
         */
-       uint16_t        link_partner_adv_eee_link_speed_mask;
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD1 \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_100MB \
-               UINT32_C(0x2)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD2 \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_1GB \
-               UINT32_C(0x8)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD3 \
-               UINT32_C(0x10)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD4 \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_10GB \
-               UINT32_C(0x40)
-       uint32_t        xcvr_identifier_type_tx_lpi_timer;
+       uint32_t        tx_lpi_timer;
+       #define HWRM_PORT_PHY_CFG_INPUT_TX_LPI_TIMER_MASK UINT32_C(0xffffff)
+       #define HWRM_PORT_PHY_CFG_INPUT_TX_LPI_TIMER_SFT 0
+       uint32_t        unused_3;
+} __attribute__((packed));
+
+/* hwrm_port_phy_cfg_output (size:128b/16B) */
+struct hwrm_port_phy_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * Current setting of TX LPI timer in microseconds.
-        * This field is valid only when_eee_enabled flag is set to 1
-        * and tx_lpi_enabled is set to 1.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_TX_LPI_TIMER_MASK \
-               UINT32_C(0xffffff)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_TX_LPI_TIMER_SFT             0
-       /* This value represents transceiver identifier type. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_MASK \
-               UINT32_C(0xff000000)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_SFT     24
-       /* Unknown */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_UNKNOWN \
-               (UINT32_C(0x0) << 24)
-       /* SFP/SFP+/SFP28 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_SFP \
-               (UINT32_C(0x3) << 24)
-       /* QSFP+ */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP \
-               (UINT32_C(0xc) << 24)
-       /* QSFP+ */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFPPLUS \
-               (UINT32_C(0xd) << 24)
-       /* QSFP28 */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP28 \
-               (UINT32_C(0x11) << 24)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP28
+       uint8_t valid;
+} __attribute__((packed));
+
+/* hwrm_port_phy_cfg_cmd_err (size:64b/8B) */
+struct hwrm_port_phy_cfg_cmd_err {
        /*
-        * This value represents the current configuration of
-        * Forward Error Correction (FEC) on the port.
+        * command specific error codes that goes to
+        * the cmd_err field in Common HWRM Error Response.
         */
-       uint16_t        fec_cfg;
+       uint8_t code;
+       /* Unknown error */
+       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_UNKNOWN       UINT32_C(0x0)
+       /* Unable to complete operation due to invalid speed */
+       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_ILLEGAL_SPEED UINT32_C(0x1)
        /*
-        * When set to 1, then FEC is not supported on this port. If this flag
-        * is set to 1, then all other FEC configuration flags shall be ignored.
-        * When set to 0, then FEC is supported as indicated by other
-        * configuration flags.
-        * If no cable is attached and the HWRM does not yet know the FEC
-        * capability, then the HWRM shall set this flag to 1 when reporting
-        * FEC capability.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_NONE_SUPPORTED \
-               UINT32_C(0x1)
-       /*
-        * When set to 1, then FEC autonegotiation is supported on this port.
-        * When set to 0, then FEC autonegotiation is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_AUTONEG_SUPPORTED \
-               UINT32_C(0x2)
-       /*
-        * When set to 1, then FEC autonegotiation is enabled on this port.
-        * When set to 0, then FEC autonegotiation is disabled if supported.
-        * This flag should be ignored if FEC autonegotiation is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_AUTONEG_ENABLED \
-               UINT32_C(0x4)
-       /*
-        * When set to 1, then FEC CLAUSE 74 (Fire Code) is supported on this port.
-        * When set to 0, then FEC CLAUSE 74 (Fire Code) is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE74_SUPPORTED \
-               UINT32_C(0x8)
-       /*
-        * When set to 1, then FEC CLAUSE 74 (Fire Code) is enabled on this port.
-        * When set to 0, then FEC CLAUSE 74 (Fire Code) is disabled if supported.
-        * This flag should be ignored if FEC CLAUSE 74 is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE74_ENABLED \
-               UINT32_C(0x10)
-       /*
-        * When set to 1, then FEC CLAUSE 91 (Reed Solomon) is supported on this port.
-        * When set to 0, then FEC CLAUSE 91 (Reed Solomon) is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE91_SUPPORTED \
-               UINT32_C(0x20)
-       /*
-        * When set to 1, then FEC CLAUSE 91 (Reed Solomon) is enabled on this port.
-        * When set to 0, then FEC CLAUSE 91 (Reed Solomon) is disabled if supported.
-        * This flag should be ignored if FEC CLAUSE 91 is not supported on this port.
-        */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE91_ENABLED \
-               UINT32_C(0x40)
-       /*
-        * This value is indicates the duplex of the current
-        * connection state.
-        */
-       uint8_t duplex_state;
-       /* Half Duplex connection. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_HALF UINT32_C(0x0)
-       /* Full duplex connection. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_FULL UINT32_C(0x1)
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_LAST \
-               HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_FULL
-       /* Option flags fields. */
-       uint8_t option_flags;
-       /* When this bit is '1', Media auto detect is enabled. */
-       #define HWRM_PORT_PHY_QCFG_OUTPUT_OPTION_FLAGS_MEDIA_AUTO_DETECT \
-               UINT32_C(0x1)
-       /*
-        * Up to 16 bytes of null padded ASCII string representing
-        * PHY vendor.
-        * If the string is set to null, then the vendor name is not
-        * available.
-        */
-       char    phy_vendor_name[16];
-       /*
-        * Up to 16 bytes of null padded ASCII string that
-        * identifies vendor specific part number of the PHY.
-        * If the string is set to null, then the vendor specific
-        * part number is not available.
-        */
-       char    phy_vendor_partnumber[16];
-       uint8_t unused_2[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * retry the command since the phy is not ready.
+        * retry count is returned in opaque_0.
+        * This is only valid for the first command and
+        * this value will not change for successive calls.
+        * but if a 0 is returned at any time then this should
+        * be treated as an un recoverable failure,
+        *
+        * retry interval in milli seconds is returned in opaque_1.
+        * This specifies the time that user should wait before
+        * issuing the next port_phy_cfg command.
         */
-       uint8_t valid;
+       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_RETRY         UINT32_C(0x2)
+       #define HWRM_PORT_PHY_CFG_CMD_ERR_CODE_LAST \
+               HWRM_PORT_PHY_CFG_CMD_ERR_CODE_RETRY
+       uint8_t unused_0[7];
 } __attribute__((packed));
 
-/*********************
- * hwrm_port_mac_cfg *
- *********************/
+/**********************
+ * hwrm_port_phy_qcfg *
+ **********************/
 
 
-/* hwrm_port_mac_cfg_input (size:320b/40B) */
-struct hwrm_port_mac_cfg_input {
+/* hwrm_port_phy_qcfg_input (size:192b/24B) */
+struct hwrm_port_phy_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -11118,1000 +11111,1217 @@ struct hwrm_port_mac_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
+       /* Port ID of port that is to be queried. */
+       uint16_t        port_id;
+       uint8_t unused_0[6];
+} __attribute__((packed));
+
+/* hwrm_port_phy_qcfg_output (size:768b/96B) */
+struct hwrm_port_phy_qcfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* This value indicates the current link status. */
+       uint8_t link;
+       /* There is no link or cable detected. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_NO_LINK UINT32_C(0x0)
+       /* There is no link, but a cable has been detected. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SIGNAL  UINT32_C(0x1)
+       /* There is a link. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK    UINT32_C(0x2)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_LINK
+       uint8_t unused_0;
+       /* This value indicates the current link speed of the connection. */
+       uint16_t        link_speed;
+       /* 100Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100MB UINT32_C(0x1)
+       /* 1Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_1GB   UINT32_C(0xa)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2GB   UINT32_C(0x14)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_2_5GB UINT32_C(0x19)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10GB  UINT32_C(0x64)
+       /* 20Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_20GB  UINT32_C(0xc8)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_25GB  UINT32_C(0xfa)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_40GB  UINT32_C(0x190)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_50GB  UINT32_C(0x1f4)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_100GB UINT32_C(0x3e8)
+       /* 10Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB  UINT32_C(0xffff)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_SPEED_10MB
        /*
-        * In this field, there are a number of CoS mappings related flags
-        * that are used to configure CoS mappings and their corresponding
-        * priorities in the hardware.
-        * For the priorities of CoS mappings, the HWRM uses the following
-        * priority order (high to low) by default:
-        * # vlan pri
-        * # ip_dscp
-        * # tunnel_vlan_pri
-        * # default cos
-        *
-        * A subset of CoS mappings can be enabled.
-        * If a priority is not specified for an enabled CoS mapping, the
-        * priority will be assigned in the above order for the enabled CoS
-        * mappings. For example, if vlan_pri and ip_dscp CoS mappings are
-        * enabled and their priorities are not specified, the following
-        * priority order (high to low) will be used by the HWRM:
-        * # vlan_pri
-        * # ip_dscp
-        * # default cos
-        *
-        * vlan_pri CoS mapping together with default CoS with lower priority
-        * are enabled by default by the HWRM.
+        * This value is indicates the duplex of the current
+        * configuration.
         */
-       uint32_t        flags;
+       uint8_t duplex_cfg;
+       /* Half Duplex connection. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_HALF UINT32_C(0x0)
+       /* Full duplex connection. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_CFG_FULL
        /*
-        * When this bit is '1', this command will configure
-        * the MAC to match the current link state of the PHY.
-        * If the link is not established on the PHY, then this
-        * bit has no effect.
+        * This value is used to indicate the current
+        * pause configuration. When autoneg is enabled, this value
+        * represents the autoneg results of pause configuration.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_MATCH_LINK \
-               UINT32_C(0x1)
+       uint8_t pause;
        /*
-        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
-        * is requested to be enabled.
+        * When this bit is '1', Generation of tx pause messages
+        * is supported. Disabled otherwise.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_VLAN_PRI2COS_ENABLE \
-               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_TX     UINT32_C(0x1)
        /*
-        * When this bit is set to '1', tunnel VLAN PRI field to
-        * CoS mapping is requested to be enabled.
+        * When this bit is '1', Reception of rx pause messages
+        * is supported. Disabled otherwise.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_TUNNEL_PRI2COS_ENABLE \
-               UINT32_C(0x4)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PAUSE_RX     UINT32_C(0x2)
        /*
-        * When this bit is set to '1', the IP DSCP to CoS mapping is
-        * requested to be enabled.
+        * The supported speeds for the port. This is a bit mask.
+        * For each speed that is supported, the corrresponding
+        * bit will be set to '1'.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_IP_DSCP2COS_ENABLE \
+       uint16_t        support_speeds;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100MBHD \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100MB \
+               UINT32_C(0x2)
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GBHD \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_1GB \
                UINT32_C(0x8)
-       /*
-        * When this bit is '1', the HWRM is requested to
-        * enable timestamp capture capability on the receive side
-        * of this port.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_RX_TS_CAPTURE_ENABLE \
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2GB \
                UINT32_C(0x10)
-       /*
-        * When this bit is '1', the HWRM is requested to
-        * disable timestamp capture capability on the receive side
-        * of this port.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_RX_TS_CAPTURE_DISABLE \
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_2_5GB \
                UINT32_C(0x20)
-       /*
-        * When this bit is '1', the HWRM is requested to
-        * enable timestamp capture capability on the transmit side
-        * of this port.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_TX_TS_CAPTURE_ENABLE \
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10GB \
                UINT32_C(0x40)
-       /*
-        * When this bit is '1', the HWRM is requested to
-        * disable timestamp capture capability on the transmit side
-        * of this port.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_TX_TS_CAPTURE_DISABLE \
+       /* 20Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_20GB \
                UINT32_C(0x80)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_25GB \
+               UINT32_C(0x100)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_40GB \
+               UINT32_C(0x200)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_50GB \
+               UINT32_C(0x400)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_100GB \
+               UINT32_C(0x800)
+       /* 10Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10MBHD \
+               UINT32_C(0x1000)
+       /* 10Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_SUPPORT_SPEEDS_10MB \
+               UINT32_C(0x2000)
        /*
-        * When this bit is '1', the Out-Of-Box WoL is requested to
-        * be enabled on this port.
+        * Current setting of forced link speed.
+        * When the link speed is not being forced, this
+        * value shall be set to 0.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_OOB_WOL_ENABLE \
-               UINT32_C(0x100)
+       uint16_t        force_link_speed;
+       /* 100Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_100MB UINT32_C(0x1)
+       /* 1Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_1GB   UINT32_C(0xa)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_2GB   UINT32_C(0x14)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_2_5GB UINT32_C(0x19)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10GB  UINT32_C(0x64)
+       /* 20Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_20GB  UINT32_C(0xc8)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_25GB  UINT32_C(0xfa)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_40GB \
+               UINT32_C(0x190)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_50GB \
+               UINT32_C(0x1f4)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_100GB \
+               UINT32_C(0x3e8)
+       /* 10Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10MB \
+               UINT32_C(0xffff)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_LINK_SPEED_10MB
+       /* Current setting of auto negotiation mode. */
+       uint8_t auto_mode;
+       /* Disable autoneg or autoneg disabled. No speeds are selected. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_NONE         UINT32_C(0x0)
+       /* Select all possible speeds for autoneg mode. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ALL_SPEEDS   UINT32_C(0x1)
        /*
-        * When this bit is '1', the the Out-Of-Box WoL is requested to
-        * be disabled on this port.
+        * Select only the auto_link_speed speed for autoneg mode. This mode has
+        * been DEPRECATED. An HWRM client should not use this mode.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_OOB_WOL_DISABLE \
-               UINT32_C(0x200)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ONE_SPEED    UINT32_C(0x2)
        /*
-        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
-        * is requested to be disabled.
+        * Select the auto_link_speed or any speed below that speed for autoneg.
+        * This mode has been DEPRECATED. An HWRM client should not use this mode.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_VLAN_PRI2COS_DISABLE \
-               UINT32_C(0x400)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_ONE_OR_BELOW UINT32_C(0x3)
        /*
-        * When this bit is set to '1', tunnel VLAN PRI field to
-        * CoS mapping is requested to be disabled.
+        * Select the speeds based on the corresponding link speed mask value
+        * that is provided.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_TUNNEL_PRI2COS_DISABLE \
-               UINT32_C(0x800)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_SPEED_MASK   UINT32_C(0x4)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_MODE_SPEED_MASK
        /*
-        * When this bit is set to '1', the IP DSCP to CoS mapping is
-        * requested to be disabled.
+        * Current setting of pause autonegotiation.
+        * Move autoneg_pause flag here.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_IP_DSCP2COS_DISABLE \
-               UINT32_C(0x1000)
-       uint32_t        enables;
+       uint8_t auto_pause;
        /*
-        * This bit must be '1' for the ipg field to be
-        * configured.
+        * When this bit is '1', Generation of tx pause messages
+        * has been requested. Disabled otherwise.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_IPG \
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_TX \
                UINT32_C(0x1)
        /*
-        * This bit must be '1' for the lpbk field to be
-        * configured.
+        * When this bit is '1', Reception of rx pause messages
+        * has been requested. Disabled otherwise.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_LPBK \
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_RX \
                UINT32_C(0x2)
        /*
-        * This bit must be '1' for the vlan_pri2cos_map_pri field to be
-        * configured.
+        * When set to 1, the advertisement of pause is enabled.
+        *
+        * # When the auto_mode is not set to none and this flag is
+        * set to 1, then the auto_pause bits on this port are being
+        * advertised and autoneg pause results are being interpreted.
+        * # When the auto_mode is not set to none and this
+        * flag is set to 0, the pause is forced as indicated in
+        * force_pause, and also advertised as auto_pause bits, but
+        * the autoneg results are not interpreted since the pause
+        * configuration is being forced.
+        * # When the auto_mode is set to none and this flag is set to
+        * 1, auto_pause bits should be ignored and should be set to 0.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_VLAN_PRI2COS_MAP_PRI \
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_PAUSE_AUTONEG_PAUSE \
                UINT32_C(0x4)
        /*
-        * This bit must be '1' for the tunnel_pri2cos_map_pri field to be
-        * configured.
+        * Current setting for auto_link_speed. This field is only
+        * valid when auto_mode is set to "one_speed" or "one_or_below".
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_TUNNEL_PRI2COS_MAP_PRI \
-               UINT32_C(0x10)
+       uint16_t        auto_link_speed;
+       /* 100Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_100MB UINT32_C(0x1)
+       /* 1Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_1GB   UINT32_C(0xa)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_2GB   UINT32_C(0x14)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_2_5GB UINT32_C(0x19)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10GB  UINT32_C(0x64)
+       /* 20Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_20GB  UINT32_C(0xc8)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_25GB  UINT32_C(0xfa)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_40GB  UINT32_C(0x190)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_50GB  UINT32_C(0x1f4)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_100GB UINT32_C(0x3e8)
+       /* 10Mb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10MB \
+               UINT32_C(0xffff)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_10MB
        /*
-        * This bit must be '1' for the dscp2cos_map_pri field to be
-        * configured.
+        * Current setting for auto_link_speed_mask that is used to
+        * advertise speeds during autonegotiation.
+        * This field is only valid when auto_mode is set to "mask".
+        * The speeds specified in this field shall be a subset of
+        * supported speeds on this port.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_DSCP2COS_MAP_PRI \
+       uint16_t        auto_link_speed_mask;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100MBHD \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100MB \
+               UINT32_C(0x2)
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_1GBHD \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_1GB \
+               UINT32_C(0x8)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_2GB \
+               UINT32_C(0x10)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_2_5GB \
                UINT32_C(0x20)
-       /*
-        * This bit must be '1' for the rx_ts_capture_ptp_msg_type field to be
-        * configured.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE \
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10GB \
                UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the tx_ts_capture_ptp_msg_type field to be
-        * configured.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE \
+       /* 20Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_20GB \
                UINT32_C(0x80)
-       /*
-        * This bit must be '1' for the cos_field_cfg field to be
-        * configured.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_COS_FIELD_CFG \
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_25GB \
                UINT32_C(0x100)
-       /* Port ID of port that is to be configured. */
-       uint16_t        port_id;
-       /*
-        * This value is used to configure the minimum IPG that will
-        * be sent between packets by this port.
-        */
-       uint8_t ipg;
-       /* This value controls the loopback setting for the MAC. */
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_40GB \
+               UINT32_C(0x200)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_50GB \
+               UINT32_C(0x400)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_100GB \
+               UINT32_C(0x800)
+       /* 10Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10MBHD \
+               UINT32_C(0x1000)
+       /* 10Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_AUTO_LINK_SPEED_MASK_10MB \
+               UINT32_C(0x2000)
+       /* Current setting for wirespeed. */
+       uint8_t wirespeed;
+       /* Wirespeed feature is disabled. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_OFF UINT32_C(0x0)
+       /* Wirespeed feature is enabled. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_ON  UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_WIRESPEED_ON
+       /* Current setting for loopback. */
        uint8_t lpbk;
        /* No loopback is selected.  Normal operation. */
-       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_NONE   UINT32_C(0x0)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_NONE     UINT32_C(0x0)
        /*
         * The HW will be configured with local loopback such that
         * host data is sent back to the host without modification.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_LOCAL  UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_LOCAL    UINT32_C(0x1)
        /*
         * The HW will be configured with remote loopback such that
         * port logic will send packets back out the transmitter that
         * are received.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_REMOTE UINT32_C(0x2)
-       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_LAST \
-               HWRM_PORT_MAC_CFG_INPUT_LPBK_REMOTE
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_REMOTE   UINT32_C(0x2)
        /*
-        * This value controls the priority setting of VLAN PRI to CoS
-        * mapping based on VLAN Tags of inner packet headers of
-        * tunneled packets or packet headers of non-tunneled packets.
-        *
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being specified.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
+        * The HW will be configured with external loopback such that
+        * host data is sent on the trasmitter and based on the external
+        * loopback connection the data will be received without modification.
         */
-       uint8_t vlan_pri2cos_map_pri;
-       /* Reserved field. */
-       uint8_t reserved1;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_EXTERNAL UINT32_C(0x3)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_LPBK_EXTERNAL
        /*
-        * This value controls the priority setting of VLAN PRI to CoS
-        * mapping based on VLAN Tags of tunneled header.
-        * This mapping only applies when tunneled headers
-        * are present.
-        *
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being specified.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
+        * Current setting of forced pause.
+        * When the pause configuration is not being forced, then
+        * this value shall be set to 0.
         */
-       uint8_t tunnel_pri2cos_map_pri;
+       uint8_t force_pause;
        /*
-        * This value controls the priority setting of IP DSCP to CoS
-        * mapping based on inner IP header of tunneled packets or
-        * IP header of non-tunneled packets.
-        *
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being specified.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
+        * When this bit is '1', Generation of tx pause messages
+        * is supported. Disabled otherwise.
         */
-       uint8_t dscp2pri_map_pri;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_PAUSE_TX     UINT32_C(0x1)
        /*
-        * This is a 16-bit bit mask that is used to request a
-        * specific configuration of time stamp capture of PTP messages
-        * on the receive side of this port.
-        * This field shall be ignored if the ptp_rx_ts_capture_enable
-        * flag is not set in this command.
-        * Otherwise, if bit 'i' is set, then the HWRM is being
-        * requested to configure the receive side of the port to
-        * capture the time stamp of every received PTP message
-        * with messageType field value set to i.
+        * When this bit is '1', Reception of rx pause messages
+        * is supported. Disabled otherwise.
         */
-       uint16_t        rx_ts_capture_ptp_msg_type;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FORCE_PAUSE_RX     UINT32_C(0x2)
        /*
-        * This is a 16-bit bit mask that is used to request a
-        * specific configuration of time stamp capture of PTP messages
-        * on the transmit side of this port.
-        * This field shall be ignored if the ptp_tx_ts_capture_enable
-        * flag is not set in this command.
-        * Otherwise, if bit 'i' is set, then the HWRM is being
-        * requested to configure the transmit sied of the port to
-        * capture the time stamp of every transmitted PTP message
-        * with messageType field value set to i.
+        * This value indicates the current status of the optics module on
+        * this port.
         */
-       uint16_t        tx_ts_capture_ptp_msg_type;
-       /* Configuration of CoS fields. */
-       uint8_t cos_field_cfg;
-       /* Reserved */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_RSVD1 \
+       uint8_t module_status;
+       /* Module is inserted and accepted */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NONE \
+               UINT32_C(0x0)
+       /* Module is rejected and transmit side Laser is disabled. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_DISABLETX \
                UINT32_C(0x1)
-       /*
-        * This field is used to specify selection of VLAN PRI value
-        * based on whether one or two VLAN Tags are present in
-        * the inner packet headers of tunneled packets or
-        * non-tunneled packets.
-        * This field is valid only if inner VLAN PRI to CoS mapping
-        * is enabled.
-        * If VLAN PRI to CoS mapping is not enabled, then this
-        * field shall be ignored.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_MASK \
+       /* Module mismatch warning. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_WARNINGMSG \
+               UINT32_C(0x2)
+       /* Module is rejected and powered down. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_PWRDOWN \
+               UINT32_C(0x3)
+       /* Module is not inserted. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTINSERTED \
+               UINT32_C(0x4)
+       /* Module status is not applicable. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTAPPLICABLE \
+               UINT32_C(0xff)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_MODULE_STATUS_NOTAPPLICABLE
+       /* Current setting for preemphasis. */
+       uint32_t        preemphasis;
+       /* This field represents the major version of the PHY. */
+       uint8_t phy_maj;
+       /* This field represents the minor version of the PHY. */
+       uint8_t phy_min;
+       /* This field represents the build version of the PHY. */
+       uint8_t phy_bld;
+       /* This value represents a PHY type. */
+       uint8_t phy_type;
+       /* Unknown */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_UNKNOWN \
+               UINT32_C(0x0)
+       /* BASE-CR */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASECR \
+               UINT32_C(0x1)
+       /* BASE-KR4 (Deprecated) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR4 \
+               UINT32_C(0x2)
+       /* BASE-LR */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASELR \
+               UINT32_C(0x3)
+       /* BASE-SR */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASESR \
+               UINT32_C(0x4)
+       /* BASE-KR2 (Deprecated) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR2 \
+               UINT32_C(0x5)
+       /* BASE-KX */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKX \
                UINT32_C(0x6)
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_SFT \
-               1
+       /* BASE-KR */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASEKR \
+               UINT32_C(0x7)
+       /* BASE-T */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASET \
+               UINT32_C(0x8)
+       /* EEE capable BASE-T */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_BASETE \
+               UINT32_C(0x9)
+       /* SGMII connected external PHY */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_SGMIIEXTPHY \
+               UINT32_C(0xa)
+       /* 25G_BASECR_CA_L */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_L \
+               UINT32_C(0xb)
+       /* 25G_BASECR_CA_S */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_S \
+               UINT32_C(0xc)
+       /* 25G_BASECR_CA_N */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASECR_CA_N \
+               UINT32_C(0xd)
+       /* 25G_BASESR */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_25G_BASESR \
+               UINT32_C(0xe)
+       /* 100G_BASECR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASECR4 \
+               UINT32_C(0xf)
+       /* 100G_BASESR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR4 \
+               UINT32_C(0x10)
+       /* 100G_BASELR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASELR4 \
+               UINT32_C(0x11)
+       /* 100G_BASEER4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASEER4 \
+               UINT32_C(0x12)
+       /* 100G_BASESR10 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_100G_BASESR10 \
+               UINT32_C(0x13)
+       /* 40G_BASECR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASECR4 \
+               UINT32_C(0x14)
+       /* 40G_BASESR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASESR4 \
+               UINT32_C(0x15)
+       /* 40G_BASELR4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASELR4 \
+               UINT32_C(0x16)
+       /* 40G_BASEER4 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_BASEER4 \
+               UINT32_C(0x17)
+       /* 40G_ACTIVE_CABLE */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_40G_ACTIVE_CABLE \
+               UINT32_C(0x18)
+       /* 1G_baseT */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASET \
+               UINT32_C(0x19)
+       /* 1G_baseSX */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASESX \
+               UINT32_C(0x1a)
+       /* 1G_baseCX */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX \
+               UINT32_C(0x1b)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_PHY_TYPE_1G_BASECX
+       /* This value represents a media type. */
+       uint8_t media_type;
+       /* Unknown */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_UNKNOWN UINT32_C(0x0)
+       /* Twisted Pair */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_TP      UINT32_C(0x1)
+       /* Direct Attached Copper */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_DAC     UINT32_C(0x2)
+       /* Fiber */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE   UINT32_C(0x3)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_MEDIA_TYPE_FIBRE
+       /* This value represents a transceiver type. */
+       uint8_t xcvr_pkg_type;
+       /* PHY and MAC are in the same package */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_INTERNAL \
+               UINT32_C(0x1)
+       /* PHY and MAC are in different packages */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_EXTERNAL \
+               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_PKG_TYPE_XCVR_EXTERNAL
+       uint8_t eee_config_phy_addr;
+       /* This field represents PHY address. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_ADDR_MASK \
+               UINT32_C(0x1f)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PHY_ADDR_SFT               0
        /*
-        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
-        * present in the inner packet headers
+        * This field represents flags related to EEE configuration.
+        * These EEE configuration flags are valid only when the
+        * auto_mode is not set to none (in other words autonegotiation
+        * is enabled).
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST \
-               (UINT32_C(0x0) << 1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_MASK \
+               UINT32_C(0xe0)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_SFT             5
        /*
-        * Select outer VLAN Tag PRI when 2 VLAN Tags are
-        * present in the inner packet headers.
-        * No VLAN PRI shall be selected for this configuration
-        * if only one VLAN Tag is present in the inner
-        * packet headers.
+        * When set to 1, Energy Efficient Ethernet (EEE) mode is enabled.
+        * Speeds for autoneg with EEE mode enabled
+        * are based on eee_link_speed_mask.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER \
-               (UINT32_C(0x1) << 1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_ENABLED \
+               UINT32_C(0x20)
        /*
-        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
-        * are present in the inner packet headers
+        * This flag is valid only when eee_enabled is set to 1.
+        *
+        * # If eee_enabled is set to 0, then EEE mode is disabled
+        * and this flag shall be ignored.
+        * # If eee_enabled is set to 1 and this flag is set to 1,
+        * then Energy Efficient Ethernet (EEE) mode is enabled
+        * and in use.
+        * # If eee_enabled is set to 1 and this flag is set to 0,
+        * then Energy Efficient Ethernet (EEE) mode is enabled
+        * but is currently not in use.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST \
-               (UINT32_C(0x2) << 1)
-       /* Unspecified */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED \
-               (UINT32_C(0x3) << 1)
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_LAST \
-               HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED
-       /*
-        * This field is used to specify selection of tunnel VLAN
-        * PRI value based on whether one or two VLAN Tags are
-        * present in tunnel headers.
-        * This field is valid only if tunnel VLAN PRI to CoS mapping
-        * is enabled.
-        * If tunnel VLAN PRI to CoS mapping is not enabled, then this
-        * field shall be ignored.
-        */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK \
-               UINT32_C(0x18)
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT \
-               3
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_ACTIVE \
+               UINT32_C(0x40)
        /*
-        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
-        * present in the tunnel packet headers
+        * This flag is valid only when eee_enabled is set to 1.
+        *
+        * # If eee_enabled is set to 0, then EEE mode is disabled
+        * and this flag shall be ignored.
+        * # If eee_enabled is set to 1 and this flag is set to 1,
+        * then Energy Efficient Ethernet (EEE) mode is enabled
+        * and TX LPI is enabled.
+        * # If eee_enabled is set to 1 and this flag is set to 0,
+        * then Energy Efficient Ethernet (EEE) mode is enabled
+        * but TX LPI is disabled.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST \
-               (UINT32_C(0x0) << 3)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_EEE_CONFIG_EEE_TX_LPI \
+               UINT32_C(0x80)
        /*
-        * Select outer VLAN Tag PRI when 2 VLAN Tags are
-        * present in the tunnel packet headers.
-        * No tunnel VLAN PRI shall be selected for this
-        * configuration if only one VLAN Tag is present in
-        * the tunnel packet headers.
+        * When set to 1, the parallel detection is used to determine
+        * the speed of the link partner.
+        *
+        * Parallel detection is used when a autonegotiation capable
+        * device is connected to a link parter that is not capable
+        * of autonegotiation.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER \
-               (UINT32_C(0x1) << 3)
+       uint8_t parallel_detect;
        /*
-        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
-        * are present in the tunnel packet headers
+        * When set to 1, the parallel detection is used to determine
+        * the speed of the link partner.
+        *
+        * Parallel detection is used when a autonegotiation capable
+        * device is connected to a link parter that is not capable
+        * of autonegotiation.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST \
-               (UINT32_C(0x2) << 3)
-       /* Unspecified */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED \
-               (UINT32_C(0x3) << 3)
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST \
-               HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_PARALLEL_DETECT     UINT32_C(0x1)
        /*
-        * This field shall be used to provide default CoS value
-        * that has been configured on this port.
-        * This field is valid only if default CoS mapping
-        * is enabled.
-        * If default CoS mapping is not enabled, then this
-        * field shall be ignored.
+        * The advertised speeds for the port by the link partner.
+        * Each advertised speed will be set to '1'.
         */
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_DEFAULT_COS_MASK \
-               UINT32_C(0xe0)
-       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_DEFAULT_COS_SFT \
-               5
-       uint8_t unused_0[3];
-} __attribute__((packed));
-
-/* hwrm_port_mac_cfg_output (size:128b/16B) */
-struct hwrm_port_mac_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       uint16_t        link_partner_adv_speeds;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100MBHD \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100MB \
+               UINT32_C(0x2)
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_1GBHD \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_1GB \
+               UINT32_C(0x8)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_2GB \
+               UINT32_C(0x10)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_2_5GB \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10GB \
+               UINT32_C(0x40)
+       /* 20Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_20GB \
+               UINT32_C(0x80)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_25GB \
+               UINT32_C(0x100)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_40GB \
+               UINT32_C(0x200)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_50GB \
+               UINT32_C(0x400)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_100GB \
+               UINT32_C(0x800)
+       /* 10Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10MBHD \
+               UINT32_C(0x1000)
+       /* 10Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_SPEEDS_10MB \
+               UINT32_C(0x2000)
        /*
-        * This is the configured maximum length of Ethernet packet
-        * payload that is allowed to be received on the port.
-        * This value does not include the number of bytes used by
-        * Ethernet header and trailer (CRC).
+        * The advertised autoneg for the port by the link partner.
+        * This field is deprecated and should be set to 0.
         */
-       uint16_t        mru;
+       uint8_t link_partner_adv_auto_mode;
+       /* Disable autoneg or autoneg disabled. No speeds are selected. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_NONE \
+               UINT32_C(0x0)
+       /* Select all possible speeds for autoneg mode. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS \
+               UINT32_C(0x1)
        /*
-        * This is the configured maximum length of Ethernet packet
-        * payload that is allowed to be transmitted on the port.
-        * This value does not include the number of bytes used by
-        * Ethernet header and trailer (CRC).
+        * Select only the auto_link_speed speed for autoneg mode. This mode has
+        * been DEPRECATED. An HWRM client should not use this mode.
         */
-       uint16_t        mtu;
-       /* Current configuration of the IPG value. */
-       uint8_t ipg;
-       /* Current value of the loopback value. */
-       uint8_t lpbk;
-       /* No loopback is selected.  Normal operation. */
-       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_NONE   UINT32_C(0x0)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED \
+               UINT32_C(0x2)
        /*
-        * The HW will be configured with local loopback such that
-        * host data is sent back to the host without modification.
+        * Select the auto_link_speed or any speed below that speed for autoneg.
+        * This mode has been DEPRECATED. An HWRM client should not use this mode.
         */
-       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_LOCAL  UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW \
+               UINT32_C(0x3)
        /*
-        * The HW will be configured with remote loopback such that
-        * port logic will send packets back out the transmitter that
-        * are received.
+        * Select the speeds based on the corresponding link speed mask value
+        * that is provided.
         */
-       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_REMOTE UINT32_C(0x2)
-       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_LAST \
-               HWRM_PORT_MAC_CFG_OUTPUT_LPBK_REMOTE
-       uint8_t unused_0;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK \
+               UINT32_C(0x4)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK
+       /* The advertised pause settings on the port by the link partner. */
+       uint8_t link_partner_adv_pause;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * When this bit is '1', Generation of tx pause messages
+        * is supported. Disabled otherwise.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_port_mac_qcfg *
- **********************/
-
-
-/* hwrm_port_mac_qcfg_input (size:192b/24B) */
-struct hwrm_port_mac_qcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_PAUSE_TX \
+               UINT32_C(0x1)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * When this bit is '1', Reception of rx pause messages
+        * is supported. Disabled otherwise.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_PAUSE_RX \
+               UINT32_C(0x2)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Current setting for link speed mask that is used to
+        * advertise speeds during autonegotiation when EEE is enabled.
+        * This field is valid only when eee_enabled flags is set to 1.
+        * The speeds specified in this field shall be a subset of
+        * speeds specified in auto_link_speed_mask.
         */
-       uint16_t        seq_id;
+       uint16_t        adv_eee_link_speed_mask;
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD1 \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_100MB \
+               UINT32_C(0x2)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD2 \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_1GB \
+               UINT32_C(0x8)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD3 \
+               UINT32_C(0x10)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_RSVD4 \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_ADV_EEE_LINK_SPEED_MASK_10GB \
+               UINT32_C(0x40)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Current setting for link speed mask that is advertised by
+        * the link partner when EEE is enabled.
+        * This field is valid only when eee_enabled flags is set to 1.
         */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+       uint16_t        link_partner_adv_eee_link_speed_mask;
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD1 \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_100MB \
+               UINT32_C(0x2)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD2 \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_1GB \
+               UINT32_C(0x8)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD3 \
+               UINT32_C(0x10)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_RSVD4 \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_LINK_PARTNER_ADV_EEE_LINK_SPEED_MASK_10GB \
+               UINT32_C(0x40)
+       uint32_t        xcvr_identifier_type_tx_lpi_timer;
+       /*
+        * Current setting of TX LPI timer in microseconds.
+        * This field is valid only when_eee_enabled flag is set to 1
+        * and tx_lpi_enabled is set to 1.
         */
-       uint64_t        resp_addr;
-       /* Port ID of port that is to be configured. */
-       uint16_t        port_id;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_port_mac_qcfg_output (size:192b/24B) */
-struct hwrm_port_mac_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_TX_LPI_TIMER_MASK \
+               UINT32_C(0xffffff)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_TX_LPI_TIMER_SFT             0
+       /* This value represents transceiver identifier type. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_MASK \
+               UINT32_C(0xff000000)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_SFT     24
+       /* Unknown */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_UNKNOWN \
+               (UINT32_C(0x0) << 24)
+       /* SFP/SFP+/SFP28 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_SFP \
+               (UINT32_C(0x3) << 24)
+       /* QSFP+ */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP \
+               (UINT32_C(0xc) << 24)
+       /* QSFP+ */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFPPLUS \
+               (UINT32_C(0xd) << 24)
+       /* QSFP28 */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP28 \
+               (UINT32_C(0x11) << 24)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_XCVR_IDENTIFIER_TYPE_QSFP28
        /*
-        * This is the configured maximum length of Ethernet packet
-        * payload that is allowed to be received on the port.
-        * This value does not include the number of bytes used by the
-        * Ethernet header and trailer (CRC).
+        * This value represents the current configuration of
+        * Forward Error Correction (FEC) on the port.
         */
-       uint16_t        mru;
+       uint16_t        fec_cfg;
        /*
-        * This is the configured maximum length of Ethernet packet
-        * payload that is allowed to be transmitted on the port.
-        * This value does not include the number of bytes used by the
-        * Ethernet header and trailer (CRC).
+        * When set to 1, then FEC is not supported on this port. If this flag
+        * is set to 1, then all other FEC configuration flags shall be ignored.
+        * When set to 0, then FEC is supported as indicated by other
+        * configuration flags.
+        * If no cable is attached and the HWRM does not yet know the FEC
+        * capability, then the HWRM shall set this flag to 1 when reporting
+        * FEC capability.
         */
-       uint16_t        mtu;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_NONE_SUPPORTED \
+               UINT32_C(0x1)
        /*
-        * The minimum IPG that will
-        * be sent between packets by this port.
+        * When set to 1, then FEC autonegotiation is supported on this port.
+        * When set to 0, then FEC autonegotiation is not supported on this port.
         */
-       uint8_t ipg;
-       /* The loopback setting for the MAC. */
-       uint8_t lpbk;
-       /* No loopback is selected.  Normal operation. */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_NONE   UINT32_C(0x0)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_AUTONEG_SUPPORTED \
+               UINT32_C(0x2)
        /*
-        * The HW will be configured with local loopback such that
-        * host data is sent back to the host without modification.
+        * When set to 1, then FEC autonegotiation is enabled on this port.
+        * When set to 0, then FEC autonegotiation is disabled if supported.
+        * This flag should be ignored if FEC autonegotiation is not supported on this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_LOCAL  UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_AUTONEG_ENABLED \
+               UINT32_C(0x4)
        /*
-        * The HW will be configured with remote loopback such that
-        * port logic will send packets back out the transmitter that
-        * are received.
+        * When set to 1, then FEC CLAUSE 74 (Fire Code) is supported on this port.
+        * When set to 0, then FEC CLAUSE 74 (Fire Code) is not supported on this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_REMOTE UINT32_C(0x2)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_LAST \
-               HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_REMOTE
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE74_SUPPORTED \
+               UINT32_C(0x8)
        /*
-        * Priority setting for VLAN PRI to CoS mapping.
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being used.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
-        * # If the correspoding CoS mapping is not enabled, then this
-        * field should be ignored.
-        * # This value indicates the normalized priority value retained
-        * in the HWRM.
+        * When set to 1, then FEC CLAUSE 74 (Fire Code) is enabled on this port.
+        * When set to 0, then FEC CLAUSE 74 (Fire Code) is disabled if supported.
+        * This flag should be ignored if FEC CLAUSE 74 is not supported on this port.
         */
-       uint8_t vlan_pri2cos_map_pri;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE74_ENABLED \
+               UINT32_C(0x10)
        /*
-        * In this field, a number of CoS mappings related flags
-        * are used to indicate configured CoS mappings.
+        * When set to 1, then FEC CLAUSE 91 (Reed Solomon) is supported on this port.
+        * When set to 0, then FEC CLAUSE 91 (Reed Solomon) is not supported on this port.
         */
-       uint8_t flags;
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE91_SUPPORTED \
+               UINT32_C(0x20)
        /*
-        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
-        * is enabled.
+        * When set to 1, then FEC CLAUSE 91 (Reed Solomon) is enabled on this port.
+        * When set to 0, then FEC CLAUSE 91 (Reed Solomon) is disabled if supported.
+        * This flag should be ignored if FEC CLAUSE 91 is not supported on this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_VLAN_PRI2COS_ENABLE \
-               UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_FEC_CFG_FEC_CLAUSE91_ENABLED \
+               UINT32_C(0x40)
        /*
-        * When this bit is set to '1', tunnel VLAN PRI field to
-        * CoS mapping is enabled.
+        * This value is indicates the duplex of the current
+        * connection state.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_TUNNEL_PRI2COS_ENABLE \
-               UINT32_C(0x2)
+       uint8_t duplex_state;
+       /* Half Duplex connection. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_HALF UINT32_C(0x0)
+       /* Full duplex connection. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_FULL UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_LAST \
+               HWRM_PORT_PHY_QCFG_OUTPUT_DUPLEX_STATE_FULL
+       /* Option flags fields. */
+       uint8_t option_flags;
+       /* When this bit is '1', Media auto detect is enabled. */
+       #define HWRM_PORT_PHY_QCFG_OUTPUT_OPTION_FLAGS_MEDIA_AUTO_DETECT \
+               UINT32_C(0x1)
        /*
-        * When this bit is set to '1', the IP DSCP to CoS mapping is
-        * enabled.
+        * Up to 16 bytes of null padded ASCII string representing
+        * PHY vendor.
+        * If the string is set to null, then the vendor name is not
+        * available.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_IP_DSCP2COS_ENABLE \
-               UINT32_C(0x4)
+       char    phy_vendor_name[16];
        /*
-        * When this bit is '1', the Out-Of-Box WoL is enabled on this
-        * port.
+        * Up to 16 bytes of null padded ASCII string that
+        * identifies vendor specific part number of the PHY.
+        * If the string is set to null, then the vendor specific
+        * part number is not available.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_OOB_WOL_ENABLE \
-               UINT32_C(0x8)
-       /* When this bit is '1', PTP is enabled for RX on this port. */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_PTP_RX_TS_CAPTURE_ENABLE \
-               UINT32_C(0x10)
-       /* When this bit is '1', PTP is enabled for TX on this port. */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_PTP_TX_TS_CAPTURE_ENABLE \
-               UINT32_C(0x20)
+       char    phy_vendor_partnumber[16];
+       uint8_t unused_2[7];
        /*
-        * Priority setting for tunnel VLAN PRI to CoS mapping.
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being used.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
-        * # If the correspoding CoS mapping is not enabled, then this
-        * field should be ignored.
-        * # This value indicates the normalized priority value retained
-        * in the HWRM.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t tunnel_pri2cos_map_pri;
+       uint8_t valid;
+} __attribute__((packed));
+
+/*********************
+ * hwrm_port_mac_cfg *
+ *********************/
+
+
+/* hwrm_port_mac_cfg_input (size:320b/40B) */
+struct hwrm_port_mac_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * Priority setting for DSCP to PRI mapping.
-        * # Each XXX_pri variable shall have a unique priority value
-        * when it is being used.
-        * # When comparing priorities of mappings, higher value
-        * indicates higher priority.
-        * For example, a value of 0-3 is returned where 0 is being
-        * the lowest priority and 3 is being the highest priority.
-        * # If the correspoding CoS mapping is not enabled, then this
-        * field should be ignored.
-        * # This value indicates the normalized priority value retained
-        * in the HWRM.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint8_t dscp2pri_map_pri;
+       uint16_t        cmpl_ring;
        /*
-        * This is a 16-bit bit mask that represents the
-        * current configuration of time stamp capture of PTP messages
-        * on the receive side of this port.
-        * If bit 'i' is set, then the receive side of the port
-        * is configured to capture the time stamp of every
-        * received PTP message with messageType field value set
-        * to i.
-        * If all bits are set to 0 (i.e. field value set 0),
-        * then the receive side of the port is not configured
-        * to capture timestamp for PTP messages.
-        * If all bits are set to 1, then the receive side of the
-        * port is configured to capture timestamp for all PTP
-        * messages.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint16_t        rx_ts_capture_ptp_msg_type;
+       uint16_t        seq_id;
        /*
-        * This is a 16-bit bit mask that represents the
-        * current configuration of time stamp capture of PTP messages
-        * on the transmit side of this port.
-        * If bit 'i' is set, then the transmit side of the port
-        * is configured to capture the time stamp of every
-        * received PTP message with messageType field value set
-        * to i.
-        * If all bits are set to 0 (i.e. field value set 0),
-        * then the transmit side of the port is not configured
-        * to capture timestamp for PTP messages.
-        * If all bits are set to 1, then the transmit side of the
-        * port is configured to capture timestamp for all PTP
-        * messages.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint16_t        tx_ts_capture_ptp_msg_type;
-       /* Configuration of CoS fields. */
-       uint8_t cos_field_cfg;
-       /* Reserved */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_RSVD \
-               UINT32_C(0x1)
+       uint16_t        target_id;
        /*
-        * This field is used for selecting VLAN PRI value
-        * based on whether one or two VLAN Tags are present in
-        * the inner packet headers of tunneled packets or
-        * non-tunneled packets.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_MASK \
-               UINT32_C(0x6)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_SFT \
-               1
+       uint64_t        resp_addr;
        /*
-        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
-        * present in the inner packet headers
+        * In this field, there are a number of CoS mappings related flags
+        * that are used to configure CoS mappings and their corresponding
+        * priorities in the hardware.
+        * For the priorities of CoS mappings, the HWRM uses the following
+        * priority order (high to low) by default:
+        * # vlan pri
+        * # ip_dscp
+        * # tunnel_vlan_pri
+        * # default cos
+        *
+        * A subset of CoS mappings can be enabled.
+        * If a priority is not specified for an enabled CoS mapping, the
+        * priority will be assigned in the above order for the enabled CoS
+        * mappings. For example, if vlan_pri and ip_dscp CoS mappings are
+        * enabled and their priorities are not specified, the following
+        * priority order (high to low) will be used by the HWRM:
+        * # vlan_pri
+        * # ip_dscp
+        * # default cos
+        *
+        * vlan_pri CoS mapping together with default CoS with lower priority
+        * are enabled by default by the HWRM.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST \
-               (UINT32_C(0x0) << 1)
+       uint32_t        flags;
        /*
-        * Select outer VLAN Tag PRI when 2 VLAN Tags are
-        * present in the inner packet headers.
-        * No VLAN PRI is selected for this configuration
-        * if only one VLAN Tag is present in the inner
-        * packet headers.
+        * When this bit is '1', this command will configure
+        * the MAC to match the current link state of the PHY.
+        * If the link is not established on the PHY, then this
+        * bit has no effect.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER \
-               (UINT32_C(0x1) << 1)
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_MATCH_LINK \
+               UINT32_C(0x1)
        /*
-        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
-        * are present in the inner packet headers
+        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
+        * is requested to be enabled.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST \
-               (UINT32_C(0x2) << 1)
-       /* Unspecified */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED \
-               (UINT32_C(0x3) << 1)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_LAST \
-               HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_VLAN_PRI2COS_ENABLE \
+               UINT32_C(0x2)
        /*
-        * This field is used for selecting tunnel VLAN PRI value
-        * based on whether one or two VLAN Tags are present in
-        * the tunnel headers of tunneled packets. This selection
-        * does not apply to non-tunneled packets.
+        * When this bit is set to '1', tunnel VLAN PRI field to
+        * CoS mapping is requested to be enabled.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK \
-               UINT32_C(0x18)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT \
-               3
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_TUNNEL_PRI2COS_ENABLE \
+               UINT32_C(0x4)
        /*
-        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
-        * present in the tunnel packet headers
+        * When this bit is set to '1', the IP DSCP to CoS mapping is
+        * requested to be enabled.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST \
-               (UINT32_C(0x0) << 3)
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_IP_DSCP2COS_ENABLE \
+               UINT32_C(0x8)
        /*
-        * Select outer VLAN Tag PRI when 2 VLAN Tags are
-        * present in the tunnel packet headers.
-        * No VLAN PRI is selected for this configuration
-        * if only one VLAN Tag is present in the tunnel
-        * packet headers.
+        * When this bit is '1', the HWRM is requested to
+        * enable timestamp capture capability on the receive side
+        * of this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER \
-               (UINT32_C(0x1) << 3)
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_RX_TS_CAPTURE_ENABLE \
+               UINT32_C(0x10)
        /*
-        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
-        * are present in the tunnel packet headers
+        * When this bit is '1', the HWRM is requested to
+        * disable timestamp capture capability on the receive side
+        * of this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST \
-               (UINT32_C(0x2) << 3)
-       /* Unspecified */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED \
-               (UINT32_C(0x3) << 3)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST \
-               HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_RX_TS_CAPTURE_DISABLE \
+               UINT32_C(0x20)
        /*
-        * This field is used to provide default CoS value that
-        * has been configured on this port.
+        * When this bit is '1', the HWRM is requested to
+        * enable timestamp capture capability on the transmit side
+        * of this port.
         */
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_DEFAULT_COS_MASK \
-               UINT32_C(0xe0)
-       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_DEFAULT_COS_SFT \
-               5
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_TX_TS_CAPTURE_ENABLE \
+               UINT32_C(0x40)
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * When this bit is '1', the HWRM is requested to
+        * disable timestamp capture capability on the transmit side
+        * of this port.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**************************
- * hwrm_port_mac_ptp_qcfg *
- **************************/
-
-
-/* hwrm_port_mac_ptp_qcfg_input (size:192b/24B) */
-struct hwrm_port_mac_ptp_qcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_PTP_TX_TS_CAPTURE_DISABLE \
+               UINT32_C(0x80)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * When this bit is '1', the Out-Of-Box WoL is requested to
+        * be enabled on this port.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_OOB_WOL_ENABLE \
+               UINT32_C(0x100)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * When this bit is '1', the the Out-Of-Box WoL is requested to
+        * be disabled on this port.
         */
-       uint16_t        seq_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_OOB_WOL_DISABLE \
+               UINT32_C(0x200)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
+        * is requested to be disabled.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_VLAN_PRI2COS_DISABLE \
+               UINT32_C(0x400)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * When this bit is set to '1', tunnel VLAN PRI field to
+        * CoS mapping is requested to be disabled.
         */
-       uint64_t        resp_addr;
-       /* Port ID of port that is being queried. */
-       uint16_t        port_id;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_port_mac_ptp_qcfg_output (size:640b/80B) */
-struct hwrm_port_mac_ptp_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_TUNNEL_PRI2COS_DISABLE \
+               UINT32_C(0x800)
        /*
-        * In this field, a number of PTP related flags
-        * are used to indicate configured PTP capabilities.
+        * When this bit is set to '1', the IP DSCP to CoS mapping is
+        * requested to be disabled.
         */
-       uint8_t flags;
+       #define HWRM_PORT_MAC_CFG_INPUT_FLAGS_IP_DSCP2COS_DISABLE \
+               UINT32_C(0x1000)
+       uint32_t        enables;
        /*
-        * When this bit is set to '1', the PTP related registers are
-        * directly accessible by the host.
+        * This bit must be '1' for the ipg field to be
+        * configured.
         */
-       #define HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS \
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_IPG \
                UINT32_C(0x1)
        /*
-        * When this bit is set to '1', the PTP information is accessible
-        * via HWRM commands.
+        * This bit must be '1' for the lpbk field to be
+        * configured.
         */
-       #define HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_HWRM_ACCESS \
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_LPBK \
                UINT32_C(0x2)
-       uint8_t unused_0[3];
-       /* Offset of the PTP register for the lower 32 bits of timestamp for RX. */
-       uint32_t        rx_ts_reg_off_lower;
-       /* Offset of the PTP register for the upper 32 bits of timestamp for RX. */
-       uint32_t        rx_ts_reg_off_upper;
-       /* Offset of the PTP register for the sequence ID for RX. */
-       uint32_t        rx_ts_reg_off_seq_id;
-       /* Offset of the first PTP source ID for RX. */
-       uint32_t        rx_ts_reg_off_src_id_0;
-       /* Offset of the second PTP source ID for RX. */
-       uint32_t        rx_ts_reg_off_src_id_1;
-       /* Offset of the third PTP source ID for RX. */
-       uint32_t        rx_ts_reg_off_src_id_2;
-       /* Offset of the domain ID for RX. */
-       uint32_t        rx_ts_reg_off_domain_id;
-       /* Offset of the PTP FIFO register for RX. */
-       uint32_t        rx_ts_reg_off_fifo;
-       /* Offset of the PTP advance FIFO register for RX. */
-       uint32_t        rx_ts_reg_off_fifo_adv;
-       /* PTP timestamp granularity for RX. */
-       uint32_t        rx_ts_reg_off_granularity;
-       /* Offset of the PTP register for the lower 32 bits of timestamp for TX. */
-       uint32_t        tx_ts_reg_off_lower;
-       /* Offset of the PTP register for the upper 32 bits of timestamp for TX. */
-       uint32_t        tx_ts_reg_off_upper;
-       /* Offset of the PTP register for the sequence ID for TX. */
-       uint32_t        tx_ts_reg_off_seq_id;
-       /* Offset of the PTP FIFO register for TX. */
-       uint32_t        tx_ts_reg_off_fifo;
-       /* PTP timestamp granularity for TX. */
-       uint32_t        tx_ts_reg_off_granularity;
-       uint8_t unused_1[7];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This bit must be '1' for the vlan_pri2cos_map_pri field to be
+        * configured.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/********************
- * hwrm_port_qstats *
- ********************/
-
-
-/* hwrm_port_qstats_input (size:320b/40B) */
-struct hwrm_port_qstats_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_VLAN_PRI2COS_MAP_PRI \
+               UINT32_C(0x4)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This bit must be '1' for the tunnel_pri2cos_map_pri field to be
+        * configured.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_TUNNEL_PRI2COS_MAP_PRI \
+               UINT32_C(0x10)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This bit must be '1' for the dscp2cos_map_pri field to be
+        * configured.
         */
-       uint16_t        seq_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_DSCP2COS_MAP_PRI \
+               UINT32_C(0x20)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * This bit must be '1' for the rx_ts_capture_ptp_msg_type field to be
+        * configured.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE \
+               UINT32_C(0x40)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * This bit must be '1' for the tx_ts_capture_ptp_msg_type field to be
+        * configured.
         */
-       uint64_t        resp_addr;
-       /* Port ID of port that is being queried. */
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE \
+               UINT32_C(0x80)
+       /*
+        * This bit must be '1' for the cos_field_cfg field to be
+        * configured.
+        */
+       #define HWRM_PORT_MAC_CFG_INPUT_ENABLES_COS_FIELD_CFG \
+               UINT32_C(0x100)
+       /* Port ID of port that is to be configured. */
        uint16_t        port_id;
-       uint8_t unused_0[6];
        /*
-        * This is the host address where
-        * Tx port statistics will be stored
+        * This value is used to configure the minimum IPG that will
+        * be sent between packets by this port.
         */
-       uint64_t        tx_stat_host_addr;
+       uint8_t ipg;
+       /* This value controls the loopback setting for the MAC. */
+       uint8_t lpbk;
+       /* No loopback is selected.  Normal operation. */
+       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_NONE   UINT32_C(0x0)
        /*
-        * This is the host address where
-        * Rx port statistics will be stored
+        * The HW will be configured with local loopback such that
+        * host data is sent back to the host without modification.
         */
-       uint64_t        rx_stat_host_addr;
-} __attribute__((packed));
-
-/* hwrm_port_qstats_output (size:128b/16B) */
-struct hwrm_port_qstats_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The size of TX port statistics block in bytes. */
-       uint16_t        tx_stat_size;
-       /* The size of RX port statistics block in bytes. */
-       uint16_t        rx_stat_size;
-       uint8_t unused_0[3];
+       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_LOCAL  UINT32_C(0x1)
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * The HW will be configured with remote loopback such that
+        * port logic will send packets back out the transmitter that
+        * are received.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/************************
- * hwrm_port_qstats_ext *
- ************************/
-
-
-/* hwrm_port_qstats_ext_input (size:320b/40B) */
-struct hwrm_port_qstats_ext_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_REMOTE UINT32_C(0x2)
+       #define HWRM_PORT_MAC_CFG_INPUT_LPBK_LAST \
+               HWRM_PORT_MAC_CFG_INPUT_LPBK_REMOTE
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This value controls the priority setting of VLAN PRI to CoS
+        * mapping based on VLAN Tags of inner packet headers of
+        * tunneled packets or packet headers of non-tunneled packets.
+        *
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being specified.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
         */
-       uint16_t        cmpl_ring;
+       uint8_t vlan_pri2cos_map_pri;
+       /* Reserved field. */
+       uint8_t reserved1;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This value controls the priority setting of VLAN PRI to CoS
+        * mapping based on VLAN Tags of tunneled header.
+        * This mapping only applies when tunneled headers
+        * are present.
+        *
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being specified.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
         */
-       uint16_t        seq_id;
+       uint8_t tunnel_pri2cos_map_pri;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * This value controls the priority setting of IP DSCP to CoS
+        * mapping based on inner IP header of tunneled packets or
+        * IP header of non-tunneled packets.
+        *
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being specified.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
         */
-       uint16_t        target_id;
+       uint8_t dscp2pri_map_pri;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * This is a 16-bit bit mask that is used to request a
+        * specific configuration of time stamp capture of PTP messages
+        * on the receive side of this port.
+        * This field shall be ignored if the ptp_rx_ts_capture_enable
+        * flag is not set in this command.
+        * Otherwise, if bit 'i' is set, then the HWRM is being
+        * requested to configure the receive side of the port to
+        * capture the time stamp of every received PTP message
+        * with messageType field value set to i.
         */
-       uint64_t        resp_addr;
-       /* Port ID of port that is being queried. */
-       uint16_t        port_id;
+       uint16_t        rx_ts_capture_ptp_msg_type;
        /*
-        * The size of TX port extended
-        * statistics block in bytes.
+        * This is a 16-bit bit mask that is used to request a
+        * specific configuration of time stamp capture of PTP messages
+        * on the transmit side of this port.
+        * This field shall be ignored if the ptp_tx_ts_capture_enable
+        * flag is not set in this command.
+        * Otherwise, if bit 'i' is set, then the HWRM is being
+        * requested to configure the transmit sied of the port to
+        * capture the time stamp of every transmitted PTP message
+        * with messageType field value set to i.
         */
-       uint16_t        tx_stat_size;
+       uint16_t        tx_ts_capture_ptp_msg_type;
+       /* Configuration of CoS fields. */
+       uint8_t cos_field_cfg;
+       /* Reserved */
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_RSVD1 \
+               UINT32_C(0x1)
        /*
-        * The size of RX port extended
-        * statistics block in bytes
+        * This field is used to specify selection of VLAN PRI value
+        * based on whether one or two VLAN Tags are present in
+        * the inner packet headers of tunneled packets or
+        * non-tunneled packets.
+        * This field is valid only if inner VLAN PRI to CoS mapping
+        * is enabled.
+        * If VLAN PRI to CoS mapping is not enabled, then this
+        * field shall be ignored.
         */
-       uint16_t        rx_stat_size;
-       uint8_t unused_0[2];
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_MASK \
+               UINT32_C(0x6)
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_SFT \
+               1
        /*
-        * This is the host address where
-        * Tx port statistics will be stored
+        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
+        * present in the inner packet headers
         */
-       uint64_t        tx_stat_host_addr;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST \
+               (UINT32_C(0x0) << 1)
        /*
-        * This is the host address where
-        * Rx port statistics will be stored
+        * Select outer VLAN Tag PRI when 2 VLAN Tags are
+        * present in the inner packet headers.
+        * No VLAN PRI shall be selected for this configuration
+        * if only one VLAN Tag is present in the inner
+        * packet headers.
         */
-       uint64_t        rx_stat_host_addr;
-} __attribute__((packed));
-
-/* hwrm_port_qstats_ext_output (size:128b/16B) */
-struct hwrm_port_qstats_ext_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The size of TX port statistics block in bytes. */
-       uint16_t        tx_stat_size;
-       /* The size of RX port statistics block in bytes. */
-       uint16_t        rx_stat_size;
-       uint8_t unused_0[3];
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER \
+               (UINT32_C(0x1) << 1)
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
+        * are present in the inner packet headers
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*************************
- * hwrm_port_lpbk_qstats *
- *************************/
-
-
-/* hwrm_port_lpbk_qstats_input (size:128b/16B) */
-struct hwrm_port_lpbk_qstats_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST \
+               (UINT32_C(0x2) << 1)
+       /* Unspecified */
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED \
+               (UINT32_C(0x3) << 1)
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_LAST \
+               HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This field is used to specify selection of tunnel VLAN
+        * PRI value based on whether one or two VLAN Tags are
+        * present in tunnel headers.
+        * This field is valid only if tunnel VLAN PRI to CoS mapping
+        * is enabled.
+        * If tunnel VLAN PRI to CoS mapping is not enabled, then this
+        * field shall be ignored.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK \
+               UINT32_C(0x18)
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT \
+               3
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
+        * present in the tunnel packet headers
         */
-       uint16_t        seq_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST \
+               (UINT32_C(0x0) << 3)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Select outer VLAN Tag PRI when 2 VLAN Tags are
+        * present in the tunnel packet headers.
+        * No tunnel VLAN PRI shall be selected for this
+        * configuration if only one VLAN Tag is present in
+        * the tunnel packet headers.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER \
+               (UINT32_C(0x1) << 3)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
+        * are present in the tunnel packet headers
         */
-       uint64_t        resp_addr;
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST \
+               (UINT32_C(0x2) << 3)
+       /* Unspecified */
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED \
+               (UINT32_C(0x3) << 3)
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST \
+               HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED
+       /*
+        * This field shall be used to provide default CoS value
+        * that has been configured on this port.
+        * This field is valid only if default CoS mapping
+        * is enabled.
+        * If default CoS mapping is not enabled, then this
+        * field shall be ignored.
+        */
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_DEFAULT_COS_MASK \
+               UINT32_C(0xe0)
+       #define HWRM_PORT_MAC_CFG_INPUT_COS_FIELD_CFG_DEFAULT_COS_SFT \
+               5
+       uint8_t unused_0[3];
 } __attribute__((packed));
 
-/* hwrm_port_lpbk_qstats_output (size:768b/96B) */
-struct hwrm_port_lpbk_qstats_output {
+/* hwrm_port_mac_cfg_output (size:128b/16B) */
+struct hwrm_port_mac_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -12120,87 +12330,40 @@ struct hwrm_port_lpbk_qstats_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Number of transmitted unicast frames */
-       uint64_t        lpbk_ucast_frames;
-       /* Number of transmitted multicast frames */
-       uint64_t        lpbk_mcast_frames;
-       /* Number of transmitted broadcast frames */
-       uint64_t        lpbk_bcast_frames;
-       /* Number of transmitted bytes for unicast traffic */
-       uint64_t        lpbk_ucast_bytes;
-       /* Number of transmitted bytes for multicast traffic */
-       uint64_t        lpbk_mcast_bytes;
-       /* Number of transmitted bytes for broadcast traffic */
-       uint64_t        lpbk_bcast_bytes;
-       /* Total Tx Drops for loopback traffic reported by STATS block */
-       uint64_t        tx_stat_discard;
-       /* Total Tx Error Drops for loopback traffic reported by STATS block */
-       uint64_t        tx_stat_error;
-       /* Total Rx Drops for loopback traffic reported by STATS block */
-       uint64_t        rx_stat_discard;
-       /* Total Rx Error Drops for loopback traffic reported by STATS block */
-       uint64_t        rx_stat_error;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***********************
- * hwrm_port_clr_stats *
- ***********************/
-
-
-/* hwrm_port_clr_stats_input (size:192b/24B) */
-struct hwrm_port_clr_stats_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This is the configured maximum length of Ethernet packet
+        * payload that is allowed to be received on the port.
+        * This value does not include the number of bytes used by
+        * Ethernet header and trailer (CRC).
         */
-       uint16_t        cmpl_ring;
+       uint16_t        mru;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This is the configured maximum length of Ethernet packet
+        * payload that is allowed to be transmitted on the port.
+        * This value does not include the number of bytes used by
+        * Ethernet header and trailer (CRC).
         */
-       uint16_t        seq_id;
+       uint16_t        mtu;
+       /* Current configuration of the IPG value. */
+       uint8_t ipg;
+       /* Current value of the loopback value. */
+       uint8_t lpbk;
+       /* No loopback is selected.  Normal operation. */
+       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_NONE   UINT32_C(0x0)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * The HW will be configured with local loopback such that
+        * host data is sent back to the host without modification.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_LOCAL  UINT32_C(0x1)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * The HW will be configured with remote loopback such that
+        * port logic will send packets back out the transmitter that
+        * are received.
         */
-       uint64_t        resp_addr;
-       /* Port ID of port that is being queried. */
-       uint16_t        port_id;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_port_clr_stats_output (size:128b/16B) */
-struct hwrm_port_clr_stats_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_REMOTE UINT32_C(0x2)
+       #define HWRM_PORT_MAC_CFG_OUTPUT_LPBK_LAST \
+               HWRM_PORT_MAC_CFG_OUTPUT_LPBK_REMOTE
+       uint8_t unused_0;
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -12211,13 +12374,13 @@ struct hwrm_port_clr_stats_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/****************************
- * hwrm_port_lpbk_clr_stats *
- ****************************/
+/**********************
+ * hwrm_port_mac_qcfg *
+ **********************/
 
 
-/* hwrm_port_lpbk_clr_stats_input (size:128b/16B) */
-struct hwrm_port_lpbk_clr_stats_input {
+/* hwrm_port_mac_qcfg_input (size:192b/24B) */
+struct hwrm_port_mac_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -12245,10 +12408,13 @@ struct hwrm_port_lpbk_clr_stats_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
+       /* Port ID of port that is to be configured. */
+       uint16_t        port_id;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_port_lpbk_clr_stats_output (size:128b/16B) */
-struct hwrm_port_lpbk_clr_stats_output {
+/* hwrm_port_mac_qcfg_output (size:192b/24B) */
+struct hwrm_port_mac_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -12257,84 +12423,236 @@ struct hwrm_port_lpbk_clr_stats_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This is the configured maximum length of Ethernet packet
+        * payload that is allowed to be received on the port.
+        * This value does not include the number of bytes used by the
+        * Ethernet header and trailer (CRC).
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_port_ts_query *
- **********************/
-
-
-/* hwrm_port_ts_query_input (size:192b/24B) */
-struct hwrm_port_ts_query_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint16_t        mru;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This is the configured maximum length of Ethernet packet
+        * payload that is allowed to be transmitted on the port.
+        * This value does not include the number of bytes used by the
+        * Ethernet header and trailer (CRC).
         */
-       uint16_t        cmpl_ring;
+       uint16_t        mtu;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * The minimum IPG that will
+        * be sent between packets by this port.
         */
-       uint16_t        seq_id;
+       uint8_t ipg;
+       /* The loopback setting for the MAC. */
+       uint8_t lpbk;
+       /* No loopback is selected.  Normal operation. */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_NONE   UINT32_C(0x0)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * The HW will be configured with local loopback such that
+        * host data is sent back to the host without modification.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_LOCAL  UINT32_C(0x1)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * The HW will be configured with remote loopback such that
+        * port logic will send packets back out the transmitter that
+        * are received.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_REMOTE UINT32_C(0x2)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_LAST \
+               HWRM_PORT_MAC_QCFG_OUTPUT_LPBK_REMOTE
        /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
+        * Priority setting for VLAN PRI to CoS mapping.
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being used.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
+        * # If the correspoding CoS mapping is not enabled, then this
+        * field should be ignored.
+        * # This value indicates the normalized priority value retained
+        * in the HWRM.
         */
-       #define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
-       #define HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_LAST \
-               HWRM_PORT_TS_QUERY_INPUT_FLAGS_PATH_RX
-       /* Port ID of port that is being queried. */
-       uint16_t        port_id;
-       uint8_t unused_0[2];
-} __attribute__((packed));
-
-/* hwrm_port_ts_query_output (size:192b/24B) */
-struct hwrm_port_ts_query_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* Timestamp value of PTP message captured. */
-       uint64_t        ptp_msg_ts;
-       /* Sequence ID of the PTP message captured. */
-       uint16_t        ptp_msg_seqid;
-       uint8_t unused_0[5];
+       uint8_t vlan_pri2cos_map_pri;
+       /*
+        * In this field, a number of CoS mappings related flags
+        * are used to indicate configured CoS mappings.
+        */
+       uint8_t flags;
+       /*
+        * When this bit is set to '1', the inner VLAN PRI to CoS mapping
+        * is enabled.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_VLAN_PRI2COS_ENABLE \
+               UINT32_C(0x1)
+       /*
+        * When this bit is set to '1', tunnel VLAN PRI field to
+        * CoS mapping is enabled.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_TUNNEL_PRI2COS_ENABLE \
+               UINT32_C(0x2)
+       /*
+        * When this bit is set to '1', the IP DSCP to CoS mapping is
+        * enabled.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_IP_DSCP2COS_ENABLE \
+               UINT32_C(0x4)
+       /*
+        * When this bit is '1', the Out-Of-Box WoL is enabled on this
+        * port.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_OOB_WOL_ENABLE \
+               UINT32_C(0x8)
+       /* When this bit is '1', PTP is enabled for RX on this port. */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_PTP_RX_TS_CAPTURE_ENABLE \
+               UINT32_C(0x10)
+       /* When this bit is '1', PTP is enabled for TX on this port. */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_FLAGS_PTP_TX_TS_CAPTURE_ENABLE \
+               UINT32_C(0x20)
+       /*
+        * Priority setting for tunnel VLAN PRI to CoS mapping.
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being used.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
+        * # If the correspoding CoS mapping is not enabled, then this
+        * field should be ignored.
+        * # This value indicates the normalized priority value retained
+        * in the HWRM.
+        */
+       uint8_t tunnel_pri2cos_map_pri;
+       /*
+        * Priority setting for DSCP to PRI mapping.
+        * # Each XXX_pri variable shall have a unique priority value
+        * when it is being used.
+        * # When comparing priorities of mappings, higher value
+        * indicates higher priority.
+        * For example, a value of 0-3 is returned where 0 is being
+        * the lowest priority and 3 is being the highest priority.
+        * # If the correspoding CoS mapping is not enabled, then this
+        * field should be ignored.
+        * # This value indicates the normalized priority value retained
+        * in the HWRM.
+        */
+       uint8_t dscp2pri_map_pri;
+       /*
+        * This is a 16-bit bit mask that represents the
+        * current configuration of time stamp capture of PTP messages
+        * on the receive side of this port.
+        * If bit 'i' is set, then the receive side of the port
+        * is configured to capture the time stamp of every
+        * received PTP message with messageType field value set
+        * to i.
+        * If all bits are set to 0 (i.e. field value set 0),
+        * then the receive side of the port is not configured
+        * to capture timestamp for PTP messages.
+        * If all bits are set to 1, then the receive side of the
+        * port is configured to capture timestamp for all PTP
+        * messages.
+        */
+       uint16_t        rx_ts_capture_ptp_msg_type;
+       /*
+        * This is a 16-bit bit mask that represents the
+        * current configuration of time stamp capture of PTP messages
+        * on the transmit side of this port.
+        * If bit 'i' is set, then the transmit side of the port
+        * is configured to capture the time stamp of every
+        * received PTP message with messageType field value set
+        * to i.
+        * If all bits are set to 0 (i.e. field value set 0),
+        * then the transmit side of the port is not configured
+        * to capture timestamp for PTP messages.
+        * If all bits are set to 1, then the transmit side of the
+        * port is configured to capture timestamp for all PTP
+        * messages.
+        */
+       uint16_t        tx_ts_capture_ptp_msg_type;
+       /* Configuration of CoS fields. */
+       uint8_t cos_field_cfg;
+       /* Reserved */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_RSVD \
+               UINT32_C(0x1)
+       /*
+        * This field is used for selecting VLAN PRI value
+        * based on whether one or two VLAN Tags are present in
+        * the inner packet headers of tunneled packets or
+        * non-tunneled packets.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_MASK \
+               UINT32_C(0x6)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_SFT \
+               1
+       /*
+        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
+        * present in the inner packet headers
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST \
+               (UINT32_C(0x0) << 1)
+       /*
+        * Select outer VLAN Tag PRI when 2 VLAN Tags are
+        * present in the inner packet headers.
+        * No VLAN PRI is selected for this configuration
+        * if only one VLAN Tag is present in the inner
+        * packet headers.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER \
+               (UINT32_C(0x1) << 1)
+       /*
+        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
+        * are present in the inner packet headers
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST \
+               (UINT32_C(0x2) << 1)
+       /* Unspecified */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED \
+               (UINT32_C(0x3) << 1)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_LAST \
+               HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED
+       /*
+        * This field is used for selecting tunnel VLAN PRI value
+        * based on whether one or two VLAN Tags are present in
+        * the tunnel headers of tunneled packets. This selection
+        * does not apply to non-tunneled packets.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK \
+               UINT32_C(0x18)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT \
+               3
+       /*
+        * Select inner VLAN PRI when 1 or 2 VLAN Tags are
+        * present in the tunnel packet headers
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST \
+               (UINT32_C(0x0) << 3)
+       /*
+        * Select outer VLAN Tag PRI when 2 VLAN Tags are
+        * present in the tunnel packet headers.
+        * No VLAN PRI is selected for this configuration
+        * if only one VLAN Tag is present in the tunnel
+        * packet headers.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER \
+               (UINT32_C(0x1) << 3)
+       /*
+        * Select outermost VLAN PRI when 1 or 2 VLAN Tags
+        * are present in the tunnel packet headers
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST \
+               (UINT32_C(0x2) << 3)
+       /* Unspecified */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED \
+               (UINT32_C(0x3) << 3)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST \
+               HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED
+       /*
+        * This field is used to provide default CoS value that
+        * has been configured on this port.
+        */
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_DEFAULT_COS_MASK \
+               UINT32_C(0xe0)
+       #define HWRM_PORT_MAC_QCFG_OUTPUT_COS_FIELD_CFG_DEFAULT_COS_SFT \
+               5
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -12345,13 +12663,13 @@ struct hwrm_port_ts_query_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_port_phy_qcaps *
- ***********************/
+/**************************
+ * hwrm_port_mac_ptp_qcfg *
+ **************************/
 
 
-/* hwrm_port_phy_qcaps_input (size:192b/24B) */
-struct hwrm_port_phy_qcaps_input {
+/* hwrm_port_mac_ptp_qcfg_input (size:192b/24B) */
+struct hwrm_port_mac_ptp_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -12384,8 +12702,8 @@ struct hwrm_port_phy_qcaps_input {
        uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_port_phy_qcaps_output (size:192b/24B) */
-struct hwrm_port_phy_qcaps_output {
+/* hwrm_port_mac_ptp_qcfg_output (size:640b/80B) */
+struct hwrm_port_mac_ptp_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -12394,193 +12712,55 @@ struct hwrm_port_phy_qcaps_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* PHY capability flags */
+       /*
+        * In this field, a number of PTP related flags
+        * are used to indicate configured PTP capabilities.
+        */
        uint8_t flags;
        /*
-        * If set to 1, then this field indicates that the
-        * link is capable of supporting EEE.
+        * When this bit is set to '1', the PTP related registers are
+        * directly accessible by the host.
         */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_EEE_SUPPORTED \
+       #define HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_DIRECT_ACCESS \
                UINT32_C(0x1)
        /*
-        * If set to 1, then this field indicates that the
-        * PHY is capable of supporting external loopback.
+        * When this bit is set to '1', the PTP information is accessible
+        * via HWRM commands.
         */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_EXTERNAL_LPBK_SUPPORTED \
+       #define HWRM_PORT_MAC_PTP_QCFG_OUTPUT_FLAGS_HWRM_ACCESS \
                UINT32_C(0x2)
-       /*
-        * Reserved field. The HWRM shall set this field to 0.
-        * An HWRM client shall ignore this field.
-        */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_RSVD1_MASK \
-               UINT32_C(0xfc)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_RSVD1_SFT                   2
-       /* Number of front panel ports for this device. */
-       uint8_t port_cnt;
-       /* Not supported or unknown */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_UNKNOWN UINT32_C(0x0)
-       /* single port device */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_1       UINT32_C(0x1)
-       /* 2-port device */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_2       UINT32_C(0x2)
-       /* 3-port device */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_3       UINT32_C(0x3)
-       /* 4-port device */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_4       UINT32_C(0x4)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_LAST \
-               HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_4
-       /*
-        * This is a bit mask to indicate what speeds are supported
-        * as forced speeds on this link.
-        * For each speed that can be forced on this link, the
-        * corresponding mask bit shall be set to '1'.
-        */
-       uint16_t        supported_speeds_force_mode;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100MB \
-               UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_1GBHD \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_1GB \
-               UINT32_C(0x8)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_2GB \
-               UINT32_C(0x10)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_2_5GB \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10GB \
-               UINT32_C(0x40)
-       /* 20Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_20GB \
-               UINT32_C(0x80)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_25GB \
-               UINT32_C(0x100)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_40GB \
-               UINT32_C(0x200)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_50GB \
-               UINT32_C(0x400)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100GB \
-               UINT32_C(0x800)
-       /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10MBHD \
-               UINT32_C(0x1000)
-       /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10MB \
-               UINT32_C(0x2000)
-       /*
-        * This is a bit mask to indicate what speeds are supported
-        * for autonegotiation on this link.
-        * For each speed that can be autonegotiated on this link, the
-        * corresponding mask bit shall be set to '1'.
-        */
-       uint16_t        supported_speeds_auto_mode;
-       /* 100Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100MBHD \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100MB \
-               UINT32_C(0x2)
-       /* 1Gb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_1GBHD \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_1GB \
-               UINT32_C(0x8)
-       /* 2Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_2GB \
-               UINT32_C(0x10)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_2_5GB \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10GB \
-               UINT32_C(0x40)
-       /* 20Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_20GB \
-               UINT32_C(0x80)
-       /* 25Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_25GB \
-               UINT32_C(0x100)
-       /* 40Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_40GB \
-               UINT32_C(0x200)
-       /* 50Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_50GB \
-               UINT32_C(0x400)
-       /* 100Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100GB \
-               UINT32_C(0x800)
-       /* 10Mb link speed (Half-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10MBHD \
-               UINT32_C(0x1000)
-       /* 10Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10MB \
-               UINT32_C(0x2000)
-       /*
-        * This is a bit mask to indicate what speeds are supported
-        * for EEE on this link.
-        * For each speed that can be autonegotiated when EEE is enabled
-        * on this link, the corresponding mask bit shall be set to '1'.
-        * This field is only valid when the eee_suppotred is set to '1'.
-        */
-       uint16_t        supported_speeds_eee_mode;
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD1 \
-               UINT32_C(0x1)
-       /* 100Mb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_100MB \
-               UINT32_C(0x2)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD2 \
-               UINT32_C(0x4)
-       /* 1Gb link speed (Full-duplex) */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_1GB \
-               UINT32_C(0x8)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD3 \
-               UINT32_C(0x10)
-       /* Reserved */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD4 \
-               UINT32_C(0x20)
-       /* 10Gb link speed */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_10GB \
-               UINT32_C(0x40)
-       uint32_t        tx_lpi_timer_low;
-       /*
-        * The lowest value of TX LPI timer that can be set on this link
-        * when EEE is enabled. This value is in microseconds.
-        * This field is valid only when_eee_supported is set to '1'.
-        */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_LOW_MASK \
-               UINT32_C(0xffffff)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_LOW_SFT 0
-       /*
-        * Reserved field. The HWRM shall set this field to 0.
-        * An HWRM client shall ignore this field.
-        */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_RSVD2_MASK \
-               UINT32_C(0xff000000)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_RSVD2_SFT            24
-       uint32_t        valid_tx_lpi_timer_high;
-       /*
-        * The highest value of TX LPI timer that can be set on this link
-        * when EEE is enabled. This value is in microseconds.
-        * This field is valid only when_eee_supported is set to '1'.
-        */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_HIGH_MASK \
-               UINT32_C(0xffffff)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_HIGH_SFT 0
+       uint8_t unused_0[3];
+       /* Offset of the PTP register for the lower 32 bits of timestamp for RX. */
+       uint32_t        rx_ts_reg_off_lower;
+       /* Offset of the PTP register for the upper 32 bits of timestamp for RX. */
+       uint32_t        rx_ts_reg_off_upper;
+       /* Offset of the PTP register for the sequence ID for RX. */
+       uint32_t        rx_ts_reg_off_seq_id;
+       /* Offset of the first PTP source ID for RX. */
+       uint32_t        rx_ts_reg_off_src_id_0;
+       /* Offset of the second PTP source ID for RX. */
+       uint32_t        rx_ts_reg_off_src_id_1;
+       /* Offset of the third PTP source ID for RX. */
+       uint32_t        rx_ts_reg_off_src_id_2;
+       /* Offset of the domain ID for RX. */
+       uint32_t        rx_ts_reg_off_domain_id;
+       /* Offset of the PTP FIFO register for RX. */
+       uint32_t        rx_ts_reg_off_fifo;
+       /* Offset of the PTP advance FIFO register for RX. */
+       uint32_t        rx_ts_reg_off_fifo_adv;
+       /* PTP timestamp granularity for RX. */
+       uint32_t        rx_ts_reg_off_granularity;
+       /* Offset of the PTP register for the lower 32 bits of timestamp for TX. */
+       uint32_t        tx_ts_reg_off_lower;
+       /* Offset of the PTP register for the upper 32 bits of timestamp for TX. */
+       uint32_t        tx_ts_reg_off_upper;
+       /* Offset of the PTP register for the sequence ID for TX. */
+       uint32_t        tx_ts_reg_off_seq_id;
+       /* Offset of the PTP FIFO register for TX. */
+       uint32_t        tx_ts_reg_off_fifo;
+       /* PTP timestamp granularity for TX. */
+       uint32_t        tx_ts_reg_off_granularity;
+       uint8_t unused_1[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -12588,581 +12768,416 @@ struct hwrm_port_phy_qcaps_output {
         * When writing a command completion or response to an internal processor,
         * the order of writes has to be such that this field is written last.
         */
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_VALID_MASK \
-               UINT32_C(0xff000000)
-       #define HWRM_PORT_PHY_QCAPS_OUTPUT_VALID_SFT             24
+       uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_port_phy_i2c_write *
- ***************************/
-
-
-/* hwrm_port_phy_i2c_write_input (size:832b/104B) */
-struct hwrm_port_phy_i2c_write_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+/* Port Tx Statistics Formats */
+/* tx_port_stats (size:3264b/408B) */
+struct tx_port_stats {
+       /* Total Number of 64 Bytes frames transmitted */
+       uint64_t        tx_64b_frames;
+       /* Total Number of 65-127 Bytes frames transmitted */
+       uint64_t        tx_65b_127b_frames;
+       /* Total Number of 128-255 Bytes frames transmitted */
+       uint64_t        tx_128b_255b_frames;
+       /* Total Number of 256-511 Bytes frames transmitted */
+       uint64_t        tx_256b_511b_frames;
+       /* Total Number of 512-1023 Bytes frames transmitted */
+       uint64_t        tx_512b_1023b_frames;
+       /* Total Number of 1024-1518 Bytes frames transmitted */
+       uint64_t        tx_1024b_1518b_frames;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Total Number of each good VLAN (exludes FCS errors)
+        * frame transmitted which is 1519 to 1522 bytes in length
+        * inclusive (excluding framing bits but including FCS bytes).
         */
-       uint16_t        cmpl_ring;
+       uint64_t        tx_good_vlan_frames;
+       /* Total Number of 1519-2047 Bytes frames transmitted */
+       uint64_t        tx_1519b_2047b_frames;
+       /* Total Number of 2048-4095 Bytes frames transmitted */
+       uint64_t        tx_2048b_4095b_frames;
+       /* Total Number of 4096-9216 Bytes frames transmitted */
+       uint64_t        tx_4096b_9216b_frames;
+       /* Total Number of 9217-16383 Bytes frames transmitted */
+       uint64_t        tx_9217b_16383b_frames;
+       /* Total Number of good frames transmitted */
+       uint64_t        tx_good_frames;
+       /* Total Number of frames transmitted */
+       uint64_t        tx_total_frames;
+       /* Total number of unicast frames transmitted */
+       uint64_t        tx_ucast_frames;
+       /* Total number of multicast frames transmitted */
+       uint64_t        tx_mcast_frames;
+       /* Total number of broadcast frames transmitted */
+       uint64_t        tx_bcast_frames;
+       /* Total number of PAUSE control frames transmitted */
+       uint64_t        tx_pause_frames;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Total number of PFC/per-priority PAUSE
+        * control frames transmitted
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       uint32_t        enables;
+       uint64_t        tx_pfc_frames;
+       /* Total number of jabber frames transmitted */
+       uint64_t        tx_jabber_frames;
+       /* Total number of frames transmitted with FCS error */
+       uint64_t        tx_fcs_err_frames;
+       /* Total number of control frames transmitted */
+       uint64_t        tx_control_frames;
+       /* Total number of over-sized frames transmitted */
+       uint64_t        tx_oversz_frames;
+       /* Total number of frames with single deferral */
+       uint64_t        tx_single_dfrl_frames;
+       /* Total number of frames with multiple deferrals */
+       uint64_t        tx_multi_dfrl_frames;
+       /* Total number of frames with single collision */
+       uint64_t        tx_single_coll_frames;
+       /* Total number of frames with multiple collisions */
+       uint64_t        tx_multi_coll_frames;
+       /* Total number of frames with late collisions */
+       uint64_t        tx_late_coll_frames;
+       /* Total number of frames with excessive collisions */
+       uint64_t        tx_excessive_coll_frames;
+       /* Total number of fragmented frames transmitted */
+       uint64_t        tx_frag_frames;
+       /* Total number of transmit errors */
+       uint64_t        tx_err;
+       /* Total number of single VLAN tagged frames transmitted */
+       uint64_t        tx_tagged_frames;
+       /* Total number of double VLAN tagged frames transmitted */
+       uint64_t        tx_dbl_tagged_frames;
+       /* Total number of runt frames transmitted */
+       uint64_t        tx_runt_frames;
+       /* Total number of TX FIFO under runs */
+       uint64_t        tx_fifo_underruns;
        /*
-        * This bit must be '1' for the page_offset field to be
-        * configured.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 0 transmitted
         */
-       #define HWRM_PORT_PHY_I2C_WRITE_INPUT_ENABLES_PAGE_OFFSET \
-               UINT32_C(0x1)
-       /* Port ID of port. */
-       uint16_t        port_id;
-       /* 8-bit I2C slave address. */
-       uint8_t i2c_slave_addr;
-       uint8_t unused_0;
-       /* The page number that is being accessed over I2C. */
-       uint16_t        page_number;
-       /* Offset within the page that is being accessed over I2C. */
-       uint16_t        page_offset;
+       uint64_t        tx_pfc_ena_frames_pri0;
        /*
-        * Length of data to write, in bytes starting at the offset
-        * specified above. If the offset is not specified, then
-        * the data shall be written from the beginning of the page.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 1 transmitted
         */
-       uint8_t data_length;
-       uint8_t unused_1[7];
-       /* Up to 64B of data. */
-       uint32_t        data[16];
-} __attribute__((packed));
-
-/* hwrm_port_phy_i2c_write_output (size:128b/16B) */
-struct hwrm_port_phy_i2c_write_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint64_t        tx_pfc_ena_frames_pri1;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 2 transmitted
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**************************
- * hwrm_port_phy_i2c_read *
- **************************/
-
-
-/* hwrm_port_phy_i2c_read_input (size:320b/40B) */
-struct hwrm_port_phy_i2c_read_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint64_t        tx_pfc_ena_frames_pri2;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 3 transmitted
         */
-       uint16_t        cmpl_ring;
+       uint64_t        tx_pfc_ena_frames_pri3;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 4 transmitted
         */
-       uint16_t        seq_id;
+       uint64_t        tx_pfc_ena_frames_pri4;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 5 transmitted
         */
-       uint16_t        target_id;
+       uint64_t        tx_pfc_ena_frames_pri5;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 6 transmitted
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       uint32_t        enables;
+       uint64_t        tx_pfc_ena_frames_pri6;
        /*
-        * This bit must be '1' for the page_offset field to be
-        * configured.
+        * Total number of PFC frames with PFC enabled bit for
+        * Pri 7 transmitted
         */
-       #define HWRM_PORT_PHY_I2C_READ_INPUT_ENABLES_PAGE_OFFSET \
-               UINT32_C(0x1)
-       /* Port ID of port. */
-       uint16_t        port_id;
-       /* 8-bit I2C slave address. */
-       uint8_t i2c_slave_addr;
-       uint8_t unused_0;
-       /* The page number that is being accessed over I2C. */
-       uint16_t        page_number;
-       /* Offset within the page that is being accessed over I2C. */
-       uint16_t        page_offset;
+       uint64_t        tx_pfc_ena_frames_pri7;
+       /* Total number of EEE LPI Events on TX */
+       uint64_t        tx_eee_lpi_events;
+       /* EEE LPI Duration Counter on TX */
+       uint64_t        tx_eee_lpi_duration;
        /*
-        * Length of data to read, in bytes starting at the offset
-        * specified above. If the offset is not specified, then
-        * the data shall be read from the beginning of the page.
+        * Total number of Link Level Flow Control (LLFC) messages
+        * transmitted
         */
-       uint8_t data_length;
-       uint8_t unused_1[7];
+       uint64_t        tx_llfc_logical_msgs;
+       /* Total number of HCFC messages transmitted */
+       uint64_t        tx_hcfc_msgs;
+       /* Total number of TX collisions */
+       uint64_t        tx_total_collisions;
+       /* Total number of transmitted bytes */
+       uint64_t        tx_bytes;
+       /* Total number of end-to-end HOL frames */
+       uint64_t        tx_xthol_frames;
+       /* Total Tx Drops per Port reported by STATS block */
+       uint64_t        tx_stat_discard;
+       /* Total Tx Error Drops per Port reported by STATS block */
+       uint64_t        tx_stat_error;
 } __attribute__((packed));
 
-/* hwrm_port_phy_i2c_read_output (size:640b/80B) */
-struct hwrm_port_phy_i2c_read_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* Up to 64B of data. */
-       uint32_t        data[16];
-       uint8_t unused_0[7];
+/* Port Rx Statistics Formats */
+/* rx_port_stats (size:4224b/528B) */
+struct rx_port_stats {
+       /* Total Number of 64 Bytes frames received */
+       uint64_t        rx_64b_frames;
+       /* Total Number of 65-127 Bytes frames received */
+       uint64_t        rx_65b_127b_frames;
+       /* Total Number of 128-255 Bytes frames received */
+       uint64_t        rx_128b_255b_frames;
+       /* Total Number of 256-511 Bytes frames received */
+       uint64_t        rx_256b_511b_frames;
+       /* Total Number of 512-1023 Bytes frames received */
+       uint64_t        rx_512b_1023b_frames;
+       /* Total Number of 1024-1518 Bytes frames received */
+       uint64_t        rx_1024b_1518b_frames;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Total Number of each good VLAN (exludes FCS errors)
+        * frame received which is 1519 to 1522 bytes in length
+        * inclusive (excluding framing bits but including FCS bytes).
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*********************
- * hwrm_port_led_cfg *
- *********************/
-
-
-/* hwrm_port_led_cfg_input (size:512b/64B) */
-struct hwrm_port_led_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint64_t        rx_good_vlan_frames;
+       /* Total Number of 1519-2047 Bytes frames received */
+       uint64_t        rx_1519b_2047b_frames;
+       /* Total Number of 2048-4095 Bytes frames received */
+       uint64_t        rx_2048b_4095b_frames;
+       /* Total Number of 4096-9216 Bytes frames received */
+       uint64_t        rx_4096b_9216b_frames;
+       /* Total Number of 9217-16383 Bytes frames received */
+       uint64_t        rx_9217b_16383b_frames;
+       /* Total number of frames received */
+       uint64_t        rx_total_frames;
+       /* Total number of unicast frames received */
+       uint64_t        rx_ucast_frames;
+       /* Total number of multicast frames received */
+       uint64_t        rx_mcast_frames;
+       /* Total number of broadcast frames received */
+       uint64_t        rx_bcast_frames;
+       /* Total number of received frames with FCS error */
+       uint64_t        rx_fcs_err_frames;
+       /* Total number of control frames received */
+       uint64_t        rx_ctrl_frames;
+       /* Total number of PAUSE frames received */
+       uint64_t        rx_pause_frames;
+       /* Total number of PFC frames received */
+       uint64_t        rx_pfc_frames;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Total number of frames received with an unsupported
+        * opcode
         */
-       uint16_t        cmpl_ring;
+       uint64_t        rx_unsupported_opcode_frames;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Total number of frames received with an unsupported
+        * DA for pause and PFC
         */
-       uint16_t        seq_id;
+       uint64_t        rx_unsupported_da_pausepfc_frames;
+       /* Total number of frames received with an unsupported SA */
+       uint64_t        rx_wrong_sa_frames;
+       /* Total number of received packets with alignment error */
+       uint64_t        rx_align_err_frames;
+       /* Total number of received frames with out-of-range length */
+       uint64_t        rx_oor_len_frames;
+       /* Total number of received frames with error termination */
+       uint64_t        rx_code_err_frames;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Total number of received frames with a false carrier is
+        * detected during idle, as defined by RX_ER samples active
+        * and RXD is 0xE. The event is reported along with the
+        * statistics generated on the next received frame. Only
+        * one false carrier condition can be detected and logged
+        * between frames.
+        *
+        * Carrier event, valid for 10M/100M speed modes only.
         */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint32_t        enables;
+       uint64_t        rx_false_carrier_frames;
+       /* Total number of over-sized frames received */
+       uint64_t        rx_ovrsz_frames;
+       /* Total number of jabber packets received */
+       uint64_t        rx_jbr_frames;
+       /* Total number of received frames with MTU error */
+       uint64_t        rx_mtu_err_frames;
+       /* Total number of received frames with CRC match */
+       uint64_t        rx_match_crc_frames;
+       /* Total number of frames received promiscuously */
+       uint64_t        rx_promiscuous_frames;
        /*
-        * This bit must be '1' for the led0_id field to be
-        * configured.
+        * Total number of received frames with one or two VLAN
+        * tags
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_ID \
-               UINT32_C(0x1)
+       uint64_t        rx_tagged_frames;
+       /* Total number of received frames with two VLAN tags */
+       uint64_t        rx_double_tagged_frames;
+       /* Total number of truncated frames received */
+       uint64_t        rx_trunc_frames;
+       /* Total number of good frames (without errors) received */
+       uint64_t        rx_good_frames;
        /*
-        * This bit must be '1' for the led0_state field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 0
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_STATE \
-               UINT32_C(0x2)
+       uint64_t        rx_pfc_xon2xoff_frames_pri0;
        /*
-        * This bit must be '1' for the led0_color field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 1
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_COLOR \
-               UINT32_C(0x4)
+       uint64_t        rx_pfc_xon2xoff_frames_pri1;
        /*
-        * This bit must be '1' for the led0_blink_on field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 2
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_BLINK_ON \
-               UINT32_C(0x8)
+       uint64_t        rx_pfc_xon2xoff_frames_pri2;
        /*
-        * This bit must be '1' for the led0_blink_off field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 3
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_BLINK_OFF \
-               UINT32_C(0x10)
+       uint64_t        rx_pfc_xon2xoff_frames_pri3;
        /*
-        * This bit must be '1' for the led0_group_id field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 4
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_GROUP_ID \
-               UINT32_C(0x20)
+       uint64_t        rx_pfc_xon2xoff_frames_pri4;
        /*
-        * This bit must be '1' for the led1_id field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 5
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_ID \
-               UINT32_C(0x40)
+       uint64_t        rx_pfc_xon2xoff_frames_pri5;
        /*
-        * This bit must be '1' for the led1_state field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 6
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_STATE \
-               UINT32_C(0x80)
+       uint64_t        rx_pfc_xon2xoff_frames_pri6;
        /*
-        * This bit must be '1' for the led1_color field to be
-        * configured.
+        * Total number of received PFC frames with transition from
+        * XON to XOFF on Pri 7
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_COLOR \
-               UINT32_C(0x100)
+       uint64_t        rx_pfc_xon2xoff_frames_pri7;
        /*
-        * This bit must be '1' for the led1_blink_on field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 0
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_BLINK_ON \
-               UINT32_C(0x200)
+       uint64_t        rx_pfc_ena_frames_pri0;
        /*
-        * This bit must be '1' for the led1_blink_off field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 1
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_BLINK_OFF \
-               UINT32_C(0x400)
+       uint64_t        rx_pfc_ena_frames_pri1;
        /*
-        * This bit must be '1' for the led1_group_id field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 2
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_GROUP_ID \
-               UINT32_C(0x800)
+       uint64_t        rx_pfc_ena_frames_pri2;
        /*
-        * This bit must be '1' for the led2_id field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 3
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_ID \
-               UINT32_C(0x1000)
+       uint64_t        rx_pfc_ena_frames_pri3;
        /*
-        * This bit must be '1' for the led2_state field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 4
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_STATE \
-               UINT32_C(0x2000)
+       uint64_t        rx_pfc_ena_frames_pri4;
        /*
-        * This bit must be '1' for the led2_color field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 5
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_COLOR \
-               UINT32_C(0x4000)
+       uint64_t        rx_pfc_ena_frames_pri5;
        /*
-        * This bit must be '1' for the led2_blink_on field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 6
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_BLINK_ON \
-               UINT32_C(0x8000)
+       uint64_t        rx_pfc_ena_frames_pri6;
        /*
-        * This bit must be '1' for the led2_blink_off field to be
-        * configured.
+        * Total number of received PFC frames with PFC enabled
+        * bit for Pri 7
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_BLINK_OFF \
-               UINT32_C(0x10000)
+       uint64_t        rx_pfc_ena_frames_pri7;
+       /* Total Number of frames received with SCH CRC error */
+       uint64_t        rx_sch_crc_err_frames;
+       /* Total Number of under-sized frames received */
+       uint64_t        rx_undrsz_frames;
+       /* Total Number of fragmented frames received */
+       uint64_t        rx_frag_frames;
+       /* Total number of RX EEE LPI Events */
+       uint64_t        rx_eee_lpi_events;
+       /* EEE LPI Duration Counter on RX */
+       uint64_t        rx_eee_lpi_duration;
        /*
-        * This bit must be '1' for the led2_group_id field to be
-        * configured.
+        * Total number of physical type Link Level Flow Control
+        * (LLFC) messages received
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_GROUP_ID \
-               UINT32_C(0x20000)
+       uint64_t        rx_llfc_physical_msgs;
        /*
-        * This bit must be '1' for the led3_id field to be
-        * configured.
+        * Total number of logical type Link Level Flow Control
+        * (LLFC) messages received
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_ID \
-               UINT32_C(0x40000)
+       uint64_t        rx_llfc_logical_msgs;
        /*
-        * This bit must be '1' for the led3_state field to be
-        * configured.
+        * Total number of logical type Link Level Flow Control
+        * (LLFC) messages received with CRC error
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_STATE \
-               UINT32_C(0x80000)
+       uint64_t        rx_llfc_msgs_with_crc_err;
+       /* Total number of HCFC messages received */
+       uint64_t        rx_hcfc_msgs;
+       /* Total number of HCFC messages received with CRC error */
+       uint64_t        rx_hcfc_msgs_with_crc_err;
+       /* Total number of received bytes */
+       uint64_t        rx_bytes;
+       /* Total number of bytes received in runt frames */
+       uint64_t        rx_runt_bytes;
+       /* Total number of runt frames received */
+       uint64_t        rx_runt_frames;
+       /* Total Rx Discards per Port reported by STATS block */
+       uint64_t        rx_stat_discard;
+       uint64_t        rx_stat_err;
+} __attribute__((packed));
+
+/********************
+ * hwrm_port_qstats *
+ ********************/
+
+
+/* hwrm_port_qstats_input (size:320b/40B) */
+struct hwrm_port_qstats_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * This bit must be '1' for the led3_color field to be
-        * configured.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_COLOR \
-               UINT32_C(0x100000)
+       uint16_t        cmpl_ring;
        /*
-        * This bit must be '1' for the led3_blink_on field to be
-        * configured.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_BLINK_ON \
-               UINT32_C(0x200000)
+       uint16_t        seq_id;
        /*
-        * This bit must be '1' for the led3_blink_off field to be
-        * configured.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_BLINK_OFF \
-               UINT32_C(0x400000)
+       uint16_t        target_id;
        /*
-        * This bit must be '1' for the led3_group_id field to be
-        * configured.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_GROUP_ID \
-               UINT32_C(0x800000)
-       /* Port ID of port whose LEDs are configured. */
+       uint64_t        resp_addr;
+       /* Port ID of port that is being queried. */
        uint16_t        port_id;
+       uint8_t unused_0[6];
        /*
-        * The number of LEDs that are being configured.
-        * Up to 4 LEDs can be configured with this command.
+        * This is the host address where
+        * Tx port statistics will be stored
         */
-       uint8_t num_leds;
-       /* Reserved field. */
-       uint8_t rsvd;
-       /* An identifier for the LED #0. */
-       uint8_t led0_id;
-       /* The requested state of the LED #0. */
-       uint8_t led0_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT
-       /* The requested color of LED #0. */
-       uint8_t led0_color;
-       /* Default */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREENAMBER
-       uint8_t unused_0;
-       /*
-        * If the LED #0 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
-        */
-       uint16_t        led0_blink_on;
-       /*
-        * If the LED #0 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
-        */
-       uint16_t        led0_blink_off;
-       /*
-        * An identifier for the group of LEDs that LED #0 belongs
-        * to.
-        * If set to 0, then the LED #0 shall not be grouped and
-        * shall be treated as an individual resource.
-        * For all other non-zero values of this field, LED #0 shall
-        * be grouped together with the LEDs with the same group ID
-        * value.
-        */
-       uint8_t led0_group_id;
-       /* Reserved field. */
-       uint8_t rsvd0;
-       /* An identifier for the LED #1. */
-       uint8_t led1_id;
-       /* The requested state of the LED #1. */
-       uint8_t led1_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINKALT
-       /* The requested color of LED #1. */
-       uint8_t led1_color;
-       /* Default */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREENAMBER
-       uint8_t unused_1;
-       /*
-        * If the LED #1 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
-        */
-       uint16_t        led1_blink_on;
-       /*
-        * If the LED #1 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
-        */
-       uint16_t        led1_blink_off;
-       /*
-        * An identifier for the group of LEDs that LED #1 belongs
-        * to.
-        * If set to 0, then the LED #1 shall not be grouped and
-        * shall be treated as an individual resource.
-        * For all other non-zero values of this field, LED #1 shall
-        * be grouped together with the LEDs with the same group ID
-        * value.
-        */
-       uint8_t led1_group_id;
-       /* Reserved field. */
-       uint8_t rsvd1;
-       /* An identifier for the LED #2. */
-       uint8_t led2_id;
-       /* The requested state of the LED #2. */
-       uint8_t led2_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINKALT
-       /* The requested color of LED #2. */
-       uint8_t led2_color;
-       /* Default */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREENAMBER
-       uint8_t unused_2;
-       /*
-        * If the LED #2 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
-        */
-       uint16_t        led2_blink_on;
-       /*
-        * If the LED #2 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
-        */
-       uint16_t        led2_blink_off;
-       /*
-        * An identifier for the group of LEDs that LED #2 belongs
-        * to.
-        * If set to 0, then the LED #2 shall not be grouped and
-        * shall be treated as an individual resource.
-        * For all other non-zero values of this field, LED #2 shall
-        * be grouped together with the LEDs with the same group ID
-        * value.
-        */
-       uint8_t led2_group_id;
-       /* Reserved field. */
-       uint8_t rsvd2;
-       /* An identifier for the LED #3. */
-       uint8_t led3_id;
-       /* The requested state of the LED #3. */
-       uint8_t led3_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINKALT
-       /* The requested color of LED #3. */
-       uint8_t led3_color;
-       /* Default */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_LAST \
-               HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREENAMBER
-       uint8_t unused_3;
-       /*
-        * If the LED #3 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
-        */
-       uint16_t        led3_blink_on;
-       /*
-        * If the LED #3 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
-        */
-       uint16_t        led3_blink_off;
+       uint64_t        tx_stat_host_addr;
        /*
-        * An identifier for the group of LEDs that LED #3 belongs
-        * to.
-        * If set to 0, then the LED #3 shall not be grouped and
-        * shall be treated as an individual resource.
-        * For all other non-zero values of this field, LED #3 shall
-        * be grouped together with the LEDs with the same group ID
-        * value.
+        * This is the host address where
+        * Rx port statistics will be stored
         */
-       uint8_t led3_group_id;
-       /* Reserved field. */
-       uint8_t rsvd3;
+       uint64_t        rx_stat_host_addr;
 } __attribute__((packed));
 
-/* hwrm_port_led_cfg_output (size:128b/16B) */
-struct hwrm_port_led_cfg_output {
+/* hwrm_port_qstats_output (size:128b/16B) */
+struct hwrm_port_qstats_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -13171,7 +13186,11 @@ struct hwrm_port_led_cfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* The size of TX port statistics block in bytes. */
+       uint16_t        tx_stat_size;
+       /* The size of RX port statistics block in bytes. */
+       uint16_t        rx_stat_size;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -13182,13 +13201,161 @@ struct hwrm_port_led_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_port_led_qcfg *
- **********************/
+/* Port Tx Statistics extended Formats */
+/* tx_port_stats_ext (size:2048b/256B) */
+struct tx_port_stats_ext {
+       /* Total number of tx bytes count on cos queue 0 */
+       uint64_t        tx_bytes_cos0;
+       /* Total number of tx bytes count on cos queue 1 */
+       uint64_t        tx_bytes_cos1;
+       /* Total number of tx bytes count on cos queue 2 */
+       uint64_t        tx_bytes_cos2;
+       /* Total number of tx bytes count on cos queue 3 */
+       uint64_t        tx_bytes_cos3;
+       /* Total number of tx bytes count on cos queue 4 */
+       uint64_t        tx_bytes_cos4;
+       /* Total number of tx bytes count on cos queue 5 */
+       uint64_t        tx_bytes_cos5;
+       /* Total number of tx bytes count on cos queue 6 */
+       uint64_t        tx_bytes_cos6;
+       /* Total number of tx bytes count on cos queue 7 */
+       uint64_t        tx_bytes_cos7;
+       /* Total number of tx packets count on cos queue 0 */
+       uint64_t        tx_packets_cos0;
+       /* Total number of tx packets count on cos queue 1 */
+       uint64_t        tx_packets_cos1;
+       /* Total number of tx packets count on cos queue 2 */
+       uint64_t        tx_packets_cos2;
+       /* Total number of tx packets count on cos queue 3 */
+       uint64_t        tx_packets_cos3;
+       /* Total number of tx packets count on cos queue 4 */
+       uint64_t        tx_packets_cos4;
+       /* Total number of tx packets count on cos queue 5 */
+       uint64_t        tx_packets_cos5;
+       /* Total number of tx packets count on cos queue 6 */
+       uint64_t        tx_packets_cos6;
+       /* Total number of tx packets count on cos queue 7 */
+       uint64_t        tx_packets_cos7;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 0 */
+       uint64_t        pfc_pri0_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 0 */
+       uint64_t        pfc_pri0_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 1 */
+       uint64_t        pfc_pri1_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 1 */
+       uint64_t        pfc_pri1_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 2 */
+       uint64_t        pfc_pri2_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 2 */
+       uint64_t        pfc_pri2_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 3 */
+       uint64_t        pfc_pri3_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 3 */
+       uint64_t        pfc_pri3_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 4 */
+       uint64_t        pfc_pri4_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 4 */
+       uint64_t        pfc_pri4_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 5 */
+       uint64_t        pfc_pri5_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 5 */
+       uint64_t        pfc_pri5_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 6 */
+       uint64_t        pfc_pri6_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 6 */
+       uint64_t        pfc_pri6_tx_transitions;
+       /* time duration between transmitting a XON -> XOFF and a subsequent XOFF -> XON for priority 7 */
+       uint64_t        pfc_pri7_tx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 7 */
+       uint64_t        pfc_pri7_tx_transitions;
+} __attribute__((packed));
+
+/* Port Rx Statistics extended Formats */
+/* rx_port_stats_ext (size:2368b/296B) */
+struct rx_port_stats_ext {
+       /* Number of times link state changed to down */
+       uint64_t        link_down_events;
+       /* Number of times the idle rings with pause bit are found */
+       uint64_t        continuous_pause_events;
+       /* Number of times the active rings pause bit resumed back */
+       uint64_t        resume_pause_events;
+       /* Number of times, the ROCE cos queue PFC is disabled to avoid pause flood/burst */
+       uint64_t        continuous_roce_pause_events;
+       /* Number of times, the ROCE cos queue PFC is enabled back */
+       uint64_t        resume_roce_pause_events;
+       /* Total number of rx bytes count on cos queue 0 */
+       uint64_t        rx_bytes_cos0;
+       /* Total number of rx bytes count on cos queue 1 */
+       uint64_t        rx_bytes_cos1;
+       /* Total number of rx bytes count on cos queue 2 */
+       uint64_t        rx_bytes_cos2;
+       /* Total number of rx bytes count on cos queue 3 */
+       uint64_t        rx_bytes_cos3;
+       /* Total number of rx bytes count on cos queue 4 */
+       uint64_t        rx_bytes_cos4;
+       /* Total number of rx bytes count on cos queue 5 */
+       uint64_t        rx_bytes_cos5;
+       /* Total number of rx bytes count on cos queue 6 */
+       uint64_t        rx_bytes_cos6;
+       /* Total number of rx bytes count on cos queue 7 */
+       uint64_t        rx_bytes_cos7;
+       /* Total number of rx packets count on cos queue 0 */
+       uint64_t        rx_packets_cos0;
+       /* Total number of rx packets count on cos queue 1 */
+       uint64_t        rx_packets_cos1;
+       /* Total number of rx packets count on cos queue 2 */
+       uint64_t        rx_packets_cos2;
+       /* Total number of rx packets count on cos queue 3 */
+       uint64_t        rx_packets_cos3;
+       /* Total number of rx packets count on cos queue 4 */
+       uint64_t        rx_packets_cos4;
+       /* Total number of rx packets count on cos queue 5 */
+       uint64_t        rx_packets_cos5;
+       /* Total number of rx packets count on cos queue 6 */
+       uint64_t        rx_packets_cos6;
+       /* Total number of rx packets count on cos queue 7 */
+       uint64_t        rx_packets_cos7;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 0 */
+       uint64_t        pfc_pri0_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 0 */
+       uint64_t        pfc_pri0_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 1 */
+       uint64_t        pfc_pri1_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 1 */
+       uint64_t        pfc_pri1_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 2 */
+       uint64_t        pfc_pri2_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 2 */
+       uint64_t        pfc_pri2_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 3 */
+       uint64_t        pfc_pri3_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 3 */
+       uint64_t        pfc_pri3_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 4 */
+       uint64_t        pfc_pri4_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 4 */
+       uint64_t        pfc_pri4_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 5 */
+       uint64_t        pfc_pri5_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 5 */
+       uint64_t        pfc_pri5_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 6 */
+       uint64_t        pfc_pri6_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 6 */
+       uint64_t        pfc_pri6_rx_transitions;
+       /* time duration receiving a XON -> XOFF and a subsequent XOFF -> XON for priority 7 */
+       uint64_t        pfc_pri7_rx_duration_us;
+       /* Number of times, a XON -> XOFF and XOFF -> XON transitions occur for priority 7 */
+       uint64_t        pfc_pri7_rx_transitions;
+} __attribute__((packed));
+
+/************************
+ * hwrm_port_qstats_ext *
+ ************************/
 
 
-/* hwrm_port_led_qcfg_input (size:192b/24B) */
-struct hwrm_port_led_qcfg_input {
+/* hwrm_port_qstats_ext_input (size:320b/40B) */
+struct hwrm_port_qstats_ext_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -13216,267 +13383,131 @@ struct hwrm_port_led_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Port ID of port whose LED configuration is being queried. */
+       /* Port ID of port that is being queried. */
        uint16_t        port_id;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_port_led_qcfg_output (size:448b/56B) */
-struct hwrm_port_led_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
        /*
-        * The number of LEDs that are configured on this port.
-        * Up to 4 LEDs can be returned in the response.
+        * The size of TX port extended
+        * statistics block in bytes.
         */
-       uint8_t num_leds;
-       /* An identifier for the LED #0. */
-       uint8_t led0_id;
-       /* The type of LED #0. */
-       uint8_t led0_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_INVALID
-       /* The current state of the LED #0. */
-       uint8_t led0_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINKALT
-       /* The color of LED #0. */
-       uint8_t led0_color;
-       /* Default */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREENAMBER
-       uint8_t unused_0;
+       uint16_t        tx_stat_size;
        /*
-        * If the LED #0 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
+        * The size of RX port extended
+        * statistics block in bytes
         */
-       uint16_t        led0_blink_on;
+       uint16_t        rx_stat_size;
+       uint8_t unused_0[2];
        /*
-        * If the LED #0 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
+        * This is the host address where
+        * Tx port statistics will be stored
         */
-       uint16_t        led0_blink_off;
+       uint64_t        tx_stat_host_addr;
        /*
-        * An identifier for the group of LEDs that LED #0 belongs
-        * to.
-        * If set to 0, then the LED #0 is not grouped.
-        * For all other non-zero values of this field, LED #0 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * This is the host address where
+        * Rx port statistics will be stored
         */
-       uint8_t led0_group_id;
-       /* An identifier for the LED #1. */
-       uint8_t led1_id;
-       /* The type of LED #1. */
-       uint8_t led1_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_INVALID
-       /* The current state of the LED #1. */
-       uint8_t led1_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINKALT
-       /* The color of LED #1. */
-       uint8_t led1_color;
-       /* Default */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREENAMBER
-       uint8_t unused_1;
+       uint64_t        rx_stat_host_addr;
+} __attribute__((packed));
+
+/* hwrm_port_qstats_ext_output (size:128b/16B) */
+struct hwrm_port_qstats_ext_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* The size of TX port statistics block in bytes. */
+       uint16_t        tx_stat_size;
+       /* The size of RX port statistics block in bytes. */
+       uint16_t        rx_stat_size;
+       /* Total number of active cos queues available. */
+       uint16_t        total_active_cos_queues;
+       uint8_t flags;
        /*
-        * If the LED #1 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
+        * If set to 1, then this field indicates that clear
+        * roce specific counters is supported.
         */
-       uint16_t        led1_blink_on;
+       #define HWRM_PORT_QSTATS_EXT_OUTPUT_FLAGS_CLEAR_ROCE_COUNTERS_SUPPORTED \
+               UINT32_C(0x1)
        /*
-        * If the LED #1 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint16_t        led1_blink_off;
+       uint8_t valid;
+} __attribute__((packed));
+
+/*************************
+ * hwrm_port_lpbk_qstats *
+ *************************/
+
+
+/* hwrm_port_lpbk_qstats_input (size:128b/16B) */
+struct hwrm_port_lpbk_qstats_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * An identifier for the group of LEDs that LED #1 belongs
-        * to.
-        * If set to 0, then the LED #1 is not grouped.
-        * For all other non-zero values of this field, LED #1 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint8_t led1_group_id;
-       /* An identifier for the LED #2. */
-       uint8_t led2_id;
-       /* The type of LED #2. */
-       uint8_t led2_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_INVALID
-       /* The current state of the LED #2. */
-       uint8_t led2_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINKALT
-       /* The color of LED #2. */
-       uint8_t led2_color;
-       /* Default */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREENAMBER
-       uint8_t unused_2;
+       uint16_t        cmpl_ring;
        /*
-        * If the LED #2 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint16_t        led2_blink_on;
+       uint16_t        seq_id;
        /*
-        * If the LED #2 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint16_t        led2_blink_off;
+       uint16_t        target_id;
        /*
-        * An identifier for the group of LEDs that LED #2 belongs
-        * to.
-        * If set to 0, then the LED #2 is not grouped.
-        * For all other non-zero values of this field, LED #2 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint8_t led2_group_id;
-       /* An identifier for the LED #3. */
-       uint8_t led3_id;
-       /* The type of LED #3. */
-       uint8_t led3_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_INVALID
-       /* The current state of the LED #3. */
-       uint8_t led3_state;
-       /* Default state of the LED */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_DEFAULT  UINT32_C(0x0)
-       /* Off */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_OFF      UINT32_C(0x1)
-       /* On */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_ON       UINT32_C(0x2)
-       /* Blink */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINK    UINT32_C(0x3)
-       /* Blink Alternately */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINKALT UINT32_C(0x4)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINKALT
-       /* The color of LED #3. */
-       uint8_t led3_color;
-       /* Default */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_DEFAULT    UINT32_C(0x0)
-       /* Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_AMBER      UINT32_C(0x1)
-       /* Green */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREEN      UINT32_C(0x2)
-       /* Green or Amber */
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREENAMBER UINT32_C(0x3)
-       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_LAST \
-               HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREENAMBER
-       uint8_t unused_3;
-       /*
-        * If the LED #3 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED on between cycles.
-        */
-       uint16_t        led3_blink_on;
-       /*
-        * If the LED #3 state is "blink" or "blinkalt", then
-        * this field represents the requested time in milliseconds
-        * to keep LED off between cycles.
-        */
-       uint16_t        led3_blink_off;
-       /*
-        * An identifier for the group of LEDs that LED #3 belongs
-        * to.
-        * If set to 0, then the LED #3 is not grouped.
-        * For all other non-zero values of this field, LED #3 is
-        * grouped together with the LEDs with the same group ID
-        * value.
-        */
-       uint8_t led3_group_id;
-       uint8_t unused_4[6];
+       uint64_t        resp_addr;
+} __attribute__((packed));
+
+/* hwrm_port_lpbk_qstats_output (size:768b/96B) */
+struct hwrm_port_lpbk_qstats_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* Number of transmitted unicast frames */
+       uint64_t        lpbk_ucast_frames;
+       /* Number of transmitted multicast frames */
+       uint64_t        lpbk_mcast_frames;
+       /* Number of transmitted broadcast frames */
+       uint64_t        lpbk_bcast_frames;
+       /* Number of transmitted bytes for unicast traffic */
+       uint64_t        lpbk_ucast_bytes;
+       /* Number of transmitted bytes for multicast traffic */
+       uint64_t        lpbk_mcast_bytes;
+       /* Number of transmitted bytes for broadcast traffic */
+       uint64_t        lpbk_bcast_bytes;
+       /* Total Tx Drops for loopback traffic reported by STATS block */
+       uint64_t        tx_stat_discard;
+       /* Total Tx Error Drops for loopback traffic reported by STATS block */
+       uint64_t        tx_stat_error;
+       /* Total Rx Drops for loopback traffic reported by STATS block */
+       uint64_t        rx_stat_discard;
+       /* Total Rx Error Drops for loopback traffic reported by STATS block */
+       uint64_t        rx_stat_error;
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -13488,12 +13519,12 @@ struct hwrm_port_led_qcfg_output {
 } __attribute__((packed));
 
 /***********************
- * hwrm_port_led_qcaps *
+ * hwrm_port_clr_stats *
  ***********************/
 
 
-/* hwrm_port_led_qcaps_input (size:192b/24B) */
-struct hwrm_port_led_qcaps_input {
+/* hwrm_port_clr_stats_input (size:192b/24B) */
+struct hwrm_port_clr_stats_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -13521,13 +13552,24 @@ struct hwrm_port_led_qcaps_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Port ID of port whose LED configuration is being queried. */
+       /* Port ID of port that is being queried. */
        uint16_t        port_id;
-       uint8_t unused_0[6];
+       uint8_t flags;
+       /*
+        * If set to 1, then this field indicates clear the following RoCE
+        * specific counters.
+        * RoCE associated TX/RX cos counters
+        * CNP associated TX/RX cos counters
+        * RoCE/CNP specific TX/RX flow counters
+        * Firmware will determine the RoCE/CNP cos queue based on qos profile.
+        * This flag is honored only when RoCE is enabled on that port.
+        */
+       #define HWRM_PORT_CLR_STATS_INPUT_FLAGS_ROCE_COUNTERS     UINT32_C(0x1)
+       uint8_t unused_0[5];
 } __attribute__((packed));
 
-/* hwrm_port_led_qcaps_output (size:384b/48B) */
-struct hwrm_port_led_qcaps_output {
+/* hwrm_port_clr_stats_output (size:128b/16B) */
+struct hwrm_port_clr_stats_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -13536,750 +13578,678 @@ struct hwrm_port_led_qcaps_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * The number of LEDs that are configured on this port.
-        * Up to 4 LEDs can be returned in the response.
-        */
-       uint8_t num_leds;
-       /* Reserved for future use. */
-       uint8_t unused[3];
-       /* An identifier for the LED #0. */
-       uint8_t led0_id;
-       /* The type of LED #0. */
-       uint8_t led0_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_LAST \
-               HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_INVALID
-       /*
-        * An identifier for the group of LEDs that LED #0 belongs
-        * to.
-        * If set to 0, then the LED #0 cannot be grouped.
-        * For all other non-zero values of this field, LED #0 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t led0_group_id;
-       uint8_t unused_0;
-       /* The states supported by LED #0. */
-       uint16_t        led0_state_caps;
+       uint8_t valid;
+} __attribute__((packed));
+
+/***********************
+ * hwrm_port_phy_qcaps *
+ ***********************/
+
+
+/* hwrm_port_phy_qcaps_input (size:192b/24B) */
+struct hwrm_port_phy_qcaps_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * If set to 1, this LED is enabled.
-        * If set to 0, this LED is disabled.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_ENABLED \
-               UINT32_C(0x1)
+       uint16_t        cmpl_ring;
        /*
-        * If set to 1, off state is supported on this LED.
-        * If set to 0, off state is not supported on this LED.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_OFF_SUPPORTED \
-               UINT32_C(0x2)
+       uint16_t        seq_id;
        /*
-        * If set to 1, on state is supported on this LED.
-        * If set to 0, on state is not supported on this LED.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_ON_SUPPORTED \
-               UINT32_C(0x4)
+       uint16_t        target_id;
        /*
-        * If set to 1, blink state is supported on this LED.
-        * If set to 0, blink state is not supported on this LED.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_BLINK_SUPPORTED \
-               UINT32_C(0x8)
+       uint64_t        resp_addr;
+       /* Port ID of port that is being queried. */
+       uint16_t        port_id;
+       uint8_t unused_0[6];
+} __attribute__((packed));
+
+/* hwrm_port_phy_qcaps_output (size:192b/24B) */
+struct hwrm_port_phy_qcaps_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* PHY capability flags */
+       uint8_t flags;
        /*
-        * If set to 1, blink_alt state is supported on this LED.
-        * If set to 0, blink_alt state is not supported on this LED.
+        * If set to 1, then this field indicates that the
+        * link is capable of supporting EEE.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED \
-               UINT32_C(0x10)
-       /* The colors supported by LED #0. */
-       uint16_t        led0_color_caps;
-       /* reserved. */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_RSVD \
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_EEE_SUPPORTED \
                UINT32_C(0x1)
        /*
-        * If set to 1, Amber color is supported on this LED.
-        * If set to 0, Amber color is not supported on this LED.
+        * If set to 1, then this field indicates that the
+        * PHY is capable of supporting external loopback.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_AMBER_SUPPORTED \
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_EXTERNAL_LPBK_SUPPORTED \
                UINT32_C(0x2)
        /*
-        * If set to 1, Green color is supported on this LED.
-        * If set to 0, Green color is not supported on this LED.
-        */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_GREEN_SUPPORTED \
-               UINT32_C(0x4)
-       /* An identifier for the LED #1. */
-       uint8_t led1_id;
-       /* The type of LED #1. */
-       uint8_t led1_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_LAST \
-               HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_INVALID
-       /*
-        * An identifier for the group of LEDs that LED #1 belongs
-        * to.
-        * If set to 0, then the LED #0 cannot be grouped.
-        * For all other non-zero values of this field, LED #0 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * Reserved field. The HWRM shall set this field to 0.
+        * An HWRM client shall ignore this field.
         */
-       uint8_t led1_group_id;
-       uint8_t unused_1;
-       /* The states supported by LED #1. */
-       uint16_t        led1_state_caps;
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_RSVD1_MASK \
+               UINT32_C(0xfc)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_FLAGS_RSVD1_SFT                   2
+       /* Number of front panel ports for this device. */
+       uint8_t port_cnt;
+       /* Not supported or unknown */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_UNKNOWN UINT32_C(0x0)
+       /* single port device */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_1       UINT32_C(0x1)
+       /* 2-port device */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_2       UINT32_C(0x2)
+       /* 3-port device */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_3       UINT32_C(0x3)
+       /* 4-port device */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_4       UINT32_C(0x4)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_LAST \
+               HWRM_PORT_PHY_QCAPS_OUTPUT_PORT_CNT_4
        /*
-        * If set to 1, this LED is enabled.
-        * If set to 0, this LED is disabled.
+        * This is a bit mask to indicate what speeds are supported
+        * as forced speeds on this link.
+        * For each speed that can be forced on this link, the
+        * corresponding mask bit shall be set to '1'.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_ENABLED \
+       uint16_t        supported_speeds_force_mode;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100MBHD \
                UINT32_C(0x1)
-       /*
-        * If set to 1, off state is supported on this LED.
-        * If set to 0, off state is not supported on this LED.
-        */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_OFF_SUPPORTED \
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100MB \
                UINT32_C(0x2)
-       /*
-        * If set to 1, on state is supported on this LED.
-        * If set to 0, on state is not supported on this LED.
-        */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_ON_SUPPORTED \
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_1GBHD \
                UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_1GB \
+               UINT32_C(0x8)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_2GB \
+               UINT32_C(0x10)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_2_5GB \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10GB \
+               UINT32_C(0x40)
+       /* 20Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_20GB \
+               UINT32_C(0x80)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_25GB \
+               UINT32_C(0x100)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_40GB \
+               UINT32_C(0x200)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_50GB \
+               UINT32_C(0x400)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_100GB \
+               UINT32_C(0x800)
+       /* 10Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10MBHD \
+               UINT32_C(0x1000)
+       /* 10Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_FORCE_MODE_10MB \
+               UINT32_C(0x2000)
        /*
-        * If set to 1, blink state is supported on this LED.
-        * If set to 0, blink state is not supported on this LED.
+        * This is a bit mask to indicate what speeds are supported
+        * for autonegotiation on this link.
+        * For each speed that can be autonegotiated on this link, the
+        * corresponding mask bit shall be set to '1'.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_BLINK_SUPPORTED \
+       uint16_t        supported_speeds_auto_mode;
+       /* 100Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100MBHD \
+               UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100MB \
+               UINT32_C(0x2)
+       /* 1Gb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_1GBHD \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_1GB \
                UINT32_C(0x8)
+       /* 2Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_2GB \
+               UINT32_C(0x10)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_2_5GB \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10GB \
+               UINT32_C(0x40)
+       /* 20Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_20GB \
+               UINT32_C(0x80)
+       /* 25Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_25GB \
+               UINT32_C(0x100)
+       /* 40Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_40GB \
+               UINT32_C(0x200)
+       /* 50Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_50GB \
+               UINT32_C(0x400)
+       /* 100Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_100GB \
+               UINT32_C(0x800)
+       /* 10Mb link speed (Half-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10MBHD \
+               UINT32_C(0x1000)
+       /* 10Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_AUTO_MODE_10MB \
+               UINT32_C(0x2000)
        /*
-        * If set to 1, blink_alt state is supported on this LED.
-        * If set to 0, blink_alt state is not supported on this LED.
+        * This is a bit mask to indicate what speeds are supported
+        * for EEE on this link.
+        * For each speed that can be autonegotiated when EEE is enabled
+        * on this link, the corresponding mask bit shall be set to '1'.
+        * This field is only valid when the eee_suppotred is set to '1'.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED \
-               UINT32_C(0x10)
-       /* The colors supported by LED #1. */
-       uint16_t        led1_color_caps;
-       /* reserved. */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_RSVD \
+       uint16_t        supported_speeds_eee_mode;
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD1 \
                UINT32_C(0x1)
+       /* 100Mb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_100MB \
+               UINT32_C(0x2)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD2 \
+               UINT32_C(0x4)
+       /* 1Gb link speed (Full-duplex) */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_1GB \
+               UINT32_C(0x8)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD3 \
+               UINT32_C(0x10)
+       /* Reserved */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_RSVD4 \
+               UINT32_C(0x20)
+       /* 10Gb link speed */
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_SUPPORTED_SPEEDS_EEE_MODE_10GB \
+               UINT32_C(0x40)
+       uint32_t        tx_lpi_timer_low;
        /*
-        * If set to 1, Amber color is supported on this LED.
-        * If set to 0, Amber color is not supported on this LED.
+        * The lowest value of TX LPI timer that can be set on this link
+        * when EEE is enabled. This value is in microseconds.
+        * This field is valid only when_eee_supported is set to '1'.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_AMBER_SUPPORTED \
-               UINT32_C(0x2)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_LOW_MASK \
+               UINT32_C(0xffffff)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_LOW_SFT 0
        /*
-        * If set to 1, Green color is supported on this LED.
-        * If set to 0, Green color is not supported on this LED.
+        * Reserved field. The HWRM shall set this field to 0.
+        * An HWRM client shall ignore this field.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_GREEN_SUPPORTED \
-               UINT32_C(0x4)
-       /* An identifier for the LED #2. */
-       uint8_t led2_id;
-       /* The type of LED #2. */
-       uint8_t led2_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_LAST \
-               HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_INVALID
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_RSVD2_MASK \
+               UINT32_C(0xff000000)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_RSVD2_SFT            24
+       uint32_t        valid_tx_lpi_timer_high;
        /*
-        * An identifier for the group of LEDs that LED #0 belongs
-        * to.
-        * If set to 0, then the LED #0 cannot be grouped.
-        * For all other non-zero values of this field, LED #0 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * The highest value of TX LPI timer that can be set on this link
+        * when EEE is enabled. This value is in microseconds.
+        * This field is valid only when_eee_supported is set to '1'.
         */
-       uint8_t led2_group_id;
-       uint8_t unused_2;
-       /* The states supported by LED #2. */
-       uint16_t        led2_state_caps;
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_HIGH_MASK \
+               UINT32_C(0xffffff)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_TX_LPI_TIMER_HIGH_SFT 0
        /*
-        * If set to 1, this LED is enabled.
-        * If set to 0, this LED is disabled.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_ENABLED \
-               UINT32_C(0x1)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_VALID_MASK \
+               UINT32_C(0xff000000)
+       #define HWRM_PORT_PHY_QCAPS_OUTPUT_VALID_SFT             24
+} __attribute__((packed));
+
+/*********************
+ * hwrm_port_led_cfg *
+ *********************/
+
+
+/* hwrm_port_led_cfg_input (size:512b/64B) */
+struct hwrm_port_led_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * If set to 1, off state is supported on this LED.
-        * If set to 0, off state is not supported on this LED.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_OFF_SUPPORTED \
-               UINT32_C(0x2)
+       uint16_t        cmpl_ring;
        /*
-        * If set to 1, on state is supported on this LED.
-        * If set to 0, on state is not supported on this LED.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_ON_SUPPORTED \
-               UINT32_C(0x4)
+       uint16_t        seq_id;
        /*
-        * If set to 1, blink state is supported on this LED.
-        * If set to 0, blink state is not supported on this LED.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_BLINK_SUPPORTED \
-               UINT32_C(0x8)
+       uint16_t        target_id;
        /*
-        * If set to 1, blink_alt state is supported on this LED.
-        * If set to 0, blink_alt state is not supported on this LED.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED \
-               UINT32_C(0x10)
-       /* The colors supported by LED #2. */
-       uint16_t        led2_color_caps;
-       /* reserved. */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_RSVD \
+       uint64_t        resp_addr;
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the led0_id field to be
+        * configured.
+        */
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_ID \
                UINT32_C(0x1)
        /*
-        * If set to 1, Amber color is supported on this LED.
-        * If set to 0, Amber color is not supported on this LED.
+        * This bit must be '1' for the led0_state field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_AMBER_SUPPORTED \
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_STATE \
                UINT32_C(0x2)
        /*
-        * If set to 1, Green color is supported on this LED.
-        * If set to 0, Green color is not supported on this LED.
+        * This bit must be '1' for the led0_color field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_GREEN_SUPPORTED \
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_COLOR \
                UINT32_C(0x4)
-       /* An identifier for the LED #3. */
-       uint8_t led3_id;
-       /* The type of LED #3. */
-       uint8_t led3_type;
-       /* Speed LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_SPEED    UINT32_C(0x0)
-       /* Activity LED */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_ACTIVITY UINT32_C(0x1)
-       /* Invalid */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_INVALID  UINT32_C(0xff)
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_LAST \
-               HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_INVALID
        /*
-        * An identifier for the group of LEDs that LED #3 belongs
-        * to.
-        * If set to 0, then the LED #0 cannot be grouped.
-        * For all other non-zero values of this field, LED #0 is
-        * grouped together with the LEDs with the same group ID
-        * value.
+        * This bit must be '1' for the led0_blink_on field to be
+        * configured.
         */
-       uint8_t led3_group_id;
-       uint8_t unused_3;
-       /* The states supported by LED #3. */
-       uint16_t        led3_state_caps;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_BLINK_ON \
+               UINT32_C(0x8)
        /*
-        * If set to 1, this LED is enabled.
-        * If set to 0, this LED is disabled.
+        * This bit must be '1' for the led0_blink_off field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_ENABLED \
-               UINT32_C(0x1)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_BLINK_OFF \
+               UINT32_C(0x10)
        /*
-        * If set to 1, off state is supported on this LED.
-        * If set to 0, off state is not supported on this LED.
+        * This bit must be '1' for the led0_group_id field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_OFF_SUPPORTED \
-               UINT32_C(0x2)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED0_GROUP_ID \
+               UINT32_C(0x20)
        /*
-        * If set to 1, on state is supported on this LED.
-        * If set to 0, on state is not supported on this LED.
+        * This bit must be '1' for the led1_id field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_ON_SUPPORTED \
-               UINT32_C(0x4)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_ID \
+               UINT32_C(0x40)
        /*
-        * If set to 1, blink state is supported on this LED.
-        * If set to 0, blink state is not supported on this LED.
+        * This bit must be '1' for the led1_state field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_BLINK_SUPPORTED \
-               UINT32_C(0x8)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_STATE \
+               UINT32_C(0x80)
        /*
-        * If set to 1, blink_alt state is supported on this LED.
-        * If set to 0, blink_alt state is not supported on this LED.
+        * This bit must be '1' for the led1_color field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED \
-               UINT32_C(0x10)
-       /* The colors supported by LED #3. */
-       uint16_t        led3_color_caps;
-       /* reserved. */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_RSVD \
-               UINT32_C(0x1)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_COLOR \
+               UINT32_C(0x100)
        /*
-        * If set to 1, Amber color is supported on this LED.
-        * If set to 0, Amber color is not supported on this LED.
+        * This bit must be '1' for the led1_blink_on field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_AMBER_SUPPORTED \
-               UINT32_C(0x2)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_BLINK_ON \
+               UINT32_C(0x200)
        /*
-        * If set to 1, Green color is supported on this LED.
-        * If set to 0, Green color is not supported on this LED.
+        * This bit must be '1' for the led1_blink_off field to be
+        * configured.
         */
-       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_GREEN_SUPPORTED \
-               UINT32_C(0x4)
-       uint8_t unused_4[3];
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_BLINK_OFF \
+               UINT32_C(0x400)
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This bit must be '1' for the led1_group_id field to be
+        * configured.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***********************
- * hwrm_queue_qportcfg *
- ***********************/
-
-
-/* hwrm_queue_qportcfg_input (size:192b/24B) */
-struct hwrm_queue_qportcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED1_GROUP_ID \
+               UINT32_C(0x800)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This bit must be '1' for the led2_id field to be
+        * configured.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_ID \
+               UINT32_C(0x1000)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This bit must be '1' for the led2_state field to be
+        * configured.
         */
-       uint16_t        seq_id;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_STATE \
+               UINT32_C(0x2000)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * This bit must be '1' for the led2_color field to be
+        * configured.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_COLOR \
+               UINT32_C(0x4000)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * This bit must be '1' for the led2_blink_on field to be
+        * configured.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_BLINK_ON \
+               UINT32_C(0x8000)
        /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
+        * This bit must be '1' for the led2_blink_off field to be
+        * configured.
         */
-       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
-       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_BLINK_OFF \
+               UINT32_C(0x10000)
        /*
-        * Port ID of port for which the queue configuration is being
-        * queried.  This field is only required when sent by IPC.
+        * This bit must be '1' for the led2_group_id field to be
+        * configured.
         */
-       uint16_t        port_id;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED2_GROUP_ID \
+               UINT32_C(0x20000)
        /*
-        * Drivers will set this capability when it can use
-        * queue_idx_service_profile to map the queues to application.
+        * This bit must be '1' for the led3_id field to be
+        * configured.
         */
-       uint8_t drv_qmap_cap;
-       /* disabled */
-       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_DISABLED UINT32_C(0x0)
-       /* enabled */
-       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED  UINT32_C(0x1)
-       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_LAST \
-               HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED
-       uint8_t unused_0;
-} __attribute__((packed));
-
-/* hwrm_queue_qportcfg_output (size:256b/32B) */
-struct hwrm_queue_qportcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_ID \
+               UINT32_C(0x40000)
        /*
-        * The maximum number of queues that can be configured on this
-        * port.
-        * Valid values range from 1 through 8.
+        * This bit must be '1' for the led3_state field to be
+        * configured.
         */
-       uint8_t max_configurable_queues;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_STATE \
+               UINT32_C(0x80000)
        /*
-        * The maximum number of lossless queues that can be configured
-        * on this port.
-        * Valid values range from 0 through 8.
+        * This bit must be '1' for the led3_color field to be
+        * configured.
         */
-       uint8_t max_configurable_lossless_queues;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_COLOR \
+               UINT32_C(0x100000)
        /*
-        * Bitmask indicating which queues can be configured by the
-        * hwrm_queue_cfg command.
-        *
-        * Each bit represents a specific queue where bit 0 represents
-        * queue 0 and bit 7 represents queue 7.
-        * # A value of 0 indicates that the queue is not configurable
-        * by the hwrm_queue_cfg command.
-        * # A value of 1 indicates that the queue is configurable.
-        * # A hwrm_queue_cfg command shall return error when trying to
-        * configure a queue not configurable.
+        * This bit must be '1' for the led3_blink_on field to be
+        * configured.
         */
-       uint8_t queue_cfg_allowed;
-       /* Information about queue configuration. */
-       uint8_t queue_cfg_info;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_BLINK_ON \
+               UINT32_C(0x200000)
        /*
-        * If this flag is set to '1', then the queues are
-        * configured asymmetrically on TX and RX sides.
-        * If this flag is set to '0', then the queues are
-        * configured symmetrically on TX and RX sides. For
-        * symmetric configuration, the queue configuration
-        * including queue ids and service profiles on the
-        * TX side is the same as the corresponding queue
-        * configuration on the RX side.
+        * This bit must be '1' for the led3_blink_off field to be
+        * configured.
         */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
-               UINT32_C(0x1)
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_BLINK_OFF \
+               UINT32_C(0x400000)
        /*
-        * Bitmask indicating which queues can be configured by the
-        * hwrm_queue_pfcenable_cfg command.
-        *
-        * Each bit represents a specific priority where bit 0 represents
-        * priority 0 and bit 7 represents priority 7.
-        * # A value of 0 indicates that the priority is not configurable by
-        * the hwrm_queue_pfcenable_cfg command.
-        * # A value of 1 indicates that the priority is configurable.
-        * # A hwrm_queue_pfcenable_cfg command shall return error when
-        * trying to configure a priority that is not configurable.
+        * This bit must be '1' for the led3_group_id field to be
+        * configured.
         */
-       uint8_t queue_pfcenable_cfg_allowed;
+       #define HWRM_PORT_LED_CFG_INPUT_ENABLES_LED3_GROUP_ID \
+               UINT32_C(0x800000)
+       /* Port ID of port whose LEDs are configured. */
+       uint16_t        port_id;
        /*
-        * Bitmask indicating which queues can be configured by the
-        * hwrm_queue_pri2cos_cfg command.
-        *
-        * Each bit represents a specific queue where bit 0 represents
-        * queue 0 and bit 7 represents queue 7.
-        * # A value of 0 indicates that the queue is not configurable
-        * by the hwrm_queue_pri2cos_cfg command.
-        * # A value of 1 indicates that the queue is configurable.
-        * # A hwrm_queue_pri2cos_cfg command shall return error when
-        * trying to configure a queue that is not configurable.
+        * The number of LEDs that are being configured.
+        * Up to 4 LEDs can be configured with this command.
         */
-       uint8_t queue_pri2cos_cfg_allowed;
+       uint8_t num_leds;
+       /* Reserved field. */
+       uint8_t rsvd;
+       /* An identifier for the LED #0. */
+       uint8_t led0_id;
+       /* The requested state of the LED #0. */
+       uint8_t led0_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_STATE_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED0_STATE_BLINKALT
+       /* The requested color of LED #0. */
+       uint8_t led0_color;
+       /* Default */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED0_COLOR_GREENAMBER
+       uint8_t unused_0;
        /*
-        * Bitmask indicating which queues can be configured by the
-        * hwrm_queue_pri2cos_cfg command.
-        *
-        * Each bit represents a specific queue where bit 0 represents
-        * queue 0 and bit 7 represents queue 7.
-        * # A value of 0 indicates that the queue is not configurable
-        * by the hwrm_queue_pri2cos_cfg command.
-        * # A value of 1 indicates that the queue is configurable.
-        * # A hwrm_queue_pri2cos_cfg command shall return error when
-        * trying to configure a queue not configurable.
+        * If the LED #0 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       uint8_t queue_cos2bw_cfg_allowed;
+       uint16_t        led0_blink_on;
        /*
-        * ID of CoS Queue 0.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * If the LED #0 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       uint8_t queue_id0;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id0_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN
+       uint16_t        led0_blink_off;
        /*
-        * ID of CoS Queue 1.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * An identifier for the group of LEDs that LED #0 belongs
+        * to.
+        * If set to 0, then the LED #0 shall not be grouped and
+        * shall be treated as an individual resource.
+        * For all other non-zero values of this field, LED #0 shall
+        * be grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint8_t queue_id1;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id1_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN
+       uint8_t led0_group_id;
+       /* Reserved field. */
+       uint8_t rsvd0;
+       /* An identifier for the LED #1. */
+       uint8_t led1_id;
+       /* The requested state of the LED #1. */
+       uint8_t led1_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_STATE_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED1_STATE_BLINKALT
+       /* The requested color of LED #1. */
+       uint8_t led1_color;
+       /* Default */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED1_COLOR_GREENAMBER
+       uint8_t unused_1;
        /*
-        * ID of CoS Queue 2.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * If the LED #1 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       uint8_t queue_id2;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id2_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN
+       uint16_t        led1_blink_on;
        /*
-        * ID of CoS Queue 3.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * If the LED #1 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       uint8_t queue_id3;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id3_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN
+       uint16_t        led1_blink_off;
        /*
-        * ID of CoS Queue 4.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * An identifier for the group of LEDs that LED #1 belongs
+        * to.
+        * If set to 0, then the LED #1 shall not be grouped and
+        * shall be treated as an individual resource.
+        * For all other non-zero values of this field, LED #1 shall
+        * be grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint8_t queue_id4;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id4_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN
+       uint8_t led1_group_id;
+       /* Reserved field. */
+       uint8_t rsvd1;
+       /* An identifier for the LED #2. */
+       uint8_t led2_id;
+       /* The requested state of the LED #2. */
+       uint8_t led2_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_STATE_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED2_STATE_BLINKALT
+       /* The requested color of LED #2. */
+       uint8_t led2_color;
+       /* Default */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED2_COLOR_GREENAMBER
+       uint8_t unused_2;
        /*
-        * ID of CoS Queue 5.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * If the LED #2 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       uint8_t queue_id5;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id5_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN
+       uint16_t        led2_blink_on;
        /*
-        * ID of CoS Queue 6.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * If the LED #2 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       uint8_t queue_id6;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id6_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN
+       uint16_t        led2_blink_off;
        /*
-        * ID of CoS Queue 7.
-        * FF - Invalid id
-        *
-        * # This ID can be used on any subsequent call to an hwrm command
-        * that takes a queue id.
-        * # IDs must always be queried by this command before any use
-        * by the driver or software.
-        * # Any driver or software should not make any assumptions about
-        * queue IDs.
-        * # A value of 0xff indicates that the queue is not available.
-        * # Available queues may not be in sequential order.
+        * An identifier for the group of LEDs that LED #2 belongs
+        * to.
+        * If set to 0, then the LED #2 shall not be grouped and
+        * shall be treated as an individual resource.
+        * For all other non-zero values of this field, LED #2 shall
+        * be grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint8_t queue_id7;
-       /* This value is applicable to CoS queues only. */
-       uint8_t queue_id7_service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSY \
-               UINT32_C(0x0)
-       /* Lossless (legacy) */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS \
-               UINT32_C(0x1)
-       /* Lossless RoCE */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_ROCE \
-               UINT32_C(0x1)
-       /* Lossy RoCE CNP */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSY_ROCE_CNP \
-               UINT32_C(0x2)
-       /* Lossless NIC */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC \
-               UINT32_C(0x3)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN \
-               UINT32_C(0xff)
-       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
+       uint8_t led2_group_id;
+       /* Reserved field. */
+       uint8_t rsvd2;
+       /* An identifier for the LED #3. */
+       uint8_t led3_id;
+       /* The requested state of the LED #3. */
+       uint8_t led3_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_STATE_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED3_STATE_BLINKALT
+       /* The requested color of LED #3. */
+       uint8_t led3_color;
+       /* Default */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_LAST \
+               HWRM_PORT_LED_CFG_INPUT_LED3_COLOR_GREENAMBER
+       uint8_t unused_3;
+       /*
+        * If the LED #3 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
+        */
+       uint16_t        led3_blink_on;
+       /*
+        * If the LED #3 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
+        */
+       uint16_t        led3_blink_off;
+       /*
+        * An identifier for the group of LEDs that LED #3 belongs
+        * to.
+        * If set to 0, then the LED #3 shall not be grouped and
+        * shall be treated as an individual resource.
+        * For all other non-zero values of this field, LED #3 shall
+        * be grouped together with the LEDs with the same group ID
+        * value.
+        */
+       uint8_t led3_group_id;
+       /* Reserved field. */
+       uint8_t rsvd3;
+} __attribute__((packed));
+
+/* hwrm_port_led_cfg_output (size:128b/16B) */
+struct hwrm_port_led_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -14290,13 +14260,13 @@ struct hwrm_queue_qportcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************
- * hwrm_queue_qcfg *
- *******************/
+/**********************
+ * hwrm_port_led_qcfg *
+ **********************/
 
 
-/* hwrm_queue_qcfg_input (size:192b/24B) */
-struct hwrm_queue_qcfg_input {
+/* hwrm_port_led_qcfg_input (size:192b/24B) */
+struct hwrm_port_led_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -14324,25 +14294,13 @@ struct hwrm_queue_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
-       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_RX
-       /* Queue ID of the queue. */
-       uint32_t        queue_id;
+       /* Port ID of port whose LED configuration is being queried. */
+       uint16_t        port_id;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_queue_qcfg_output (size:128b/16B) */
-struct hwrm_queue_qcfg_output {
+/* hwrm_port_led_qcfg_output (size:448b/56B) */
+struct hwrm_port_led_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -14352,134 +14310,251 @@ struct hwrm_queue_qcfg_output {
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
        /*
-        * This value is a the estimate packet length used in the
-        * TX arbiter.
-        */
-       uint32_t        queue_len;
-       /* This value is applicable to CoS queues only. */
-       uint8_t service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LOSSY    UINT32_C(0x0)
-       /* Lossless */
-       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LOSSLESS UINT32_C(0x1)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_UNKNOWN  UINT32_C(0xff)
-       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_UNKNOWN
-       /* Information about queue configuration. */
-       uint8_t queue_cfg_info;
-       /*
-        * If this flag is set to '1', then the queue is
-        * configured asymmetrically on TX and RX sides.
-        * If this flag is set to '0', then this queue is
-        * configured symmetrically on TX and RX sides.
+        * The number of LEDs that are configured on this port.
+        * Up to 4 LEDs can be returned in the response.
         */
-       #define HWRM_QUEUE_QCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
-               UINT32_C(0x1)
+       uint8_t num_leds;
+       /* An identifier for the LED #0. */
+       uint8_t led0_id;
+       /* The type of LED #0. */
+       uint8_t led0_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED0_TYPE_INVALID
+       /* The current state of the LED #0. */
+       uint8_t led0_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED0_STATE_BLINKALT
+       /* The color of LED #0. */
+       uint8_t led0_color;
+       /* Default */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED0_COLOR_GREENAMBER
        uint8_t unused_0;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * If the LED #0 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/******************
- * hwrm_queue_cfg *
- ******************/
-
-
-/* hwrm_queue_cfg_input (size:320b/40B) */
-struct hwrm_queue_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint16_t        led0_blink_on;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * If the LED #0 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       uint16_t        cmpl_ring;
+       uint16_t        led0_blink_off;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * An identifier for the group of LEDs that LED #0 belongs
+        * to.
+        * If set to 0, then the LED #0 is not grouped.
+        * For all other non-zero values of this field, LED #0 is
+        * grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint16_t        seq_id;
+       uint8_t led0_group_id;
+       /* An identifier for the LED #1. */
+       uint8_t led1_id;
+       /* The type of LED #1. */
+       uint8_t led1_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED1_TYPE_INVALID
+       /* The current state of the LED #1. */
+       uint8_t led1_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED1_STATE_BLINKALT
+       /* The color of LED #1. */
+       uint8_t led1_color;
+       /* Default */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED1_COLOR_GREENAMBER
+       uint8_t unused_1;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * If the LED #1 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       uint16_t        target_id;
+       uint16_t        led1_blink_on;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * If the LED #1 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       uint16_t        led1_blink_off;
        /*
-        * Enumeration denoting the RX, TX, or both directions applicable to the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
+        * An identifier for the group of LEDs that LED #1 belongs
+        * to.
+        * If set to 0, then the LED #1 is not grouped.
+        * For all other non-zero values of this field, LED #1 is
+        * grouped together with the LEDs with the same group ID
+        * value.
         */
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_MASK UINT32_C(0x3)
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_SFT  0
-       /* tx path */
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
-       /* Bi-directional (Symmetrically applicable to TX and RX paths) */
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_BIDIR  UINT32_C(0x2)
-       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_BIDIR
-       uint32_t        enables;
+       uint8_t led1_group_id;
+       /* An identifier for the LED #2. */
+       uint8_t led2_id;
+       /* The type of LED #2. */
+       uint8_t led2_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED2_TYPE_INVALID
+       /* The current state of the LED #2. */
+       uint8_t led2_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED2_STATE_BLINKALT
+       /* The color of LED #2. */
+       uint8_t led2_color;
+       /* Default */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED2_COLOR_GREENAMBER
+       uint8_t unused_2;
        /*
-        * This bit must be '1' for the dflt_len field to be
-        * configured.
+        * If the LED #2 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
         */
-       #define HWRM_QUEUE_CFG_INPUT_ENABLES_DFLT_LEN            UINT32_C(0x1)
+       uint16_t        led2_blink_on;
        /*
-        * This bit must be '1' for the service_profile field to be
-        * configured.
+        * If the LED #2 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
         */
-       #define HWRM_QUEUE_CFG_INPUT_ENABLES_SERVICE_PROFILE     UINT32_C(0x2)
-       /* Queue ID of queue that is to be configured by this function. */
-       uint32_t        queue_id;
+       uint16_t        led2_blink_off;
        /*
-        * This value is a the estimate packet length used in the
-        * TX arbiter.
-        * Set to 0xFF... (All Fs) to not adjust this value.
+        * An identifier for the group of LEDs that LED #2 belongs
+        * to.
+        * If set to 0, then the LED #2 is not grouped.
+        * For all other non-zero values of this field, LED #2 is
+        * grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint32_t        dflt_len;
-       /* This value is applicable to CoS queues only. */
-       uint8_t service_profile;
-       /* Lossy (best-effort) */
-       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LOSSY    UINT32_C(0x0)
-       /* Lossless */
-       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LOSSLESS UINT32_C(0x1)
-       /* Set to 0xFF... (All Fs) if there is no service profile specified */
-       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_UNKNOWN  UINT32_C(0xff)
-       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LAST \
-               HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_UNKNOWN
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
-/* hwrm_queue_cfg_output (size:128b/16B) */
-struct hwrm_queue_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint8_t led2_group_id;
+       /* An identifier for the LED #3. */
+       uint8_t led3_id;
+       /* The type of LED #3. */
+       uint8_t led3_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED3_TYPE_INVALID
+       /* The current state of the LED #3. */
+       uint8_t led3_state;
+       /* Default state of the LED */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_DEFAULT  UINT32_C(0x0)
+       /* Off */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_OFF      UINT32_C(0x1)
+       /* On */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_ON       UINT32_C(0x2)
+       /* Blink */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINK    UINT32_C(0x3)
+       /* Blink Alternately */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINKALT UINT32_C(0x4)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED3_STATE_BLINKALT
+       /* The color of LED #3. */
+       uint8_t led3_color;
+       /* Default */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_DEFAULT    UINT32_C(0x0)
+       /* Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_AMBER      UINT32_C(0x1)
+       /* Green */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREEN      UINT32_C(0x2)
+       /* Green or Amber */
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREENAMBER UINT32_C(0x3)
+       #define HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_LAST \
+               HWRM_PORT_LED_QCFG_OUTPUT_LED3_COLOR_GREENAMBER
+       uint8_t unused_3;
+       /*
+        * If the LED #3 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED on between cycles.
+        */
+       uint16_t        led3_blink_on;
+       /*
+        * If the LED #3 state is "blink" or "blinkalt", then
+        * this field represents the requested time in milliseconds
+        * to keep LED off between cycles.
+        */
+       uint16_t        led3_blink_off;
+       /*
+        * An identifier for the group of LEDs that LED #3 belongs
+        * to.
+        * If set to 0, then the LED #3 is not grouped.
+        * For all other non-zero values of this field, LED #3 is
+        * grouped together with the LEDs with the same group ID
+        * value.
+        */
+       uint8_t led3_group_id;
+       uint8_t unused_4[6];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -14490,13 +14565,13 @@ struct hwrm_queue_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*****************************
- * hwrm_queue_pfcenable_qcfg *
- *****************************/
+/***********************
+ * hwrm_port_led_qcaps *
+ ***********************/
 
 
-/* hwrm_queue_pfcenable_qcfg_input (size:192b/24B) */
-struct hwrm_queue_pfcenable_qcfg_input {
+/* hwrm_port_led_qcaps_input (size:192b/24B) */
+struct hwrm_port_led_qcaps_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -14524,17 +14599,13 @@ struct hwrm_queue_pfcenable_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
-        */
+       /* Port ID of port whose LED configuration is being queried. */
        uint16_t        port_id;
        uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_queue_pfcenable_qcfg_output (size:128b/16B) */
-struct hwrm_queue_pfcenable_qcfg_output {
+/* hwrm_port_led_qcaps_output (size:384b/48B) */
+struct hwrm_port_led_qcaps_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -14543,121 +14614,298 @@ struct hwrm_queue_pfcenable_qcfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint32_t        flags;
-       /* If set to 1, then PFC is enabled on PRI 0. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI0_PFC_ENABLED \
-               UINT32_C(0x1)
-       /* If set to 1, then PFC is enabled on PRI 1. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI1_PFC_ENABLED \
-               UINT32_C(0x2)
-       /* If set to 1, then PFC is enabled on PRI 2. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI2_PFC_ENABLED \
-               UINT32_C(0x4)
-       /* If set to 1, then PFC is enabled on PRI 3. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI3_PFC_ENABLED \
-               UINT32_C(0x8)
-       /* If set to 1, then PFC is enabled on PRI 4. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI4_PFC_ENABLED \
-               UINT32_C(0x10)
-       /* If set to 1, then PFC is enabled on PRI 5. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI5_PFC_ENABLED \
-               UINT32_C(0x20)
-       /* If set to 1, then PFC is enabled on PRI 6. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI6_PFC_ENABLED \
-               UINT32_C(0x40)
-       /* If set to 1, then PFC is enabled on PRI 7. */
-       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI7_PFC_ENABLED \
-               UINT32_C(0x80)
-       uint8_t unused_0[3];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * The number of LEDs that are configured on this port.
+        * Up to 4 LEDs can be returned in the response.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/****************************
- * hwrm_queue_pfcenable_cfg *
- ****************************/
-
-
-/* hwrm_queue_pfcenable_cfg_input (size:192b/24B) */
-struct hwrm_queue_pfcenable_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint8_t num_leds;
+       /* Reserved for future use. */
+       uint8_t unused[3];
+       /* An identifier for the LED #0. */
+       uint8_t led0_id;
+       /* The type of LED #0. */
+       uint8_t led0_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_LAST \
+               HWRM_PORT_LED_QCAPS_OUTPUT_LED0_TYPE_INVALID
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * An identifier for the group of LEDs that LED #0 belongs
+        * to.
+        * If set to 0, then the LED #0 cannot be grouped.
+        * For all other non-zero values of this field, LED #0 is
+        * grouped together with the LEDs with the same group ID
+        * value.
         */
-       uint16_t        cmpl_ring;
+       uint8_t led0_group_id;
+       uint8_t unused_0;
+       /* The states supported by LED #0. */
+       uint16_t        led0_state_caps;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * If set to 1, this LED is enabled.
+        * If set to 0, this LED is disabled.
         */
-       uint16_t        seq_id;
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_ENABLED \
+               UINT32_C(0x1)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * If set to 1, off state is supported on this LED.
+        * If set to 0, off state is not supported on this LED.
         */
-       uint16_t        target_id;
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_OFF_SUPPORTED \
+               UINT32_C(0x2)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * If set to 1, on state is supported on this LED.
+        * If set to 0, on state is not supported on this LED.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       /* If set to 1, then PFC is requested to be enabled on PRI 0. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI0_PFC_ENABLED \
-               UINT32_C(0x1)
-       /* If set to 1, then PFC is requested to be enabled on PRI 1. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI1_PFC_ENABLED \
-               UINT32_C(0x2)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 2. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI2_PFC_ENABLED \
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_ON_SUPPORTED \
                UINT32_C(0x4)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 3. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI3_PFC_ENABLED \
+       /*
+        * If set to 1, blink state is supported on this LED.
+        * If set to 0, blink state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_BLINK_SUPPORTED \
                UINT32_C(0x8)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 4. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI4_PFC_ENABLED \
+       /*
+        * If set to 1, blink_alt state is supported on this LED.
+        * If set to 0, blink_alt state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_STATE_CAPS_BLINK_ALT_SUPPORTED \
                UINT32_C(0x10)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 5. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI5_PFC_ENABLED \
-               UINT32_C(0x20)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 6. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI6_PFC_ENABLED \
-               UINT32_C(0x40)
-       /* If set to 1, then PFC is requested to  be enabled on PRI 7. */
-       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI7_PFC_ENABLED \
-               UINT32_C(0x80)
+       /* The colors supported by LED #0. */
+       uint16_t        led0_color_caps;
+       /* reserved. */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_RSVD \
+               UINT32_C(0x1)
        /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
+        * If set to 1, Amber color is supported on this LED.
+        * If set to 0, Amber color is not supported on this LED.
         */
-       uint16_t        port_id;
-       uint8_t unused_0[2];
-} __attribute__((packed));
-
-/* hwrm_queue_pfcenable_cfg_output (size:128b/16B) */
-struct hwrm_queue_pfcenable_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_AMBER_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, Green color is supported on this LED.
+        * If set to 0, Green color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED0_COLOR_CAPS_GREEN_SUPPORTED \
+               UINT32_C(0x4)
+       /* An identifier for the LED #1. */
+       uint8_t led1_id;
+       /* The type of LED #1. */
+       uint8_t led1_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_LAST \
+               HWRM_PORT_LED_QCAPS_OUTPUT_LED1_TYPE_INVALID
+       /*
+        * An identifier for the group of LEDs that LED #1 belongs
+        * to.
+        * If set to 0, then the LED #0 cannot be grouped.
+        * For all other non-zero values of this field, LED #0 is
+        * grouped together with the LEDs with the same group ID
+        * value.
+        */
+       uint8_t led1_group_id;
+       uint8_t unused_1;
+       /* The states supported by LED #1. */
+       uint16_t        led1_state_caps;
+       /*
+        * If set to 1, this LED is enabled.
+        * If set to 0, this LED is disabled.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_ENABLED \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, off state is supported on this LED.
+        * If set to 0, off state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_OFF_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, on state is supported on this LED.
+        * If set to 0, on state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_ON_SUPPORTED \
+               UINT32_C(0x4)
+       /*
+        * If set to 1, blink state is supported on this LED.
+        * If set to 0, blink state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_BLINK_SUPPORTED \
+               UINT32_C(0x8)
+       /*
+        * If set to 1, blink_alt state is supported on this LED.
+        * If set to 0, blink_alt state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_STATE_CAPS_BLINK_ALT_SUPPORTED \
+               UINT32_C(0x10)
+       /* The colors supported by LED #1. */
+       uint16_t        led1_color_caps;
+       /* reserved. */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_RSVD \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, Amber color is supported on this LED.
+        * If set to 0, Amber color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_AMBER_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, Green color is supported on this LED.
+        * If set to 0, Green color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED1_COLOR_CAPS_GREEN_SUPPORTED \
+               UINT32_C(0x4)
+       /* An identifier for the LED #2. */
+       uint8_t led2_id;
+       /* The type of LED #2. */
+       uint8_t led2_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_LAST \
+               HWRM_PORT_LED_QCAPS_OUTPUT_LED2_TYPE_INVALID
+       /*
+        * An identifier for the group of LEDs that LED #0 belongs
+        * to.
+        * If set to 0, then the LED #0 cannot be grouped.
+        * For all other non-zero values of this field, LED #0 is
+        * grouped together with the LEDs with the same group ID
+        * value.
+        */
+       uint8_t led2_group_id;
+       uint8_t unused_2;
+       /* The states supported by LED #2. */
+       uint16_t        led2_state_caps;
+       /*
+        * If set to 1, this LED is enabled.
+        * If set to 0, this LED is disabled.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_ENABLED \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, off state is supported on this LED.
+        * If set to 0, off state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_OFF_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, on state is supported on this LED.
+        * If set to 0, on state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_ON_SUPPORTED \
+               UINT32_C(0x4)
+       /*
+        * If set to 1, blink state is supported on this LED.
+        * If set to 0, blink state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_BLINK_SUPPORTED \
+               UINT32_C(0x8)
+       /*
+        * If set to 1, blink_alt state is supported on this LED.
+        * If set to 0, blink_alt state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_STATE_CAPS_BLINK_ALT_SUPPORTED \
+               UINT32_C(0x10)
+       /* The colors supported by LED #2. */
+       uint16_t        led2_color_caps;
+       /* reserved. */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_RSVD \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, Amber color is supported on this LED.
+        * If set to 0, Amber color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_AMBER_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, Green color is supported on this LED.
+        * If set to 0, Green color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED2_COLOR_CAPS_GREEN_SUPPORTED \
+               UINT32_C(0x4)
+       /* An identifier for the LED #3. */
+       uint8_t led3_id;
+       /* The type of LED #3. */
+       uint8_t led3_type;
+       /* Speed LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_SPEED    UINT32_C(0x0)
+       /* Activity LED */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_ACTIVITY UINT32_C(0x1)
+       /* Invalid */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_INVALID  UINT32_C(0xff)
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_LAST \
+               HWRM_PORT_LED_QCAPS_OUTPUT_LED3_TYPE_INVALID
+       /*
+        * An identifier for the group of LEDs that LED #3 belongs
+        * to.
+        * If set to 0, then the LED #0 cannot be grouped.
+        * For all other non-zero values of this field, LED #0 is
+        * grouped together with the LEDs with the same group ID
+        * value.
+        */
+       uint8_t led3_group_id;
+       uint8_t unused_3;
+       /* The states supported by LED #3. */
+       uint16_t        led3_state_caps;
+       /*
+        * If set to 1, this LED is enabled.
+        * If set to 0, this LED is disabled.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_ENABLED \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, off state is supported on this LED.
+        * If set to 0, off state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_OFF_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, on state is supported on this LED.
+        * If set to 0, on state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_ON_SUPPORTED \
+               UINT32_C(0x4)
+       /*
+        * If set to 1, blink state is supported on this LED.
+        * If set to 0, blink state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_BLINK_SUPPORTED \
+               UINT32_C(0x8)
+       /*
+        * If set to 1, blink_alt state is supported on this LED.
+        * If set to 0, blink_alt state is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_STATE_CAPS_BLINK_ALT_SUPPORTED \
+               UINT32_C(0x10)
+       /* The colors supported by LED #3. */
+       uint16_t        led3_color_caps;
+       /* reserved. */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_RSVD \
+               UINT32_C(0x1)
+       /*
+        * If set to 1, Amber color is supported on this LED.
+        * If set to 0, Amber color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_AMBER_SUPPORTED \
+               UINT32_C(0x2)
+       /*
+        * If set to 1, Green color is supported on this LED.
+        * If set to 0, Green color is not supported on this LED.
+        */
+       #define HWRM_PORT_LED_QCAPS_OUTPUT_LED3_COLOR_CAPS_GREEN_SUPPORTED \
+               UINT32_C(0x4)
+       uint8_t unused_4[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -14668,13 +14916,13 @@ struct hwrm_queue_pfcenable_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_queue_pri2cos_qcfg *
- ***************************/
+/***********************
+ * hwrm_queue_qportcfg *
+ ***********************/
 
 
-/* hwrm_queue_pri2cos_qcfg_input (size:192b/24B) */
-struct hwrm_queue_pri2cos_qcfg_input {
+/* hwrm_queue_qportcfg_input (size:192b/24B) */
+struct hwrm_queue_qportcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -14708,31 +14956,34 @@ struct hwrm_queue_pri2cos_qcfg_input {
         * This enumeration is used for resources that are similar for both
         * TX and RX paths of the chip.
         */
-       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH      UINT32_C(0x1)
+       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
        /* tx path */
-       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
+       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
        /* rx path */
-       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
-       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_RX
+       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
+       #define HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_QUEUE_QPORTCFG_INPUT_FLAGS_PATH_RX
        /*
-        * When this bit is set to '0', the query is
-        * for VLAN PRI field in tunnel headers.
-        * When this bit is set to '1', the query is
-        * for VLAN PRI field in inner packet headers.
+        * Port ID of port for which the queue configuration is being
+        * queried.  This field is only required when sent by IPC.
         */
-       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN     UINT32_C(0x2)
+       uint16_t        port_id;
        /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
+        * Drivers will set this capability when it can use
+        * queue_idx_service_profile to map the queues to application.
         */
-       uint8_t port_id;
-       uint8_t unused_0[3];
+       uint8_t drv_qmap_cap;
+       /* disabled */
+       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_DISABLED UINT32_C(0x0)
+       /* enabled */
+       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED  UINT32_C(0x1)
+       #define HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_LAST \
+               HWRM_QUEUE_QPORTCFG_INPUT_DRV_QMAP_CAP_ENABLED
+       uint8_t unused_0;
 } __attribute__((packed));
 
-/* hwrm_queue_pri2cos_qcfg_output (size:192b/24B) */
-struct hwrm_queue_pri2cos_qcfg_output {
+/* hwrm_queue_qportcfg_output (size:256b/32B) */
+struct hwrm_queue_qportcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -14742,239 +14993,562 @@ struct hwrm_queue_pri2cos_qcfg_output {
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
        /*
-        * CoS Queue assigned to priority 0.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
-        */
-       uint8_t pri0_cos_queue_id;
-       /*
-        * CoS Queue assigned to priority 1.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
-        */
-       uint8_t pri1_cos_queue_id;
-       /*
-        * CoS Queue assigned to priority 2  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
-        */
-       uint8_t pri2_cos_queue_id;
-       /*
-        * CoS Queue assigned to priority 3.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
-        */
-       uint8_t pri3_cos_queue_id;
-       /*
-        * CoS Queue assigned to priority 4.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
-        */
-       uint8_t pri4_cos_queue_id;
-       /*
-        * CoS Queue assigned to priority 5.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
+        * The maximum number of queues that can be configured on this
+        * port.
+        * Valid values range from 1 through 8.
         */
-       uint8_t pri5_cos_queue_id;
+       uint8_t max_configurable_queues;
        /*
-        * CoS Queue assigned to priority 6.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
+        * The maximum number of lossless queues that can be configured
+        * on this port.
+        * Valid values range from 0 through 8.
         */
-       uint8_t pri6_cos_queue_id;
+       uint8_t max_configurable_lossless_queues;
        /*
-        * CoS Queue assigned to priority 7.  This value can only
-        * be changed before traffic has started.
-        * A value of 0xff indicates that no CoS queue is assigned to the
-        * specified priority.
+        * Bitmask indicating which queues can be configured by the
+        * hwrm_queue_cfg command.
+        *
+        * Each bit represents a specific queue where bit 0 represents
+        * queue 0 and bit 7 represents queue 7.
+        * # A value of 0 indicates that the queue is not configurable
+        * by the hwrm_queue_cfg command.
+        * # A value of 1 indicates that the queue is configurable.
+        * # A hwrm_queue_cfg command shall return error when trying to
+        * configure a queue not configurable.
         */
-       uint8_t pri7_cos_queue_id;
+       uint8_t queue_cfg_allowed;
        /* Information about queue configuration. */
        uint8_t queue_cfg_info;
        /*
-        * If this flag is set to '1', then the PRI to CoS
-        * configuration is asymmetric on TX and RX sides.
-        * If this flag is set to '0', then PRI to CoS configuration
-        * is symmetric on TX and RX sides.
+        * If this flag is set to '1', then the queues are
+        * configured asymmetrically on TX and RX sides.
+        * If this flag is set to '0', then the queues are
+        * configured symmetrically on TX and RX sides. For
+        * symmetric configuration, the queue configuration
+        * including queue ids and service profiles on the
+        * TX side is the same as the corresponding queue
+        * configuration on the RX side.
         */
-       #define HWRM_QUEUE_PRI2COS_QCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
                UINT32_C(0x1)
-       uint8_t unused_0[6];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**************************
- * hwrm_queue_pri2cos_cfg *
- **************************/
-
-
-/* hwrm_queue_pri2cos_cfg_input (size:320b/40B) */
-struct hwrm_queue_pri2cos_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Bitmask indicating which queues can be configured by the
+        * hwrm_queue_pfcenable_cfg command.
+        *
+        * Each bit represents a specific priority where bit 0 represents
+        * priority 0 and bit 7 represents priority 7.
+        * # A value of 0 indicates that the priority is not configurable by
+        * the hwrm_queue_pfcenable_cfg command.
+        * # A value of 1 indicates that the priority is configurable.
+        * # A hwrm_queue_pfcenable_cfg command shall return error when
+        * trying to configure a priority that is not configurable.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       uint8_t queue_pfcenable_cfg_allowed;
        /*
-        * Enumeration denoting the RX, TX, or both directions applicable to the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
+        * Bitmask indicating which queues can be configured by the
+        * hwrm_queue_pri2cos_cfg command.
+        *
+        * Each bit represents a specific queue where bit 0 represents
+        * queue 0 and bit 7 represents queue 7.
+        * # A value of 0 indicates that the queue is not configurable
+        * by the hwrm_queue_pri2cos_cfg command.
+        * # A value of 1 indicates that the queue is configurable.
+        * # A hwrm_queue_pri2cos_cfg command shall return error when
+        * trying to configure a queue that is not configurable.
         */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_MASK UINT32_C(0x3)
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_SFT  0
-       /* tx path */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
-       /* Bi-directional (Symmetrically applicable to TX and RX paths) */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_BIDIR  UINT32_C(0x2)
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_BIDIR
+       uint8_t queue_pri2cos_cfg_allowed;
        /*
-        * When this bit is set to '0', the mapping is requested
-        * for VLAN PRI field in tunnel headers.
-        * When this bit is set to '1', the mapping is requested
-        * for VLAN PRI field in inner packet headers.
+        * Bitmask indicating which queues can be configured by the
+        * hwrm_queue_pri2cos_cfg command.
+        *
+        * Each bit represents a specific queue where bit 0 represents
+        * queue 0 and bit 7 represents queue 7.
+        * # A value of 0 indicates that the queue is not configurable
+        * by the hwrm_queue_pri2cos_cfg command.
+        * # A value of 1 indicates that the queue is configurable.
+        * # A hwrm_queue_pri2cos_cfg command shall return error when
+        * trying to configure a queue not configurable.
         */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_IVLAN     UINT32_C(0x4)
-       uint32_t        enables;
+       uint8_t queue_cos2bw_cfg_allowed;
        /*
-        * This bit must be '1' for the pri0_cos_queue_id field to be
-        * configured.
+        * ID of CoS Queue 0.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI0_COS_QUEUE_ID \
+       uint8_t queue_id0;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id0_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS \
                UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the pri1_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI1_COS_QUEUE_ID \
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSY_ROCE_CNP \
                UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN
        /*
-        * This bit must be '1' for the pri2_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI2_COS_QUEUE_ID \
-               UINT32_C(0x4)
-       /*
-        * This bit must be '1' for the pri3_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI3_COS_QUEUE_ID \
-               UINT32_C(0x8)
-       /*
-        * This bit must be '1' for the pri4_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI4_COS_QUEUE_ID \
-               UINT32_C(0x10)
-       /*
-        * This bit must be '1' for the pri5_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI5_COS_QUEUE_ID \
-               UINT32_C(0x20)
-       /*
-        * This bit must be '1' for the pri6_cos_queue_id field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI6_COS_QUEUE_ID \
-               UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the pri7_cos_queue_id field to be
-        * configured.
+        * ID of CoS Queue 1.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI7_COS_QUEUE_ID \
-               UINT32_C(0x80)
+       uint8_t queue_id1;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id1_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN
        /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
+        * ID of CoS Queue 2.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t port_id;
+       uint8_t queue_id2;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id2_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 0.  This value can only
-        * be changed before traffic has started.
+        * ID of CoS Queue 3.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t pri0_cos_queue_id;
+       uint8_t queue_id3;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id3_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 1.  This value can only
-        * be changed before traffic has started.
+        * ID of CoS Queue 4.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t pri1_cos_queue_id;
+       uint8_t queue_id4;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id4_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 2  This value can only
-        * be changed before traffic has started.
+        * ID of CoS Queue 5.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t pri2_cos_queue_id;
+       uint8_t queue_id5;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id5_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 3.  This value can only
-        * be changed before traffic has started.
+        * ID of CoS Queue 6.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t pri3_cos_queue_id;
+       uint8_t queue_id6;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id6_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 4.  This value can only
-        * be changed before traffic has started.
+        * ID of CoS Queue 7.
+        * FF - Invalid id
+        *
+        * # This ID can be used on any subsequent call to an hwrm command
+        * that takes a queue id.
+        * # IDs must always be queried by this command before any use
+        * by the driver or software.
+        * # Any driver or software should not make any assumptions about
+        * queue IDs.
+        * # A value of 0xff indicates that the queue is not available.
+        * # Available queues may not be in sequential order.
         */
-       uint8_t pri4_cos_queue_id;
+       uint8_t queue_id7;
+       /* This value is applicable to CoS queues only. */
+       uint8_t queue_id7_service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSY \
+               UINT32_C(0x0)
+       /* Lossless (legacy) */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS \
+               UINT32_C(0x1)
+       /* Lossless RoCE */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_ROCE \
+               UINT32_C(0x1)
+       /* Lossy RoCE CNP */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSY_ROCE_CNP \
+               UINT32_C(0x2)
+       /* Lossless NIC */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS_NIC \
+               UINT32_C(0x3)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN \
+               UINT32_C(0xff)
+       #define HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QPORTCFG_OUTPUT_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN
        /*
-        * CoS Queue assigned to priority 5.  This value can only
-        * be changed before traffic has started.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t pri5_cos_queue_id;
+       uint8_t valid;
+} __attribute__((packed));
+
+/*******************
+ * hwrm_queue_qcfg *
+ *******************/
+
+
+/* hwrm_queue_qcfg_input (size:192b/24B) */
+struct hwrm_queue_qcfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * CoS Queue assigned to priority 6.  This value can only
-        * be changed before traffic has started.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint8_t pri6_cos_queue_id;
+       uint16_t        cmpl_ring;
        /*
-        * CoS Queue assigned to priority 7.  This value can only
-        * be changed before traffic has started.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint8_t pri7_cos_queue_id;
+       uint16_t        seq_id;
+       /*
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
+        */
+       uint16_t        target_id;
+       /*
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
+        */
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /*
+        * Enumeration denoting the RX, TX type of the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
+        */
+       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
+       /* tx path */
+       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
+       #define HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_QUEUE_QCFG_INPUT_FLAGS_PATH_RX
+       /* Queue ID of the queue. */
+       uint32_t        queue_id;
+} __attribute__((packed));
+
+/* hwrm_queue_qcfg_output (size:128b/16B) */
+struct hwrm_queue_qcfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /*
+        * This value is a the estimate packet length used in the
+        * TX arbiter.
+        */
+       uint32_t        queue_len;
+       /* This value is applicable to CoS queues only. */
+       uint8_t service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LOSSY    UINT32_C(0x0)
+       /* Lossless */
+       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LOSSLESS UINT32_C(0x1)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_UNKNOWN  UINT32_C(0xff)
+       #define HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_QCFG_OUTPUT_SERVICE_PROFILE_UNKNOWN
+       /* Information about queue configuration. */
+       uint8_t queue_cfg_info;
+       /*
+        * If this flag is set to '1', then the queue is
+        * configured asymmetrically on TX and RX sides.
+        * If this flag is set to '0', then this queue is
+        * configured symmetrically on TX and RX sides.
+        */
+       #define HWRM_QUEUE_QCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
+               UINT32_C(0x1)
+       uint8_t unused_0;
+       /*
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
+        */
+       uint8_t valid;
+} __attribute__((packed));
+
+/******************
+ * hwrm_queue_cfg *
+ ******************/
+
+
+/* hwrm_queue_cfg_input (size:320b/40B) */
+struct hwrm_queue_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /*
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        */
+       uint16_t        cmpl_ring;
+       /*
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        */
+       uint16_t        seq_id;
+       /*
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
+        */
+       uint16_t        target_id;
+       /*
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
+        */
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /*
+        * Enumeration denoting the RX, TX, or both directions applicable to the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
+        */
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_MASK UINT32_C(0x3)
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_SFT  0
+       /* tx path */
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
+       /* Bi-directional (Symmetrically applicable to TX and RX paths) */
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_BIDIR  UINT32_C(0x2)
+       #define HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_QUEUE_CFG_INPUT_FLAGS_PATH_BIDIR
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the dflt_len field to be
+        * configured.
+        */
+       #define HWRM_QUEUE_CFG_INPUT_ENABLES_DFLT_LEN            UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the service_profile field to be
+        * configured.
+        */
+       #define HWRM_QUEUE_CFG_INPUT_ENABLES_SERVICE_PROFILE     UINT32_C(0x2)
+       /* Queue ID of queue that is to be configured by this function. */
+       uint32_t        queue_id;
+       /*
+        * This value is a the estimate packet length used in the
+        * TX arbiter.
+        * Set to 0xFF... (All Fs) to not adjust this value.
+        */
+       uint32_t        dflt_len;
+       /* This value is applicable to CoS queues only. */
+       uint8_t service_profile;
+       /* Lossy (best-effort) */
+       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LOSSY    UINT32_C(0x0)
+       /* Lossless */
+       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LOSSLESS UINT32_C(0x1)
+       /* Set to 0xFF... (All Fs) if there is no service profile specified */
+       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_UNKNOWN  UINT32_C(0xff)
+       #define HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_LAST \
+               HWRM_QUEUE_CFG_INPUT_SERVICE_PROFILE_UNKNOWN
        uint8_t unused_0[7];
 } __attribute__((packed));
 
-/* hwrm_queue_pri2cos_cfg_output (size:128b/16B) */
-struct hwrm_queue_pri2cos_cfg_output {
+/* hwrm_queue_cfg_output (size:128b/16B) */
+struct hwrm_queue_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -14994,13 +15568,13 @@ struct hwrm_queue_pri2cos_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************
- * hwrm_queue_cos2bw_qcfg *
- **************************/
+/*****************************
+ * hwrm_queue_pfcenable_qcfg *
+ *****************************/
 
 
-/* hwrm_queue_cos2bw_qcfg_input (size:192b/24B) */
-struct hwrm_queue_cos2bw_qcfg_input {
+/* hwrm_queue_pfcenable_qcfg_input (size:192b/24B) */
+struct hwrm_queue_pfcenable_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -15031,14 +15605,14 @@ struct hwrm_queue_cos2bw_qcfg_input {
        /*
         * Port ID of port for which the table is being configured.
         * The HWRM needs to check whether this function is allowed
-        * to configure TC BW assignment on this port.
+        * to configure pri2cos mapping on this port.
         */
        uint16_t        port_id;
        uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_queue_cos2bw_qcfg_output (size:896b/112B) */
-struct hwrm_queue_cos2bw_qcfg_output {
+/* hwrm_queue_pfcenable_qcfg_output (size:128b/16B) */
+struct hwrm_queue_pfcenable_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -15047,2649 +15621,112 @@ struct hwrm_queue_cos2bw_qcfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* ID of CoS Queue 0. */
-       uint8_t queue_id0;
-       uint8_t unused_0;
-       uint16_t        unused_1;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id0_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id0_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id0_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id0_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id0_bw_weight;
-       /* ID of CoS Queue 1. */
-       uint8_t queue_id1;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id1_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id1_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id1_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id1_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id1_bw_weight;
-       /* ID of CoS Queue 2. */
-       uint8_t queue_id2;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id2_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id2_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id2_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id2_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id2_bw_weight;
-       /* ID of CoS Queue 3. */
-       uint8_t queue_id3;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id3_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id3_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id3_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id3_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id3_bw_weight;
-       /* ID of CoS Queue 4. */
-       uint8_t queue_id4;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id4_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id4_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id4_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id4_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id4_bw_weight;
-       /* ID of CoS Queue 5. */
-       uint8_t queue_id5;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id5_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id5_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id5_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id5_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id5_bw_weight;
-       /* ID of CoS Queue 6. */
-       uint8_t queue_id6;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id6_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id6_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id6_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id6_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id6_bw_weight;
-       /* ID of CoS Queue 7. */
-       uint8_t queue_id7;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id7_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id7_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id7_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id7_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id7_bw_weight;
-       uint8_t unused_2[4];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*************************
- * hwrm_queue_cos2bw_cfg *
- *************************/
-
-
-/* hwrm_queue_cos2bw_cfg_input (size:1024b/128B) */
-struct hwrm_queue_cos2bw_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       uint32_t        enables;
-       /*
-        * If this bit is set to 1, then all queue_id0 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID \
-               UINT32_C(0x1)
-       /*
-        * If this bit is set to 1, then all queue_id1 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID1_VALID \
-               UINT32_C(0x2)
-       /*
-        * If this bit is set to 1, then all queue_id2 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID2_VALID \
-               UINT32_C(0x4)
-       /*
-        * If this bit is set to 1, then all queue_id3 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID3_VALID \
-               UINT32_C(0x8)
-       /*
-        * If this bit is set to 1, then all queue_id4 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID4_VALID \
-               UINT32_C(0x10)
-       /*
-        * If this bit is set to 1, then all queue_id5 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID5_VALID \
-               UINT32_C(0x20)
-       /*
-        * If this bit is set to 1, then all queue_id6 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID6_VALID \
-               UINT32_C(0x40)
-       /*
-        * If this bit is set to 1, then all queue_id7 related
-        * parameters in this command are valid.
-        */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID7_VALID \
-               UINT32_C(0x80)
-       /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure TC BW assignment on this port.
-        */
-       uint16_t        port_id;
-       /* ID of CoS Queue 0. */
-       uint8_t queue_id0;
-       uint8_t unused_0;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id0_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id0_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id0_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id0_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id0_bw_weight;
-       /* ID of CoS Queue 1. */
-       uint8_t queue_id1;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id1_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id1_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id1_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id1_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id1_bw_weight;
-       /* ID of CoS Queue 2. */
-       uint8_t queue_id2;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id2_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id2_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id2_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id2_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id2_bw_weight;
-       /* ID of CoS Queue 3. */
-       uint8_t queue_id3;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id3_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id3_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id3_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id3_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id3_bw_weight;
-       /* ID of CoS Queue 4. */
-       uint8_t queue_id4;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id4_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id4_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id4_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id4_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id4_bw_weight;
-       /* ID of CoS Queue 5. */
-       uint8_t queue_id5;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id5_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id5_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id5_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id5_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id5_bw_weight;
-       /* ID of CoS Queue 6. */
-       uint8_t queue_id6;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id6_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id6_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id6_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id6_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id6_bw_weight;
-       /* ID of CoS Queue 7. */
-       uint8_t queue_id7;
-       /*
-        * Minimum BW allocated to CoS Queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id7_min_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
-       /*
-        * Maximum BW allocated to CoS queue.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this COS inside the device.
-        */
-       uint32_t        queue_id7_max_bw;
-       /* The bandwidth value. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
-       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
-       uint8_t queue_id7_tsa_assign;
-       /* Strict Priority */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_SP \
-               UINT32_C(0x0)
-       /* Enhanced Transmission Selection */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_ETS \
-               UINT32_C(0x1)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST \
-               UINT32_C(0x2)
-       /* reserved. */
-       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST \
-               UINT32_C(0xff)
-       /*
-        * Priority level for strict priority. Valid only when the
-        * tsa_assign is 0 - Strict Priority (SP)
-        * 0..7 - Valid values.
-        * 8..255 - Reserved.
-        */
-       uint8_t queue_id7_pri_lvl;
-       /*
-        * Weight used to allocate remaining BW for this COS after
-        * servicing guaranteed bandwidths for all COS.
-        */
-       uint8_t queue_id7_bw_weight;
-       uint8_t unused_1[5];
-} __attribute__((packed));
-
-/* hwrm_queue_cos2bw_cfg_output (size:128b/16B) */
-struct hwrm_queue_cos2bw_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*************************
- * hwrm_queue_dscp_qcaps *
- *************************/
-
-
-/* hwrm_queue_dscp_qcaps_input (size:192b/24B) */
-struct hwrm_queue_dscp_qcaps_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
-        */
-       uint8_t port_id;
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
-/* hwrm_queue_dscp_qcaps_output (size:128b/16B) */
-struct hwrm_queue_dscp_qcaps_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The number of bits provided by the hardware for the DSCP value. */
-       uint8_t num_dscp_bits;
-       uint8_t unused_0;
-       /* Max number of DSCP-MASK-PRI entries supported. */
-       uint16_t        max_entries;
-       uint8_t unused_1[3];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/****************************
- * hwrm_queue_dscp2pri_qcfg *
- ****************************/
-
-
-/* hwrm_queue_dscp2pri_qcfg_input (size:256b/32B) */
-struct hwrm_queue_dscp2pri_qcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /*
-        * This is the host address where the 24-bits DSCP-MASK-PRI
-        * tuple(s) will be copied to.
-        */
-       uint64_t        dest_data_addr;
-       /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
-        */
-       uint8_t port_id;
-       uint8_t unused_0;
-       /* Size of the buffer pointed to by dest_data_addr. */
-       uint16_t        dest_data_buffer_size;
-       uint8_t unused_1[4];
-} __attribute__((packed));
-
-/* hwrm_queue_dscp2pri_qcfg_output (size:128b/16B) */
-struct hwrm_queue_dscp2pri_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /*
-        * A count of the number of DSCP-MASK-PRI tuple(s) pointed to
-        * by the dest_data_addr.
-        */
-       uint16_t        entry_cnt;
-       /*
-        * This is the default PRI which un-initialized DSCP values are
-        * mapped to.
-        */
-       uint8_t default_pri;
-       uint8_t unused_0[4];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***************************
- * hwrm_queue_dscp2pri_cfg *
- ***************************/
-
-
-/* hwrm_queue_dscp2pri_cfg_input (size:320b/40B) */
-struct hwrm_queue_dscp2pri_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /*
-        * This is the host address where the 24-bits DSCP-MASK-PRI tuple
-        * will be copied from.
-        */
-       uint64_t        src_data_addr;
-       uint32_t        flags;
-       /* use_hw_default_pri is 1 b */
-       #define HWRM_QUEUE_DSCP2PRI_CFG_INPUT_FLAGS_USE_HW_DEFAULT_PRI \
-               UINT32_C(0x1)
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the default_pri field to be
-        * configured.
-        */
-       #define HWRM_QUEUE_DSCP2PRI_CFG_INPUT_ENABLES_DEFAULT_PRI \
-               UINT32_C(0x1)
-       /*
-        * Port ID of port for which the table is being configured.
-        * The HWRM needs to check whether this function is allowed
-        * to configure pri2cos mapping on this port.
-        */
-       uint8_t port_id;
-       /*
-        * This is the default PRI which un-initialized DSCP values will be
-        * mapped to.
-        */
-       uint8_t default_pri;
-       /*
-        * A count of the number of DSCP-MASK-PRI tuple(s) in the data pointed
-        * to by src_data_addr.
-        */
-       uint16_t        entry_cnt;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_queue_dscp2pri_cfg_output (size:128b/16B) */
-struct hwrm_queue_dscp2pri_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*******************
- * hwrm_vnic_alloc *
- *******************/
-
-
-/* hwrm_vnic_alloc_input (size:192b/24B) */
-struct hwrm_vnic_alloc_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       /*
-        * When this bit is '1', this VNIC is requested to
-        * be the default VNIC for this function.
-        */
-       #define HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT     UINT32_C(0x1)
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_vnic_alloc_output (size:128b/16B) */
-struct hwrm_vnic_alloc_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* Logical vnic ID */
-       uint32_t        vnic_id;
-       uint8_t unused_0[3];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/******************
- * hwrm_vnic_free *
- ******************/
-
-
-/* hwrm_vnic_free_input (size:192b/24B) */
-struct hwrm_vnic_free_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /* Logical vnic ID */
-       uint32_t        vnic_id;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_vnic_free_output (size:128b/16B) */
-struct hwrm_vnic_free_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/*****************
- * hwrm_vnic_cfg *
- *****************/
-
-
-/* hwrm_vnic_cfg_input (size:320b/40B) */
-struct hwrm_vnic_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
        uint32_t        flags;
-       /*
-        * When this bit is '1', the VNIC is requested to
-        * be the default VNIC for the function.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_DEFAULT \
-               UINT32_C(0x1)
-       /*
-        * When this bit is '1', the VNIC is being configured to
-        * strip VLAN in the RX path.
-        * If set to '0', then VLAN stripping is disabled on
-        * this VNIC.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_VLAN_STRIP_MODE \
-               UINT32_C(0x2)
-       /*
-        * When this bit is '1', the VNIC is being configured to
-        * buffer receive packets in the hardware until the host
-        * posts new receive buffers.
-        * If set to '0', then bd_stall is being configured to be
-        * disabled on this VNIC.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_BD_STALL_MODE \
-               UINT32_C(0x4)
-       /*
-        * When this bit is '1', the VNIC is being configured to
-        * receive both RoCE and non-RoCE traffic.
-        * If set to '0', then this VNIC is not configured to be
-        * operating in dual VNIC mode.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_DUAL_VNIC_MODE \
-               UINT32_C(0x8)
-       /*
-        * When this flag is set to '1', the VNIC is requested to
-        * be configured to receive only RoCE traffic.
-        * If this flag is set to '0', then this flag shall be
-        * ignored by the HWRM.
-        * If roce_dual_vnic_mode flag is set to '1'
-        * or roce_mirroring_capable_vnic_mode flag to 1,
-        * then the HWRM client shall not set this flag to '1'.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_ONLY_VNIC_MODE \
-               UINT32_C(0x10)
-       /*
-        * When a VNIC uses one destination ring group for certain
-        * application (e.g. Receive Flow Steering) where
-        * exact match is used to direct packets to a VNIC with one
-        * destination ring group only, there is no need to configure
-        * RSS indirection table for that VNIC as only one destination
-        * ring group is used.
-        *
-        * This flag is used to enable a mode where
-        * RSS is enabled in the VNIC using a RSS context
-        * for computing RSS hash but the RSS indirection table is
-        * not configured using hwrm_vnic_rss_cfg.
-        *
-        * If this mode is enabled, then the driver should not program
-        * RSS indirection table for the RSS context that is used for
-        * computing RSS hash only.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_RSS_DFLT_CR_MODE \
-               UINT32_C(0x20)
-       /*
-        * When this bit is '1', the VNIC is being configured to
-        * receive both RoCE and non-RoCE traffic, but forward only the
-        * RoCE traffic further. Also, RoCE traffic can be mirrored to
-        * L2 driver.
-        */
-       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
-               UINT32_C(0x40)
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the dflt_ring_grp field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_DFLT_RING_GRP \
+       /* If set to 1, then PFC is enabled on PRI 0. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI0_PFC_ENABLED \
                UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the rss_rule field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_RSS_RULE \
+       /* If set to 1, then PFC is enabled on PRI 1. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI1_PFC_ENABLED \
                UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the cos_rule field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_COS_RULE \
-               UINT32_C(0x4)
-       /*
-        * This bit must be '1' for the lb_rule field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_LB_RULE \
+       /* If set to 1, then PFC is enabled on PRI 2. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI2_PFC_ENABLED \
+               UINT32_C(0x4)
+       /* If set to 1, then PFC is enabled on PRI 3. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI3_PFC_ENABLED \
                UINT32_C(0x8)
-       /*
-        * This bit must be '1' for the mru field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_MRU \
+       /* If set to 1, then PFC is enabled on PRI 4. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI4_PFC_ENABLED \
                UINT32_C(0x10)
-       /*
-        * This bit must be '1' for the default_rx_ring_id field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_DEFAULT_RX_RING_ID \
+       /* If set to 1, then PFC is enabled on PRI 5. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI5_PFC_ENABLED \
                UINT32_C(0x20)
-       /*
-        * This bit must be '1' for the default_cmpl_ring_id field to be
-        * configured.
-        */
-       #define HWRM_VNIC_CFG_INPUT_ENABLES_DEFAULT_CMPL_RING_ID \
+       /* If set to 1, then PFC is enabled on PRI 6. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI6_PFC_ENABLED \
                UINT32_C(0x40)
-       /* Logical vnic ID */
-       uint16_t        vnic_id;
-       /*
-        * Default Completion ring for the VNIC.  This ring will
-        * be chosen if packet does not match any RSS rules and if
-        * there is no COS rule.
-        */
-       uint16_t        dflt_ring_grp;
+       /* If set to 1, then PFC is enabled on PRI 7. */
+       #define HWRM_QUEUE_PFCENABLE_QCFG_OUTPUT_FLAGS_PRI7_PFC_ENABLED \
+               UINT32_C(0x80)
+       uint8_t unused_0[3];
        /*
-        * RSS ID for RSS rule/table structure.  0xFF... (All Fs) if
-        * there is no RSS rule.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint16_t        rss_rule;
+       uint8_t valid;
+} __attribute__((packed));
+
+/****************************
+ * hwrm_queue_pfcenable_cfg *
+ ****************************/
+
+
+/* hwrm_queue_pfcenable_cfg_input (size:192b/24B) */
+struct hwrm_queue_pfcenable_cfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * RSS ID for COS rule/table structure.  0xFF... (All Fs) if
-        * there is no COS rule.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint16_t        cos_rule;
+       uint16_t        cmpl_ring;
        /*
-        * RSS ID for load balancing rule/table structure.
-        * 0xFF... (All Fs) if there is no LB rule.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint16_t        lb_rule;
+       uint16_t        seq_id;
        /*
-        * The maximum receive unit of the vnic.
-        * Each vnic is associated with a function.
-        * The vnic mru value overwrites the mru setting of the
-        * associated function.
-        * The HWRM shall make sure that vnic mru does not exceed
-        * the mru of the port the function is associated with.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint16_t        mru;
+       uint16_t        target_id;
        /*
-        * Default Rx ring for the VNIC.  This ring will
-        * be chosen if packet does not match any RSS rules.
-        * The aggregation ring associated with the Rx ring is
-        * implied based on the Rx ring specified when the
-        * aggregation ring was allocated.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint16_t        default_rx_ring_id;
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /* If set to 1, then PFC is requested to be enabled on PRI 0. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI0_PFC_ENABLED \
+               UINT32_C(0x1)
+       /* If set to 1, then PFC is requested to be enabled on PRI 1. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI1_PFC_ENABLED \
+               UINT32_C(0x2)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 2. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI2_PFC_ENABLED \
+               UINT32_C(0x4)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 3. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI3_PFC_ENABLED \
+               UINT32_C(0x8)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 4. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI4_PFC_ENABLED \
+               UINT32_C(0x10)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 5. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI5_PFC_ENABLED \
+               UINT32_C(0x20)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 6. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI6_PFC_ENABLED \
+               UINT32_C(0x40)
+       /* If set to 1, then PFC is requested to  be enabled on PRI 7. */
+       #define HWRM_QUEUE_PFCENABLE_CFG_INPUT_FLAGS_PRI7_PFC_ENABLED \
+               UINT32_C(0x80)
        /*
-        * Default completion ring for the VNIC.  This ring will
-        * be chosen if packet does not match any RSS rules.
+        * Port ID of port for which the table is being configured.
+        * The HWRM needs to check whether this function is allowed
+        * to configure pri2cos mapping on this port.
         */
-       uint16_t        default_cmpl_ring_id;
+       uint16_t        port_id;
+       uint8_t unused_0[2];
 } __attribute__((packed));
 
-/* hwrm_vnic_cfg_output (size:128b/16B) */
-struct hwrm_vnic_cfg_output {
+/* hwrm_queue_pfcenable_cfg_output (size:128b/16B) */
+struct hwrm_queue_pfcenable_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -17709,13 +15746,13 @@ struct hwrm_vnic_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************
- * hwrm_vnic_qcfg *
- ******************/
+/***************************
+ * hwrm_queue_pri2cos_qcfg *
+ ***************************/
 
 
-/* hwrm_vnic_qcfg_input (size:256b/32B) */
-struct hwrm_vnic_qcfg_input {
+/* hwrm_queue_pri2cos_qcfg_input (size:192b/24B) */
+struct hwrm_queue_pri2cos_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -17743,21 +15780,37 @@ struct hwrm_vnic_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        enables;
+       uint32_t        flags;
        /*
-        * This bit must be '1' for the vf_id_valid field to be
-        * configured.
+        * Enumeration denoting the RX, TX type of the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
         */
-       #define HWRM_VNIC_QCFG_INPUT_ENABLES_VF_ID_VALID     UINT32_C(0x1)
-       /* Logical vnic ID */
-       uint32_t        vnic_id;
-       /* ID of Virtual Function whose VNIC resource is being queried. */
-       uint16_t        vf_id;
-       uint8_t unused_0[6];
+       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH      UINT32_C(0x1)
+       /* tx path */
+       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
+       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_PATH_RX
+       /*
+        * When this bit is set to '0', the query is
+        * for VLAN PRI field in tunnel headers.
+        * When this bit is set to '1', the query is
+        * for VLAN PRI field in inner packet headers.
+        */
+       #define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN     UINT32_C(0x2)
+       /*
+        * Port ID of port for which the table is being configured.
+        * The HWRM needs to check whether this function is allowed
+        * to configure pri2cos mapping on this port.
+        */
+       uint8_t port_id;
+       uint8_t unused_0[3];
 } __attribute__((packed));
 
-/* hwrm_vnic_qcfg_output (size:256b/32B) */
-struct hwrm_vnic_qcfg_output {
+/* hwrm_queue_pri2cos_qcfg_output (size:192b/24B) */
+struct hwrm_queue_pri2cos_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -17766,94 +15819,73 @@ struct hwrm_vnic_qcfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Default Completion ring for the VNIC. */
-       uint16_t        dflt_ring_grp;
-       /*
-        * RSS ID for RSS rule/table structure.  0xFF... (All Fs) if
-        * there is no RSS rule.
-        */
-       uint16_t        rss_rule;
        /*
-        * RSS ID for COS rule/table structure.  0xFF... (All Fs) if
-        * there is no COS rule.
+        * CoS Queue assigned to priority 0.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       uint16_t        cos_rule;
+       uint8_t pri0_cos_queue_id;
        /*
-        * RSS ID for load balancing rule/table structure.
-        * 0xFF... (All Fs) if there is no LB rule.
+        * CoS Queue assigned to priority 1.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       uint16_t        lb_rule;
-       /* The maximum receive unit of the vnic. */
-       uint16_t        mru;
-       uint8_t unused_0[2];
-       uint32_t        flags;
+       uint8_t pri1_cos_queue_id;
        /*
-        * When this bit is '1', the VNIC is the default VNIC for
-        * the function.
+        * CoS Queue assigned to priority 2  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_DEFAULT \
-               UINT32_C(0x1)
+       uint8_t pri2_cos_queue_id;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * strip VLAN in the RX path.
-        * If set to '0', then VLAN stripping is disabled on
-        * this VNIC.
+        * CoS Queue assigned to priority 3.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_VLAN_STRIP_MODE \
-               UINT32_C(0x2)
+       uint8_t pri3_cos_queue_id;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * buffer receive packets in the hardware until the host
-        * posts new receive buffers.
-        * If set to '0', then bd_stall is disabled on
-        * this VNIC.
+        * CoS Queue assigned to priority 4.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_BD_STALL_MODE \
-               UINT32_C(0x4)
+       uint8_t pri4_cos_queue_id;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * receive both RoCE and non-RoCE traffic.
-        * If set to '0', then this VNIC is not configured to
-        * operate in dual VNIC mode.
+        * CoS Queue assigned to priority 5.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_DUAL_VNIC_MODE \
-               UINT32_C(0x8)
-       /*
-        * When this flag is set to '1', the VNIC is configured to
-        * receive only RoCE traffic.
-        * When this flag is set to '0', the VNIC is not configured
-        * to receive only RoCE traffic.
-        * If roce_dual_vnic_mode flag and this flag both are set
-        * to '1', then it is an invalid configuration of the
-        * VNIC. The HWRM should not allow that type of
-        * mis-configuration by HWRM clients.
+       uint8_t pri5_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 6.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_ONLY_VNIC_MODE \
-               UINT32_C(0x10)
+       uint8_t pri6_cos_queue_id;
        /*
-        * When a VNIC uses one destination ring group for certain
-        * application (e.g. Receive Flow Steering) where
-        * exact match is used to direct packets to a VNIC with one
-        * destination ring group only, there is no need to configure
-        * RSS indirection table for that VNIC as only one destination
-        * ring group is used.
-        *
-        * When this bit is set to '1', then the VNIC is enabled in a
-        * mode where RSS is enabled in the VNIC using a RSS context
-        * for computing RSS hash but the RSS indirection table is
-        * not configured.
+        * CoS Queue assigned to priority 7.  This value can only
+        * be changed before traffic has started.
+        * A value of 0xff indicates that no CoS queue is assigned to the
+        * specified priority.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE \
-               UINT32_C(0x20)
+       uint8_t pri7_cos_queue_id;
+       /* Information about queue configuration. */
+       uint8_t queue_cfg_info;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * receive both RoCE and non-RoCE traffic, but forward only
-        * RoCE traffic further. Also RoCE traffic can be mirrored to
-        * L2 driver.
+        * If this flag is set to '1', then the PRI to CoS
+        * configuration is asymmetric on TX and RX sides.
+        * If this flag is set to '0', then PRI to CoS configuration
+        * is symmetric on TX and RX sides.
         */
-       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
-               UINT32_C(0x40)
-       uint8_t unused_1[7];
+       #define HWRM_QUEUE_PRI2COS_QCFG_OUTPUT_QUEUE_CFG_INFO_ASYM_CFG \
+               UINT32_C(0x1)
+       uint8_t unused_0[6];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -17864,13 +15896,13 @@ struct hwrm_vnic_qcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************
- * hwrm_vnic_qcaps *
- *******************/
+/**************************
+ * hwrm_queue_pri2cos_cfg *
+ **************************/
 
 
-/* hwrm_vnic_qcaps_input (size:192b/24B) */
-struct hwrm_vnic_qcaps_input {
+/* hwrm_queue_pri2cos_cfg_input (size:320b/40B) */
+struct hwrm_queue_pri2cos_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -17898,88 +15930,138 @@ struct hwrm_vnic_qcaps_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        enables;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_vnic_qcaps_output (size:192b/24B) */
-struct hwrm_vnic_qcaps_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The maximum receive unit that is settable on a vnic. */
-       uint16_t        mru;
-       uint8_t unused_0[2];
        uint32_t        flags;
-       /* Unused. */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_UNUSED \
+       /*
+        * Enumeration denoting the RX, TX, or both directions applicable to the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
+        */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_MASK UINT32_C(0x3)
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_SFT  0
+       /* tx path */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_TX     UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_RX     UINT32_C(0x1)
+       /* Bi-directional (Symmetrically applicable to TX and RX paths) */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_BIDIR  UINT32_C(0x2)
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_PATH_BIDIR
+       /*
+        * When this bit is set to '0', the mapping is requested
+        * for VLAN PRI field in tunnel headers.
+        * When this bit is set to '1', the mapping is requested
+        * for VLAN PRI field in inner packet headers.
+        */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_FLAGS_IVLAN     UINT32_C(0x4)
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the pri0_cos_queue_id field to be
+        * configured.
+        */
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI0_COS_QUEUE_ID \
                UINT32_C(0x1)
        /*
-        * When this bit is '1', the capability of stripping VLAN in
-        * the RX path is supported on VNIC(s).
-        * If set to '0', then VLAN stripping capability is
-        * not supported on VNIC(s).
+        * This bit must be '1' for the pri1_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_VLAN_STRIP_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI1_COS_QUEUE_ID \
                UINT32_C(0x2)
        /*
-        * When this bit is '1', the capability to buffer receive
-        * packets in the hardware until the host posts new receive buffers
-        * is supported on VNIC(s).
-        * If set to '0', then bd_stall capability is not supported
-        * on VNIC(s).
+        * This bit must be '1' for the pri2_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_BD_STALL_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI2_COS_QUEUE_ID \
                UINT32_C(0x4)
        /*
-        * When this bit is '1', the capability to
-        * receive both RoCE and non-RoCE traffic on VNIC(s) is
-        * supported.
-        * If set to '0', then the capability to receive
-        * both RoCE and non-RoCE traffic on VNIC(s) is
-        * not supported.
+        * This bit must be '1' for the pri3_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_DUAL_VNIC_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI3_COS_QUEUE_ID \
                UINT32_C(0x8)
        /*
-        * When this bit is set to '1', the capability to configure
-        * a VNIC to receive only RoCE traffic is supported.
-        * When this flag is set to '0', the VNIC capability to
-        * configure to receive only RoCE traffic is not supported.
+        * This bit must be '1' for the pri4_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_ONLY_VNIC_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI4_COS_QUEUE_ID \
                UINT32_C(0x10)
        /*
-        * When this bit is set to '1', then the capability to enable
-        * a VNIC in a mode where RSS context without configuring
-        * RSS indirection table is supported (for RSS hash computation).
-        * When this bit is set to '0', then a VNIC can not be configured
-        * with a mode to enable RSS context without configuring RSS
-        * indirection table.
+        * This bit must be '1' for the pri5_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_RSS_DFLT_CR_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI5_COS_QUEUE_ID \
                UINT32_C(0x20)
        /*
-        * When this bit is '1', the capability to
-        * mirror the the RoCE traffic is supported.
-        * If set to '0', then the capability to mirror the
-        * RoCE traffic is not supported.
+        * This bit must be '1' for the pri6_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI6_COS_QUEUE_ID \
                UINT32_C(0x40)
        /*
-        * When this bit is '1', the outermost RSS hashing capability
-        * is supported. If set to '0', then the outermost RSS hashing
-        * capability is not supported.
+        * This bit must be '1' for the pri7_cos_queue_id field to be
+        * configured.
         */
-       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_OUTERMOST_RSS_CAP \
+       #define HWRM_QUEUE_PRI2COS_CFG_INPUT_ENABLES_PRI7_COS_QUEUE_ID \
                UINT32_C(0x80)
-       uint8_t unused_1[7];
+       /*
+        * Port ID of port for which the table is being configured.
+        * The HWRM needs to check whether this function is allowed
+        * to configure pri2cos mapping on this port.
+        */
+       uint8_t port_id;
+       /*
+        * CoS Queue assigned to priority 0.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri0_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 1.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri1_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 2  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri2_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 3.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri3_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 4.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri4_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 5.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri5_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 6.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri6_cos_queue_id;
+       /*
+        * CoS Queue assigned to priority 7.  This value can only
+        * be changed before traffic has started.
+        */
+       uint8_t pri7_cos_queue_id;
+       uint8_t unused_0[7];
+} __attribute__((packed));
+
+/* hwrm_queue_pri2cos_cfg_output (size:128b/16B) */
+struct hwrm_queue_pri2cos_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -17990,13 +16072,13 @@ struct hwrm_vnic_qcaps_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************
- * hwrm_vnic_tpa_cfg *
- *********************/
+/**************************
+ * hwrm_queue_cos2bw_qcfg *
+ **************************/
 
 
-/* hwrm_vnic_tpa_cfg_input (size:320b/40B) */
-struct hwrm_vnic_tpa_cfg_input {
+/* hwrm_queue_cos2bw_qcfg_input (size:192b/24B) */
+struct hwrm_queue_cos2bw_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -18018,331 +16100,1010 @@ struct hwrm_vnic_tpa_cfg_input {
         */
        uint16_t        target_id;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
+        */
+       uint64_t        resp_addr;
+       /*
+        * Port ID of port for which the table is being configured.
+        * The HWRM needs to check whether this function is allowed
+        * to configure TC BW assignment on this port.
+        */
+       uint16_t        port_id;
+       uint8_t unused_0[6];
+} __attribute__((packed));
+
+/* hwrm_queue_cos2bw_qcfg_output (size:896b/112B) */
+struct hwrm_queue_cos2bw_qcfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* ID of CoS Queue 0. */
+       uint8_t queue_id0;
+       uint8_t unused_0;
+       uint16_t        unused_1;
+       /*
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       uint32_t        queue_id0_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) of
-        * non-tunneled TCP packets.
+        * Maximum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA \
+       uint32_t        queue_id0_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id0_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_ETS \
                UINT32_C(0x1)
-       /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) of
-        * tunneled TCP packets.
-        */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_ENCAP_TPA \
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST \
                UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) according
-        * to Windows Receive Segment Coalescing (RSC) rules.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_RSC_WND_UPDATE \
-               UINT32_C(0x4)
+       uint8_t queue_id0_pri_lvl;
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) according
-        * to Linux Generic Receive Offload (GRO) rules.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO \
-               UINT32_C(0x8)
+       uint8_t queue_id0_bw_weight;
+       /* ID of CoS Queue 1. */
+       uint8_t queue_id1;
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) for TCP
-        * packets with IP ECN set to non-zero.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN \
-               UINT32_C(0x10)
+       uint32_t        queue_id1_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) for
-        * GRE tunneled TCP packets only if all packets have the
-        * same GRE sequence.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ \
-               UINT32_C(0x20)
+       uint32_t        queue_id1_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id1_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1' and the GRO mode is enabled,
-        * the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) for
-        * TCP/IPv4 packets with consecutively increasing IPIDs.
-        * In other words, the last packet that is being
-        * aggregated to an already existing aggregation context
-        * shall have IPID 1 more than the IPID of the last packet
-        * that was aggregated in that aggregation context.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_IPID_CHECK \
-               UINT32_C(0x40)
+       uint8_t queue_id1_pri_lvl;
        /*
-        * When this bit is '1' and the GRO mode is enabled,
-        * the VNIC shall be configured to
-        * perform transparent packet aggregation (TPA) for
-        * TCP packets with the same TTL (IPv4) or Hop limit (IPv6)
-        * value.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_TTL_CHECK \
-               UINT32_C(0x80)
-       uint32_t        enables;
+       uint8_t queue_id1_bw_weight;
+       /* ID of CoS Queue 2. */
+       uint8_t queue_id2;
        /*
-        * This bit must be '1' for the max_agg_segs field to be
-        * configured.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS      UINT32_C(0x1)
+       uint32_t        queue_id2_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * This bit must be '1' for the max_aggs field to be
-        * configured.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS          UINT32_C(0x2)
+       uint32_t        queue_id2_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id2_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * This bit must be '1' for the max_agg_timer field to be
-        * configured.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_TIMER     UINT32_C(0x4)
+       uint8_t queue_id2_pri_lvl;
        /*
-        * This bit must be '1' for the min_agg_len field to be
-        * configured.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN       UINT32_C(0x8)
-       /* Logical vnic ID */
-       uint16_t        vnic_id;
+       uint8_t queue_id2_bw_weight;
+       /* ID of CoS Queue 3. */
+       uint8_t queue_id3;
        /*
-        * This is the maximum number of TCP segments that can
-        * be aggregated (unit is Log2). Max value is 31.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        max_agg_segs;
-       /* 1 segment */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_1   UINT32_C(0x0)
-       /* 2 segments */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_2   UINT32_C(0x1)
-       /* 4 segments */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_4   UINT32_C(0x2)
-       /* 8 segments */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_8   UINT32_C(0x3)
-       /* Any segment size larger than this is not valid */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX UINT32_C(0x1f)
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_LAST \
-               HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX
+       uint32_t        queue_id3_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * This is the maximum number of aggregations this VNIC is
-        * allowed (unit is Log2). Max value is 7
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        max_aggs;
-       /* 1 aggregation */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_1   UINT32_C(0x0)
-       /* 2 aggregations */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_2   UINT32_C(0x1)
-       /* 4 aggregations */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_4   UINT32_C(0x2)
-       /* 8 aggregations */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_8   UINT32_C(0x3)
-       /* 16 aggregations */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_16  UINT32_C(0x4)
-       /* Any aggregation size larger than this is not valid */
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX UINT32_C(0x7)
-       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_LAST \
-               HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX
-       uint8_t unused_0[2];
+       uint32_t        queue_id3_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id3_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * This is the maximum amount of time allowed for
-        * an aggregation context to complete after it was initiated.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint32_t        max_agg_timer;
+       uint8_t queue_id3_pri_lvl;
        /*
-        * This is the minimum amount of payload length required to
-        * start an aggregation context.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint32_t        min_agg_len;
-} __attribute__((packed));
-
-/* hwrm_vnic_tpa_cfg_output (size:128b/16B) */
-struct hwrm_vnic_tpa_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint8_t queue_id3_bw_weight;
+       /* ID of CoS Queue 4. */
+       uint8_t queue_id4;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_vnic_tpa_qcfg *
- **********************/
-
-
-/* hwrm_vnic_tpa_qcfg_input (size:192b/24B) */
-struct hwrm_vnic_tpa_qcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint32_t        queue_id4_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        cmpl_ring;
+       uint32_t        queue_id4_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id4_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint16_t        seq_id;
+       uint8_t queue_id4_pri_lvl;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint16_t        target_id;
+       uint8_t queue_id4_bw_weight;
+       /* ID of CoS Queue 5. */
+       uint8_t queue_id5;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint64_t        resp_addr;
-       /* Logical vnic ID */
-       uint16_t        vnic_id;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_vnic_tpa_qcfg_output (size:256b/32B) */
-struct hwrm_vnic_tpa_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint32_t        flags;
+       uint32_t        queue_id5_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) of
-        * non-tunneled TCP packets.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_TPA \
+       uint32_t        queue_id5_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id5_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_ETS \
                UINT32_C(0x1)
-       /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) of
-        * tunneled TCP packets.
-        */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_ENCAP_TPA \
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST \
                UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) according
-        * to Windows Receive Segment Coalescing (RSC) rules.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_RSC_WND_UPDATE \
-               UINT32_C(0x4)
+       uint8_t queue_id5_pri_lvl;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) according
-        * to Linux Generic Receive Offload (GRO) rules.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_GRO \
-               UINT32_C(0x8)
+       uint8_t queue_id5_bw_weight;
+       /* ID of CoS Queue 6. */
+       uint8_t queue_id6;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) for TCP
-        * packets with IP ECN set to non-zero.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_AGG_WITH_ECN \
-               UINT32_C(0x10)
+       uint32_t        queue_id6_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the VNIC is configured to
-        * perform transparent packet aggregation (TPA) for
-        * GRE tunneled TCP packets only if all packets have the
-        * same GRE sequence.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ \
-               UINT32_C(0x20)
+       uint32_t        queue_id6_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id6_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1' and the GRO mode is enabled,
-        * the VNIC is configured to
-        * perform transparent packet aggregation (TPA) for
-        * TCP/IPv4 packets with consecutively increasing IPIDs.
-        * In other words, the last packet that is being
-        * aggregated to an already existing aggregation context
-        * shall have IPID 1 more than the IPID of the last packet
-        * that was aggregated in that aggregation context.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_GRO_IPID_CHECK \
-               UINT32_C(0x40)
+       uint8_t queue_id6_pri_lvl;
        /*
-        * When this bit is '1' and the GRO mode is enabled,
-        * the VNIC is configured to
-        * perform transparent packet aggregation (TPA) for
-        * TCP packets with the same TTL (IPv4) or Hop limit (IPv6)
-        * value.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_FLAGS_GRO_TTL_CHECK \
-               UINT32_C(0x80)
+       uint8_t queue_id6_bw_weight;
+       /* ID of CoS Queue 7. */
+       uint8_t queue_id7;
        /*
-        * This is the maximum number of TCP segments that can
-        * be aggregated (unit is Log2). Max value is 31.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        max_agg_segs;
-       /* 1 segment */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_1   UINT32_C(0x0)
-       /* 2 segments */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_2   UINT32_C(0x1)
-       /* 4 segments */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_4   UINT32_C(0x2)
-       /* 8 segments */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_8   UINT32_C(0x3)
-       /* Any segment size larger than this is not valid */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_MAX UINT32_C(0x1f)
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_LAST \
-               HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGG_SEGS_MAX
+       uint32_t        queue_id7_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * This is the maximum number of aggregations this VNIC is
-        * allowed (unit is Log2). Max value is 7
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        max_aggs;
-       /* 1 aggregation */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_1   UINT32_C(0x0)
-       /* 2 aggregations */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_2   UINT32_C(0x1)
-       /* 4 aggregations */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_4   UINT32_C(0x2)
-       /* 8 aggregations */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_8   UINT32_C(0x3)
-       /* 16 aggregations */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_16  UINT32_C(0x4)
-       /* Any aggregation size larger than this is not valid */
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_MAX UINT32_C(0x7)
-       #define HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_LAST \
-               HWRM_VNIC_TPA_QCFG_OUTPUT_MAX_AGGS_MAX
+       uint32_t        queue_id7_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id7_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_QCFG_OUTPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * This is the maximum amount of time allowed for
-        * an aggregation context to complete after it was initiated.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint32_t        max_agg_timer;
+       uint8_t queue_id7_pri_lvl;
        /*
-        * This is the minimum amount of payload length required to
-        * start an aggregation context.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint32_t        min_agg_len;
-       uint8_t unused_0[7];
+       uint8_t queue_id7_bw_weight;
+       uint8_t unused_2[4];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -18353,13 +17114,13 @@ struct hwrm_vnic_tpa_qcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************
- * hwrm_vnic_rss_cfg *
- *********************/
+/*************************
+ * hwrm_queue_cos2bw_cfg *
+ *************************/
 
 
-/* hwrm_vnic_rss_cfg_input (size:384b/48B) */
-struct hwrm_vnic_rss_cfg_input {
+/* hwrm_queue_cos2bw_cfg_input (size:1024b/128B) */
+struct hwrm_queue_cos2bw_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -18387,408 +17148,1044 @@ struct hwrm_vnic_rss_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        hash_type;
+       uint32_t        flags;
+       uint32_t        enables;
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source and destination IPv4 addresses of IPv4
-        * packets.
+        * If this bit is set to 1, then all queue_id0 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4         UINT32_C(0x1)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID0_VALID \
+               UINT32_C(0x1)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv4 addresses and
-        * source/destination ports of TCP/IPv4 packets.
+        * If this bit is set to 1, then all queue_id1 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4     UINT32_C(0x2)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID1_VALID \
+               UINT32_C(0x2)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv4 addresses and
-        * source/destination ports of UDP/IPv4 packets.
+        * If this bit is set to 1, then all queue_id2 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4     UINT32_C(0x4)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID2_VALID \
+               UINT32_C(0x4)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source and destination IPv4 addresses of IPv6
-        * packets.
+        * If this bit is set to 1, then all queue_id3 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6         UINT32_C(0x8)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID3_VALID \
+               UINT32_C(0x8)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv6 addresses and
-        * source/destination ports of TCP/IPv6 packets.
+        * If this bit is set to 1, then all queue_id4 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6     UINT32_C(0x10)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID4_VALID \
+               UINT32_C(0x10)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv6 addresses and
-        * source/destination ports of UDP/IPv6 packets.
+        * If this bit is set to 1, then all queue_id5 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6     UINT32_C(0x20)
-       /* VNIC ID of VNIC associated with RSS table being configured. */
-       uint16_t        vnic_id;
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID5_VALID \
+               UINT32_C(0x20)
        /*
-        * Specifies which VNIC ring table pair to configure.
-        * Valid values range from 0 to 7.
+        * If this bit is set to 1, then all queue_id6 related
+        * parameters in this command are valid.
         */
-       uint8_t ring_table_pair_index;
-       /* Flags to specify different RSS hash modes. */
-       uint8_t hash_mode_flags;
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID6_VALID \
+               UINT32_C(0x40)
        /*
-        * When this bit is '1', it indicates using current RSS
-        * hash mode setting configured in the device.
+        * If this bit is set to 1, then all queue_id7 related
+        * parameters in this command are valid.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_DEFAULT \
-               UINT32_C(0x1)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_ENABLES_COS_QUEUE_ID7_VALID \
+               UINT32_C(0x80)
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over innermost 4 tuples {l3.src, l3.dest,
-        * l4.src, l4.dest} for tunnel packets. For none-tunnel
-        * packets, the RSS hash is computed over the normal
-        * src/dest l3 and src/dest l4 headers.
+        * Port ID of port for which the table is being configured.
+        * The HWRM needs to check whether this function is allowed
+        * to configure TC BW assignment on this port.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_INNERMOST_4 \
-               UINT32_C(0x2)
+       uint16_t        port_id;
+       /* ID of CoS Queue 0. */
+       uint8_t queue_id0;
+       uint8_t unused_0;
+       /*
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
+        */
+       uint32_t        queue_id0_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over innermost 2 tuples {l3.src, l3.dest} for
-        * tunnel packets. For none-tunnel packets, the RSS hash is
-        * computed over the normal src/dest l3 headers.
+        * Maximum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_INNERMOST_2 \
-               UINT32_C(0x4)
+       uint32_t        queue_id0_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id0_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over outermost 4 tuples {t_l3.src, t_l3.dest,
-        * t_l4.src, t_l4.dest} for tunnel packets. For none-tunnel
-        * packets, the RSS hash is computed over the normal
-        * src/dest l3 and src/dest l4 headers.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_OUTERMOST_4 \
-               UINT32_C(0x8)
+       uint8_t queue_id0_pri_lvl;
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over outermost 2 tuples {t_l3.src, t_l3.dest} for
-        * tunnel packets. For none-tunnel packets, the RSS hash is
-        * computed over the normal src/dest l3 headers.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_OUTERMOST_2 \
-               UINT32_C(0x10)
-       /* This is the address for rss ring group table */
-       uint64_t        ring_grp_tbl_addr;
-       /* This is the address for rss hash key table */
-       uint64_t        hash_key_tbl_addr;
-       /* Index to the rss indirection table. */
-       uint16_t        rss_ctx_idx;
-       uint8_t unused_1[6];
-} __attribute__((packed));
-
-/* hwrm_vnic_rss_cfg_output (size:128b/16B) */
-struct hwrm_vnic_rss_cfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint8_t queue_id0_bw_weight;
+       /* ID of CoS Queue 1. */
+       uint8_t queue_id1;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_vnic_rss_qcfg *
- **********************/
-
-
-/* hwrm_vnic_rss_qcfg_input (size:192b/24B) */
-struct hwrm_vnic_rss_qcfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint32_t        queue_id1_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        cmpl_ring;
+       uint32_t        queue_id1_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id1_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint16_t        seq_id;
+       uint8_t queue_id1_pri_lvl;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+       uint8_t queue_id1_bw_weight;
+       /* ID of CoS Queue 2. */
+       uint8_t queue_id2;
+       /*
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint64_t        resp_addr;
-       /* Index to the rss indirection table. */
-       uint16_t        rss_ctx_idx;
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_vnic_rss_qcfg_output (size:512b/64B) */
-struct hwrm_vnic_rss_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint32_t        hash_type;
+       uint32_t        queue_id2_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source and destination IPv4 addresses of IPv4
-        * packets.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_IPV4         UINT32_C(0x1)
+       uint32_t        queue_id2_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id2_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv4 addresses and
-        * source/destination ports of TCP/IPv4 packets.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_TCP_IPV4     UINT32_C(0x2)
+       uint8_t queue_id2_pri_lvl;
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv4 addresses and
-        * source/destination ports of UDP/IPv4 packets.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_UDP_IPV4     UINT32_C(0x4)
+       uint8_t queue_id2_bw_weight;
+       /* ID of CoS Queue 3. */
+       uint8_t queue_id3;
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source and destination IPv4 addresses of IPv6
-        * packets.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_IPV6         UINT32_C(0x8)
+       uint32_t        queue_id3_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv6 addresses and
-        * source/destination ports of TCP/IPv6 packets.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_TCP_IPV6     UINT32_C(0x10)
+       uint32_t        queue_id3_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id3_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', the RSS hash shall be computed
-        * over source/destination IPv6 addresses and
-        * source/destination ports of UDP/IPv6 packets.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_UDP_IPV6     UINT32_C(0x20)
-       uint8_t unused_0[4];
-       /* This is the value of rss hash key */
-       uint32_t        hash_key[10];
-       /* Flags to specify different RSS hash modes. */
-       uint8_t hash_mode_flags;
+       uint8_t queue_id3_pri_lvl;
        /*
-        * When this bit is '1', it indicates using current RSS
-        * hash mode setting configured in the device.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_DEFAULT \
-               UINT32_C(0x1)
+       uint8_t queue_id3_bw_weight;
+       /* ID of CoS Queue 4. */
+       uint8_t queue_id4;
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over innermost 4 tuples {l3.src, l3.dest,
-        * l4.src, l4.dest} for tunnel packets. For none-tunnel
-        * packets, the RSS hash is computed over the normal
-        * src/dest l3 and src/dest l4 headers.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_INNERMOST_4 \
-               UINT32_C(0x2)
+       uint32_t        queue_id4_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over innermost 2 tuples {l3.src, l3.dest} for
-        * tunnel packets. For none-tunnel packets, the RSS hash is
-        * computed over the normal src/dest l3 headers.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_INNERMOST_2 \
-               UINT32_C(0x4)
+       uint32_t        queue_id4_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id4_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over outermost 4 tuples {t_l3.src, t_l3.dest,
-        * t_l4.src, t_l4.dest} for tunnel packets. For none-tunnel
-        * packets, the RSS hash is computed over the normal
-        * src/dest l3 and src/dest l4 headers.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_OUTERMOST_4 \
-               UINT32_C(0x8)
+       uint8_t queue_id4_pri_lvl;
        /*
-        * When this bit is '1', it indicates requesting support of
-        * RSS hashing over outermost 2 tuples {t_l3.src, t_l3.dest} for
-        * tunnel packets. For none-tunnel packets, the RSS hash is
-        * computed over the normal src/dest l3 headers.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_OUTERMOST_2 \
-               UINT32_C(0x10)
-       uint8_t unused_1[6];
+       uint8_t queue_id4_bw_weight;
+       /* ID of CoS Queue 5. */
+       uint8_t queue_id5;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**************************
- * hwrm_vnic_plcmodes_cfg *
- **************************/
-
-
-/* hwrm_vnic_plcmodes_cfg_input (size:320b/40B) */
-struct hwrm_vnic_plcmodes_cfg_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint32_t        queue_id5_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint16_t        cmpl_ring;
+       uint32_t        queue_id5_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id5_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_ETS \
+               UINT32_C(0x1)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST \
+               UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint16_t        seq_id;
+       uint8_t queue_id5_pri_lvl;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint16_t        target_id;
+       uint8_t queue_id5_bw_weight;
+       /* ID of CoS Queue 6. */
+       uint8_t queue_id6;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
+       uint32_t        queue_id6_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * When this bit is '1', the VNIC shall be configured to
-        * use regular placement algorithm.
-        * By default, the regular placement algorithm shall be
-        * enabled on the VNIC.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_REGULAR_PLACEMENT \
+       uint32_t        queue_id6_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id6_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_ETS \
                UINT32_C(0x1)
-       /*
-        * When this bit is '1', the VNIC shall be configured
-        * use the jumbo placement algorithm.
-        */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_JUMBO_PLACEMENT \
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST \
                UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * When this bit is '1', the VNIC shall be configured
-        * to enable Header-Data split for IPv4 packets according
-        * to the following rules:
-        * # If the packet is identified as TCP/IPv4, then the
-        * packet is split at the beginning of the TCP payload.
-        * # If the packet is identified as UDP/IPv4, then the
-        * packet is split at the beginning of UDP payload.
-        * # If the packet is identified as non-TCP and non-UDP
-        * IPv4 packet, then the packet is split at the beginning
-        * of the upper layer protocol header carried in the IPv4
-        * packet.
-        */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_IPV4 \
-               UINT32_C(0x4)
-       /*
-        * When this bit is '1', the VNIC shall be configured
-        * to enable Header-Data split for IPv6 packets according
-        * to the following rules:
-        * # If the packet is identified as TCP/IPv6, then the
-        * packet is split at the beginning of the TCP payload.
-        * # If the packet is identified as UDP/IPv6, then the
-        * packet is split at the beginning of UDP payload.
-        * # If the packet is identified as non-TCP and non-UDP
-        * IPv6 packet, then the packet is split at the beginning
-        * of the upper layer protocol header carried in the IPv6
-        * packet.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_IPV6 \
-               UINT32_C(0x8)
+       uint8_t queue_id6_pri_lvl;
        /*
-        * When this bit is '1', the VNIC shall be configured
-        * to enable Header-Data split for FCoE packets at the
-        * beginning of FC payload.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_FCOE \
-               UINT32_C(0x10)
+       uint8_t queue_id6_bw_weight;
+       /* ID of CoS Queue 7. */
+       uint8_t queue_id7;
        /*
-        * When this bit is '1', the VNIC shall be configured
-        * to enable Header-Data split for RoCE packets at the
-        * beginning of RoCE payload (after BTH/GRH headers).
+        * Minimum BW allocated to CoS Queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_ROCE \
-               UINT32_C(0x20)
-       uint32_t        enables;
+       uint32_t        queue_id7_min_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
        /*
-        * This bit must be '1' for the jumbo_thresh_valid field to be
-        * configured.
+        * Maximum BW allocated to CoS queue.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this COS inside the device.
         */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_JUMBO_THRESH_VALID \
+       uint32_t        queue_id7_max_bw;
+       /* The bandwidth value. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_SFT \
+               0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT \
+               29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
+       /* Transmission Selection Algorithm (TSA) for CoS Queue. */
+       uint8_t queue_id7_tsa_assign;
+       /* Strict Priority */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_SP \
+               UINT32_C(0x0)
+       /* Enhanced Transmission Selection */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_ETS \
                UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the hds_offset_valid field to be
-        * configured.
-        */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_HDS_OFFSET_VALID \
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST \
                UINT32_C(0x2)
+       /* reserved. */
+       #define HWRM_QUEUE_COS2BW_CFG_INPUT_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST \
+               UINT32_C(0xff)
        /*
-        * This bit must be '1' for the hds_threshold_valid field to be
-        * configured.
-        */
-       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_HDS_THRESHOLD_VALID \
-               UINT32_C(0x4)
-       /* Logical vnic ID */
-       uint32_t        vnic_id;
-       /*
-        * When jumbo placement algorithm is enabled, this value
-        * is used to determine the threshold for jumbo placement.
-        * Packets with length larger than this value will be
-        * placed according to the jumbo placement algorithm.
-        */
-       uint16_t        jumbo_thresh;
-       /*
-        * This value is used to determine the offset into
-        * packet buffer where the split data (payload) will be
-        * placed according to one of of HDS placement algorithm.
-        *
-        * The lengths of packet buffers provided for split data
-        * shall be larger than this value.
+        * Priority level for strict priority. Valid only when the
+        * tsa_assign is 0 - Strict Priority (SP)
+        * 0..7 - Valid values.
+        * 8..255 - Reserved.
         */
-       uint16_t        hds_offset;
+       uint8_t queue_id7_pri_lvl;
        /*
-        * When one of the HDS placement algorithm is enabled, this
-        * value is used to determine the threshold for HDS
-        * placement.
-        * Packets with length larger than this value will be
-        * placed according to the HDS placement algorithm.
-        * This value shall be in multiple of 4 bytes.
+        * Weight used to allocate remaining BW for this COS after
+        * servicing guaranteed bandwidths for all COS.
         */
-       uint16_t        hds_threshold;
-       uint8_t unused_0[6];
+       uint8_t queue_id7_bw_weight;
+       uint8_t unused_1[5];
 } __attribute__((packed));
 
-/* hwrm_vnic_plcmodes_cfg_output (size:128b/16B) */
-struct hwrm_vnic_plcmodes_cfg_output {
+/* hwrm_queue_cos2bw_cfg_output (size:128b/16B) */
+struct hwrm_queue_cos2bw_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -18808,13 +18205,13 @@ struct hwrm_vnic_plcmodes_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_vnic_plcmodes_qcfg *
- ***************************/
+/*******************
+ * hwrm_vnic_alloc *
+ *******************/
 
 
-/* hwrm_vnic_plcmodes_qcfg_input (size:192b/24B) */
-struct hwrm_vnic_plcmodes_qcfg_input {
+/* hwrm_vnic_alloc_input (size:192b/24B) */
+struct hwrm_vnic_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -18842,138 +18239,17 @@ struct hwrm_vnic_plcmodes_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Logical vnic ID */
-       uint32_t        vnic_id;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_vnic_plcmodes_qcfg_output (size:192b/24B) */
-struct hwrm_vnic_plcmodes_qcfg_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
        uint32_t        flags;
        /*
-        * When this bit is '1', the VNIC is configured to
-        * use regular placement algorithm.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_REGULAR_PLACEMENT \
-               UINT32_C(0x1)
-       /*
-        * When this bit is '1', the VNIC is configured to
-        * use the jumbo placement algorithm.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_JUMBO_PLACEMENT \
-               UINT32_C(0x2)
-       /*
-        * When this bit is '1', the VNIC is configured
-        * to enable Header-Data split for IPv4 packets.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_IPV4 \
-               UINT32_C(0x4)
-       /*
-        * When this bit is '1', the VNIC is configured
-        * to enable Header-Data split for IPv6 packets.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_IPV6 \
-               UINT32_C(0x8)
-       /*
-        * When this bit is '1', the VNIC is configured
-        * to enable Header-Data split for FCoE packets.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_FCOE \
-               UINT32_C(0x10)
-       /*
-        * When this bit is '1', the VNIC is configured
-        * to enable Header-Data split for RoCE packets.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_ROCE \
-               UINT32_C(0x20)
-       /*
-        * When this bit is '1', the VNIC is configured
-        * to be the default VNIC of the requesting function.
-        */
-       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_DFLT_VNIC \
-               UINT32_C(0x40)
-       /*
-        * When jumbo placement algorithm is enabled, this value
-        * is used to determine the threshold for jumbo placement.
-        * Packets with length larger than this value will be
-        * placed according to the jumbo placement algorithm.
-        */
-       uint16_t        jumbo_thresh;
-       /*
-        * This value is used to determine the offset into
-        * packet buffer where the split data (payload) will be
-        * placed according to one of of HDS placement algorithm.
-        *
-        * The lengths of packet buffers provided for split data
-        * shall be larger than this value.
-        */
-       uint16_t        hds_offset;
-       /*
-        * When one of the HDS placement algorithm is enabled, this
-        * value is used to determine the threshold for HDS
-        * placement.
-        * Packets with length larger than this value will be
-        * placed according to the HDS placement algorithm.
-        * This value shall be in multiple of 4 bytes.
-        */
-       uint16_t        hds_threshold;
-       uint8_t unused_0[5];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************************
- * hwrm_vnic_rss_cos_lb_ctx_alloc *
- **********************************/
-
-
-/* hwrm_vnic_rss_cos_lb_ctx_alloc_input (size:128b/16B) */
-struct hwrm_vnic_rss_cos_lb_ctx_alloc_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * When this bit is '1', this VNIC is requested to
+        * be the default VNIC for this function.
         */
-       uint64_t        resp_addr;
+       #define HWRM_VNIC_ALLOC_INPUT_FLAGS_DEFAULT     UINT32_C(0x1)
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_vnic_rss_cos_lb_ctx_alloc_output (size:128b/16B) */
-struct hwrm_vnic_rss_cos_lb_ctx_alloc_output {
+/* hwrm_vnic_alloc_output (size:128b/16B) */
+struct hwrm_vnic_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -18982,9 +18258,9 @@ struct hwrm_vnic_rss_cos_lb_ctx_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* rss_cos_lb_ctx_id is 16 b */
-       uint16_t        rss_cos_lb_ctx_id;
-       uint8_t unused_0[5];
+       /* Logical vnic ID */
+       uint32_t        vnic_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -18995,13 +18271,13 @@ struct hwrm_vnic_rss_cos_lb_ctx_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************************
- * hwrm_vnic_rss_cos_lb_ctx_free *
- *********************************/
+/******************
+ * hwrm_vnic_free *
+ ******************/
 
 
-/* hwrm_vnic_rss_cos_lb_ctx_free_input (size:192b/24B) */
-struct hwrm_vnic_rss_cos_lb_ctx_free_input {
+/* hwrm_vnic_free_input (size:192b/24B) */
+struct hwrm_vnic_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19029,13 +18305,13 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* rss_cos_lb_ctx_id is 16 b */
-       uint16_t        rss_cos_lb_ctx_id;
-       uint8_t unused_0[6];
+       /* Logical vnic ID */
+       uint32_t        vnic_id;
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_vnic_rss_cos_lb_ctx_free_output (size:128b/16B) */
-struct hwrm_vnic_rss_cos_lb_ctx_free_output {
+/* hwrm_vnic_free_output (size:128b/16B) */
+struct hwrm_vnic_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19055,13 +18331,13 @@ struct hwrm_vnic_rss_cos_lb_ctx_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************
- * hwrm_ring_alloc *
- *******************/
+/*****************
+ * hwrm_vnic_cfg *
+ *****************/
 
 
-/* hwrm_ring_alloc_input (size:640b/80B) */
-struct hwrm_ring_alloc_input {
+/* hwrm_vnic_cfg_input (size:320b/40B) */
+struct hwrm_vnic_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19089,268 +18365,168 @@ struct hwrm_ring_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the ring_arb_cfg field to be
-        * configured.
-        */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_RING_ARB_CFG \
-               UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the stat_ctx_id_valid field to be
-        * configured.
-        */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_STAT_CTX_ID_VALID \
-               UINT32_C(0x8)
-       /*
-        * This bit must be '1' for the max_bw_valid field to be
-        * configured.
-        */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_MAX_BW_VALID \
-               UINT32_C(0x20)
-       /*
-        * This bit must be '1' for the rx_ring_id field to be
-        * configured.
-        */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_RX_RING_ID_VALID \
-               UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the nq_ring_id field to be
-        * configured.
-        */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_NQ_RING_ID_VALID \
-               UINT32_C(0x80)
+       uint32_t        flags;
        /*
-        * This bit must be '1' for the rx_buf_size field to be
-        * configured.
+        * When this bit is '1', the VNIC is requested to
+        * be the default VNIC for the function.
         */
-       #define HWRM_RING_ALLOC_INPUT_ENABLES_RX_BUF_SIZE_VALID \
-               UINT32_C(0x100)
-       /* Ring Type. */
-       uint8_t ring_type;
-       /* L2 Completion Ring (CR) */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
-       /* TX Ring (TR) */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_TX        UINT32_C(0x1)
-       /* RX Ring (RR) */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_RX        UINT32_C(0x2)
-       /* RoCE Notification Completion Ring (ROCE_CR) */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
-       /* RX Aggregation Ring */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG    UINT32_C(0x4)
-       /* Notification Queue */
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ        UINT32_C(0x5)
-       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_LAST \
-               HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ
-       uint8_t unused_0[3];
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_DEFAULT \
+               UINT32_C(0x1)
        /*
-        * This value is a pointer to the page table for the
-        * Ring.
+        * When this bit is '1', the VNIC is being configured to
+        * strip VLAN in the RX path.
+        * If set to '0', then VLAN stripping is disabled on
+        * this VNIC.
         */
-       uint64_t        page_tbl_addr;
-       /* First Byte Offset of the first entry in the first page. */
-       uint32_t        fbo;
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_VLAN_STRIP_MODE \
+               UINT32_C(0x2)
        /*
-        * Actual page size in 2^page_size. The supported range is increments
-        * in powers of 2 from 16 bytes to 1GB.
-        * - 4 = 16 B
-        *     Page size is 16 B.
-        * - 12 = 4 KB
-        *     Page size is 4 KB.
-        * - 13 = 8 KB
-        *     Page size is 8 KB.
-        * - 16 = 64 KB
-        *     Page size is 64 KB.
-        * - 21 = 2 MB
-        *     Page size is 2 MB.
-        * - 22 = 4 MB
-        *     Page size is 4 MB.
-        * - 30 = 1 GB
-        *     Page size is 1 GB.
+        * When this bit is '1', the VNIC is being configured to
+        * buffer receive packets in the hardware until the host
+        * posts new receive buffers.
+        * If set to '0', then bd_stall is being configured to be
+        * disabled on this VNIC.
         */
-       uint8_t page_size;
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_BD_STALL_MODE \
+               UINT32_C(0x4)
        /*
-        * This value indicates the depth of page table.
-        * For this version of the specification, value other than 0 or
-        * 1 shall be considered as an invalid value.
-        * When the page_tbl_depth = 0, then it is treated as a
-        * special case with the following.
-        * 1. FBO and page size fields are not valid.
-        * 2. page_tbl_addr is the physical address of the first
-        *    element of the ring.
+        * When this bit is '1', the VNIC is being configured to
+        * receive both RoCE and non-RoCE traffic.
+        * If set to '0', then this VNIC is not configured to be
+        * operating in dual VNIC mode.
         */
-       uint8_t page_tbl_depth;
-       uint8_t unused_1[2];
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_DUAL_VNIC_MODE \
+               UINT32_C(0x8)
        /*
-        * Number of 16B units in the ring.  Minimum size for
-        * a ring is 16 16B entries.
+        * When this flag is set to '1', the VNIC is requested to
+        * be configured to receive only RoCE traffic.
+        * If this flag is set to '0', then this flag shall be
+        * ignored by the HWRM.
+        * If roce_dual_vnic_mode flag is set to '1'
+        * or roce_mirroring_capable_vnic_mode flag to 1,
+        * then the HWRM client shall not set this flag to '1'.
         */
-       uint32_t        length;
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_ONLY_VNIC_MODE \
+               UINT32_C(0x10)
        /*
-        * Logical ring number for the ring to be allocated.
-        * This value determines the position in the doorbell
-        * area where the update to the ring will be made.
+        * When a VNIC uses one destination ring group for certain
+        * application (e.g. Receive Flow Steering) where
+        * exact match is used to direct packets to a VNIC with one
+        * destination ring group only, there is no need to configure
+        * RSS indirection table for that VNIC as only one destination
+        * ring group is used.
         *
-        * For completion rings, this value is also the MSI-X
-        * vector number for the function the completion ring is
-        * associated with.
+        * This flag is used to enable a mode where
+        * RSS is enabled in the VNIC using a RSS context
+        * for computing RSS hash but the RSS indirection table is
+        * not configured using hwrm_vnic_rss_cfg.
+        *
+        * If this mode is enabled, then the driver should not program
+        * RSS indirection table for the RSS context that is used for
+        * computing RSS hash only.
         */
-       uint16_t        logical_id;
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_RSS_DFLT_CR_MODE \
+               UINT32_C(0x20)
        /*
-        * This field is used only when ring_type is a TX ring.
-        * This value indicates what completion ring the TX ring
-        * is associated with.
+        * When this bit is '1', the VNIC is being configured to
+        * receive both RoCE and non-RoCE traffic, but forward only the
+        * RoCE traffic further. Also, RoCE traffic can be mirrored to
+        * L2 driver.
         */
-       uint16_t        cmpl_ring_id;
+       #define HWRM_VNIC_CFG_INPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
+               UINT32_C(0x40)
+       uint32_t        enables;
        /*
-        * This field is used only when ring_type is a TX ring.
-        * This value indicates what CoS queue the TX ring
-        * is associated with.
+        * This bit must be '1' for the dflt_ring_grp field to be
+        * configured.
         */
-       uint16_t        queue_id;
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_DFLT_RING_GRP \
+               UINT32_C(0x1)
        /*
-        * When allocating a Rx ring or Rx aggregation ring, this field
-        * specifies the size of the buffer descriptors posted to the ring.
+        * This bit must be '1' for the rss_rule field to be
+        * configured.
         */
-       uint16_t        rx_buf_size;
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_RSS_RULE \
+               UINT32_C(0x2)
        /*
-        * When allocating an Rx aggregation ring, this field
-        * specifies the associated Rx ring ID.
+        * This bit must be '1' for the cos_rule field to be
+        * configured.
         */
-       uint16_t        rx_ring_id;
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_COS_RULE \
+               UINT32_C(0x4)
        /*
-        * When allocating a completion ring, this field
-        * specifies the associated NQ ring ID.
+        * This bit must be '1' for the lb_rule field to be
+        * configured.
         */
-       uint16_t        nq_ring_id;
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_LB_RULE \
+               UINT32_C(0x8)
        /*
-        * This field is used only when ring_type is a TX ring.
-        * This field is used to configure arbitration related
-        * parameters for a TX ring.
+        * This bit must be '1' for the mru field to be
+        * configured.
         */
-       uint16_t        ring_arb_cfg;
-       /* Arbitration policy used for the ring. */
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_MASK \
-               UINT32_C(0xf)
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_SFT       0
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_MRU \
+               UINT32_C(0x10)
        /*
-        * Use strict priority for the TX ring.
-        * Priority value is specified in arb_policy_param
+        * This bit must be '1' for the default_rx_ring_id field to be
+        * configured.
         */
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_SP \
-               UINT32_C(0x1)
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_DEFAULT_RX_RING_ID \
+               UINT32_C(0x20)
        /*
-        * Use weighted fair queue arbitration for the TX ring.
-        * Weight is specified in arb_policy_param
+        * This bit must be '1' for the default_cmpl_ring_id field to be
+        * configured.
         */
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_WFQ \
-               UINT32_C(0x2)
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_LAST \
-               HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_WFQ
-       /* Reserved field. */
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_RSVD_MASK \
-               UINT32_C(0xf0)
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_RSVD_SFT             4
+       #define HWRM_VNIC_CFG_INPUT_ENABLES_DEFAULT_CMPL_RING_ID \
+               UINT32_C(0x40)
+       /* Logical vnic ID */
+       uint16_t        vnic_id;
        /*
-        * Arbitration policy specific parameter.
-        * # For strict priority arbitration policy, this field
-        * represents a priority value. If set to 0, then the priority
-        * is not specified and the HWRM is allowed to select
-        * any priority for this TX ring.
-        * # For weighted fair queue arbitration policy, this field
-        * represents a weight value. If set to 0, then the weight
-        * is not specified and the HWRM is allowed to select
-        * any weight for this TX ring.
+        * Default Completion ring for the VNIC.  This ring will
+        * be chosen if packet does not match any RSS rules and if
+        * there is no COS rule.
         */
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_PARAM_MASK \
-               UINT32_C(0xff00)
-       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8
-       uint16_t        unused_3;
+       uint16_t        dflt_ring_grp;
        /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
+        * RSS ID for RSS rule/table structure.  0xFF... (All Fs) if
+        * there is no RSS rule.
         */
-       uint32_t        reserved3;
+       uint16_t        rss_rule;
        /*
-        * This field is used only when ring_type is a TX ring.
-        * This input indicates what statistics context this ring
-        * should be associated with.
+        * RSS ID for COS rule/table structure.  0xFF... (All Fs) if
+        * there is no COS rule.
         */
-       uint32_t        stat_ctx_id;
+       uint16_t        cos_rule;
        /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
+        * RSS ID for load balancing rule/table structure.
+        * 0xFF... (All Fs) if there is no LB rule.
         */
-       uint32_t        reserved4;
+       uint16_t        lb_rule;
        /*
-        * This field is used only when ring_type is a TX ring
-        * to specify maximum BW allocated to the TX ring.
-        * The HWRM will translate this value into byte counter and
-        * time interval used for this ring inside the device.
+        * The maximum receive unit of the vnic.
+        * Each vnic is associated with a function.
+        * The vnic mru value overwrites the mru setting of the
+        * associated function.
+        * The HWRM shall make sure that vnic mru does not exceed
+        * the mru of the port the function is associated with.
         */
-       uint32_t        max_bw;
-       /* The bandwidth value. */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_SFT              0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_LAST \
-               HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_SFT         29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_LAST \
-               HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_INVALID
+       uint16_t        mru;
        /*
-        * This field is used only when ring_type is a Completion ring.
-        * This value indicates what interrupt mode should be used
-        * on this completion ring.
-        * Note: In the legacy interrupt mode, no more than 16
-        * completion rings are allowed.
+        * Default Rx ring for the VNIC.  This ring will
+        * be chosen if packet does not match any RSS rules.
+        * The aggregation ring associated with the Rx ring is
+        * implied based on the Rx ring specified when the
+        * aggregation ring was allocated.
         */
-       uint8_t int_mode;
-       /* Legacy INTA */
-       #define HWRM_RING_ALLOC_INPUT_INT_MODE_LEGACY UINT32_C(0x0)
-       /* Reserved */
-       #define HWRM_RING_ALLOC_INPUT_INT_MODE_RSVD   UINT32_C(0x1)
-       /* MSI-X */
-       #define HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX   UINT32_C(0x2)
-       /* No Interrupt - Polled mode */
-       #define HWRM_RING_ALLOC_INPUT_INT_MODE_POLL   UINT32_C(0x3)
-       #define HWRM_RING_ALLOC_INPUT_INT_MODE_LAST \
-               HWRM_RING_ALLOC_INPUT_INT_MODE_POLL
-       uint8_t unused_4[3];
+       uint16_t        default_rx_ring_id;
+       /*
+        * Default completion ring for the VNIC.  This ring will
+        * be chosen if packet does not match any RSS rules.
+        */
+       uint16_t        default_cmpl_ring_id;
 } __attribute__((packed));
 
-/* hwrm_ring_alloc_output (size:128b/16B) */
-struct hwrm_ring_alloc_output {
+/* hwrm_vnic_cfg_output (size:128b/16B) */
+struct hwrm_vnic_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19359,14 +18535,7 @@ struct hwrm_ring_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /*
-        * Physical number of ring allocated.
-        * This value shall be unique for a ring type.
-        */
-       uint16_t        ring_id;
-       /* Logical number of ring allocated. */
-       uint16_t        logical_ring_id;
-       uint8_t unused_0[3];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -19378,12 +18547,12 @@ struct hwrm_ring_alloc_output {
 } __attribute__((packed));
 
 /******************
- * hwrm_ring_free *
+ * hwrm_vnic_qcfg *
  ******************/
 
 
-/* hwrm_ring_free_input (size:192b/24B) */
-struct hwrm_ring_free_input {
+/* hwrm_vnic_qcfg_input (size:256b/32B) */
+struct hwrm_vnic_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19411,30 +18580,21 @@ struct hwrm_ring_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Ring Type. */
-       uint8_t ring_type;
-       /* L2 Completion Ring (CR) */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
-       /* TX Ring (TR) */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_TX        UINT32_C(0x1)
-       /* RX Ring (RR) */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_RX        UINT32_C(0x2)
-       /* RoCE Notification Completion Ring (ROCE_CR) */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
-       /* RX Aggregation Ring */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_RX_AGG    UINT32_C(0x4)
-       /* Notification Queue */
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_NQ        UINT32_C(0x5)
-       #define HWRM_RING_FREE_INPUT_RING_TYPE_LAST \
-               HWRM_RING_FREE_INPUT_RING_TYPE_NQ
-       uint8_t unused_0;
-       /* Physical number of ring allocated. */
-       uint16_t        ring_id;
-       uint8_t unused_1[4];
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the vf_id_valid field to be
+        * configured.
+        */
+       #define HWRM_VNIC_QCFG_INPUT_ENABLES_VF_ID_VALID     UINT32_C(0x1)
+       /* Logical vnic ID */
+       uint32_t        vnic_id;
+       /* ID of Virtual Function whose VNIC resource is being queried. */
+       uint16_t        vf_id;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_ring_free_output (size:128b/16B) */
-struct hwrm_ring_free_output {
+/* hwrm_vnic_qcfg_output (size:256b/32B) */
+struct hwrm_vnic_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19443,7 +18603,94 @@ struct hwrm_ring_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* Default Completion ring for the VNIC. */
+       uint16_t        dflt_ring_grp;
+       /*
+        * RSS ID for RSS rule/table structure.  0xFF... (All Fs) if
+        * there is no RSS rule.
+        */
+       uint16_t        rss_rule;
+       /*
+        * RSS ID for COS rule/table structure.  0xFF... (All Fs) if
+        * there is no COS rule.
+        */
+       uint16_t        cos_rule;
+       /*
+        * RSS ID for load balancing rule/table structure.
+        * 0xFF... (All Fs) if there is no LB rule.
+        */
+       uint16_t        lb_rule;
+       /* The maximum receive unit of the vnic. */
+       uint16_t        mru;
+       uint8_t unused_0[2];
+       uint32_t        flags;
+       /*
+        * When this bit is '1', the VNIC is the default VNIC for
+        * the function.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_DEFAULT \
+               UINT32_C(0x1)
+       /*
+        * When this bit is '1', the VNIC is configured to
+        * strip VLAN in the RX path.
+        * If set to '0', then VLAN stripping is disabled on
+        * this VNIC.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_VLAN_STRIP_MODE \
+               UINT32_C(0x2)
+       /*
+        * When this bit is '1', the VNIC is configured to
+        * buffer receive packets in the hardware until the host
+        * posts new receive buffers.
+        * If set to '0', then bd_stall is disabled on
+        * this VNIC.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_BD_STALL_MODE \
+               UINT32_C(0x4)
+       /*
+        * When this bit is '1', the VNIC is configured to
+        * receive both RoCE and non-RoCE traffic.
+        * If set to '0', then this VNIC is not configured to
+        * operate in dual VNIC mode.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_DUAL_VNIC_MODE \
+               UINT32_C(0x8)
+       /*
+        * When this flag is set to '1', the VNIC is configured to
+        * receive only RoCE traffic.
+        * When this flag is set to '0', the VNIC is not configured
+        * to receive only RoCE traffic.
+        * If roce_dual_vnic_mode flag and this flag both are set
+        * to '1', then it is an invalid configuration of the
+        * VNIC. The HWRM should not allow that type of
+        * mis-configuration by HWRM clients.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_ONLY_VNIC_MODE \
+               UINT32_C(0x10)
+       /*
+        * When a VNIC uses one destination ring group for certain
+        * application (e.g. Receive Flow Steering) where
+        * exact match is used to direct packets to a VNIC with one
+        * destination ring group only, there is no need to configure
+        * RSS indirection table for that VNIC as only one destination
+        * ring group is used.
+        *
+        * When this bit is set to '1', then the VNIC is enabled in a
+        * mode where RSS is enabled in the VNIC using a RSS context
+        * for computing RSS hash but the RSS indirection table is
+        * not configured.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_RSS_DFLT_CR_MODE \
+               UINT32_C(0x20)
+       /*
+        * When this bit is '1', the VNIC is configured to
+        * receive both RoCE and non-RoCE traffic, but forward only
+        * RoCE traffic further. Also RoCE traffic can be mirrored to
+        * L2 driver.
+        */
+       #define HWRM_VNIC_QCFG_OUTPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_MODE \
+               UINT32_C(0x40)
+       uint8_t unused_1[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -19454,13 +18701,13 @@ struct hwrm_ring_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************************
- * hwrm_ring_cmpl_ring_qaggint_params *
- **************************************/
+/*******************
+ * hwrm_vnic_qcaps *
+ *******************/
 
 
-/* hwrm_ring_cmpl_ring_qaggint_params_input (size:192b/24B) */
-struct hwrm_ring_cmpl_ring_qaggint_params_input {
+/* hwrm_vnic_qcaps_input (size:192b/24B) */
+struct hwrm_vnic_qcaps_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19488,13 +18735,12 @@ struct hwrm_ring_cmpl_ring_qaggint_params_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Physical number of completion ring. */
-       uint16_t        ring_id;
-       uint8_t unused_0[6];
+       uint32_t        enables;
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_ring_cmpl_ring_qaggint_params_output (size:256b/32B) */
-struct hwrm_ring_cmpl_ring_qaggint_params_output {
+/* hwrm_vnic_qcaps_output (size:192b/24B) */
+struct hwrm_vnic_qcaps_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19503,53 +18749,74 @@ struct hwrm_ring_cmpl_ring_qaggint_params_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint16_t        flags;
-       /*
-        * When this bit is set to '1', interrupt max
-        * timer is reset whenever a completion is received.
-        */
-       #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS_OUTPUT_FLAGS_TIMER_RESET \
+       /* The maximum receive unit that is settable on a vnic. */
+       uint16_t        mru;
+       uint8_t unused_0[2];
+       uint32_t        flags;
+       /* Unused. */
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_UNUSED \
                UINT32_C(0x1)
        /*
-        * When this bit is set to '1', ring idle mode
-        * aggregation will be enabled.
+        * When this bit is '1', the capability of stripping VLAN in
+        * the RX path is supported on VNIC(s).
+        * If set to '0', then VLAN stripping capability is
+        * not supported on VNIC(s).
         */
-       #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS_OUTPUT_FLAGS_RING_IDLE \
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_VLAN_STRIP_CAP \
                UINT32_C(0x2)
        /*
-        * Number of completions to aggregate before DMA
-        * during the normal mode.
+        * When this bit is '1', the capability to buffer receive
+        * packets in the hardware until the host posts new receive buffers
+        * is supported on VNIC(s).
+        * If set to '0', then bd_stall capability is not supported
+        * on VNIC(s).
         */
-       uint16_t        num_cmpl_dma_aggr;
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_BD_STALL_CAP \
+               UINT32_C(0x4)
        /*
-        * Number of completions to aggregate before DMA
-        * during the interrupt mode.
+        * When this bit is '1', the capability to
+        * receive both RoCE and non-RoCE traffic on VNIC(s) is
+        * supported.
+        * If set to '0', then the capability to receive
+        * both RoCE and non-RoCE traffic on VNIC(s) is
+        * not supported.
         */
-       uint16_t        num_cmpl_dma_aggr_during_int;
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_DUAL_VNIC_CAP \
+               UINT32_C(0x8)
        /*
-        * Timer in unit of 80-nsec used to aggregate completions before
-        * DMA during the normal mode (not in interrupt mode).
+        * When this bit is set to '1', the capability to configure
+        * a VNIC to receive only RoCE traffic is supported.
+        * When this flag is set to '0', the VNIC capability to
+        * configure to receive only RoCE traffic is not supported.
         */
-       uint16_t        cmpl_aggr_dma_tmr;
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_ONLY_VNIC_CAP \
+               UINT32_C(0x10)
        /*
-        * Timer in unit of 80-nsec used to aggregate completions before
-        * DMA during the interrupt mode.
+        * When this bit is set to '1', then the capability to enable
+        * a VNIC in a mode where RSS context without configuring
+        * RSS indirection table is supported (for RSS hash computation).
+        * When this bit is set to '0', then a VNIC can not be configured
+        * with a mode to enable RSS context without configuring RSS
+        * indirection table.
         */
-       uint16_t        cmpl_aggr_dma_tmr_during_int;
-       /* Minimum time (in unit of 80-nsec) between two interrupts. */
-       uint16_t        int_lat_tmr_min;
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_RSS_DFLT_CR_CAP \
+               UINT32_C(0x20)
        /*
-        * Maximum wait time (in unit of 80-nsec) spent aggregating
-        * completions before signaling the interrupt after the
-        * interrupt is enabled.
+        * When this bit is '1', the capability to
+        * mirror the the RoCE traffic is supported.
+        * If set to '0', then the capability to mirror the
+        * RoCE traffic is not supported.
         */
-       uint16_t        int_lat_tmr_max;
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP \
+               UINT32_C(0x40)
        /*
-        * Minimum number of completions aggregated before signaling
-        * an interrupt.
+        * When this bit is '1', the outermost RSS hashing capability
+        * is supported. If set to '0', then the outermost RSS hashing
+        * capability is not supported.
         */
-       uint16_t        num_cmpl_aggr_int;
-       uint8_t unused_0[7];
+       #define HWRM_VNIC_QCAPS_OUTPUT_FLAGS_OUTERMOST_RSS_CAP \
+               UINT32_C(0x80)
+       uint8_t unused_1[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -19560,13 +18827,13 @@ struct hwrm_ring_cmpl_ring_qaggint_params_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*****************************************
- * hwrm_ring_cmpl_ring_cfg_aggint_params *
- *****************************************/
+/*********************
+ * hwrm_vnic_tpa_cfg *
+ *********************/
 
 
-/* hwrm_ring_cmpl_ring_cfg_aggint_params_input (size:320b/40B) */
-struct hwrm_ring_cmpl_ring_cfg_aggint_params_input {
+/* hwrm_vnic_tpa_cfg_input (size:320b/40B) */
+struct hwrm_vnic_tpa_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19594,109 +18861,145 @@ struct hwrm_ring_cmpl_ring_cfg_aggint_params_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Physical number of completion ring. */
-       uint16_t        ring_id;
-       uint16_t        flags;
+       uint32_t        flags;
        /*
-        * When this bit is set to '1', interrupt latency max
-        * timer is reset whenever a completion is received.
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) of
+        * non-tunneled TCP packets.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_TIMER_RESET \
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA \
                UINT32_C(0x1)
        /*
-        * When this bit is set to '1', ring idle mode
-        * aggregation will be enabled.
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) of
+        * tunneled TCP packets.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_RING_IDLE \
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_ENCAP_TPA \
                UINT32_C(0x2)
        /*
-        * Set this flag to 1 when configuring parameters on a
-        * notification queue. Set this flag to 0 when configuring
-        * parameters on a completion queue.
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) according
+        * to Windows Receive Segment Coalescing (RSC) rules.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_IS_NQ \
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_RSC_WND_UPDATE \
                UINT32_C(0x4)
        /*
-        * Number of completions to aggregate before DMA
-        * during the normal mode.
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) according
+        * to Linux Generic Receive Offload (GRO) rules.
         */
-       uint16_t        num_cmpl_dma_aggr;
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO \
+               UINT32_C(0x8)
        /*
-        * Number of completions to aggregate before DMA
-        * during the interrupt mode.
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) for TCP
+        * packets with IP ECN set to non-zero.
         */
-       uint16_t        num_cmpl_dma_aggr_during_int;
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_ECN \
+               UINT32_C(0x10)
        /*
-        * Timer in unit of 80-nsec used to aggregate completions before
-        * DMA during the normal mode (not in interrupt mode).
+        * When this bit is '1', the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) for
+        * GRE tunneled TCP packets only if all packets have the
+        * same GRE sequence.
         */
-       uint16_t        cmpl_aggr_dma_tmr;
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_AGG_WITH_SAME_GRE_SEQ \
+               UINT32_C(0x20)
        /*
-        * Timer in unit of 80-nsec used to aggregate completions before
-        * DMA during the interrupt mode.
+        * When this bit is '1' and the GRO mode is enabled,
+        * the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) for
+        * TCP/IPv4 packets with consecutively increasing IPIDs.
+        * In other words, the last packet that is being
+        * aggregated to an already existing aggregation context
+        * shall have IPID 1 more than the IPID of the last packet
+        * that was aggregated in that aggregation context.
         */
-       uint16_t        cmpl_aggr_dma_tmr_during_int;
-       /* Minimum time (in unit of 80-nsec) between two interrupts. */
-       uint16_t        int_lat_tmr_min;
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_IPID_CHECK \
+               UINT32_C(0x40)
        /*
-        * Maximum wait time (in unit of 80-nsec) spent aggregating
-        * cmpls before signaling the interrupt after the
-        * interrupt is enabled.
+        * When this bit is '1' and the GRO mode is enabled,
+        * the VNIC shall be configured to
+        * perform transparent packet aggregation (TPA) for
+        * TCP packets with the same TTL (IPv4) or Hop limit (IPv6)
+        * value.
         */
-       uint16_t        int_lat_tmr_max;
+       #define HWRM_VNIC_TPA_CFG_INPUT_FLAGS_GRO_TTL_CHECK \
+               UINT32_C(0x80)
+       uint32_t        enables;
        /*
-        * Minimum number of completions aggregated before signaling
-        * an interrupt.
+        * This bit must be '1' for the max_agg_segs field to be
+        * configured.
         */
-       uint16_t        num_cmpl_aggr_int;
+       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_SEGS      UINT32_C(0x1)
        /*
-        * Bitfield that indicates which parameters are to be applied. Only
-        * required when configuring devices with notification queues, and
-        * used in that case to set certain parameters on completion queues
-        * and others on notification queues.
+        * This bit must be '1' for the max_aggs field to be
+        * configured.
         */
-       uint16_t        enables;
+       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGGS          UINT32_C(0x2)
        /*
-        * This bit must be '1' for the num_cmpl_dma_aggr field to be
+        * This bit must be '1' for the max_agg_timer field to be
         * configured.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_DMA_AGGR \
-               UINT32_C(0x1)
+       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MAX_AGG_TIMER     UINT32_C(0x4)
        /*
-        * This bit must be '1' for the num_cmpl_dma_aggr_during_int field to be
+        * This bit must be '1' for the min_agg_len field to be
         * configured.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_DMA_AGGR_DURING_INT \
-               UINT32_C(0x2)
+       #define HWRM_VNIC_TPA_CFG_INPUT_ENABLES_MIN_AGG_LEN       UINT32_C(0x8)
+       /* Logical vnic ID */
+       uint16_t        vnic_id;
        /*
-        * This bit must be '1' for the cmpl_aggr_dma_tmr field to be
-        * configured.
+        * This is the maximum number of TCP segments that can
+        * be aggregated (unit is Log2). Max value is 31.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_CMPL_AGGR_DMA_TMR \
-               UINT32_C(0x4)
+       uint16_t        max_agg_segs;
+       /* 1 segment */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_1   UINT32_C(0x0)
+       /* 2 segments */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_2   UINT32_C(0x1)
+       /* 4 segments */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_4   UINT32_C(0x2)
+       /* 8 segments */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_8   UINT32_C(0x3)
+       /* Any segment size larger than this is not valid */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX UINT32_C(0x1f)
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_LAST \
+               HWRM_VNIC_TPA_CFG_INPUT_MAX_AGG_SEGS_MAX
        /*
-        * This bit must be '1' for the int_lat_tmr_min field to be
-        * configured.
+        * This is the maximum number of aggregations this VNIC is
+        * allowed (unit is Log2). Max value is 7
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_INT_LAT_TMR_MIN \
-               UINT32_C(0x8)
+       uint16_t        max_aggs;
+       /* 1 aggregation */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_1   UINT32_C(0x0)
+       /* 2 aggregations */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_2   UINT32_C(0x1)
+       /* 4 aggregations */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_4   UINT32_C(0x2)
+       /* 8 aggregations */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_8   UINT32_C(0x3)
+       /* 16 aggregations */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_16  UINT32_C(0x4)
+       /* Any aggregation size larger than this is not valid */
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX UINT32_C(0x7)
+       #define HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_LAST \
+               HWRM_VNIC_TPA_CFG_INPUT_MAX_AGGS_MAX
+       uint8_t unused_0[2];
        /*
-        * This bit must be '1' for the int_lat_tmr_max field to be
-        * configured.
+        * This is the maximum amount of time allowed for
+        * an aggregation context to complete after it was initiated.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_INT_LAT_TMR_MAX \
-               UINT32_C(0x10)
+       uint32_t        max_agg_timer;
        /*
-        * This bit must be '1' for the num_cmpl_aggr_int field to be
-        * configured.
+        * This is the minimum amount of payload length required to
+        * start an aggregation context.
         */
-       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_AGGR_INT \
-               UINT32_C(0x20)
-       uint8_t unused_0[4];
+       uint32_t        min_agg_len;
 } __attribute__((packed));
 
-/* hwrm_ring_cmpl_ring_cfg_aggint_params_output (size:128b/16B) */
-struct hwrm_ring_cmpl_ring_cfg_aggint_params_output {
+/* hwrm_vnic_tpa_cfg_output (size:128b/16B) */
+struct hwrm_vnic_tpa_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19716,13 +19019,13 @@ struct hwrm_ring_cmpl_ring_cfg_aggint_params_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************
- * hwrm_ring_reset *
- *******************/
+/*********************
+ * hwrm_vnic_rss_cfg *
+ *********************/
 
 
-/* hwrm_ring_reset_input (size:192b/24B) */
-struct hwrm_ring_reset_input {
+/* hwrm_vnic_rss_cfg_input (size:384b/48B) */
+struct hwrm_vnic_rss_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19750,26 +19053,103 @@ struct hwrm_ring_reset_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Ring Type. */
-       uint8_t ring_type;
-       /* L2 Completion Ring (CR) */
-       #define HWRM_RING_RESET_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
-       /* TX Ring (TR) */
-       #define HWRM_RING_RESET_INPUT_RING_TYPE_TX        UINT32_C(0x1)
-       /* RX Ring (RR) */
-       #define HWRM_RING_RESET_INPUT_RING_TYPE_RX        UINT32_C(0x2)
-       /* RoCE Notification Completion Ring (ROCE_CR) */
-       #define HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
-       #define HWRM_RING_RESET_INPUT_RING_TYPE_LAST \
-               HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL
-       uint8_t unused_0;
-       /* Physical number of the ring. */
-       uint16_t        ring_id;
-       uint8_t unused_1[4];
+       uint32_t        hash_type;
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source and destination IPv4 addresses of IPv4
+        * packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV4         UINT32_C(0x1)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv4 addresses and
+        * source/destination ports of TCP/IPv4 packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV4     UINT32_C(0x2)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv4 addresses and
+        * source/destination ports of UDP/IPv4 packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV4     UINT32_C(0x4)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source and destination IPv4 addresses of IPv6
+        * packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_IPV6         UINT32_C(0x8)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv6 addresses and
+        * source/destination ports of TCP/IPv6 packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_TCP_IPV6     UINT32_C(0x10)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv6 addresses and
+        * source/destination ports of UDP/IPv6 packets.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_TYPE_UDP_IPV6     UINT32_C(0x20)
+       /* VNIC ID of VNIC associated with RSS table being configured. */
+       uint16_t        vnic_id;
+       /*
+        * Specifies which VNIC ring table pair to configure.
+        * Valid values range from 0 to 7.
+        */
+       uint8_t ring_table_pair_index;
+       /* Flags to specify different RSS hash modes. */
+       uint8_t hash_mode_flags;
+       /*
+        * When this bit is '1', it indicates using current RSS
+        * hash mode setting configured in the device.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_DEFAULT \
+               UINT32_C(0x1)
+       /*
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over innermost 4 tuples {l3.src, l3.dest,
+        * l4.src, l4.dest} for tunnel packets. For none-tunnel
+        * packets, the RSS hash is computed over the normal
+        * src/dest l3 and src/dest l4 headers.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_INNERMOST_4 \
+               UINT32_C(0x2)
+       /*
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over innermost 2 tuples {l3.src, l3.dest} for
+        * tunnel packets. For none-tunnel packets, the RSS hash is
+        * computed over the normal src/dest l3 headers.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_INNERMOST_2 \
+               UINT32_C(0x4)
+       /*
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over outermost 4 tuples {t_l3.src, t_l3.dest,
+        * t_l4.src, t_l4.dest} for tunnel packets. For none-tunnel
+        * packets, the RSS hash is computed over the normal
+        * src/dest l3 and src/dest l4 headers.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_OUTERMOST_4 \
+               UINT32_C(0x8)
+       /*
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over outermost 2 tuples {t_l3.src, t_l3.dest} for
+        * tunnel packets. For none-tunnel packets, the RSS hash is
+        * computed over the normal src/dest l3 headers.
+        */
+       #define HWRM_VNIC_RSS_CFG_INPUT_HASH_MODE_FLAGS_OUTERMOST_2 \
+               UINT32_C(0x10)
+       /* This is the address for rss ring group table */
+       uint64_t        ring_grp_tbl_addr;
+       /* This is the address for rss hash key table */
+       uint64_t        hash_key_tbl_addr;
+       /* Index to the rss indirection table. */
+       uint16_t        rss_ctx_idx;
+       uint8_t unused_1[6];
 } __attribute__((packed));
 
-/* hwrm_ring_reset_output (size:128b/16B) */
-struct hwrm_ring_reset_output {
+/* hwrm_vnic_rss_cfg_output (size:128b/16B) */
+struct hwrm_vnic_rss_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19789,13 +19169,13 @@ struct hwrm_ring_reset_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_ring_grp_alloc *
- ***********************/
+/**********************
+ * hwrm_vnic_rss_qcfg *
+ **********************/
 
 
-/* hwrm_ring_grp_alloc_input (size:192b/24B) */
-struct hwrm_ring_grp_alloc_input {
+/* hwrm_vnic_rss_qcfg_input (size:192b/24B) */
+struct hwrm_vnic_rss_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19823,31 +19203,13 @@ struct hwrm_ring_grp_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /*
-        * This value identifies the CR associated with the ring
-        * group.
-        */
-       uint16_t        cr;
-       /*
-        * This value identifies the main RR associated with the ring
-        * group.
-        */
-       uint16_t        rr;
-       /*
-        * This value identifies the aggregation RR associated with
-        * the ring group.  If this value is 0xFF... (All Fs), then no
-        * Aggregation ring will be set.
-        */
-       uint16_t        ar;
-       /*
-        * This value identifies the statistics context associated
-        * with the ring group.
-        */
-       uint16_t        sc;
+       /* Index to the rss indirection table. */
+       uint16_t        rss_ctx_idx;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_ring_grp_alloc_output (size:128b/16B) */
-struct hwrm_ring_grp_alloc_output {
+/* hwrm_vnic_rss_qcfg_output (size:512b/64B) */
+struct hwrm_vnic_rss_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -19856,73 +19218,89 @@ struct hwrm_ring_grp_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
+       uint32_t        hash_type;
        /*
-        * This is the ring group ID value.  Use this value to program
-        * the default ring group for the VNIC or as table entries
-        * in an RSS/COS context.
+        * When this bit is '1', the RSS hash shall be computed
+        * over source and destination IPv4 addresses of IPv4
+        * packets.
         */
-       uint32_t        ring_group_id;
-       uint8_t unused_0[3];
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_IPV4         UINT32_C(0x1)
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv4 addresses and
+        * source/destination ports of TCP/IPv4 packets.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_ring_grp_free *
- **********************/
-
-
-/* hwrm_ring_grp_free_input (size:192b/24B) */
-struct hwrm_ring_grp_free_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_TCP_IPV4     UINT32_C(0x2)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv4 addresses and
+        * source/destination ports of UDP/IPv4 packets.
+        */
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_UDP_IPV4     UINT32_C(0x4)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source and destination IPv4 addresses of IPv6
+        * packets.
+        */
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_IPV6         UINT32_C(0x8)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv6 addresses and
+        * source/destination ports of TCP/IPv6 packets.
+        */
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_TCP_IPV6     UINT32_C(0x10)
+       /*
+        * When this bit is '1', the RSS hash shall be computed
+        * over source/destination IPv6 addresses and
+        * source/destination ports of UDP/IPv6 packets.
+        */
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_TYPE_UDP_IPV6     UINT32_C(0x20)
+       uint8_t unused_0[4];
+       /* This is the value of rss hash key */
+       uint32_t        hash_key[10];
+       /* Flags to specify different RSS hash modes. */
+       uint8_t hash_mode_flags;
+       /*
+        * When this bit is '1', it indicates using current RSS
+        * hash mode setting configured in the device.
+        */
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_DEFAULT \
+               UINT32_C(0x1)
+       /*
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over innermost 4 tuples {l3.src, l3.dest,
+        * l4.src, l4.dest} for tunnel packets. For none-tunnel
+        * packets, the RSS hash is computed over the normal
+        * src/dest l3 and src/dest l4 headers.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_INNERMOST_4 \
+               UINT32_C(0x2)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over innermost 2 tuples {l3.src, l3.dest} for
+        * tunnel packets. For none-tunnel packets, the RSS hash is
+        * computed over the normal src/dest l3 headers.
         */
-       uint16_t        seq_id;
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_INNERMOST_2 \
+               UINT32_C(0x4)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over outermost 4 tuples {t_l3.src, t_l3.dest,
+        * t_l4.src, t_l4.dest} for tunnel packets. For none-tunnel
+        * packets, the RSS hash is computed over the normal
+        * src/dest l3 and src/dest l4 headers.
         */
-       uint16_t        target_id;
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_OUTERMOST_4 \
+               UINT32_C(0x8)
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * When this bit is '1', it indicates requesting support of
+        * RSS hashing over outermost 2 tuples {t_l3.src, t_l3.dest} for
+        * tunnel packets. For none-tunnel packets, the RSS hash is
+        * computed over the normal src/dest l3 headers.
         */
-       uint64_t        resp_addr;
-       /* This is the ring group ID value. */
-       uint32_t        ring_group_id;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_ring_grp_free_output (size:128b/16B) */
-struct hwrm_ring_grp_free_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       #define HWRM_VNIC_RSS_QCFG_OUTPUT_HASH_MODE_FLAGS_OUTERMOST_2 \
+               UINT32_C(0x10)
+       uint8_t unused_1[6];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -19933,13 +19311,13 @@ struct hwrm_ring_grp_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/****************************
- * hwrm_cfa_l2_filter_alloc *
- ****************************/
+/**************************
+ * hwrm_vnic_plcmodes_cfg *
+ **************************/
 
 
-/* hwrm_cfa_l2_filter_alloc_input (size:768b/96B) */
-struct hwrm_cfa_l2_filter_alloc_input {
+/* hwrm_vnic_plcmodes_cfg_input (size:320b/40B) */
+struct hwrm_vnic_plcmodes_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -19969,344 +19347,251 @@ struct hwrm_cfa_l2_filter_alloc_input {
        uint64_t        resp_addr;
        uint32_t        flags;
        /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH \
-               UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_TX \
-               UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX \
-               UINT32_C(0x1)
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX
-       /* Setting of this flag indicates the applicability to the loopback path. */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
-               UINT32_C(0x2)
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_DROP \
-               UINT32_C(0x4)
-       /*
-        * If this flag is set, all t_l2_* fields are invalid
-        * and they should not be specified.
-        * If this flag is set, then l2_* fields refer to
-        * fields of outermost L2 header.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_OUTERMOST \
-               UINT32_C(0x8)
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the l2_addr field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured to
+        * use regular placement algorithm.
+        * By default, the regular placement algorithm shall be
+        * enabled on the VNIC.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_REGULAR_PLACEMENT \
                UINT32_C(0x1)
        /*
-        * This bit must be '1' for the l2_addr_mask field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured
+        * use the jumbo placement algorithm.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_JUMBO_PLACEMENT \
                UINT32_C(0x2)
        /*
-        * This bit must be '1' for the l2_ovlan field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured
+        * to enable Header-Data split for IPv4 packets according
+        * to the following rules:
+        * # If the packet is identified as TCP/IPv4, then the
+        * packet is split at the beginning of the TCP payload.
+        * # If the packet is identified as UDP/IPv4, then the
+        * packet is split at the beginning of UDP payload.
+        * # If the packet is identified as non-TCP and non-UDP
+        * IPv4 packet, then the packet is split at the beginning
+        * of the upper layer protocol header carried in the IPv4
+        * packet.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_IPV4 \
                UINT32_C(0x4)
        /*
-        * This bit must be '1' for the l2_ovlan_mask field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured
+        * to enable Header-Data split for IPv6 packets according
+        * to the following rules:
+        * # If the packet is identified as TCP/IPv6, then the
+        * packet is split at the beginning of the TCP payload.
+        * # If the packet is identified as UDP/IPv6, then the
+        * packet is split at the beginning of UDP payload.
+        * # If the packet is identified as non-TCP and non-UDP
+        * IPv6 packet, then the packet is split at the beginning
+        * of the upper layer protocol header carried in the IPv6
+        * packet.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN_MASK \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_IPV6 \
                UINT32_C(0x8)
        /*
-        * This bit must be '1' for the l2_ivlan field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured
+        * to enable Header-Data split for FCoE packets at the
+        * beginning of FC payload.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_FCOE \
                UINT32_C(0x10)
        /*
-        * This bit must be '1' for the l2_ivlan_mask field to be
-        * configured.
+        * When this bit is '1', the VNIC shall be configured
+        * to enable Header-Data split for RoCE packets at the
+        * beginning of RoCE payload (after BTH/GRH headers).
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN_MASK \
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_FLAGS_HDS_ROCE \
                UINT32_C(0x20)
+       uint32_t        enables;
        /*
-        * This bit must be '1' for the t_l2_addr field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_ADDR \
-               UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the t_l2_addr_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_ADDR_MASK \
-               UINT32_C(0x80)
-       /*
-        * This bit must be '1' for the t_l2_ovlan field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_OVLAN \
-               UINT32_C(0x100)
-       /*
-        * This bit must be '1' for the t_l2_ovlan_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_OVLAN_MASK \
-               UINT32_C(0x200)
-       /*
-        * This bit must be '1' for the t_l2_ivlan field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_IVLAN \
-               UINT32_C(0x400)
-       /*
-        * This bit must be '1' for the t_l2_ivlan_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_IVLAN_MASK \
-               UINT32_C(0x800)
-       /*
-        * This bit must be '1' for the src_type field to be
+        * This bit must be '1' for the jumbo_thresh_valid field to be
         * configured.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_TYPE \
-               UINT32_C(0x1000)
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_JUMBO_THRESH_VALID \
+               UINT32_C(0x1)
        /*
-        * This bit must be '1' for the src_id field to be
+        * This bit must be '1' for the hds_offset_valid field to be
         * configured.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_ID \
-               UINT32_C(0x2000)
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_HDS_OFFSET_VALID \
+               UINT32_C(0x2)
        /*
-        * This bit must be '1' for the tunnel_type field to be
+        * This bit must be '1' for the hds_threshold_valid field to be
         * configured.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
-               UINT32_C(0x4000)
+       #define HWRM_VNIC_PLCMODES_CFG_INPUT_ENABLES_HDS_THRESHOLD_VALID \
+               UINT32_C(0x4)
+       /* Logical vnic ID */
+       uint32_t        vnic_id;
        /*
-        * This bit must be '1' for the dst_id field to be
-        * configured.
+        * When jumbo placement algorithm is enabled, this value
+        * is used to determine the threshold for jumbo placement.
+        * Packets with length larger than this value will be
+        * placed according to the jumbo placement algorithm.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
-               UINT32_C(0x8000)
+       uint16_t        jumbo_thresh;
        /*
-        * This bit must be '1' for the mirror_vnic_id field to be
-        * configured.
+        * This value is used to determine the offset into
+        * packet buffer where the split data (payload) will be
+        * placed according to one of of HDS placement algorithm.
+        *
+        * The lengths of packet buffers provided for split data
+        * shall be larger than this value.
         */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
-               UINT32_C(0x10000)
+       uint16_t        hds_offset;
        /*
-        * This value sets the match value for the L2 MAC address.
-        * Destination MAC address for RX path.
-        * Source MAC address for TX path.
+        * When one of the HDS placement algorithm is enabled, this
+        * value is used to determine the threshold for HDS
+        * placement.
+        * Packets with length larger than this value will be
+        * placed according to the HDS placement algorithm.
+        * This value shall be in multiple of 4 bytes.
         */
-       uint8_t l2_addr[6];
-       uint8_t unused_0[2];
+       uint16_t        hds_threshold;
+       uint8_t unused_0[6];
+} __attribute__((packed));
+
+/* hwrm_vnic_plcmodes_cfg_output (size:128b/16B) */
+struct hwrm_vnic_plcmodes_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * This value sets the mask value for the L2 address.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t l2_addr_mask[6];
-       /* This value sets VLAN ID value for outer VLAN. */
-       uint16_t        l2_ovlan;
+       uint8_t valid;
+} __attribute__((packed));
+
+/***************************
+ * hwrm_vnic_plcmodes_qcfg *
+ ***************************/
+
+
+/* hwrm_vnic_plcmodes_qcfg_input (size:192b/24B) */
+struct hwrm_vnic_plcmodes_qcfg_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * This value sets the mask value for the ovlan id.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint16_t        l2_ovlan_mask;
-       /* This value sets VLAN ID value for inner VLAN. */
-       uint16_t        l2_ivlan;
+       uint16_t        cmpl_ring;
        /*
-        * This value sets the mask value for the ivlan id.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint16_t        l2_ivlan_mask;
-       uint8_t unused_1[2];
+       uint16_t        seq_id;
        /*
-        * This value sets the match value for the tunnel
-        * L2 MAC address.
-        * Destination MAC address for RX path.
-        * Source MAC address for TX path.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint8_t t_l2_addr[6];
-       uint8_t unused_2[2];
+       uint16_t        target_id;
        /*
-        * This value sets the mask value for the tunnel L2
-        * address.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint8_t t_l2_addr_mask[6];
-       /* This value sets VLAN ID value for tunnel outer VLAN. */
-       uint16_t        t_l2_ovlan;
+       uint64_t        resp_addr;
+       /* Logical vnic ID */
+       uint32_t        vnic_id;
+       uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_vnic_plcmodes_qcfg_output (size:192b/24B) */
+struct hwrm_vnic_plcmodes_qcfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint32_t        flags;
        /*
-        * This value sets the mask value for the tunnel ovlan id.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * When this bit is '1', the VNIC is configured to
+        * use regular placement algorithm.
         */
-       uint16_t        t_l2_ovlan_mask;
-       /* This value sets VLAN ID value for tunnel inner VLAN. */
-       uint16_t        t_l2_ivlan;
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_REGULAR_PLACEMENT \
+               UINT32_C(0x1)
        /*
-        * This value sets the mask value for the tunnel ivlan id.
-        * A value of 0 will mask the corresponding bit from
-        * compare.
+        * When this bit is '1', the VNIC is configured to
+        * use the jumbo placement algorithm.
         */
-       uint16_t        t_l2_ivlan_mask;
-       /* This value identifies the type of source of the packet. */
-       uint8_t src_type;
-       /* Network port */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_NPORT UINT32_C(0x0)
-       /* Physical function */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_PF    UINT32_C(0x1)
-       /* Virtual function */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_VF    UINT32_C(0x2)
-       /* Virtual NIC of a function */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_VNIC  UINT32_C(0x3)
-       /* Embedded processor for CFA management */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_KONG  UINT32_C(0x4)
-       /* Embedded processor for OOB management */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_APE   UINT32_C(0x5)
-       /* Embedded processor for RoCE */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_BONO  UINT32_C(0x6)
-       /* Embedded processor for network proxy functions */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_TANG  UINT32_C(0x7)
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_LAST \
-               HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_TANG
-       uint8_t unused_3;
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_JUMBO_PLACEMENT \
+               UINT32_C(0x2)
        /*
-        * This value is the id of the source.
-        * For a network port, it represents port_id.
-        * For a physical function, it represents fid.
-        * For a virtual function, it represents vf_id.
-        * For a vnic, it represents vnic_id.
-        * For embedded processors, this id is not valid.
-        *
-        * Notes:
-        * 1. The function ID is implied if it src_id is
-        *    not provided for a src_type that is either
+        * When this bit is '1', the VNIC is configured
+        * to enable Header-Data split for IPv4 packets.
         */
-       uint32_t        src_id;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_IPV4 \
                UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+       /*
+        * When this bit is '1', the VNIC is configured
+        * to enable Header-Data split for IPv6 packets.
+        */
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_IPV6 \
                UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       uint8_t unused_4;
        /*
-        * If set, this value shall represent the
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and network port id of the destination port for
-        * the TX path.
+        * When this bit is '1', the VNIC is configured
+        * to enable Header-Data split for FCoE packets.
         */
-       uint16_t        dst_id;
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_FCOE \
+               UINT32_C(0x10)
        /*
-        * Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
+        * When this bit is '1', the VNIC is configured
+        * to enable Header-Data split for RoCE packets.
         */
-       uint16_t        mirror_vnic_id;
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_HDS_ROCE \
+               UINT32_C(0x20)
        /*
-        * This hint is provided to help in placing
-        * the filter in the filter table.
+        * When this bit is '1', the VNIC is configured
+        * to be the default VNIC of the requesting function.
         */
-       uint8_t pri_hint;
-       /* No preference */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_NO_PREFER \
-               UINT32_C(0x0)
-       /* Above the given filter */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_ABOVE_FILTER \
-               UINT32_C(0x1)
-       /* Below the given filter */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_BELOW_FILTER \
-               UINT32_C(0x2)
-       /* As high as possible */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MAX \
-               UINT32_C(0x3)
-       /* As low as possible */
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MIN \
-               UINT32_C(0x4)
-       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_LAST \
-               HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MIN
-       uint8_t unused_5;
-       uint32_t        unused_6;
+       #define HWRM_VNIC_PLCMODES_QCFG_OUTPUT_FLAGS_DFLT_VNIC \
+               UINT32_C(0x40)
        /*
-        * This is the ID of the filter that goes along with
-        * the pri_hint.
-        *
-        * This field is valid only for the following values.
-        * 1 - Above the given filter
-        * 2 - Below the given filter
+        * When jumbo placement algorithm is enabled, this value
+        * is used to determine the threshold for jumbo placement.
+        * Packets with length larger than this value will be
+        * placed according to the jumbo placement algorithm.
         */
-       uint64_t        l2_filter_id_hint;
-} __attribute__((packed));
-
-/* hwrm_cfa_l2_filter_alloc_output (size:192b/24B) */
-struct hwrm_cfa_l2_filter_alloc_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       uint16_t        jumbo_thresh;
        /*
-        * This value identifies a set of CFA data structures used for an L2
-        * context.
+        * This value is used to determine the offset into
+        * packet buffer where the split data (payload) will be
+        * placed according to one of of HDS placement algorithm.
+        *
+        * The lengths of packet buffers provided for split data
+        * shall be larger than this value.
         */
-       uint64_t        l2_filter_id;
+       uint16_t        hds_offset;
        /*
-        * This is the ID of the flow associated with this
-        * filter.
-        * This value shall be used to match and associate the
-        * flow identifier returned in completion records.
-        * A value of 0xFFFFFFFF shall indicate no flow id.
+        * When one of the HDS placement algorithm is enabled, this
+        * value is used to determine the threshold for HDS
+        * placement.
+        * Packets with length larger than this value will be
+        * placed according to the HDS placement algorithm.
+        * This value shall be in multiple of 4 bytes.
         */
-       uint32_t        flow_id;
-       uint8_t unused_0[3];
+       uint16_t        hds_threshold;
+       uint8_t unused_0[5];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -20317,13 +19602,13 @@ struct hwrm_cfa_l2_filter_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_cfa_l2_filter_free *
- ***************************/
+/**********************************
+ * hwrm_vnic_rss_cos_lb_ctx_alloc *
+ **********************************/
 
 
-/* hwrm_cfa_l2_filter_free_input (size:192b/24B) */
-struct hwrm_cfa_l2_filter_free_input {
+/* hwrm_vnic_rss_cos_lb_ctx_alloc_input (size:128b/16B) */
+struct hwrm_vnic_rss_cos_lb_ctx_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20351,15 +19636,10 @@ struct hwrm_cfa_l2_filter_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /*
-        * This value identifies a set of CFA data structures used for an L2
-        * context.
-        */
-       uint64_t        l2_filter_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_l2_filter_free_output (size:128b/16B) */
-struct hwrm_cfa_l2_filter_free_output {
+/* hwrm_vnic_rss_cos_lb_ctx_alloc_output (size:128b/16B) */
+struct hwrm_vnic_rss_cos_lb_ctx_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -20368,7 +19648,9 @@ struct hwrm_cfa_l2_filter_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* rss_cos_lb_ctx_id is 16 b */
+       uint16_t        rss_cos_lb_ctx_id;
+       uint8_t unused_0[5];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -20379,13 +19661,13 @@ struct hwrm_cfa_l2_filter_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************
- * hwrm_cfa_l2_filter_cfg *
- **************************/
+/*********************************
+ * hwrm_vnic_rss_cos_lb_ctx_free *
+ *********************************/
 
 
-/* hwrm_cfa_l2_filter_cfg_input (size:320b/40B) */
-struct hwrm_cfa_l2_filter_cfg_input {
+/* hwrm_vnic_rss_cos_lb_ctx_free_input (size:192b/24B) */
+struct hwrm_vnic_rss_cos_lb_ctx_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20413,58 +19695,13 @@ struct hwrm_cfa_l2_filter_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_RX
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_DROP     UINT32_C(0x2)
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the dst_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_ENABLES_DST_ID \
-               UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the new_mirror_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_L2_FILTER_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
-               UINT32_C(0x2)
-       /*
-        * This value identifies a set of CFA data structures used for an L2
-        * context.
-        */
-       uint64_t        l2_filter_id;
-       /*
-        * If set, this value shall represent the
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and network port id of the destination port for
-        * the TX path.
-        */
-       uint32_t        dst_id;
-       /*
-        * New Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
-        */
-       uint32_t        new_mirror_vnic_id;
+       /* rss_cos_lb_ctx_id is 16 b */
+       uint16_t        rss_cos_lb_ctx_id;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_cfa_l2_filter_cfg_output (size:128b/16B) */
-struct hwrm_cfa_l2_filter_cfg_output {
+/* hwrm_vnic_rss_cos_lb_ctx_free_output (size:128b/16B) */
+struct hwrm_vnic_rss_cos_lb_ctx_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -20484,13 +19721,13 @@ struct hwrm_cfa_l2_filter_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_cfa_l2_set_rx_mask *
- ***************************/
+/*******************
+ * hwrm_ring_alloc *
+ *******************/
 
 
-/* hwrm_cfa_l2_set_rx_mask_input (size:448b/56B) */
-struct hwrm_cfa_l2_set_rx_mask_input {
+/* hwrm_ring_alloc_input (size:704b/88B) */
+struct hwrm_ring_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20518,134 +19755,286 @@ struct hwrm_cfa_l2_set_rx_mask_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* VNIC ID */
-       uint32_t        vnic_id;
-       uint32_t        mask;
+       uint32_t        enables;
        /*
-        * When this bit is '1', the function is requested to accept
-        * multi-cast packets specified by the multicast addr table.
+        * This bit must be '1' for the ring_arb_cfg field to be
+        * configured.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST \
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_RING_ARB_CFG \
                UINT32_C(0x2)
        /*
-        * When this bit is '1', the function is requested to accept
-        * all multi-cast packets.
+        * This bit must be '1' for the stat_ctx_id_valid field to be
+        * configured.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST \
-               UINT32_C(0x4)
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_STAT_CTX_ID_VALID \
+               UINT32_C(0x8)
        /*
-        * When this bit is '1', the function is requested to accept
-        * broadcast packets.
+        * This bit must be '1' for the max_bw_valid field to be
+        * configured.
+        */
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_MAX_BW_VALID \
+               UINT32_C(0x20)
+       /*
+        * This bit must be '1' for the rx_ring_id field to be
+        * configured.
+        */
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_RX_RING_ID_VALID \
+               UINT32_C(0x40)
+       /*
+        * This bit must be '1' for the nq_ring_id field to be
+        * configured.
+        */
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_NQ_RING_ID_VALID \
+               UINT32_C(0x80)
+       /*
+        * This bit must be '1' for the rx_buf_size field to be
+        * configured.
+        */
+       #define HWRM_RING_ALLOC_INPUT_ENABLES_RX_BUF_SIZE_VALID \
+               UINT32_C(0x100)
+       /* Ring Type. */
+       uint8_t ring_type;
+       /* L2 Completion Ring (CR) */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
+       /* TX Ring (TR) */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_TX        UINT32_C(0x1)
+       /* RX Ring (RR) */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_RX        UINT32_C(0x2)
+       /* RoCE Notification Completion Ring (ROCE_CR) */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
+       /* RX Aggregation Ring */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_RX_AGG    UINT32_C(0x4)
+       /* Notification Queue */
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ        UINT32_C(0x5)
+       #define HWRM_RING_ALLOC_INPUT_RING_TYPE_LAST \
+               HWRM_RING_ALLOC_INPUT_RING_TYPE_NQ
+       uint8_t unused_0;
+       /* Ring allocation flags. */
+       uint16_t        flags;
+       /*
+        * For Rx rings, the incoming packet data can be placed at either
+        * a 0B or 2B offset from the start of the Rx packet buffer. When
+        * '1', the received packet will be padded with 2B of zeros at the
+        * front of the packet. Note that this flag is only used for
+        * Rx rings and is ignored for all other rings included Rx
+        * Aggregation rings.
+        */
+       #define HWRM_RING_ALLOC_INPUT_FLAGS_RX_SOP_PAD     UINT32_C(0x1)
+       /*
+        * This value is a pointer to the page table for the
+        * Ring.
+        */
+       uint64_t        page_tbl_addr;
+       /* First Byte Offset of the first entry in the first page. */
+       uint32_t        fbo;
+       /*
+        * Actual page size in 2^page_size. The supported range is increments
+        * in powers of 2 from 16 bytes to 1GB.
+        * - 4 = 16 B
+        *     Page size is 16 B.
+        * - 12 = 4 KB
+        *     Page size is 4 KB.
+        * - 13 = 8 KB
+        *     Page size is 8 KB.
+        * - 16 = 64 KB
+        *     Page size is 64 KB.
+        * - 21 = 2 MB
+        *     Page size is 2 MB.
+        * - 22 = 4 MB
+        *     Page size is 4 MB.
+        * - 30 = 1 GB
+        *     Page size is 1 GB.
+        */
+       uint8_t page_size;
+       /*
+        * This value indicates the depth of page table.
+        * For this version of the specification, value other than 0 or
+        * 1 shall be considered as an invalid value.
+        * When the page_tbl_depth = 0, then it is treated as a
+        * special case with the following.
+        * 1. FBO and page size fields are not valid.
+        * 2. page_tbl_addr is the physical address of the first
+        *    element of the ring.
+        */
+       uint8_t page_tbl_depth;
+       uint8_t unused_1[2];
+       /*
+        * Number of 16B units in the ring.  Minimum size for
+        * a ring is 16 16B entries.
+        */
+       uint32_t        length;
+       /*
+        * Logical ring number for the ring to be allocated.
+        * This value determines the position in the doorbell
+        * area where the update to the ring will be made.
+        *
+        * For completion rings, this value is also the MSI-X
+        * vector number for the function the completion ring is
+        * associated with.
+        */
+       uint16_t        logical_id;
+       /*
+        * This field is used only when ring_type is a TX ring.
+        * This value indicates what completion ring the TX ring
+        * is associated with.
+        */
+       uint16_t        cmpl_ring_id;
+       /*
+        * This field is used only when ring_type is a TX ring.
+        * This value indicates what CoS queue the TX ring
+        * is associated with.
+        */
+       uint16_t        queue_id;
+       /*
+        * When allocating a Rx ring or Rx aggregation ring, this field
+        * specifies the size of the buffer descriptors posted to the ring.
+        */
+       uint16_t        rx_buf_size;
+       /*
+        * When allocating an Rx aggregation ring, this field
+        * specifies the associated Rx ring ID.
+        */
+       uint16_t        rx_ring_id;
+       /*
+        * When allocating a completion ring, this field
+        * specifies the associated NQ ring ID.
+        */
+       uint16_t        nq_ring_id;
+       /*
+        * This field is used only when ring_type is a TX ring.
+        * This field is used to configure arbitration related
+        * parameters for a TX ring.
+        */
+       uint16_t        ring_arb_cfg;
+       /* Arbitration policy used for the ring. */
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_MASK \
+               UINT32_C(0xf)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_SFT       0
+       /*
+        * Use strict priority for the TX ring.
+        * Priority value is specified in arb_policy_param
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST \
-               UINT32_C(0x8)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_SP \
+               UINT32_C(0x1)
        /*
-        * When this bit is '1', the function is requested to be
-        * put in the promiscuous mode.
-        *
-        * The HWRM should accept any function to set up
-        * promiscuous mode.
-        *
-        * The HWRM shall follow the semantics below for the
-        * promiscuous mode support.
-        * # When partitioning is not enabled on a port
-        * (i.e. single PF on the port), then the PF shall
-        * be allowed to be in the promiscuous mode. When the
-        * PF is in the promiscuous mode, then it shall
-        * receive all host bound traffic on that port.
-        * # When partitioning is enabled on a port
-        * (i.e. multiple PFs per port) and a PF on that
-        * port is in the promiscuous mode, then the PF
-        * receives all traffic within that partition as
-        * identified by a unique identifier for the
-        * PF (e.g. S-Tag). If a unique outer VLAN
-        * for the PF is specified, then the setting of
-        * promiscuous mode on that PF shall result in the
-        * PF receiving all host bound traffic with matching
-        * outer VLAN.
-        * # A VF shall can be set in the promiscuous mode.
-        * In the promiscuous mode, the VF does not receive any
-        * traffic unless a unique outer VLAN for the
-        * VF is specified. If a unique outer VLAN
-        * for the VF is specified, then the setting of
-        * promiscuous mode on that VF shall result in the
-        * VF receiving all host bound traffic with the
-        * matching outer VLAN.
-        * # The HWRM shall allow the setting of promiscuous
-        * mode on a function independently from the
-        * promiscuous mode settings on other functions.
+        * Use weighted fair queue arbitration for the TX ring.
+        * Weight is specified in arb_policy_param
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS \
-               UINT32_C(0x10)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_WFQ \
+               UINT32_C(0x2)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_LAST \
+               HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_WFQ
+       /* Reserved field. */
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_RSVD_MASK \
+               UINT32_C(0xf0)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_RSVD_SFT             4
        /*
-        * If this flag is set, the corresponding RX
-        * filters shall be set up to cover multicast/broadcast
-        * filters for the outermost Layer 2 destination MAC
-        * address field.
+        * Arbitration policy specific parameter.
+        * # For strict priority arbitration policy, this field
+        * represents a priority value. If set to 0, then the priority
+        * is not specified and the HWRM is allowed to select
+        * any priority for this TX ring.
+        * # For weighted fair queue arbitration policy, this field
+        * represents a weight value. If set to 0, then the weight
+        * is not specified and the HWRM is allowed to select
+        * any weight for this TX ring.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_OUTERMOST \
-               UINT32_C(0x20)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_PARAM_MASK \
+               UINT32_C(0xff00)
+       #define HWRM_RING_ALLOC_INPUT_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8
+       uint16_t        unused_3;
        /*
-        * If this flag is set, the corresponding RX
-        * filters shall be set up to cover multicast/broadcast
-        * filters for the VLAN-tagged packets that match the
-        * TPID and VID fields of VLAN tags in the VLAN tag
-        * table specified in this command.
+        * This field is reserved for the future use.
+        * It shall be set to 0.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLANONLY \
-               UINT32_C(0x40)
+       uint32_t        reserved3;
        /*
-        * If this flag is set, the corresponding RX
-        * filters shall be set up to cover multicast/broadcast
-        * filters for non-VLAN tagged packets and VLAN-tagged
-        * packets that match the TPID and VID fields of VLAN
-        * tags in the VLAN tag table specified in this command.
+        * This field is used only when ring_type is a TX ring.
+        * This input indicates what statistics context this ring
+        * should be associated with.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN \
-               UINT32_C(0x80)
+       uint32_t        stat_ctx_id;
        /*
-        * If this flag is set, the corresponding RX
-        * filters shall be set up to cover multicast/broadcast
-        * filters for non-VLAN tagged packets and VLAN-tagged
-        * packets matching any VLAN tag.
-        *
-        * If this flag is set, then the HWRM shall ignore
-        * VLAN tags specified in vlan_tag_tbl.
-        *
-        * If none of vlanonly, vlan_nonvlan, and anyvlan_nonvlan
-        * flags is set, then the HWRM shall ignore
-        * VLAN tags specified in vlan_tag_tbl.
-        *
-        * The HWRM client shall set at most one flag out of
-        * vlanonly, vlan_nonvlan, and anyvlan_nonvlan.
+        * This field is reserved for the future use.
+        * It shall be set to 0.
         */
-       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN \
-               UINT32_C(0x100)
-       /* This is the address for mcast address tbl. */
-       uint64_t        mc_tbl_addr;
+       uint32_t        reserved4;
        /*
-        * This value indicates how many entries in mc_tbl are valid.
-        * Each entry is 6 bytes.
+        * This field is used only when ring_type is a TX ring
+        * to specify maximum BW allocated to the TX ring.
+        * The HWRM will translate this value into byte counter and
+        * time interval used for this ring inside the device.
         */
-       uint32_t        num_mc_entries;
-       uint8_t unused_0[4];
+       uint32_t        max_bw;
+       /* The bandwidth value. */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_MASK \
+               UINT32_C(0xfffffff)
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_SFT              0
+       /* The granularity of the value (bits or bytes). */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE \
+               UINT32_C(0x10000000)
+       /* Value is in bits. */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BITS \
+               (UINT32_C(0x0) << 28)
+       /* Value is in bytes. */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BYTES \
+               (UINT32_C(0x1) << 28)
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_LAST \
+               HWRM_RING_ALLOC_INPUT_MAX_BW_SCALE_BYTES
+       /* bw_value_unit is 3 b */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_MASK \
+               UINT32_C(0xe0000000)
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_SFT         29
+       /* Value is in Mb or MB (base 10). */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_MEGA \
+               (UINT32_C(0x0) << 29)
+       /* Value is in Kb or KB (base 10). */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_KILO \
+               (UINT32_C(0x2) << 29)
+       /* Value is in bits or bytes. */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_BASE \
+               (UINT32_C(0x4) << 29)
+       /* Value is in Gb or GB (base 10). */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_GIGA \
+               (UINT32_C(0x6) << 29)
+       /* Value is in 1/100th of a percentage of total bandwidth. */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 \
+               (UINT32_C(0x1) << 29)
+       /* Invalid unit */
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_INVALID \
+               (UINT32_C(0x7) << 29)
+       #define HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_LAST \
+               HWRM_RING_ALLOC_INPUT_MAX_BW_BW_VALUE_UNIT_INVALID
        /*
-        * This is the address for VLAN tag table.
-        * Each VLAN entry in the table is 4 bytes of a VLAN tag
-        * including TPID, PCP, DEI, and VID fields in network byte
-        * order.
+        * This field is used only when ring_type is a Completion ring.
+        * This value indicates what interrupt mode should be used
+        * on this completion ring.
+        * Note: In the legacy interrupt mode, no more than 16
+        * completion rings are allowed.
         */
-       uint64_t        vlan_tag_tbl_addr;
+       uint8_t int_mode;
+       /* Legacy INTA */
+       #define HWRM_RING_ALLOC_INPUT_INT_MODE_LEGACY UINT32_C(0x0)
+       /* Reserved */
+       #define HWRM_RING_ALLOC_INPUT_INT_MODE_RSVD   UINT32_C(0x1)
+       /* MSI-X */
+       #define HWRM_RING_ALLOC_INPUT_INT_MODE_MSIX   UINT32_C(0x2)
+       /* No Interrupt - Polled mode */
+       #define HWRM_RING_ALLOC_INPUT_INT_MODE_POLL   UINT32_C(0x3)
+       #define HWRM_RING_ALLOC_INPUT_INT_MODE_LAST \
+               HWRM_RING_ALLOC_INPUT_INT_MODE_POLL
+       uint8_t unused_4[3];
        /*
-        * This value indicates how many entries in vlan_tag_tbl are
-        * valid. Each entry is 4 bytes.
+        * The cq_handle is specified when allocating a completion ring. For
+        * devices that support NQs, this cq_handle will be included in the
+        * NQE to specify which CQ should be read to retrieve the completion
+        * record.
         */
-       uint32_t        num_vlan_tags;
-       uint8_t unused_1[4];
+       uint64_t        cq_handle;
 } __attribute__((packed));
 
-/* hwrm_cfa_l2_set_rx_mask_output (size:128b/16B) */
-struct hwrm_cfa_l2_set_rx_mask_output {
+/* hwrm_ring_alloc_output (size:128b/16B) */
+struct hwrm_ring_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -20654,7 +20043,14 @@ struct hwrm_cfa_l2_set_rx_mask_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /*
+        * Physical number of ring allocated.
+        * This value shall be unique for a ring type.
+        */
+       uint16_t        ring_id;
+       /* Logical number of ring allocated. */
+       uint16_t        logical_ring_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -20665,31 +20061,13 @@ struct hwrm_cfa_l2_set_rx_mask_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/* hwrm_cfa_l2_set_rx_mask_cmd_err (size:64b/8B) */
-struct hwrm_cfa_l2_set_rx_mask_cmd_err {
-       /*
-        * command specific error codes that goes to
-        * the cmd_err field in Common HWRM Error Response.
-        */
-       uint8_t code;
-       /* Unknown error */
-       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN \
-               UINT32_C(0x0)
-       /* Unable to complete operation due to conflict with Ntuple Filter */
-       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR \
-               UINT32_C(0x1)
-       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST \
-               HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
-/*******************************
- * hwrm_cfa_vlan_antispoof_cfg *
- *******************************/
+/******************
+ * hwrm_ring_free *
+ ******************/
 
 
-/* hwrm_cfa_vlan_antispoof_cfg_input (size:256b/32B) */
-struct hwrm_cfa_vlan_antispoof_cfg_input {
+/* hwrm_ring_free_input (size:192b/24B) */
+struct hwrm_ring_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20717,27 +20095,30 @@ struct hwrm_cfa_vlan_antispoof_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /*
-        * Function ID of the function that is being configured.
-        * Only valid for a VF FID configured by the PF.
-        */
-       uint16_t        fid;
-       uint8_t unused_0[2];
-       /* Number of VLAN entries in the vlan_tag_mask_tbl. */
-       uint32_t        num_vlan_entries;
-       /*
-        * The vlan_tag_mask_tbl_addr is the DMA address of the VLAN
-        * antispoof table. Each table entry contains the 16-bit TPID
-        * (0x8100 or 0x88a8 only), 16-bit VLAN ID, and a 16-bit mask,
-        * all in network order to match hwrm_cfa_l2_set_rx_mask.
-        * For an individual VLAN entry, the mask value should be 0xfff
-        * for the 12-bit VLAN ID.
-        */
-       uint64_t        vlan_tag_mask_tbl_addr;
+       /* Ring Type. */
+       uint8_t ring_type;
+       /* L2 Completion Ring (CR) */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
+       /* TX Ring (TR) */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_TX        UINT32_C(0x1)
+       /* RX Ring (RR) */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_RX        UINT32_C(0x2)
+       /* RoCE Notification Completion Ring (ROCE_CR) */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
+       /* RX Aggregation Ring */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_RX_AGG    UINT32_C(0x4)
+       /* Notification Queue */
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_NQ        UINT32_C(0x5)
+       #define HWRM_RING_FREE_INPUT_RING_TYPE_LAST \
+               HWRM_RING_FREE_INPUT_RING_TYPE_NQ
+       uint8_t unused_0;
+       /* Physical number of ring allocated. */
+       uint16_t        ring_id;
+       uint8_t unused_1[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_vlan_antispoof_cfg_output (size:128b/16B) */
-struct hwrm_cfa_vlan_antispoof_cfg_output {
+/* hwrm_ring_free_output (size:128b/16B) */
+struct hwrm_ring_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -20757,13 +20138,13 @@ struct hwrm_cfa_vlan_antispoof_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************************
- * hwrm_cfa_vlan_antispoof_qcfg *
- ********************************/
+/*******************
+ * hwrm_ring_reset *
+ *******************/
 
 
-/* hwrm_cfa_vlan_antispoof_qcfg_input (size:256b/32B) */
-struct hwrm_cfa_vlan_antispoof_qcfg_input {
+/* hwrm_ring_reset_input (size:192b/24B) */
+struct hwrm_ring_reset_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20791,30 +20172,26 @@ struct hwrm_cfa_vlan_antispoof_qcfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /*
-        * Function ID of the function that is being queried.
-        * Only valid for a VF FID queried by the PF.
-        */
-       uint16_t        fid;
-       uint8_t unused_0[2];
-       /*
-        * Maximum number of VLAN entries the firmware is allowed to DMA
-        * to vlan_tag_mask_tbl.
-        */
-       uint32_t        max_vlan_entries;
-       /*
-        * The vlan_tag_mask_tbl_addr is the DMA address of the VLAN
-        * antispoof table to which firmware will DMA to. Each table
-        * entry will contain the 16-bit TPID (0x8100 or 0x88a8 only),
-        * 16-bit VLAN ID, and a 16-bit mask, all in network order to
-        * match hwrm_cfa_l2_set_rx_mask. For an individual VLAN entry,
-        * the mask value should be 0xfff for the 12-bit VLAN ID.
-        */
-       uint64_t        vlan_tag_mask_tbl_addr;
+       /* Ring Type. */
+       uint8_t ring_type;
+       /* L2 Completion Ring (CR) */
+       #define HWRM_RING_RESET_INPUT_RING_TYPE_L2_CMPL   UINT32_C(0x0)
+       /* TX Ring (TR) */
+       #define HWRM_RING_RESET_INPUT_RING_TYPE_TX        UINT32_C(0x1)
+       /* RX Ring (RR) */
+       #define HWRM_RING_RESET_INPUT_RING_TYPE_RX        UINT32_C(0x2)
+       /* RoCE Notification Completion Ring (ROCE_CR) */
+       #define HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL UINT32_C(0x3)
+       #define HWRM_RING_RESET_INPUT_RING_TYPE_LAST \
+               HWRM_RING_RESET_INPUT_RING_TYPE_ROCE_CMPL
+       uint8_t unused_0;
+       /* Physical number of the ring. */
+       uint16_t        ring_id;
+       uint8_t unused_1[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_vlan_antispoof_qcfg_output (size:128b/16B) */
-struct hwrm_cfa_vlan_antispoof_qcfg_output {
+/* hwrm_ring_reset_output (size:128b/16B) */
+struct hwrm_ring_reset_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -20823,9 +20200,7 @@ struct hwrm_cfa_vlan_antispoof_qcfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Number of valid entries DMAd by firmware to vlan_tag_mask_tbl. */
-       uint32_t        num_vlan_entries;
-       uint8_t unused_0[3];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -20836,13 +20211,13 @@ struct hwrm_cfa_vlan_antispoof_qcfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************************
- * hwrm_cfa_tunnel_filter_alloc *
- ********************************/
+/**************************
+ * hwrm_ring_aggint_qcaps *
+ **************************/
 
 
-/* hwrm_cfa_tunnel_filter_alloc_input (size:704b/88B) */
-struct hwrm_cfa_tunnel_filter_alloc_input {
+/* hwrm_ring_aggint_qcaps_input (size:128b/16B) */
+struct hwrm_ring_aggint_qcaps_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -20870,230 +20245,111 @@ struct hwrm_cfa_tunnel_filter_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        flags;
-       /* Setting of this flag indicates the applicability to the loopback path. */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
-               UINT32_C(0x1)
-       uint32_t        enables;
+} __attribute__((packed));
+
+/* hwrm_ring_aggint_qcaps_output (size:384b/48B) */
+struct hwrm_ring_aggint_qcaps_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint32_t        cmpl_params;
        /*
-        * This bit must be '1' for the l2_filter_id field to be
-        * configured.
+        * When this bit is set to '1', int_lat_tmr_min can be configured
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_INT_LAT_TMR_MIN \
                UINT32_C(0x1)
        /*
-        * This bit must be '1' for the l2_addr field to be
-        * configured.
+        * When this bit is set to '1', int_lat_tmr_max can be configured
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_INT_LAT_TMR_MAX \
                UINT32_C(0x2)
        /*
-        * This bit must be '1' for the l2_ivlan field to be
-        * configured.
+        * When this bit is set to '1', timer_reset can be enabled
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_TIMER_RESET \
                UINT32_C(0x4)
        /*
-        * This bit must be '1' for the l3_addr field to be
-        * configured.
+        * When this bit is set to '1', ring_idle can be enabled
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L3_ADDR \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_RING_IDLE \
                UINT32_C(0x8)
        /*
-        * This bit must be '1' for the l3_addr_type field to be
-        * configured.
+        * When this bit is set to '1', num_cmpl_dma_aggr can be configured
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L3_ADDR_TYPE \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_NUM_CMPL_DMA_AGGR \
                UINT32_C(0x10)
        /*
-        * This bit must be '1' for the t_l3_addr_type field to be
-        * configured.
+        * When this bit is set to '1', num_cmpl_dma_aggr_during_int can be configured
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_T_L3_ADDR_TYPE \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_NUM_CMPL_DMA_AGGR_DURING_INT \
                UINT32_C(0x20)
        /*
-        * This bit must be '1' for the t_l3_addr field to be
-        * configured.
+        * When this bit is set to '1', cmpl_aggr_dma_tmr can be configured
+        * on completion rings.
         */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_T_L3_ADDR \
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_CMPL_AGGR_DMA_TMR \
                UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the tunnel_type field to be
-        * configured.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
-               UINT32_C(0x80)
-       /*
-        * This bit must be '1' for the vni field to be
-        * configured.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_VNI \
-               UINT32_C(0x100)
-       /*
-        * This bit must be '1' for the dst_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_DST_VNIC_ID \
-               UINT32_C(0x200)
-       /*
-        * This bit must be '1' for the mirror_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
-               UINT32_C(0x400)
-       /*
-        * This value identifies a set of CFA data structures used for an L2
-        * context.
-        */
-       uint64_t        l2_filter_id;
-       /*
-        * This value sets the match value for the inner L2
-        * MAC address.
-        * Destination MAC address for RX path.
-        * Source MAC address for TX path.
-        */
-       uint8_t l2_addr[6];
-       /*
-        * This value sets VLAN ID value for inner VLAN.
-        * Only 12-bits of VLAN ID are used in setting the filter.
-        */
-       uint16_t        l2_ivlan;
-       /*
-        * The value of inner destination IP address to be used in filtering.
-        * For IPv4, first four bytes represent the IP address.
-        */
-       uint32_t        l3_addr[4];
-       /*
-        * The value of tunnel destination IP address to be used in filtering.
-        * For IPv4, first four bytes represent the IP address.
-        */
-       uint32_t        t_l3_addr[4];
-       /*
-        * This value indicates the type of inner IP address.
-        * 4 - IPv4
-        * 6 - IPv6
-        * All others are invalid.
-        */
-       uint8_t l3_addr_type;
-       /*
-        * This value indicates the type of tunnel IP address.
-        * 4 - IPv4
-        * 6 - IPv6
-        * All others are invalid.
-        */
-       uint8_t t_l3_addr_type;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
-               UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       /*
-        * tunnel_flags allows the user to indicate the tunnel tag detection
-        * for the tunnel type specified in tunnel_type.
-        */
-       uint8_t tunnel_flags;
-       /*
-        * If the tunnel_type is geneve, then this bit indicates if we
-        * need to match the geneve OAM packet.
-        * If the tunnel_type is nvgre or gre, then this bit indicates if
-        * we need to detect checksum present bit in geneve header.
-        * If the tunnel_type is mpls, then this bit indicates if we need
-        * to match mpls packet with explicit IPV4/IPV6 null header.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_OAM_CHECKSUM_EXPLHDR \
-               UINT32_C(0x1)
-       /*
-        * If the tunnel_type is geneve, then this bit indicates if we
-        * need to detect the critical option bit set in the oam packet.
-        * If the tunnel_type is nvgre or gre, then this bit indicates
-        * if we need to match nvgre packets with key present bit set in
-        * gre header.
-        * If the tunnel_type is mpls, then this bit indicates if we
-        * need to match mpls packet with S bit from inner/second label.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_CRITICAL_OPT_S1 \
-               UINT32_C(0x2)
-       /*
-        * If the tunnel_type is geneve, then this bit indicates if we
-        * need to match geneve packet with extended header bit set in
-        * geneve header.
-        * If the tunnel_type is nvgre or gre, then this bit indicates
-        * if we need to match nvgre packets with sequence number
-        * present bit set in gre header.
-        * If the tunnel_type is mpls, then this bit indicates if we
-        * need to match mpls packet with S bit from out/first label.
-        */
-       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_EXTHDR_SEQNUM_S0 \
-               UINT32_C(0x4)
-       /*
-        * Virtual Network Identifier (VNI). Only valid with
-        * tunnel_types VXLAN, NVGRE, and Geneve.
-        * Only lower 24-bits of VNI field are used
-        * in setting up the filter.
+       /*
+        * When this bit is set to '1', cmpl_aggr_dma_tmr_during_int can be configured
+        * on completion rings.
         */
-       uint32_t        vni;
-       /* Logical VNIC ID of the destination VNIC. */
-       uint32_t        dst_vnic_id;
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_CMPL_AGGR_DMA_TMR_DURING_INT \
+               UINT32_C(0x80)
        /*
-        * Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
+        * When this bit is set to '1', num_cmpl_aggr_int can be configured
+        * on completion rings.
         */
-       uint32_t        mirror_vnic_id;
-} __attribute__((packed));
-
-/* hwrm_cfa_tunnel_filter_alloc_output (size:192b/24B) */
-struct hwrm_cfa_tunnel_filter_alloc_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        tunnel_filter_id;
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_CMPL_PARAMS_NUM_CMPL_AGGR_INT \
+               UINT32_C(0x100)
+       uint32_t        nq_params;
        /*
-        * This is the ID of the flow associated with this
-        * filter.
-        * This value shall be used to match and associate the
-        * flow identifier returned in completion records.
-        * A value of 0xFFFFFFFF shall indicate no flow id.
+        * When this bit is set to '1', int_lat_tmr_min can be configured
+        * on notification queues.
         */
-       uint32_t        flow_id;
-       uint8_t unused_0[3];
+       #define HWRM_RING_AGGINT_QCAPS_OUTPUT_NQ_PARAMS_INT_LAT_TMR_MIN \
+               UINT32_C(0x1)
+       /* Minimum value for num_cmpl_dma_aggr */
+       uint16_t        num_cmpl_dma_aggr_min;
+       /* Maximum value for num_cmpl_dma_aggr */
+       uint16_t        num_cmpl_dma_aggr_max;
+       /* Minimum value for num_cmpl_dma_aggr_during_int */
+       uint16_t        num_cmpl_dma_aggr_during_int_min;
+       /* Maximum value for num_cmpl_dma_aggr_during_int */
+       uint16_t        num_cmpl_dma_aggr_during_int_max;
+       /* Minimum value for cmpl_aggr_dma_tmr */
+       uint16_t        cmpl_aggr_dma_tmr_min;
+       /* Maximum value for cmpl_aggr_dma_tmr */
+       uint16_t        cmpl_aggr_dma_tmr_max;
+       /* Minimum value for cmpl_aggr_dma_tmr_during_int */
+       uint16_t        cmpl_aggr_dma_tmr_during_int_min;
+       /* Maximum value for cmpl_aggr_dma_tmr_during_int */
+       uint16_t        cmpl_aggr_dma_tmr_during_int_max;
+       /* Minimum value for int_lat_tmr_min */
+       uint16_t        int_lat_tmr_min_min;
+       /* Maximum value for int_lat_tmr_min */
+       uint16_t        int_lat_tmr_min_max;
+       /* Minimum value for int_lat_tmr_max */
+       uint16_t        int_lat_tmr_max_min;
+       /* Maximum value for int_lat_tmr_max */
+       uint16_t        int_lat_tmr_max_max;
+       /* Minimum value for num_cmpl_aggr_int */
+       uint16_t        num_cmpl_aggr_int_min;
+       /* Maximum value for num_cmpl_aggr_int */
+       uint16_t        num_cmpl_aggr_int_max;
+       /* The units for timer parameters, in nanoseconds. */
+       uint16_t        timer_units;
+       uint8_t unused_0[1];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -21104,13 +20360,13 @@ struct hwrm_cfa_tunnel_filter_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************************
- * hwrm_cfa_tunnel_filter_free *
- *******************************/
+/**************************************
+ * hwrm_ring_cmpl_ring_qaggint_params *
+ **************************************/
 
 
-/* hwrm_cfa_tunnel_filter_free_input (size:192b/24B) */
-struct hwrm_cfa_tunnel_filter_free_input {
+/* hwrm_ring_cmpl_ring_qaggint_params_input (size:192b/24B) */
+struct hwrm_ring_cmpl_ring_qaggint_params_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21138,12 +20394,13 @@ struct hwrm_cfa_tunnel_filter_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        tunnel_filter_id;
+       /* Physical number of completion ring. */
+       uint16_t        ring_id;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_cfa_tunnel_filter_free_output (size:128b/16B) */
-struct hwrm_cfa_tunnel_filter_free_output {
+/* hwrm_ring_cmpl_ring_qaggint_params_output (size:256b/32B) */
+struct hwrm_ring_cmpl_ring_qaggint_params_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -21152,6 +20409,52 @@ struct hwrm_cfa_tunnel_filter_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
+       uint16_t        flags;
+       /*
+        * When this bit is set to '1', interrupt max
+        * timer is reset whenever a completion is received.
+        */
+       #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS_OUTPUT_FLAGS_TIMER_RESET \
+               UINT32_C(0x1)
+       /*
+        * When this bit is set to '1', ring idle mode
+        * aggregation will be enabled.
+        */
+       #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS_OUTPUT_FLAGS_RING_IDLE \
+               UINT32_C(0x2)
+       /*
+        * Number of completions to aggregate before DMA
+        * during the normal mode.
+        */
+       uint16_t        num_cmpl_dma_aggr;
+       /*
+        * Number of completions to aggregate before DMA
+        * during the interrupt mode.
+        */
+       uint16_t        num_cmpl_dma_aggr_during_int;
+       /*
+        * Timer in unit of 80-nsec used to aggregate completions before
+        * DMA during the normal mode (not in interrupt mode).
+        */
+       uint16_t        cmpl_aggr_dma_tmr;
+       /*
+        * Timer in unit of 80-nsec used to aggregate completions before
+        * DMA during the interrupt mode.
+        */
+       uint16_t        cmpl_aggr_dma_tmr_during_int;
+       /* Minimum time (in unit of 80-nsec) between two interrupts. */
+       uint16_t        int_lat_tmr_min;
+       /*
+        * Maximum wait time (in unit of 80-nsec) spent aggregating
+        * completions before signaling the interrupt after the
+        * interrupt is enabled.
+        */
+       uint16_t        int_lat_tmr_max;
+       /*
+        * Minimum number of completions aggregated before signaling
+        * an interrupt.
+        */
+       uint16_t        num_cmpl_aggr_int;
        uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
@@ -21163,13 +20466,13 @@ struct hwrm_cfa_tunnel_filter_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************************
- * hwrm_cfa_redirect_tunnel_type_alloc *
- ***************************************/
+/*****************************************
+ * hwrm_ring_cmpl_ring_cfg_aggint_params *
+ *****************************************/
 
 
-/* hwrm_cfa_redirect_tunnel_type_alloc_input (size:192b/24B) */
-struct hwrm_cfa_redirect_tunnel_type_alloc_input {
+/* hwrm_ring_cmpl_ring_cfg_aggint_params_input (size:320b/40B) */
+struct hwrm_ring_cmpl_ring_cfg_aggint_params_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21197,58 +20500,109 @@ struct hwrm_cfa_redirect_tunnel_type_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* The destination function id, to whom the traffic is redirected. */
-       uint16_t        dest_fid;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+       /* Physical number of completion ring. */
+       uint16_t        ring_id;
+       uint16_t        flags;
+       /*
+        * When this bit is set to '1', interrupt latency max
+        * timer is reset whenever a completion is received.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_TIMER_RESET \
                UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+       /*
+        * When this bit is set to '1', ring idle mode
+        * aggregation will be enabled.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_RING_IDLE \
                UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+       /*
+        * Set this flag to 1 when configuring parameters on a
+        * notification queue. Set this flag to 0 when configuring
+        * parameters on a completion queue.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_FLAGS_IS_NQ \
                UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       /* Tunnel alloc flags. */
-       uint8_t flags;
-       /* Setting of this flag indicates modify existing redirect tunnel to new destination function ID. */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_FLAGS_MODIFY_DST \
+       /*
+        * Number of completions to aggregate before DMA
+        * during the normal mode.
+        */
+       uint16_t        num_cmpl_dma_aggr;
+       /*
+        * Number of completions to aggregate before DMA
+        * during the interrupt mode.
+        */
+       uint16_t        num_cmpl_dma_aggr_during_int;
+       /*
+        * Timer in unit of 80-nsec used to aggregate completions before
+        * DMA during the normal mode (not in interrupt mode).
+        */
+       uint16_t        cmpl_aggr_dma_tmr;
+       /*
+        * Timer in unit of 80-nsec used to aggregate completions before
+        * DMA during the interrupt mode.
+        */
+       uint16_t        cmpl_aggr_dma_tmr_during_int;
+       /* Minimum time (in unit of 80-nsec) between two interrupts. */
+       uint16_t        int_lat_tmr_min;
+       /*
+        * Maximum wait time (in unit of 80-nsec) spent aggregating
+        * cmpls before signaling the interrupt after the
+        * interrupt is enabled.
+        */
+       uint16_t        int_lat_tmr_max;
+       /*
+        * Minimum number of completions aggregated before signaling
+        * an interrupt.
+        */
+       uint16_t        num_cmpl_aggr_int;
+       /*
+        * Bitfield that indicates which parameters are to be applied. Only
+        * required when configuring devices with notification queues, and
+        * used in that case to set certain parameters on completion queues
+        * and others on notification queues.
+        */
+       uint16_t        enables;
+       /*
+        * This bit must be '1' for the num_cmpl_dma_aggr field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_DMA_AGGR \
                UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the num_cmpl_dma_aggr_during_int field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_DMA_AGGR_DURING_INT \
+               UINT32_C(0x2)
+       /*
+        * This bit must be '1' for the cmpl_aggr_dma_tmr field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_CMPL_AGGR_DMA_TMR \
+               UINT32_C(0x4)
+       /*
+        * This bit must be '1' for the int_lat_tmr_min field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_INT_LAT_TMR_MIN \
+               UINT32_C(0x8)
+       /*
+        * This bit must be '1' for the int_lat_tmr_max field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_INT_LAT_TMR_MAX \
+               UINT32_C(0x10)
+       /*
+        * This bit must be '1' for the num_cmpl_aggr_int field to be
+        * configured.
+        */
+       #define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS_INPUT_ENABLES_NUM_CMPL_AGGR_INT \
+               UINT32_C(0x20)
        uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_redirect_tunnel_type_alloc_output (size:128b/16B) */
-struct hwrm_cfa_redirect_tunnel_type_alloc_output {
+/* hwrm_ring_cmpl_ring_cfg_aggint_params_output (size:128b/16B) */
+struct hwrm_ring_cmpl_ring_cfg_aggint_params_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -21268,13 +20622,13 @@ struct hwrm_cfa_redirect_tunnel_type_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************************
- * hwrm_cfa_redirect_tunnel_type_free *
- **************************************/
-
+/***********************
+ * hwrm_ring_grp_alloc *
+ ***********************/
 
-/* hwrm_cfa_redirect_tunnel_type_free_input (size:192b/24B) */
-struct hwrm_cfa_redirect_tunnel_type_free_input {
+
+/* hwrm_ring_grp_alloc_input (size:192b/24B) */
+struct hwrm_ring_grp_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21302,53 +20656,31 @@ struct hwrm_cfa_redirect_tunnel_type_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* The destination function id, to whom the traffic is redirected. */
-       uint16_t        dest_fid;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPIP \
-               UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       uint8_t unused_0[5];
+       /*
+        * This value identifies the CR associated with the ring
+        * group.
+        */
+       uint16_t        cr;
+       /*
+        * This value identifies the main RR associated with the ring
+        * group.
+        */
+       uint16_t        rr;
+       /*
+        * This value identifies the aggregation RR associated with
+        * the ring group.  If this value is 0xFF... (All Fs), then no
+        * Aggregation ring will be set.
+        */
+       uint16_t        ar;
+       /*
+        * This value identifies the statistics context associated
+        * with the ring group.
+        */
+       uint16_t        sc;
 } __attribute__((packed));
 
-/* hwrm_cfa_redirect_tunnel_type_free_output (size:128b/16B) */
-struct hwrm_cfa_redirect_tunnel_type_free_output {
+/* hwrm_ring_grp_alloc_output (size:128b/16B) */
+struct hwrm_ring_grp_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -21357,7 +20689,13 @@ struct hwrm_cfa_redirect_tunnel_type_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /*
+        * This is the ring group ID value.  Use this value to program
+        * the default ring group for the VNIC or as table entries
+        * in an RSS/COS context.
+        */
+       uint32_t        ring_group_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -21368,13 +20706,13 @@ struct hwrm_cfa_redirect_tunnel_type_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************************
- * hwrm_cfa_redirect_tunnel_type_info *
- **************************************/
+/**********************
+ * hwrm_ring_grp_free *
+ **********************/
 
 
-/* hwrm_cfa_redirect_tunnel_type_info_input (size:192b/24B) */
-struct hwrm_cfa_redirect_tunnel_type_info_input {
+/* hwrm_ring_grp_free_input (size:192b/24B) */
+struct hwrm_ring_grp_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21402,181 +20740,39 @@ struct hwrm_cfa_redirect_tunnel_type_info_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* The source function id. */
-       uint16_t        src_fid;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPIP \
-               UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       uint8_t unused_0[5];
-} __attribute__((packed));
-
-/* hwrm_cfa_redirect_tunnel_type_info_output (size:128b/16B) */
-struct hwrm_cfa_redirect_tunnel_type_info_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The destination function id, to whom the traffic is redirected. */
-       uint16_t        dest_fid;
-       uint8_t unused_0[5];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/* hwrm_vxlan_ipv4_hdr (size:128b/16B) */
-struct hwrm_vxlan_ipv4_hdr {
-       /* IPv4 version and header length. */
-       uint8_t ver_hlen;
-       /* IPv4 header length */
-       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK UINT32_C(0xf)
-       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT 0
-       /* Version */
-       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK      UINT32_C(0xf0)
-       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT       4
-       /* IPv4 type of service. */
-       uint8_t tos;
-       /* IPv4 identification. */
-       uint16_t        ip_id;
-       /* IPv4 flags and offset. */
-       uint16_t        flags_frag_offset;
-       /* IPv4 TTL. */
-       uint8_t ttl;
-       /* IPv4 protocol. */
-       uint8_t protocol;
-       /* IPv4 source address. */
-       uint32_t        src_ip_addr;
-       /* IPv4 destination address. */
-       uint32_t        dest_ip_addr;
-} __attribute__((packed));
-
-/* hwrm_vxlan_ipv6_hdr (size:320b/40B) */
-struct hwrm_vxlan_ipv6_hdr {
-       /* IPv6 version, traffic class and flow label. */
-       uint32_t        ver_tc_flow_label;
-       /* IPv6 version shift */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT \
-               UINT32_C(0x1c)
-       /* IPv6 version mask */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK \
-               UINT32_C(0xf0000000)
-       /* IPv6 TC shift */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT \
-               UINT32_C(0x14)
-       /* IPv6 TC mask */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK \
-               UINT32_C(0xff00000)
-       /* IPv6 flow label shift */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT \
-               UINT32_C(0x0)
-       /* IPv6 flow label mask */
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK \
-               UINT32_C(0xfffff)
-       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_LAST \
-               HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK
-       /* IPv6 payload length. */
-       uint16_t        payload_len;
-       /* IPv6 next header. */
-       uint8_t next_hdr;
-       /* IPv6 TTL. */
-       uint8_t ttl;
-       /* IPv6 source address. */
-       uint32_t        src_ip_addr[4];
-       /* IPv6 destination address. */
-       uint32_t        dest_ip_addr[4];
+       /* This is the ring group ID value. */
+       uint32_t        ring_group_id;
+       uint8_t unused_0[4];
 } __attribute__((packed));
-
-/* hwrm_cfa_encap_data_vxlan (size:576b/72B) */
-struct hwrm_cfa_encap_data_vxlan {
-       /* Source MAC address. */
-       uint8_t src_mac_addr[6];
-       /* reserved. */
-       uint16_t        unused_0;
-       /* Destination MAC address. */
-       uint8_t dst_mac_addr[6];
-       /* Number of VLAN tags. */
-       uint8_t num_vlan_tags;
-       /* reserved. */
-       uint8_t unused_1;
-       /* Outer VLAN TPID. */
-       uint16_t        ovlan_tpid;
-       /* Outer VLAN TCI. */
-       uint16_t        ovlan_tci;
-       /* Inner VLAN TPID. */
-       uint16_t        ivlan_tpid;
-       /* Inner VLAN TCI. */
-       uint16_t        ivlan_tci;
-       /* L3 header fields. */
-       uint32_t        l3[10];
-       /* IP version mask. */
-       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_MASK UINT32_C(0xf)
-       /* IP version 4. */
-       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4 UINT32_C(0x4)
-       /* IP version 6. */
-       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 UINT32_C(0x6)
-       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_LAST \
-               HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6
-       /* UDP source port. */
-       uint16_t        src_port;
-       /* UDP destination port. */
-       uint16_t        dst_port;
-       /* VXLAN Network Identifier. */
-       uint32_t        vni;
+
+/* hwrm_ring_grp_free_output (size:128b/16B) */
+struct hwrm_ring_grp_free_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
+       /*
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
+        */
+       uint8_t valid;
 } __attribute__((packed));
 
-/*******************************
- * hwrm_cfa_encap_record_alloc *
- *******************************/
+/****************************
+ * hwrm_cfa_l2_filter_alloc *
+ ****************************/
 
 
-/* hwrm_cfa_encap_record_alloc_input (size:832b/104B) */
-struct hwrm_cfa_encap_record_alloc_input {
+/* hwrm_cfa_l2_filter_alloc_input (size:768b/96B) */
+struct hwrm_cfa_l2_filter_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21603,46 +20799,343 @@ struct hwrm_cfa_encap_record_alloc_input {
         * physical address (HPA) or a guest physical address (GPA) and must
         * point to a physically contiguous block of memory.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       /* Setting of this flag indicates the applicability to the loopback path. */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_FLAGS_LOOPBACK \
-               UINT32_C(0x1)
-       /* Encapsulation Type. */
-       uint8_t encap_type;
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /*
+        * Enumeration denoting the RX, TX type of the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH \
+               UINT32_C(0x1)
+       /* tx path */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_TX \
+               UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX \
+               UINT32_C(0x1)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_LAST \
+               HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_PATH_RX
+       /* Setting of this flag indicates the applicability to the loopback path. */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
+               UINT32_C(0x2)
+       /*
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_DROP \
+               UINT32_C(0x4)
+       /*
+        * If this flag is set, all t_l2_* fields are invalid
+        * and they should not be specified.
+        * If this flag is set, then l2_* fields refer to
+        * fields of outermost L2 header.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_OUTERMOST \
+               UINT32_C(0x8)
+       /*
+        * Enumeration denoting NO_ROCE_L2 to support old drivers.
+        * New driver L2 for only L2 traffic, ROCE for roce and l2 traffic
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_MASK \
+               UINT32_C(0x30)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_SFT       4
+       /* To support old drivers */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_NO_ROCE_L2 \
+               (UINT32_C(0x0) << 4)
+       /* Only L2 traffic */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_L2 \
+               (UINT32_C(0x1) << 4)
+       /* Roce & L2 traffic */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_ROCE \
+               (UINT32_C(0x2) << 4)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_LAST \
+               HWRM_CFA_L2_FILTER_ALLOC_INPUT_FLAGS_TRAFFIC_ROCE
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the l2_addr field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR \
+               UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the l2_addr_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR_MASK \
+               UINT32_C(0x2)
+       /*
+        * This bit must be '1' for the l2_ovlan field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN \
+               UINT32_C(0x4)
+       /*
+        * This bit must be '1' for the l2_ovlan_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_OVLAN_MASK \
+               UINT32_C(0x8)
+       /*
+        * This bit must be '1' for the l2_ivlan field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN \
+               UINT32_C(0x10)
+       /*
+        * This bit must be '1' for the l2_ivlan_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN_MASK \
+               UINT32_C(0x20)
+       /*
+        * This bit must be '1' for the t_l2_addr field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_ADDR \
+               UINT32_C(0x40)
+       /*
+        * This bit must be '1' for the t_l2_addr_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_ADDR_MASK \
+               UINT32_C(0x80)
+       /*
+        * This bit must be '1' for the t_l2_ovlan field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_OVLAN \
+               UINT32_C(0x100)
+       /*
+        * This bit must be '1' for the t_l2_ovlan_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_OVLAN_MASK \
+               UINT32_C(0x200)
+       /*
+        * This bit must be '1' for the t_l2_ivlan field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_IVLAN \
+               UINT32_C(0x400)
+       /*
+        * This bit must be '1' for the t_l2_ivlan_mask field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_T_L2_IVLAN_MASK \
+               UINT32_C(0x800)
+       /*
+        * This bit must be '1' for the src_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_TYPE \
+               UINT32_C(0x1000)
+       /*
+        * This bit must be '1' for the src_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_SRC_ID \
+               UINT32_C(0x2000)
+       /*
+        * This bit must be '1' for the tunnel_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
+               UINT32_C(0x4000)
+       /*
+        * This bit must be '1' for the dst_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
+               UINT32_C(0x8000)
+       /*
+        * This bit must be '1' for the mirror_vnic_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
+               UINT32_C(0x10000)
+       /*
+        * This value sets the match value for the L2 MAC address.
+        * Destination MAC address for RX path.
+        * Source MAC address for TX path.
+        */
+       uint8_t l2_addr[6];
+       uint8_t unused_0[2];
+       /*
+        * This value sets the mask value for the L2 address.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint8_t l2_addr_mask[6];
+       /* This value sets VLAN ID value for outer VLAN. */
+       uint16_t        l2_ovlan;
+       /*
+        * This value sets the mask value for the ovlan id.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint16_t        l2_ovlan_mask;
+       /* This value sets VLAN ID value for inner VLAN. */
+       uint16_t        l2_ivlan;
+       /*
+        * This value sets the mask value for the ivlan id.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint16_t        l2_ivlan_mask;
+       uint8_t unused_1[2];
+       /*
+        * This value sets the match value for the tunnel
+        * L2 MAC address.
+        * Destination MAC address for RX path.
+        * Source MAC address for TX path.
+        */
+       uint8_t t_l2_addr[6];
+       uint8_t unused_2[2];
+       /*
+        * This value sets the mask value for the tunnel L2
+        * address.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint8_t t_l2_addr_mask[6];
+       /* This value sets VLAN ID value for tunnel outer VLAN. */
+       uint16_t        t_l2_ovlan;
+       /*
+        * This value sets the mask value for the tunnel ovlan id.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint16_t        t_l2_ovlan_mask;
+       /* This value sets VLAN ID value for tunnel inner VLAN. */
+       uint16_t        t_l2_ivlan;
+       /*
+        * This value sets the mask value for the tunnel ivlan id.
+        * A value of 0 will mask the corresponding bit from
+        * compare.
+        */
+       uint16_t        t_l2_ivlan_mask;
+       /* This value identifies the type of source of the packet. */
+       uint8_t src_type;
+       /* Network port */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_NPORT UINT32_C(0x0)
+       /* Physical function */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_PF    UINT32_C(0x1)
+       /* Virtual function */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_VF    UINT32_C(0x2)
+       /* Virtual NIC of a function */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_VNIC  UINT32_C(0x3)
+       /* Embedded processor for CFA management */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_KONG  UINT32_C(0x4)
+       /* Embedded processor for OOB management */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_APE   UINT32_C(0x5)
+       /* Embedded processor for RoCE */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_BONO  UINT32_C(0x6)
+       /* Embedded processor for network proxy functions */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_TANG  UINT32_C(0x7)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_LAST \
+               HWRM_CFA_L2_FILTER_ALLOC_INPUT_SRC_TYPE_TANG
+       uint8_t unused_3;
+       /*
+        * This value is the id of the source.
+        * For a network port, it represents port_id.
+        * For a physical function, it represents fid.
+        * For a virtual function, it represents vf_id.
+        * For a vnic, it represents vnic_id.
+        * For embedded processors, this id is not valid.
+        *
+        * Notes:
+        * 1. The function ID is implied if it src_id is
+        *    not provided for a src_type that is either
+        */
+       uint32_t        src_id;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+               UINT32_C(0x0)
        /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VXLAN \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
                UINT32_C(0x1)
        /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_NVGRE \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
                UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) after inside Ethernet payload */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_L2GRE \
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
                UINT32_C(0x3)
        /* IP in IP */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_IPIP \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
                UINT32_C(0x4)
        /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_GENEVE \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
                UINT32_C(0x5)
        /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_MPLS \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
                UINT32_C(0x6)
-       /* VLAN */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VLAN \
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
                UINT32_C(0x7)
        /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_IPGRE \
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
                UINT32_C(0x8)
-       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_LAST \
-               HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_IPGRE
-       uint8_t unused_0[3];
-       /* This value is encap data used for the given encap type. */
-       uint32_t        encap_data[20];
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_L2_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint8_t unused_4;
+       /*
+        * If set, this value shall represent the
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path and network port id of the destination port for
+        * the TX path.
+        */
+       uint16_t        dst_id;
+       /*
+        * Logical VNIC ID of the VNIC where traffic is
+        * mirrored.
+        */
+       uint16_t        mirror_vnic_id;
+       /*
+        * This hint is provided to help in placing
+        * the filter in the filter table.
+        */
+       uint8_t pri_hint;
+       /* No preference */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_NO_PREFER \
+               UINT32_C(0x0)
+       /* Above the given filter */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_ABOVE_FILTER \
+               UINT32_C(0x1)
+       /* Below the given filter */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_BELOW_FILTER \
+               UINT32_C(0x2)
+       /* As high as possible */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MAX \
+               UINT32_C(0x3)
+       /* As low as possible */
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MIN \
+               UINT32_C(0x4)
+       #define HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_LAST \
+               HWRM_CFA_L2_FILTER_ALLOC_INPUT_PRI_HINT_MIN
+       uint8_t unused_5;
+       uint32_t        unused_6;
+       /*
+        * This is the ID of the filter that goes along with
+        * the pri_hint.
+        *
+        * This field is valid only for the following values.
+        * 1 - Above the given filter
+        * 2 - Below the given filter
+        */
+       uint64_t        l2_filter_id_hint;
 } __attribute__((packed));
 
-/* hwrm_cfa_encap_record_alloc_output (size:128b/16B) */
-struct hwrm_cfa_encap_record_alloc_output {
+/* hwrm_cfa_l2_filter_alloc_output (size:192b/24B) */
+struct hwrm_cfa_l2_filter_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -21651,8 +21144,19 @@ struct hwrm_cfa_encap_record_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This value is an opaque id into CFA data structures. */
-       uint32_t        encap_record_id;
+       /*
+        * This value identifies a set of CFA data structures used for an L2
+        * context.
+        */
+       uint64_t        l2_filter_id;
+       /*
+        * This is the ID of the flow associated with this
+        * filter.
+        * This value shall be used to match and associate the
+        * flow identifier returned in completion records.
+        * A value of 0xFFFFFFFF shall indicate no flow id.
+        */
+       uint32_t        flow_id;
        uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
@@ -21664,13 +21168,13 @@ struct hwrm_cfa_encap_record_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_cfa_encap_record_free *
- ******************************/
+/***************************
+ * hwrm_cfa_l2_filter_free *
+ ***************************/
 
 
-/* hwrm_cfa_encap_record_free_input (size:192b/24B) */
-struct hwrm_cfa_encap_record_free_input {
+/* hwrm_cfa_l2_filter_free_input (size:192b/24B) */
+struct hwrm_cfa_l2_filter_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21698,13 +21202,15 @@ struct hwrm_cfa_encap_record_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* This value is an opaque id into CFA data structures. */
-       uint32_t        encap_record_id;
-       uint8_t unused_0[4];
+       /*
+        * This value identifies a set of CFA data structures used for an L2
+        * context.
+        */
+       uint64_t        l2_filter_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_encap_record_free_output (size:128b/16B) */
-struct hwrm_cfa_encap_record_free_output {
+/* hwrm_cfa_l2_filter_free_output (size:128b/16B) */
+struct hwrm_cfa_l2_filter_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -21724,13 +21230,13 @@ struct hwrm_cfa_encap_record_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************************
- * hwrm_cfa_ntuple_filter_alloc *
- ********************************/
+/**************************
+ * hwrm_cfa_l2_filter_cfg *
+ **************************/
 
 
-/* hwrm_cfa_ntuple_filter_alloc_input (size:1024b/128B) */
-struct hwrm_cfa_ntuple_filter_alloc_input {
+/* hwrm_cfa_l2_filter_cfg_input (size:320b/40B) */
+struct hwrm_cfa_l2_filter_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -21759,315 +21265,260 @@ struct hwrm_cfa_ntuple_filter_alloc_input {
         */
        uint64_t        resp_addr;
        uint32_t        flags;
-       /* Setting of this flag indicates the applicability to the loopback path. */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
-               UINT32_C(0x1)
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP \
-               UINT32_C(0x2)
-       /*
-        * Setting of this flag indicates that a meter is expected to be attached
-        * to this flow. This hint can be used when choosing the action record
-        * format required for the flow.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER \
-               UINT32_C(0x4)
-       uint32_t        enables;
        /*
-        * This bit must be '1' for the l2_filter_id field to be
-        * configured.
+        * Enumeration denoting the RX, TX type of the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
         */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH \
                UINT32_C(0x1)
+       /* tx path */
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_TX \
+               UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_RX \
+               UINT32_C(0x1)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_LAST \
+               HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_PATH_RX
        /*
-        * This bit must be '1' for the ethertype field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE \
-               UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the tunnel_type field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
-               UINT32_C(0x4)
-       /*
-        * This bit must be '1' for the src_macaddr field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR \
-               UINT32_C(0x8)
-       /*
-        * This bit must be '1' for the ipaddr_type field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
-               UINT32_C(0x10)
-       /*
-        * This bit must be '1' for the src_ipaddr field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR \
-               UINT32_C(0x20)
-       /*
-        * This bit must be '1' for the src_ipaddr_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR_MASK \
-               UINT32_C(0x40)
-       /*
-        * This bit must be '1' for the dst_ipaddr field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR \
-               UINT32_C(0x80)
-       /*
-        * This bit must be '1' for the dst_ipaddr_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR_MASK \
-               UINT32_C(0x100)
-       /*
-        * This bit must be '1' for the ip_protocol field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
-               UINT32_C(0x200)
-       /*
-        * This bit must be '1' for the src_port field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT \
-               UINT32_C(0x400)
-       /*
-        * This bit must be '1' for the src_port_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT_MASK \
-               UINT32_C(0x800)
-       /*
-        * This bit must be '1' for the dst_port field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT \
-               UINT32_C(0x1000)
-       /*
-        * This bit must be '1' for the dst_port_mask field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK \
-               UINT32_C(0x2000)
-       /*
-        * This bit must be '1' for the pri_hint field to be
-        * configured.
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
         */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_PRI_HINT \
-               UINT32_C(0x4000)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_DROP \
+               UINT32_C(0x2)
        /*
-        * This bit must be '1' for the ntuple_filter_id field to be
-        * configured.
+        * Enumeration denoting NO_ROCE_L2 to support old drivers.
+        * New driver L2 for only L2 traffic, ROCE for roce and l2 traffic
         */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_NTUPLE_FILTER_ID \
-               UINT32_C(0x8000)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_MASK \
+               UINT32_C(0xc)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_SFT       2
+       /* To support old drivers */
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_NO_ROCE_L2 \
+               (UINT32_C(0x0) << 2)
+       /* Only L2 traffic */
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_L2 \
+               (UINT32_C(0x1) << 2)
+       /* Roce & L2 traffic */
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_ROCE \
+               (UINT32_C(0x2) << 2)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_LAST \
+               HWRM_CFA_L2_FILTER_CFG_INPUT_FLAGS_TRAFFIC_ROCE
+       uint32_t        enables;
        /*
         * This bit must be '1' for the dst_id field to be
         * configured.
         */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
-               UINT32_C(0x10000)
-       /*
-        * This bit must be '1' for the mirror_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
-               UINT32_C(0x20000)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_ENABLES_DST_ID \
+               UINT32_C(0x1)
        /*
-        * This bit must be '1' for the dst_macaddr field to be
+        * This bit must be '1' for the new_mirror_vnic_id field to be
         * configured.
         */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR \
-               UINT32_C(0x40000)
+       #define HWRM_CFA_L2_FILTER_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
+               UINT32_C(0x2)
        /*
         * This value identifies a set of CFA data structures used for an L2
         * context.
         */
        uint64_t        l2_filter_id;
        /*
-        * This value indicates the source MAC address in
-        * the Ethernet header.
+        * If set, this value shall represent the
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path and network port id of the destination port for
+        * the TX path.
         */
-       uint8_t src_macaddr[6];
-       /* This value indicates the ethertype in the Ethernet header. */
-       uint16_t        ethertype;
+       uint32_t        dst_id;
        /*
-        * This value indicates the type of IP address.
-        * 4 - IPv4
-        * 6 - IPv6
-        * All others are invalid.
+        * New Logical VNIC ID of the VNIC where traffic is
+        * mirrored.
         */
-       uint8_t ip_addr_type;
-       /* invalid */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN \
-               UINT32_C(0x0)
-       /* IPv4 */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 \
-               UINT32_C(0x4)
-       /* IPv6 */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 \
-               UINT32_C(0x6)
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
-               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
+       uint32_t        new_mirror_vnic_id;
+} __attribute__((packed));
+
+/* hwrm_cfa_l2_filter_cfg_output (size:128b/16B) */
+struct hwrm_cfa_l2_filter_cfg_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * The value of protocol filed in IP header.
-        * Applies to UDP and TCP traffic.
-        * 6 - TCP
-        * 17 - UDP
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint8_t ip_protocol;
-       /* invalid */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN \
-               UINT32_C(0x0)
-       /* TCP */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP \
-               UINT32_C(0x6)
-       /* UDP */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP \
-               UINT32_C(0x11)
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_LAST \
-               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP
+       uint8_t valid;
+} __attribute__((packed));
+
+/***************************
+ * hwrm_cfa_l2_set_rx_mask *
+ ***************************/
+
+
+/* hwrm_cfa_l2_set_rx_mask_input (size:448b/56B) */
+struct hwrm_cfa_l2_set_rx_mask_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * If set, this value shall represent the
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and network port id of the destination port for
-        * the TX path.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint16_t        dst_id;
+       uint16_t        cmpl_ring;
        /*
-        * Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint16_t        mirror_vnic_id;
+       uint16_t        seq_id;
        /*
-        * This value indicates the tunnel type for this filter.
-        * If this field is not specified, then the filter shall
-        * apply to both non-tunneled and tunneled packets.
-        * If this field conflicts with the tunnel_type specified
-        * in the l2_filter_id, then the HWRM shall return an
-        * error for this command.
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
-               UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint16_t        target_id;
        /*
-        * This hint is provided to help in placing
-        * the filter in the filter table.
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint8_t pri_hint;
-       /* No preference */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_NO_PREFER \
-               UINT32_C(0x0)
-       /* Above the given filter */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_ABOVE \
-               UINT32_C(0x1)
-       /* Below the given filter */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_BELOW \
+       uint64_t        resp_addr;
+       /* VNIC ID */
+       uint32_t        vnic_id;
+       uint32_t        mask;
+       /*
+        * When this bit is '1', the function is requested to accept
+        * multi-cast packets specified by the multicast addr table.
+        */
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_MCAST \
                UINT32_C(0x2)
-       /* As high as possible */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_HIGHEST \
-               UINT32_C(0x3)
-       /* As low as possible */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LOWEST \
+       /*
+        * When this bit is '1', the function is requested to accept
+        * all multi-cast packets.
+        */
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ALL_MCAST \
                UINT32_C(0x4)
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LAST \
-               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LOWEST
        /*
-        * The value of source IP address to be used in filtering.
-        * For IPv4, first four bytes represent the IP address.
+        * When this bit is '1', the function is requested to accept
+        * broadcast packets.
         */
-       uint32_t        src_ipaddr[4];
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_BCAST \
+               UINT32_C(0x8)
        /*
-        * The value of source IP address mask to be used in
-        * filtering.
-        * For IPv4, first four bytes represent the IP address mask.
+        * When this bit is '1', the function is requested to be
+        * put in the promiscuous mode.
+        *
+        * The HWRM should accept any function to set up
+        * promiscuous mode.
+        *
+        * The HWRM shall follow the semantics below for the
+        * promiscuous mode support.
+        * # When partitioning is not enabled on a port
+        * (i.e. single PF on the port), then the PF shall
+        * be allowed to be in the promiscuous mode. When the
+        * PF is in the promiscuous mode, then it shall
+        * receive all host bound traffic on that port.
+        * # When partitioning is enabled on a port
+        * (i.e. multiple PFs per port) and a PF on that
+        * port is in the promiscuous mode, then the PF
+        * receives all traffic within that partition as
+        * identified by a unique identifier for the
+        * PF (e.g. S-Tag). If a unique outer VLAN
+        * for the PF is specified, then the setting of
+        * promiscuous mode on that PF shall result in the
+        * PF receiving all host bound traffic with matching
+        * outer VLAN.
+        * # A VF shall can be set in the promiscuous mode.
+        * In the promiscuous mode, the VF does not receive any
+        * traffic unless a unique outer VLAN for the
+        * VF is specified. If a unique outer VLAN
+        * for the VF is specified, then the setting of
+        * promiscuous mode on that VF shall result in the
+        * VF receiving all host bound traffic with the
+        * matching outer VLAN.
+        * # The HWRM shall allow the setting of promiscuous
+        * mode on a function independently from the
+        * promiscuous mode settings on other functions.
         */
-       uint32_t        src_ipaddr_mask[4];
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_PROMISCUOUS \
+               UINT32_C(0x10)
        /*
-        * The value of destination IP address to be used in filtering.
-        * For IPv4, first four bytes represent the IP address.
+        * If this flag is set, the corresponding RX
+        * filters shall be set up to cover multicast/broadcast
+        * filters for the outermost Layer 2 destination MAC
+        * address field.
         */
-       uint32_t        dst_ipaddr[4];
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_OUTERMOST \
+               UINT32_C(0x20)
        /*
-        * The value of destination IP address mask to be used in
-        * filtering.
-        * For IPv4, first four bytes represent the IP address mask.
+        * If this flag is set, the corresponding RX
+        * filters shall be set up to cover multicast/broadcast
+        * filters for the VLAN-tagged packets that match the
+        * TPID and VID fields of VLAN tags in the VLAN tag
+        * table specified in this command.
         */
-       uint32_t        dst_ipaddr_mask[4];
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLANONLY \
+               UINT32_C(0x40)
        /*
-        * The value of source port to be used in filtering.
-        * Applies to UDP and TCP traffic.
+        * If this flag is set, the corresponding RX
+        * filters shall be set up to cover multicast/broadcast
+        * filters for non-VLAN tagged packets and VLAN-tagged
+        * packets that match the TPID and VID fields of VLAN
+        * tags in the VLAN tag table specified in this command.
         */
-       uint16_t        src_port;
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_VLAN_NONVLAN \
+               UINT32_C(0x80)
        /*
-        * The value of source port mask to be used in filtering.
-        * Applies to UDP and TCP traffic.
+        * If this flag is set, the corresponding RX
+        * filters shall be set up to cover multicast/broadcast
+        * filters for non-VLAN tagged packets and VLAN-tagged
+        * packets matching any VLAN tag.
+        *
+        * If this flag is set, then the HWRM shall ignore
+        * VLAN tags specified in vlan_tag_tbl.
+        *
+        * If none of vlanonly, vlan_nonvlan, and anyvlan_nonvlan
+        * flags is set, then the HWRM shall ignore
+        * VLAN tags specified in vlan_tag_tbl.
+        *
+        * The HWRM client shall set at most one flag out of
+        * vlanonly, vlan_nonvlan, and anyvlan_nonvlan.
         */
-       uint16_t        src_port_mask;
+       #define HWRM_CFA_L2_SET_RX_MASK_INPUT_MASK_ANYVLAN_NONVLAN \
+               UINT32_C(0x100)
+       /* This is the address for mcast address tbl. */
+       uint64_t        mc_tbl_addr;
        /*
-        * The value of destination port to be used in filtering.
-        * Applies to UDP and TCP traffic.
+        * This value indicates how many entries in mc_tbl are valid.
+        * Each entry is 6 bytes.
         */
-       uint16_t        dst_port;
+       uint32_t        num_mc_entries;
+       uint8_t unused_0[4];
        /*
-        * The value of destination port mask to be used in
-        * filtering.
-        * Applies to UDP and TCP traffic.
+        * This is the address for VLAN tag table.
+        * Each VLAN entry in the table is 4 bytes of a VLAN tag
+        * including TPID, PCP, DEI, and VID fields in network byte
+        * order.
         */
-       uint16_t        dst_port_mask;
+       uint64_t        vlan_tag_tbl_addr;
        /*
-        * This is the ID of the filter that goes along with
-        * the pri_hint.
+        * This value indicates how many entries in vlan_tag_tbl are
+        * valid. Each entry is 4 bytes.
         */
-       uint64_t        ntuple_filter_id_hint;
+       uint32_t        num_vlan_tags;
+       uint8_t unused_1[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_ntuple_filter_alloc_output (size:192b/24B) */
-struct hwrm_cfa_ntuple_filter_alloc_output {
+/* hwrm_cfa_l2_set_rx_mask_output (size:128b/16B) */
+struct hwrm_cfa_l2_set_rx_mask_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -22076,17 +21527,7 @@ struct hwrm_cfa_ntuple_filter_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        ntuple_filter_id;
-       /*
-        * This is the ID of the flow associated with this
-        * filter.
-        * This value shall be used to match and associate the
-        * flow identifier returned in completion records.
-        * A value of 0xFFFFFFFF shall indicate no flow id.
-        */
-       uint32_t        flow_id;
-       uint8_t unused_0[3];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -22097,31 +21538,31 @@ struct hwrm_cfa_ntuple_filter_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */
-struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
+/* hwrm_cfa_l2_set_rx_mask_cmd_err (size:64b/8B) */
+struct hwrm_cfa_l2_set_rx_mask_cmd_err {
        /*
         * command specific error codes that goes to
         * the cmd_err field in Common HWRM Error Response.
         */
        uint8_t code;
        /* Unknown error */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN \
+       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_UNKNOWN \
                UINT32_C(0x0)
-       /* Unable to complete operation due to conflict with Rx Mask VLAN */
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR \
+       /* Unable to complete operation due to conflict with Ntuple Filter */
+       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR \
                UINT32_C(0x1)
-       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST \
-               HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR
+       #define HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_LAST \
+               HWRM_CFA_L2_SET_RX_MASK_CMD_ERR_CODE_NTUPLE_FILTER_CONFLICT_ERR
        uint8_t unused_0[7];
 } __attribute__((packed));
 
 /*******************************
- * hwrm_cfa_ntuple_filter_free *
+ * hwrm_cfa_vlan_antispoof_cfg *
  *******************************/
 
 
-/* hwrm_cfa_ntuple_filter_free_input (size:192b/24B) */
-struct hwrm_cfa_ntuple_filter_free_input {
+/* hwrm_cfa_vlan_antispoof_cfg_input (size:256b/32B) */
+struct hwrm_cfa_vlan_antispoof_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -22149,12 +21590,27 @@ struct hwrm_cfa_ntuple_filter_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        ntuple_filter_id;
+       /*
+        * Function ID of the function that is being configured.
+        * Only valid for a VF FID configured by the PF.
+        */
+       uint16_t        fid;
+       uint8_t unused_0[2];
+       /* Number of VLAN entries in the vlan_tag_mask_tbl. */
+       uint32_t        num_vlan_entries;
+       /*
+        * The vlan_tag_mask_tbl_addr is the DMA address of the VLAN
+        * antispoof table. Each table entry contains the 16-bit TPID
+        * (0x8100 or 0x88a8 only), 16-bit VLAN ID, and a 16-bit mask,
+        * all in network order to match hwrm_cfa_l2_set_rx_mask.
+        * For an individual VLAN entry, the mask value should be 0xfff
+        * for the 12-bit VLAN ID.
+        */
+       uint64_t        vlan_tag_mask_tbl_addr;
 } __attribute__((packed));
 
-/* hwrm_cfa_ntuple_filter_free_output (size:128b/16B) */
-struct hwrm_cfa_ntuple_filter_free_output {
+/* hwrm_cfa_vlan_antispoof_cfg_output (size:128b/16B) */
+struct hwrm_cfa_vlan_antispoof_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -22174,13 +21630,13 @@ struct hwrm_cfa_ntuple_filter_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_cfa_ntuple_filter_cfg *
- ******************************/
+/********************************
+ * hwrm_cfa_vlan_antispoof_qcfg *
+ ********************************/
 
 
-/* hwrm_cfa_ntuple_filter_cfg_input (size:384b/48B) */
-struct hwrm_cfa_ntuple_filter_cfg_input {
+/* hwrm_cfa_vlan_antispoof_qcfg_input (size:256b/32B) */
+struct hwrm_cfa_vlan_antispoof_qcfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -22208,59 +21664,30 @@ struct hwrm_cfa_ntuple_filter_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the new_dst_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_DST_ID \
-               UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the new_mirror_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
-               UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the new_meter_instance_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_METER_INSTANCE_ID \
-               UINT32_C(0x4)
-       uint8_t unused_0[4];
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        ntuple_filter_id;
-       /*
-        * If set, this value shall represent the new
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and new network port id of the destination port for
-        * the TX path.
-        */
-       uint32_t        new_dst_id;
        /*
-        * New Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
+        * Function ID of the function that is being queried.
+        * Only valid for a VF FID queried by the PF.
         */
-       uint32_t        new_mirror_vnic_id;
+       uint16_t        fid;
+       uint8_t unused_0[2];
        /*
-        * New meter to attach to the flow. Specifying the
-        * invalid instance ID is used to remove any existing
-        * meter from the flow.
+        * Maximum number of VLAN entries the firmware is allowed to DMA
+        * to vlan_tag_mask_tbl.
         */
-       uint16_t        new_meter_instance_id;
+       uint32_t        max_vlan_entries;
        /*
-        * A value of 0xfff is considered invalid and implies the
-        * instance is not configured.
+        * The vlan_tag_mask_tbl_addr is the DMA address of the VLAN
+        * antispoof table to which firmware will DMA to. Each table
+        * entry will contain the 16-bit TPID (0x8100 or 0x88a8 only),
+        * 16-bit VLAN ID, and a 16-bit mask, all in network order to
+        * match hwrm_cfa_l2_set_rx_mask. For an individual VLAN entry,
+        * the mask value should be 0xfff for the 12-bit VLAN ID.
         */
-       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_LAST \
-               HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID
-       uint8_t unused_1[6];
+       uint64_t        vlan_tag_mask_tbl_addr;
 } __attribute__((packed));
 
-/* hwrm_cfa_ntuple_filter_cfg_output (size:128b/16B) */
-struct hwrm_cfa_ntuple_filter_cfg_output {
+/* hwrm_cfa_vlan_antispoof_qcfg_output (size:128b/16B) */
+struct hwrm_cfa_vlan_antispoof_qcfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -22269,7 +21696,9 @@ struct hwrm_cfa_ntuple_filter_cfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* Number of valid entries DMAd by firmware to vlan_tag_mask_tbl. */
+       uint32_t        num_vlan_entries;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -22280,13 +21709,13 @@ struct hwrm_cfa_ntuple_filter_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**************************
- * hwrm_cfa_em_flow_alloc *
- **************************/
+/********************************
+ * hwrm_cfa_tunnel_filter_alloc *
+ ********************************/
 
 
-/* hwrm_cfa_em_flow_alloc_input (size:896b/112B) */
-struct hwrm_cfa_em_flow_alloc_input {
+/* hwrm_cfa_tunnel_filter_alloc_input (size:704b/88B) */
+struct hwrm_cfa_tunnel_filter_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -22309,312 +21738,216 @@ struct hwrm_cfa_em_flow_alloc_input {
        uint16_t        target_id;
        /*
         * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH         UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_TX        UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX        UINT32_C(0x1)
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX
-       /*
-        * Setting of this flag indicates enabling of a byte counter for a given
-        * flow.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_BYTE_CTR     UINT32_C(0x2)
-       /*
-        * Setting of this flag indicates enabling of a packet counter for a given
-        * flow.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PKT_CTR      UINT32_C(0x4)
-       /* Setting of this flag indicates de-capsulation action for the given flow. */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DECAP        UINT32_C(0x8)
-       /* Setting of this flag indicates encapsulation action for the given flow. */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_ENCAP        UINT32_C(0x10)
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DROP         UINT32_C(0x20)
-       /*
-        * Setting of this flag indicates that a meter is expected to be attached
-        * to this flow. This hint can be used when choosing the action record
-        * format required for the flow.
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_METER        UINT32_C(0x40)
+       uint64_t        resp_addr;
+       uint32_t        flags;
+       /* Setting of this flag indicates the applicability to the loopback path. */
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
+               UINT32_C(0x1)
        uint32_t        enables;
        /*
         * This bit must be '1' for the l2_filter_id field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
                UINT32_C(0x1)
        /*
-        * This bit must be '1' for the tunnel_type field to be
+        * This bit must be '1' for the l2_addr field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_ADDR \
                UINT32_C(0x2)
        /*
-        * This bit must be '1' for the tunnel_id field to be
+        * This bit must be '1' for the l2_ivlan field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_ID \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L2_IVLAN \
                UINT32_C(0x4)
        /*
-        * This bit must be '1' for the src_macaddr field to be
+        * This bit must be '1' for the l3_addr field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_MACADDR \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L3_ADDR \
                UINT32_C(0x8)
        /*
-        * This bit must be '1' for the dst_macaddr field to be
+        * This bit must be '1' for the l3_addr_type field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_MACADDR \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_L3_ADDR_TYPE \
                UINT32_C(0x10)
        /*
-        * This bit must be '1' for the ovlan_vid field to be
+        * This bit must be '1' for the t_l3_addr_type field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_OVLAN_VID \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_T_L3_ADDR_TYPE \
                UINT32_C(0x20)
        /*
-        * This bit must be '1' for the ivlan_vid field to be
+        * This bit must be '1' for the t_l3_addr field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IVLAN_VID \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_T_L3_ADDR \
                UINT32_C(0x40)
        /*
-        * This bit must be '1' for the ethertype field to be
+        * This bit must be '1' for the tunnel_type field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ETHERTYPE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
                UINT32_C(0x80)
        /*
-        * This bit must be '1' for the src_ipaddr field to be
+        * This bit must be '1' for the vni field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_IPADDR \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_VNI \
                UINT32_C(0x100)
        /*
-        * This bit must be '1' for the dst_ipaddr field to be
+        * This bit must be '1' for the dst_vnic_id field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_IPADDR \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_DST_VNIC_ID \
                UINT32_C(0x200)
        /*
-        * This bit must be '1' for the ipaddr_type field to be
+        * This bit must be '1' for the mirror_vnic_id field to be
         * configured.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
                UINT32_C(0x400)
        /*
-        * This bit must be '1' for the ip_protocol field to be
-        * configured.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
-               UINT32_C(0x800)
-       /*
-        * This bit must be '1' for the src_port field to be
-        * configured.
+        * This value identifies a set of CFA data structures used for an L2
+        * context.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_PORT \
-               UINT32_C(0x1000)
+       uint64_t        l2_filter_id;
        /*
-        * This bit must be '1' for the dst_port field to be
-        * configured.
+        * This value sets the match value for the inner L2
+        * MAC address.
+        * Destination MAC address for RX path.
+        * Source MAC address for TX path.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_PORT \
-               UINT32_C(0x2000)
+       uint8_t l2_addr[6];
        /*
-        * This bit must be '1' for the dst_id field to be
-        * configured.
+        * This value sets VLAN ID value for inner VLAN.
+        * Only 12-bits of VLAN ID are used in setting the filter.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_ID \
-               UINT32_C(0x4000)
+       uint16_t        l2_ivlan;
        /*
-        * This bit must be '1' for the mirror_vnic_id field to be
-        * configured.
+        * The value of inner destination IP address to be used in filtering.
+        * For IPv4, first four bytes represent the IP address.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
-               UINT32_C(0x8000)
+       uint32_t        l3_addr[4];
        /*
-        * This bit must be '1' for the encap_record_id field to be
-        * configured.
+        * The value of tunnel destination IP address to be used in filtering.
+        * For IPv4, first four bytes represent the IP address.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ENCAP_RECORD_ID \
-               UINT32_C(0x10000)
+       uint32_t        t_l3_addr[4];
        /*
-        * This bit must be '1' for the meter_instance_id field to be
-        * configured.
+        * This value indicates the type of inner IP address.
+        * 4 - IPv4
+        * 6 - IPv6
+        * All others are invalid.
         */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_METER_INSTANCE_ID \
-               UINT32_C(0x20000)
+       uint8_t l3_addr_type;
        /*
-        * This value identifies a set of CFA data structures used for an L2
-        * context.
+        * This value indicates the type of tunnel IP address.
+        * 4 - IPv4
+        * 6 - IPv6
+        * All others are invalid.
         */
-       uint64_t        l2_filter_id;
+       uint8_t t_l3_addr_type;
        /* Tunnel Type. */
        uint8_t tunnel_type;
        /* Non-tunnel */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
                UINT32_C(0x0)
        /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
                UINT32_C(0x1)
        /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
                UINT32_C(0x2)
        /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
                UINT32_C(0x3)
        /* IP in IP */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
                UINT32_C(0x4)
        /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
                UINT32_C(0x5)
        /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
                UINT32_C(0x6)
        /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_STT \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
                UINT32_C(0x7)
        /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
                UINT32_C(0x8)
        /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
                UINT32_C(0x9)
        /* Any tunneled traffic */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
                UINT32_C(0xff)
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       uint8_t unused_0[3];
-       /*
-        * Tunnel identifier.
-        * Virtual Network Identifier (VNI). Only valid with
-        * tunnel_types VXLAN, NVGRE, and Geneve.
-        * Only lower 24-bits of VNI field are used
-        * in setting up the filter.
-        */
-       uint32_t        tunnel_id;
-       /*
-        * This value indicates the source MAC address in
-        * the Ethernet header.
-        */
-       uint8_t src_macaddr[6];
-       /* The meter instance to attach to the flow. */
-       uint16_t        meter_instance_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * instance is not configured.
-        */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_LAST \
-               HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_INVALID
-       /*
-        * This value indicates the destination MAC address in
-        * the Ethernet header.
-        */
-       uint8_t dst_macaddr[6];
-       /*
-        * This value indicates the VLAN ID of the outer VLAN tag
-        * in the Ethernet header.
-        */
-       uint16_t        ovlan_vid;
-       /*
-        * This value indicates the VLAN ID of the inner VLAN tag
-        * in the Ethernet header.
-        */
-       uint16_t        ivlan_vid;
-       /* This value indicates the ethertype in the Ethernet header. */
-       uint16_t        ethertype;
-       /*
-        * This value indicates the type of IP address.
-        * 4 - IPv4
-        * 6 - IPv6
-        * All others are invalid.
-        */
-       uint8_t ip_addr_type;
-       /* invalid */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN UINT32_C(0x0)
-       /* IPv4 */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV4    UINT32_C(0x4)
-       /* IPv6 */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6    UINT32_C(0x6)
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
-               HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
-       /*
-        * The value of protocol filed in IP header.
-        * Applies to UDP and TCP traffic.
-        * 6 - TCP
-        * 17 - UDP
-        */
-       uint8_t ip_protocol;
-       /* invalid */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN UINT32_C(0x0)
-       /* TCP */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_TCP     UINT32_C(0x6)
-       /* UDP */
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UDP     UINT32_C(0x11)
-       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_LAST \
-               HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UDP
-       uint8_t unused_1[2];
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
        /*
-        * The value of source IP address to be used in filtering.
-        * For IPv4, first four bytes represent the IP address.
+        * tunnel_flags allows the user to indicate the tunnel tag detection
+        * for the tunnel type specified in tunnel_type.
         */
-       uint32_t        src_ipaddr[4];
+       uint8_t tunnel_flags;
        /*
-        * big_endian = True
-        *     The value of destination IP address to be used in filtering.
-        *     For IPv4, first four bytes represent the IP address.
+        * If the tunnel_type is geneve, then this bit indicates if we
+        * need to match the geneve OAM packet.
+        * If the tunnel_type is nvgre or gre, then this bit indicates if
+        * we need to detect checksum present bit in geneve header.
+        * If the tunnel_type is mpls, then this bit indicates if we need
+        * to match mpls packet with explicit IPV4/IPV6 null header.
         */
-       uint32_t        dst_ipaddr[4];
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_OAM_CHECKSUM_EXPLHDR \
+               UINT32_C(0x1)
        /*
-        * The value of source port to be used in filtering.
-        * Applies to UDP and TCP traffic.
+        * If the tunnel_type is geneve, then this bit indicates if we
+        * need to detect the critical option bit set in the oam packet.
+        * If the tunnel_type is nvgre or gre, then this bit indicates
+        * if we need to match nvgre packets with key present bit set in
+        * gre header.
+        * If the tunnel_type is mpls, then this bit indicates if we
+        * need to match mpls packet with S bit from inner/second label.
         */
-       uint16_t        src_port;
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_CRITICAL_OPT_S1 \
+               UINT32_C(0x2)
        /*
-        * The value of destination port to be used in filtering.
-        * Applies to UDP and TCP traffic.
+        * If the tunnel_type is geneve, then this bit indicates if we
+        * need to match geneve packet with extended header bit set in
+        * geneve header.
+        * If the tunnel_type is nvgre or gre, then this bit indicates
+        * if we need to match nvgre packets with sequence number
+        * present bit set in gre header.
+        * If the tunnel_type is mpls, then this bit indicates if we
+        * need to match mpls packet with S bit from out/first label.
         */
-       uint16_t        dst_port;
+       #define HWRM_CFA_TUNNEL_FILTER_ALLOC_INPUT_TUNNEL_FLAGS_TUN_FLAGS_EXTHDR_SEQNUM_S0 \
+               UINT32_C(0x4)
        /*
-        * If set, this value shall represent the
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and network port id of the destination port for
-        * the TX path.
+        * Virtual Network Identifier (VNI). Only valid with
+        * tunnel_types VXLAN, NVGRE, and Geneve.
+        * Only lower 24-bits of VNI field are used
+        * in setting up the filter.
         */
-       uint16_t        dst_id;
+       uint32_t        vni;
+       /* Logical VNIC ID of the destination VNIC. */
+       uint32_t        dst_vnic_id;
        /*
         * Logical VNIC ID of the VNIC where traffic is
         * mirrored.
         */
-       uint16_t        mirror_vnic_id;
-       /* Logical ID of the encapsulation record. */
-       uint32_t        encap_record_id;
-       uint8_t unused_2[4];
+       uint32_t        mirror_vnic_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_em_flow_alloc_output (size:192b/24B) */
-struct hwrm_cfa_em_flow_alloc_output {
+/* hwrm_cfa_tunnel_filter_alloc_output (size:192b/24B) */
+struct hwrm_cfa_tunnel_filter_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -22624,7 +21957,7 @@ struct hwrm_cfa_em_flow_alloc_output {
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
        /* This value is an opaque id into CFA data structures. */
-       uint64_t        em_filter_id;
+       uint64_t        tunnel_filter_id;
        /*
         * This is the ID of the flow associated with this
         * filter.
@@ -22644,72 +21977,13 @@ struct hwrm_cfa_em_flow_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*************************
- * hwrm_cfa_em_flow_free *
- *************************/
-
-
-/* hwrm_cfa_em_flow_free_input (size:192b/24B) */
-struct hwrm_cfa_em_flow_free_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /* This value is an opaque id into CFA data structures. */
-       uint64_t        em_filter_id;
-} __attribute__((packed));
-
-/* hwrm_cfa_em_flow_free_output (size:128b/16B) */
-struct hwrm_cfa_em_flow_free_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/************************
- * hwrm_cfa_em_flow_cfg *
- ************************/
+/*******************************
+ * hwrm_cfa_tunnel_filter_free *
+ *******************************/
 
 
-/* hwrm_cfa_em_flow_cfg_input (size:384b/48B) */
-struct hwrm_cfa_em_flow_cfg_input {
+/* hwrm_cfa_tunnel_filter_free_input (size:192b/24B) */
+struct hwrm_cfa_tunnel_filter_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -22737,59 +22011,12 @@ struct hwrm_cfa_em_flow_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        enables;
-       /*
-        * This bit must be '1' for the new_dst_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_DST_ID \
-               UINT32_C(0x1)
-       /*
-        * This bit must be '1' for the new_mirror_vnic_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
-               UINT32_C(0x2)
-       /*
-        * This bit must be '1' for the new_meter_instance_id field to be
-        * configured.
-        */
-       #define HWRM_CFA_EM_FLOW_CFG_INPUT_ENABLES_NEW_METER_INSTANCE_ID \
-               UINT32_C(0x4)
-       uint8_t unused_0[4];
        /* This value is an opaque id into CFA data structures. */
-       uint64_t        em_filter_id;
-       /*
-        * If set, this value shall represent the new
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path and network port id of the destination port for
-        * the TX path.
-        */
-       uint32_t        new_dst_id;
-       /*
-        * New Logical VNIC ID of the VNIC where traffic is
-        * mirrored.
-        */
-       uint32_t        new_mirror_vnic_id;
-       /*
-        * New meter to attach to the flow. Specifying the
-        * invalid instance ID is used to remove any existing
-        * meter from the flow.
-        */
-       uint16_t        new_meter_instance_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * instance is not configured.
-        */
-       #define HWRM_CFA_EM_FLOW_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_EM_FLOW_CFG_INPUT_NEW_METER_INSTANCE_ID_LAST \
-               HWRM_CFA_EM_FLOW_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID
-       uint8_t unused_1[6];
+       uint64_t        tunnel_filter_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_em_flow_cfg_output (size:128b/16B) */
-struct hwrm_cfa_em_flow_cfg_output {
+/* hwrm_cfa_tunnel_filter_free_output (size:128b/16B) */
+struct hwrm_cfa_tunnel_filter_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -22809,13 +22036,13 @@ struct hwrm_cfa_em_flow_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************************
- * hwrm_cfa_meter_profile_alloc *
- ********************************/
+/***************************************
+ * hwrm_cfa_redirect_tunnel_type_alloc *
+ ***************************************/
 
 
-/* hwrm_cfa_meter_profile_alloc_input (size:320b/40B) */
-struct hwrm_cfa_meter_profile_alloc_input {
+/* hwrm_cfa_redirect_tunnel_type_alloc_input (size:192b/24B) */
+struct hwrm_cfa_redirect_tunnel_type_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -22841,222 +22068,60 @@ struct hwrm_cfa_meter_profile_alloc_input {
         * command's response data will be written. This can be either a host
         * physical address (HPA) or a guest physical address (GPA) and must
         * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       uint8_t flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_FLAGS_PATH_TX \
-               UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_FLAGS_PATH_RX \
-               UINT32_C(0x1)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_FLAGS_PATH_RX
-       /* The meter algorithm type. */
-       uint8_t meter_type;
-       /* RFC 2697 (srTCM) */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_METER_TYPE_RFC2697 \
-               UINT32_C(0x0)
-       /* RFC 2698 (trTCM) */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_METER_TYPE_RFC2698 \
-               UINT32_C(0x1)
-       /* RFC 4115 (trTCM) */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_METER_TYPE_RFC4115 \
-               UINT32_C(0x2)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_METER_TYPE_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_METER_TYPE_RFC4115
-       /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
-        */
-       uint16_t        reserved1;
-       /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
-        */
-       uint32_t        reserved2;
-       /* A meter rate specified in bytes-per-second. */
-       uint32_t        commit_rate;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_RATE_BW_VALUE_UNIT_INVALID
-       /* A meter burst size specified in bytes. */
-       uint32_t        commit_burst;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_COMMIT_BURST_BW_VALUE_UNIT_INVALID
-       /* A meter rate specified in bytes-per-second. */
-       uint32_t        excess_peak_rate;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_INVALID
-       /* A meter burst size specified in bytes. */
-       uint32_t        excess_peak_burst;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_INVALID
+        */
+       uint64_t        resp_addr;
+       /* The destination function id, to whom the traffic is redirected. */
+       uint16_t        dest_fid;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+               UINT32_C(0x0)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       /* Tunnel alloc flags. */
+       uint8_t flags;
+       /* Setting of this flag indicates modify existing redirect tunnel to new destination function ID. */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_ALLOC_INPUT_FLAGS_MODIFY_DST \
+               UINT32_C(0x1)
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_meter_profile_alloc_output (size:128b/16B) */
-struct hwrm_cfa_meter_profile_alloc_output {
+/* hwrm_cfa_redirect_tunnel_type_alloc_output (size:128b/16B) */
+struct hwrm_cfa_redirect_tunnel_type_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23065,17 +22130,7 @@ struct hwrm_cfa_meter_profile_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This value identifies a meter profile in CFA. */
-       uint16_t        meter_profile_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * profile is not configured.
-        */
-       #define HWRM_CFA_METER_PROFILE_ALLOC_OUTPUT_METER_PROFILE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_PROFILE_ALLOC_OUTPUT_METER_PROFILE_ID_LAST \
-               HWRM_CFA_METER_PROFILE_ALLOC_OUTPUT_METER_PROFILE_ID_INVALID
-       uint8_t unused_0[5];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -23086,13 +22141,13 @@ struct hwrm_cfa_meter_profile_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************************
- * hwrm_cfa_meter_profile_free *
- *******************************/
+/**************************************
+ * hwrm_cfa_redirect_tunnel_type_free *
+ **************************************/
 
 
-/* hwrm_cfa_meter_profile_free_input (size:192b/24B) */
-struct hwrm_cfa_meter_profile_free_input {
+/* hwrm_cfa_redirect_tunnel_type_free_input (size:192b/24B) */
+struct hwrm_cfa_redirect_tunnel_type_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -23120,37 +22175,53 @@ struct hwrm_cfa_meter_profile_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint8_t flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_FLAGS_PATH_TX \
+       /* The destination function id, to whom the traffic is redirected. */
+       uint16_t        dest_fid;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_NONTUNNEL \
                UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_FLAGS_PATH_RX \
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_VXLAN \
                UINT32_C(0x1)
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_METER_PROFILE_FREE_INPUT_FLAGS_PATH_RX
-       uint8_t unused_0;
-       /* This value identifies a meter profile in CFA. */
-       uint16_t        meter_profile_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * profile is not configured.
-        */
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_METER_PROFILE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_PROFILE_FREE_INPUT_METER_PROFILE_ID_LAST \
-               HWRM_CFA_METER_PROFILE_FREE_INPUT_METER_PROFILE_ID_INVALID
-       uint8_t unused_1[4];
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_REDIRECT_TUNNEL_TYPE_FREE_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint8_t unused_0[5];
 } __attribute__((packed));
 
-/* hwrm_cfa_meter_profile_free_output (size:128b/16B) */
-struct hwrm_cfa_meter_profile_free_output {
+/* hwrm_cfa_redirect_tunnel_type_free_output (size:128b/16B) */
+struct hwrm_cfa_redirect_tunnel_type_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23170,13 +22241,13 @@ struct hwrm_cfa_meter_profile_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_cfa_meter_profile_cfg *
- ******************************/
+/**************************************
+ * hwrm_cfa_redirect_tunnel_type_info *
+ **************************************/
 
 
-/* hwrm_cfa_meter_profile_cfg_input (size:320b/40B) */
-struct hwrm_cfa_meter_profile_cfg_input {
+/* hwrm_cfa_redirect_tunnel_type_info_input (size:192b/24B) */
+struct hwrm_cfa_redirect_tunnel_type_info_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -23204,223 +22275,53 @@ struct hwrm_cfa_meter_profile_cfg_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint8_t flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_FLAGS_PATH_TX    UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_FLAGS_PATH_RX    UINT32_C(0x1)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_FLAGS_PATH_RX
-       /* The meter algorithm type. */
-       uint8_t meter_type;
-       /* RFC 2697 (srTCM) */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_TYPE_RFC2697 \
+       /* The source function id. */
+       uint16_t        src_fid;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_NONTUNNEL \
                UINT32_C(0x0)
-       /* RFC 2698 (trTCM) */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_TYPE_RFC2698 \
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_VXLAN \
                UINT32_C(0x1)
-       /* RFC 4115 (trTCM) */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_TYPE_RFC4115 \
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_NVGRE \
                UINT32_C(0x2)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_TYPE_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_TYPE_RFC4115
-       /* This value identifies a meter profile in CFA. */
-       uint16_t        meter_profile_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * profile is not configured.
-        */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_PROFILE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_PROFILE_ID_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_METER_PROFILE_ID_INVALID
-       /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
-        */
-       uint32_t        reserved;
-       /* A meter rate specified in bytes-per-second. */
-       uint32_t        commit_rate;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_RATE_BW_VALUE_UNIT_INVALID
-       /* A meter burst size specified in bytes. */
-       uint32_t        commit_burst;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_COMMIT_BURST_BW_VALUE_UNIT_INVALID
-       /* A meter rate specified in bytes-per-second. */
-       uint32_t        excess_peak_rate;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_RATE_BW_VALUE_UNIT_INVALID
-       /* A meter burst size specified in bytes. */
-       uint32_t        excess_peak_burst;
-       /* The bandwidth value. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_MASK \
-               UINT32_C(0xfffffff)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_SFT \
-               0
-       /* The granularity of the value (bits or bytes). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_SCALE \
-               UINT32_C(0x10000000)
-       /* Value is in bits. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_SCALE_BITS \
-               (UINT32_C(0x0) << 28)
-       /* Value is in bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_SCALE_BYTES \
-               (UINT32_C(0x1) << 28)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_SCALE_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_SCALE_BYTES
-       /* bw_value_unit is 3 b */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_MASK \
-               UINT32_C(0xe0000000)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_SFT \
-               29
-       /* Value is in Mb or MB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_MEGA \
-               (UINT32_C(0x0) << 29)
-       /* Value is in Kb or KB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_KILO \
-               (UINT32_C(0x2) << 29)
-       /* Value is in bits or bytes. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_BASE \
-               (UINT32_C(0x4) << 29)
-       /* Value is in Gb or GB (base 10). */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_GIGA \
-               (UINT32_C(0x6) << 29)
-       /* Value is in 1/100th of a percentage of total bandwidth. */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_PERCENT1_100 \
-               (UINT32_C(0x1) << 29)
-       /* Invalid unit */
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_INVALID \
-               (UINT32_C(0x7) << 29)
-       #define HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_LAST \
-               HWRM_CFA_METER_PROFILE_CFG_INPUT_EXCESS_PEAK_BURST_BW_VALUE_UNIT_INVALID
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_REDIRECT_TUNNEL_TYPE_INFO_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint8_t unused_0[5];
 } __attribute__((packed));
 
-/* hwrm_cfa_meter_profile_cfg_output (size:128b/16B) */
-struct hwrm_cfa_meter_profile_cfg_output {
+/* hwrm_cfa_redirect_tunnel_type_info_output (size:128b/16B) */
+struct hwrm_cfa_redirect_tunnel_type_info_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23429,7 +22330,9 @@ struct hwrm_cfa_meter_profile_cfg_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* The destination function id, to whom the traffic is redirected. */
+       uint16_t        dest_fid;
+       uint8_t unused_0[5];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -23440,13 +22343,120 @@ struct hwrm_cfa_meter_profile_cfg_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************************
- * hwrm_cfa_meter_instance_alloc *
- *********************************/
+/* hwrm_vxlan_ipv4_hdr (size:128b/16B) */
+struct hwrm_vxlan_ipv4_hdr {
+       /* IPv4 version and header length. */
+       uint8_t ver_hlen;
+       /* IPv4 header length */
+       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_MASK UINT32_C(0xf)
+       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT 0
+       /* Version */
+       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_VERSION_MASK      UINT32_C(0xf0)
+       #define HWRM_VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT       4
+       /* IPv4 type of service. */
+       uint8_t tos;
+       /* IPv4 identification. */
+       uint16_t        ip_id;
+       /* IPv4 flags and offset. */
+       uint16_t        flags_frag_offset;
+       /* IPv4 TTL. */
+       uint8_t ttl;
+       /* IPv4 protocol. */
+       uint8_t protocol;
+       /* IPv4 source address. */
+       uint32_t        src_ip_addr;
+       /* IPv4 destination address. */
+       uint32_t        dest_ip_addr;
+} __attribute__((packed));
+
+/* hwrm_vxlan_ipv6_hdr (size:320b/40B) */
+struct hwrm_vxlan_ipv6_hdr {
+       /* IPv6 version, traffic class and flow label. */
+       uint32_t        ver_tc_flow_label;
+       /* IPv6 version shift */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_SFT \
+               UINT32_C(0x1c)
+       /* IPv6 version mask */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_VER_MASK \
+               UINT32_C(0xf0000000)
+       /* IPv6 TC shift */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_SFT \
+               UINT32_C(0x14)
+       /* IPv6 TC mask */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_TC_MASK \
+               UINT32_C(0xff00000)
+       /* IPv6 flow label shift */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_SFT \
+               UINT32_C(0x0)
+       /* IPv6 flow label mask */
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK \
+               UINT32_C(0xfffff)
+       #define HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_LAST \
+               HWRM_VXLAN_IPV6_HDR_VER_TC_FLOW_LABEL_FLOW_LABEL_MASK
+       /* IPv6 payload length. */
+       uint16_t        payload_len;
+       /* IPv6 next header. */
+       uint8_t next_hdr;
+       /* IPv6 TTL. */
+       uint8_t ttl;
+       /* IPv6 source address. */
+       uint32_t        src_ip_addr[4];
+       /* IPv6 destination address. */
+       uint32_t        dest_ip_addr[4];
+} __attribute__((packed));
+
+/* hwrm_cfa_encap_data_vxlan (size:640b/80B) */
+struct hwrm_cfa_encap_data_vxlan {
+       /* Source MAC address. */
+       uint8_t src_mac_addr[6];
+       /* reserved. */
+       uint16_t        unused_0;
+       /* Destination MAC address. */
+       uint8_t dst_mac_addr[6];
+       /* Number of VLAN tags. */
+       uint8_t num_vlan_tags;
+       /* reserved. */
+       uint8_t unused_1;
+       /* Outer VLAN TPID. */
+       uint16_t        ovlan_tpid;
+       /* Outer VLAN TCI. */
+       uint16_t        ovlan_tci;
+       /* Inner VLAN TPID. */
+       uint16_t        ivlan_tpid;
+       /* Inner VLAN TCI. */
+       uint16_t        ivlan_tci;
+       /* L3 header fields. */
+       uint32_t        l3[10];
+       /* IP version mask. */
+       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_MASK UINT32_C(0xf)
+       /* IP version 4. */
+       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV4 UINT32_C(0x4)
+       /* IP version 6. */
+       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6 UINT32_C(0x6)
+       #define HWRM_CFA_ENCAP_DATA_VXLAN_L3_LAST \
+               HWRM_CFA_ENCAP_DATA_VXLAN_L3_VER_IPV6
+       /* UDP source port. */
+       uint16_t        src_port;
+       /* UDP destination port. */
+       uint16_t        dst_port;
+       /* VXLAN Network Identifier. */
+       uint32_t        vni;
+       /* 3 bytes VXLAN header reserve fields from 1st dword of the VXLAN header. */
+       uint8_t hdr_rsvd0[3];
+       /* 1 byte VXLAN header reserve field from 2nd dword of the VXLAN header. */
+       uint8_t hdr_rsvd1;
+       /* VXLAN header flags field. */
+       uint8_t hdr_flags;
+       uint8_t unused[3];
+} __attribute__((packed));
+
+/*******************************
+ * hwrm_cfa_encap_record_alloc *
+ *******************************/
 
 
-/* hwrm_cfa_meter_instance_alloc_input (size:192b/24B) */
-struct hwrm_cfa_meter_instance_alloc_input {
+/* hwrm_cfa_encap_record_alloc_input (size:832b/104B) */
+struct hwrm_cfa_encap_record_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -23474,38 +22484,48 @@ struct hwrm_cfa_meter_instance_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint8_t flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_FLAGS_PATH \
+       uint32_t        flags;
+       /* Setting of this flag indicates the applicability to the loopback path. */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_FLAGS_LOOPBACK \
                UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_FLAGS_PATH_TX \
-               UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_FLAGS_PATH_RX \
+       /* Encapsulation Type. */
+       uint8_t encap_type;
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VXLAN \
                UINT32_C(0x1)
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_FLAGS_PATH_RX
-       uint8_t unused_0;
-       /* This value identifies a meter profile in CFA. */
-       uint16_t        meter_profile_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * profile is not configured.
-        */
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_METER_PROFILE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_METER_PROFILE_ID_LAST \
-               HWRM_CFA_METER_INSTANCE_ALLOC_INPUT_METER_PROFILE_ID_INVALID
-       uint8_t unused_1[4];
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) after inside Ethernet payload */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* VLAN */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VLAN \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       #define HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_LAST \
+               HWRM_CFA_ENCAP_RECORD_ALLOC_INPUT_ENCAP_TYPE_VXLAN_V4
+       uint8_t unused_0[3];
+       /* This value is encap data used for the given encap type. */
+       uint32_t        encap_data[20];
 } __attribute__((packed));
 
-/* hwrm_cfa_meter_instance_alloc_output (size:128b/16B) */
-struct hwrm_cfa_meter_instance_alloc_output {
+/* hwrm_cfa_encap_record_alloc_output (size:128b/16B) */
+struct hwrm_cfa_encap_record_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23514,17 +22534,9 @@ struct hwrm_cfa_meter_instance_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This value identifies a meter instance in CFA. */
-       uint16_t        meter_instance_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * instance is not configured.
-        */
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_OUTPUT_METER_INSTANCE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_INSTANCE_ALLOC_OUTPUT_METER_INSTANCE_ID_LAST \
-               HWRM_CFA_METER_INSTANCE_ALLOC_OUTPUT_METER_INSTANCE_ID_INVALID
-       uint8_t unused_0[5];
+       /* This value is an opaque id into CFA data structures. */
+       uint32_t        encap_record_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -23535,13 +22547,13 @@ struct hwrm_cfa_meter_instance_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************************
- * hwrm_cfa_meter_instance_free *
- ********************************/
+/******************************
+ * hwrm_cfa_encap_record_free *
+ ******************************/
 
 
-/* hwrm_cfa_meter_instance_free_input (size:192b/24B) */
-struct hwrm_cfa_meter_instance_free_input {
+/* hwrm_cfa_encap_record_free_input (size:192b/24B) */
+struct hwrm_cfa_encap_record_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -23569,37 +22581,13 @@ struct hwrm_cfa_meter_instance_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint8_t flags;
-       /*
-        * Enumeration denoting the RX, TX type of the resource.
-        * This enumeration is used for resources that are similar for both
-        * TX and RX paths of the chip.
-        */
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_FLAGS_PATH     UINT32_C(0x1)
-       /* tx path */
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_FLAGS_PATH_TX \
-               UINT32_C(0x0)
-       /* rx path */
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_FLAGS_PATH_RX \
-               UINT32_C(0x1)
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_FLAGS_PATH_LAST \
-               HWRM_CFA_METER_INSTANCE_FREE_INPUT_FLAGS_PATH_RX
-       uint8_t unused_0;
-       /* This value identifies a meter instance in CFA. */
-       uint16_t        meter_instance_id;
-       /*
-        * A value of 0xfff is considered invalid and implies the
-        * instance is not configured.
-        */
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_METER_INSTANCE_ID_INVALID \
-               UINT32_C(0xffff)
-       #define HWRM_CFA_METER_INSTANCE_FREE_INPUT_METER_INSTANCE_ID_LAST \
-               HWRM_CFA_METER_INSTANCE_FREE_INPUT_METER_INSTANCE_ID_INVALID
-       uint8_t unused_1[4];
+       /* This value is an opaque id into CFA data structures. */
+       uint32_t        encap_record_id;
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_meter_instance_free_output (size:128b/16B) */
-struct hwrm_cfa_meter_instance_free_output {
+/* hwrm_cfa_encap_record_free_output (size:128b/16B) */
+struct hwrm_cfa_encap_record_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23619,13 +22607,13 @@ struct hwrm_cfa_meter_instance_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*******************************
- * hwrm_cfa_decap_filter_alloc *
- *******************************/
+/********************************
+ * hwrm_cfa_ntuple_filter_alloc *
+ ********************************/
 
 
-/* hwrm_cfa_decap_filter_alloc_input (size:832b/104B) */
-struct hwrm_cfa_decap_filter_alloc_input {
+/* hwrm_cfa_ntuple_filter_alloc_input (size:1024b/128B) */
+struct hwrm_cfa_ntuple_filter_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -23654,190 +22642,147 @@ struct hwrm_cfa_decap_filter_alloc_input {
         */
        uint64_t        resp_addr;
        uint32_t        flags;
-       /* ovs_tunnel is 1 b */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_FLAGS_OVS_TUNNEL \
+       /* Setting of this flag indicates the applicability to the loopback path. */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_LOOPBACK \
                UINT32_C(0x1)
+       /*
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_DROP \
+               UINT32_C(0x2)
+       /*
+        * Setting of this flag indicates that a meter is expected to be attached
+        * to this flow. This hint can be used when choosing the action record
+        * format required for the flow.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_FLAGS_METER \
+               UINT32_C(0x4)
        uint32_t        enables;
        /*
-        * This bit must be '1' for the tunnel_type field to be
+        * This bit must be '1' for the l2_filter_id field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
                UINT32_C(0x1)
        /*
-        * This bit must be '1' for the tunnel_id field to be
+        * This bit must be '1' for the ethertype field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_ID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE \
                UINT32_C(0x2)
        /*
-        * This bit must be '1' for the src_macaddr field to be
+        * This bit must be '1' for the tunnel_type field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
                UINT32_C(0x4)
        /*
-        * This bit must be '1' for the dst_macaddr field to be
+        * This bit must be '1' for the src_macaddr field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR \
                UINT32_C(0x8)
        /*
-        * This bit must be '1' for the ovlan_vid field to be
+        * This bit must be '1' for the ipaddr_type field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_OVLAN_VID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
                UINT32_C(0x10)
        /*
-        * This bit must be '1' for the ivlan_vid field to be
+        * This bit must be '1' for the src_ipaddr field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IVLAN_VID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR \
                UINT32_C(0x20)
        /*
-        * This bit must be '1' for the t_ovlan_vid field to be
+        * This bit must be '1' for the src_ipaddr_mask field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_T_OVLAN_VID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR_MASK \
                UINT32_C(0x40)
        /*
-        * This bit must be '1' for the t_ivlan_vid field to be
+        * This bit must be '1' for the dst_ipaddr field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_T_IVLAN_VID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR \
                UINT32_C(0x80)
        /*
-        * This bit must be '1' for the ethertype field to be
+        * This bit must be '1' for the dst_ipaddr_mask field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR_MASK \
                UINT32_C(0x100)
        /*
-        * This bit must be '1' for the src_ipaddr field to be
+        * This bit must be '1' for the ip_protocol field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
                UINT32_C(0x200)
        /*
-        * This bit must be '1' for the dst_ipaddr field to be
+        * This bit must be '1' for the src_port field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT \
                UINT32_C(0x400)
        /*
-        * This bit must be '1' for the ipaddr_type field to be
+        * This bit must be '1' for the src_port_mask field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT_MASK \
                UINT32_C(0x800)
        /*
-        * This bit must be '1' for the ip_protocol field to be
+        * This bit must be '1' for the dst_port field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT \
                UINT32_C(0x1000)
        /*
-        * This bit must be '1' for the src_port field to be
+        * This bit must be '1' for the dst_port_mask field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_PORT_MASK \
                UINT32_C(0x2000)
        /*
-        * This bit must be '1' for the dst_port field to be
+        * This bit must be '1' for the pri_hint field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_PORT \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_PRI_HINT \
                UINT32_C(0x4000)
        /*
-        * This bit must be '1' for the dst_id field to be
+        * This bit must be '1' for the ntuple_filter_id field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_NTUPLE_FILTER_ID \
                UINT32_C(0x8000)
        /*
-        * This bit must be '1' for the mirror_vnic_id field to be
+        * This bit must be '1' for the dst_id field to be
         * configured.
         */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
                UINT32_C(0x10000)
        /*
-        * Tunnel identifier.
-        * Virtual Network Identifier (VNI). Only valid with
-        * tunnel_types VXLAN, NVGRE, and Geneve.
-        * Only lower 24-bits of VNI field are used
-        * in setting up the filter.
-        */
-       uint32_t        tunnel_id;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Non-tunnel */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
-               UINT32_C(0x0)
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
-               UINT32_C(0x2)
-       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
-               UINT32_C(0x3)
-       /* IP in IP */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
-               UINT32_C(0x4)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* Multi-Protocol Lable Switching (MPLS) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
-               UINT32_C(0x6)
-       /* Stateless Transport Tunnel (STT) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
-               UINT32_C(0x7)
-       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
-               UINT32_C(0x8)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Any tunneled traffic */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
-               UINT32_C(0xff)
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
-       uint8_t unused_0;
-       uint16_t        unused_1;
-       /*
-        * This value indicates the source MAC address in
-        * the Ethernet header.
-        */
-       uint8_t src_macaddr[6];
-       uint8_t unused_2[2];
-       /*
-        * This value indicates the destination MAC address in
-        * the Ethernet header.
-        */
-       uint8_t dst_macaddr[6];
-       /*
-        * This value indicates the VLAN ID of the outer VLAN tag
-        * in the Ethernet header.
+        * This bit must be '1' for the mirror_vnic_id field to be
+        * configured.
         */
-       uint16_t        ovlan_vid;
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
+               UINT32_C(0x20000)
        /*
-        * This value indicates the VLAN ID of the inner VLAN tag
-        * in the Ethernet header.
+        * This bit must be '1' for the dst_macaddr field to be
+        * configured.
         */
-       uint16_t        ivlan_vid;
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR \
+               UINT32_C(0x40000)
        /*
-        * This value indicates the VLAN ID of the outer VLAN tag
-        * in the tunnel Ethernet header.
+        * This value identifies a set of CFA data structures used for an L2
+        * context.
         */
-       uint16_t        t_ovlan_vid;
+       uint64_t        l2_filter_id;
        /*
-        * This value indicates the VLAN ID of the inner VLAN tag
-        * in the tunnel Ethernet header.
+        * This value indicates the source MAC address in
+        * the Ethernet header.
         */
-       uint16_t        t_ivlan_vid;
+       uint8_t src_macaddr[6];
        /* This value indicates the ethertype in the Ethernet header. */
        uint16_t        ethertype;
        /*
@@ -23848,16 +22793,16 @@ struct hwrm_cfa_decap_filter_alloc_input {
         */
        uint8_t ip_addr_type;
        /* invalid */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN \
                UINT32_C(0x0)
        /* IPv4 */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 \
                UINT32_C(0x4)
        /* IPv6 */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 \
                UINT32_C(0x6)
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
-               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
+               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
        /*
         * The value of protocol filed in IP header.
         * Applies to UDP and TCP traffic.
@@ -23866,53 +22811,146 @@ struct hwrm_cfa_decap_filter_alloc_input {
         */
        uint8_t ip_protocol;
        /* invalid */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN \
                UINT32_C(0x0)
        /* TCP */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP \
                UINT32_C(0x6)
        /* UDP */
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP \
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP \
                UINT32_C(0x11)
-       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_LAST \
-               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP
-       uint16_t        unused_3;
-       uint32_t        unused_4;
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_LAST \
+               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP
+       /*
+        * If set, this value shall represent the
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path and network port id of the destination port for
+        * the TX path.
+        */
+       uint16_t        dst_id;
+       /*
+        * Logical VNIC ID of the VNIC where traffic is
+        * mirrored.
+        */
+       uint16_t        mirror_vnic_id;
+       /*
+        * This value indicates the tunnel type for this filter.
+        * If this field is not specified, then the filter shall
+        * apply to both non-tunneled and tunneled packets.
+        * If this field conflicts with the tunnel_type specified
+        * in the l2_filter_id, then the HWRM shall return an
+        * error for this command.
+        */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+               UINT32_C(0x0)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       /*
+        * This hint is provided to help in placing
+        * the filter in the filter table.
+        */
+       uint8_t pri_hint;
+       /* No preference */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_NO_PREFER \
+               UINT32_C(0x0)
+       /* Above the given filter */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_ABOVE \
+               UINT32_C(0x1)
+       /* Below the given filter */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_BELOW \
+               UINT32_C(0x2)
+       /* As high as possible */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_HIGHEST \
+               UINT32_C(0x3)
+       /* As low as possible */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LOWEST \
+               UINT32_C(0x4)
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LAST \
+               HWRM_CFA_NTUPLE_FILTER_ALLOC_INPUT_PRI_HINT_LOWEST
        /*
         * The value of source IP address to be used in filtering.
         * For IPv4, first four bytes represent the IP address.
         */
-       uint32_t        src_ipaddr[4];
+       uint32_t        src_ipaddr[4];
+       /*
+        * The value of source IP address mask to be used in
+        * filtering.
+        * For IPv4, first four bytes represent the IP address mask.
+        */
+       uint32_t        src_ipaddr_mask[4];
        /*
         * The value of destination IP address to be used in filtering.
         * For IPv4, first four bytes represent the IP address.
         */
        uint32_t        dst_ipaddr[4];
+       /*
+        * The value of destination IP address mask to be used in
+        * filtering.
+        * For IPv4, first four bytes represent the IP address mask.
+        */
+       uint32_t        dst_ipaddr_mask[4];
        /*
         * The value of source port to be used in filtering.
         * Applies to UDP and TCP traffic.
         */
        uint16_t        src_port;
+       /*
+        * The value of source port mask to be used in filtering.
+        * Applies to UDP and TCP traffic.
+        */
+       uint16_t        src_port_mask;
        /*
         * The value of destination port to be used in filtering.
         * Applies to UDP and TCP traffic.
         */
        uint16_t        dst_port;
        /*
-        * If set, this value shall represent the
-        * Logical VNIC ID of the destination VNIC for the RX
-        * path.
+        * The value of destination port mask to be used in
+        * filtering.
+        * Applies to UDP and TCP traffic.
         */
-       uint16_t        dst_id;
+       uint16_t        dst_port_mask;
        /*
-        * If set, this value shall represent the L2 context that matches the L2
-        * information of the decap filter.
+        * This is the ID of the filter that goes along with
+        * the pri_hint.
         */
-       uint16_t        l2_ctxt_ref_id;
+       uint64_t        ntuple_filter_id_hint;
 } __attribute__((packed));
 
-/* hwrm_cfa_decap_filter_alloc_output (size:128b/16B) */
-struct hwrm_cfa_decap_filter_alloc_output {
+/* hwrm_cfa_ntuple_filter_alloc_output (size:192b/24B) */
+struct hwrm_cfa_ntuple_filter_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -23922,7 +22960,15 @@ struct hwrm_cfa_decap_filter_alloc_output {
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
        /* This value is an opaque id into CFA data structures. */
-       uint32_t        decap_filter_id;
+       uint64_t        ntuple_filter_id;
+       /*
+        * This is the ID of the flow associated with this
+        * filter.
+        * This value shall be used to match and associate the
+        * flow identifier returned in completion records.
+        * A value of 0xFFFFFFFF shall indicate no flow id.
+        */
+       uint32_t        flow_id;
        uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
@@ -23934,73 +22980,31 @@ struct hwrm_cfa_decap_filter_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_cfa_decap_filter_free *
- ******************************/
-
-
-/* hwrm_cfa_decap_filter_free_input (size:192b/24B) */
-struct hwrm_cfa_decap_filter_free_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
+/* hwrm_cfa_ntuple_filter_alloc_cmd_err (size:64b/8B) */
+struct hwrm_cfa_ntuple_filter_alloc_cmd_err {
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * command specific error codes that goes to
+        * the cmd_err field in Common HWRM Error Response.
         */
-       uint64_t        resp_addr;
-       /* This value is an opaque id into CFA data structures. */
-       uint32_t        decap_filter_id;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_cfa_decap_filter_free_output (size:128b/16B) */
-struct hwrm_cfa_decap_filter_free_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
+       uint8_t code;
+       /* Unknown error */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_UNKNOWN \
+               UINT32_C(0x0)
+       /* Unable to complete operation due to conflict with Rx Mask VLAN */
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR \
+               UINT32_C(0x1)
+       #define HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_LAST \
+               HWRM_CFA_NTUPLE_FILTER_ALLOC_CMD_ERR_CODE_RX_MASK_VLAN_CONFLICT_ERR
        uint8_t unused_0[7];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_cfa_flow_alloc *
- ***********************/
+/*******************************
+ * hwrm_cfa_ntuple_filter_free *
+ *******************************/
 
 
-/* hwrm_cfa_flow_alloc_input (size:1024b/128B) */
-struct hwrm_cfa_flow_alloc_input {
+/* hwrm_cfa_ntuple_filter_free_input (size:192b/24B) */
+struct hwrm_cfa_ntuple_filter_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24027,155 +23031,13 @@ struct hwrm_cfa_flow_alloc_input {
         * physical address (HPA) or a guest physical address (GPA) and must
         * point to a physically contiguous block of memory.
         */
-       uint64_t        resp_addr;
-       uint16_t        flags;
-       /* tunnel is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_TUNNEL       UINT32_C(0x1)
-       /* num_vlan is 2 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_MASK UINT32_C(0x6)
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_SFT 1
-       /* no tags */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_NONE \
-               (UINT32_C(0x0) << 1)
-       /* 1 tag */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_ONE \
-               (UINT32_C(0x1) << 1)
-       /* 2 tags */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_TWO \
-               (UINT32_C(0x2) << 1)
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_LAST \
-               HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_TWO
-       /* Enumeration denoting the Flow Type. */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_MASK UINT32_C(0x38)
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_SFT 3
-       /* L2 flow */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_L2 \
-               (UINT32_C(0x0) << 3)
-       /* IPV4 flow */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV4 \
-               (UINT32_C(0x1) << 3)
-       /* IPV6 flow */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV6 \
-               (UINT32_C(0x2) << 3)
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_LAST \
-               HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV6
-       /*
-        * Tx Flow: vf fid.
-        * Rx Flow: pf fid.
-        */
-       uint16_t        src_fid;
-       /* Tunnel handle valid when tunnel flag is set. */
-       uint32_t        tunnel_handle;
-       uint16_t        action_flags;
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_FWD \
-               UINT32_C(0x1)
-       /* recycle is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_RECYCLE \
-               UINT32_C(0x2)
-       /*
-        * Setting of this flag indicates drop action. If this flag is not set,
-        * then it should be considered accept action.
-        */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_DROP \
-               UINT32_C(0x4)
-       /* meter is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_METER \
-               UINT32_C(0x8)
-       /* tunnel is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_TUNNEL \
-               UINT32_C(0x10)
-       /* nat_src is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_SRC \
-               UINT32_C(0x20)
-       /* nat_dest is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_DEST \
-               UINT32_C(0x40)
-       /* nat_ipv4_address is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_IPV4_ADDRESS \
-               UINT32_C(0x80)
-       /* l2_header_rewrite is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_L2_HEADER_REWRITE \
-               UINT32_C(0x100)
-       /* ttl_decrement is 1 b */
-       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_TTL_DECREMENT \
-               UINT32_C(0x200)
-       /*
-        * Tx Flow: pf or vf fid.
-        * Rx Flow: vf fid.
-        */
-       uint16_t        dst_fid;
-       /* VLAN tpid, valid when push_vlan flag is set. */
-       uint16_t        l2_rewrite_vlan_tpid;
-       /* VLAN tci, valid when push_vlan flag is set. */
-       uint16_t        l2_rewrite_vlan_tci;
-       /* Meter id, valid when meter flag is set. */
-       uint16_t        act_meter_id;
-       /* Flow with the same l2 context tcam key. */
-       uint16_t        ref_flow_handle;
-       /* This value sets the match value for the ethertype. */
-       uint16_t        ethertype;
-       /* valid when num tags is 1 or 2. */
-       uint16_t        outer_vlan_tci;
-       /* This value sets the match value for the Destination MAC address. */
-       uint16_t        dmac[3];
-       /* valid when num tags is 2. */
-       uint16_t        inner_vlan_tci;
-       /* This value sets the match value for the Source MAC address. */
-       uint16_t        smac[3];
-       /* The bit length of destination IP address mask. */
-       uint8_t ip_dst_mask_len;
-       /* The bit length of source IP address mask. */
-       uint8_t ip_src_mask_len;
-       /* The value of destination IPv4/IPv6 address. */
-       uint32_t        ip_dst[4];
-       /* The source IPv4/IPv6 address. */
-       uint32_t        ip_src[4];
-       /*
-        * The value of source port.
-        * Applies to UDP and TCP traffic.
-        */
-       uint16_t        l4_src_port;
-       /*
-        * The value of source port mask.
-        * Applies to UDP and TCP traffic.
-        */
-       uint16_t        l4_src_port_mask;
-       /*
-        * The value of destination port.
-        * Applies to UDP and TCP traffic.
-        */
-       uint16_t        l4_dst_port;
-       /*
-        * The value of destination port mask.
-        * Applies to UDP and TCP traffic.
-        */
-       uint16_t        l4_dst_port_mask;
-       /*
-        * NAT IPv4/6 address based on address type flag.
-        * 0 values are ignored.
-        */
-       uint32_t        nat_ip_address[4];
-       /* L2 header re-write Destination MAC address. */
-       uint16_t        l2_rewrite_dmac[3];
-       /*
-        * The NAT source/destination port based on direction flag.
-        * Applies to UDP and TCP traffic.
-        * 0 values are ignored.
-        */
-       uint16_t        nat_port;
-       /* L2 header re-write Source MAC address. */
-       uint16_t        l2_rewrite_smac[3];
-       /* The value of ip protocol. */
-       uint8_t ip_proto;
-       uint8_t unused_0;
+       uint64_t        resp_addr;
+       /* This value is an opaque id into CFA data structures. */
+       uint64_t        ntuple_filter_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_flow_alloc_output (size:128b/16B) */
-struct hwrm_cfa_flow_alloc_output {
+/* hwrm_cfa_ntuple_filter_free_output (size:128b/16B) */
+struct hwrm_cfa_ntuple_filter_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24184,9 +23046,7 @@ struct hwrm_cfa_flow_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Flow record index. */
-       uint16_t        flow_handle;
-       uint8_t unused_0[5];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -24197,13 +23057,13 @@ struct hwrm_cfa_flow_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_cfa_flow_free *
- **********************/
+/******************************
+ * hwrm_cfa_ntuple_filter_cfg *
+ ******************************/
 
 
-/* hwrm_cfa_flow_free_input (size:192b/24B) */
-struct hwrm_cfa_flow_free_input {
+/* hwrm_cfa_ntuple_filter_cfg_input (size:384b/48B) */
+struct hwrm_cfa_ntuple_filter_cfg_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24231,13 +23091,59 @@ struct hwrm_cfa_flow_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Flow record index. */
-       uint16_t        flow_handle;
-       uint8_t unused_0[6];
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the new_dst_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_DST_ID \
+               UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the new_mirror_vnic_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_MIRROR_VNIC_ID \
+               UINT32_C(0x2)
+       /*
+        * This bit must be '1' for the new_meter_instance_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_ENABLES_NEW_METER_INSTANCE_ID \
+               UINT32_C(0x4)
+       uint8_t unused_0[4];
+       /* This value is an opaque id into CFA data structures. */
+       uint64_t        ntuple_filter_id;
+       /*
+        * If set, this value shall represent the new
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path and new network port id of the destination port for
+        * the TX path.
+        */
+       uint32_t        new_dst_id;
+       /*
+        * New Logical VNIC ID of the VNIC where traffic is
+        * mirrored.
+        */
+       uint32_t        new_mirror_vnic_id;
+       /*
+        * New meter to attach to the flow. Specifying the
+        * invalid instance ID is used to remove any existing
+        * meter from the flow.
+        */
+       uint16_t        new_meter_instance_id;
+       /*
+        * A value of 0xfff is considered invalid and implies the
+        * instance is not configured.
+        */
+       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID \
+               UINT32_C(0xffff)
+       #define HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_LAST \
+               HWRM_CFA_NTUPLE_FILTER_CFG_INPUT_NEW_METER_INSTANCE_ID_INVALID
+       uint8_t unused_1[6];
 } __attribute__((packed));
 
-/* hwrm_cfa_flow_free_output (size:256b/32B) */
-struct hwrm_cfa_flow_free_output {
+/* hwrm_cfa_ntuple_filter_cfg_output (size:128b/16B) */
+struct hwrm_cfa_ntuple_filter_cfg_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24246,10 +23152,6 @@ struct hwrm_cfa_flow_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* packet is 64 b */
-       uint64_t        packet;
-       /* byte is 64 b */
-       uint64_t        byte;
        uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
@@ -24261,13 +23163,13 @@ struct hwrm_cfa_flow_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_cfa_flow_info *
- **********************/
+/**************************
+ * hwrm_cfa_em_flow_alloc *
+ **************************/
 
 
-/* hwrm_cfa_flow_info_input (size:192b/24B) */
-struct hwrm_cfa_flow_info_input {
+/* hwrm_cfa_em_flow_alloc_input (size:896b/112B) */
+struct hwrm_cfa_em_flow_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24295,288 +23197,307 @@ struct hwrm_cfa_flow_info_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Flow record index. */
-       uint16_t        flow_handle;
-       /* Max flow handle */
-       #define HWRM_CFA_FLOW_INFO_INPUT_FLOW_HANDLE_MAX_MASK \
-               UINT32_C(0xfff)
-       #define HWRM_CFA_FLOW_INFO_INPUT_FLOW_HANDLE_MAX_SFT     0
-       /* CNP flow handle */
-       #define HWRM_CFA_FLOW_INFO_INPUT_FLOW_HANDLE_CNP_CNT \
+       uint32_t        flags;
+       /*
+        * Enumeration denoting the RX, TX type of the resource.
+        * This enumeration is used for resources that are similar for both
+        * TX and RX paths of the chip.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH         UINT32_C(0x1)
+       /* tx path */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_TX        UINT32_C(0x0)
+       /* rx path */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX        UINT32_C(0x1)
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_LAST \
+               HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PATH_RX
+       /*
+        * Setting of this flag indicates enabling of a byte counter for a given
+        * flow.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_BYTE_CTR     UINT32_C(0x2)
+       /*
+        * Setting of this flag indicates enabling of a packet counter for a given
+        * flow.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_PKT_CTR      UINT32_C(0x4)
+       /* Setting of this flag indicates de-capsulation action for the given flow. */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DECAP        UINT32_C(0x8)
+       /* Setting of this flag indicates encapsulation action for the given flow. */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_ENCAP        UINT32_C(0x10)
+       /*
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_DROP         UINT32_C(0x20)
+       /*
+        * Setting of this flag indicates that a meter is expected to be attached
+        * to this flow. This hint can be used when choosing the action record
+        * format required for the flow.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_FLAGS_METER        UINT32_C(0x40)
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the l2_filter_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_L2_FILTER_ID \
+               UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the tunnel_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
+               UINT32_C(0x2)
+       /*
+        * This bit must be '1' for the tunnel_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_TUNNEL_ID \
+               UINT32_C(0x4)
+       /*
+        * This bit must be '1' for the src_macaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_MACADDR \
+               UINT32_C(0x8)
+       /*
+        * This bit must be '1' for the dst_macaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_MACADDR \
+               UINT32_C(0x10)
+       /*
+        * This bit must be '1' for the ovlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_OVLAN_VID \
+               UINT32_C(0x20)
+       /*
+        * This bit must be '1' for the ivlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IVLAN_VID \
+               UINT32_C(0x40)
+       /*
+        * This bit must be '1' for the ethertype field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ETHERTYPE \
+               UINT32_C(0x80)
+       /*
+        * This bit must be '1' for the src_ipaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_IPADDR \
+               UINT32_C(0x100)
+       /*
+        * This bit must be '1' for the dst_ipaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_IPADDR \
+               UINT32_C(0x200)
+       /*
+        * This bit must be '1' for the ipaddr_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
+               UINT32_C(0x400)
+       /*
+        * This bit must be '1' for the ip_protocol field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
+               UINT32_C(0x800)
+       /*
+        * This bit must be '1' for the src_port field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_SRC_PORT \
                UINT32_C(0x1000)
-       /* Direction rx = 1 */
-       #define HWRM_CFA_FLOW_INFO_INPUT_FLOW_HANDLE_DIR_RX \
+       /*
+        * This bit must be '1' for the dst_port field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_PORT \
+               UINT32_C(0x2000)
+       /*
+        * This bit must be '1' for the dst_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_DST_ID \
+               UINT32_C(0x4000)
+       /*
+        * This bit must be '1' for the mirror_vnic_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
                UINT32_C(0x8000)
-       uint8_t unused_0[6];
-} __attribute__((packed));
-
-/* hwrm_cfa_flow_info_output (size:448b/56B) */
-struct hwrm_cfa_flow_info_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* flags is 8 b */
-       uint8_t flags;
-       /* profile is 8 b */
-       uint8_t profile;
-       /* src_fid is 16 b */
-       uint16_t        src_fid;
-       /* dst_fid is 16 b */
-       uint16_t        dst_fid;
-       /* l2_ctxt_id is 16 b */
-       uint16_t        l2_ctxt_id;
-       /* em_info is 64 b */
-       uint64_t        em_info;
-       /* tcam_info is 64 b */
-       uint64_t        tcam_info;
-       /* vfp_tcam_info is 64 b */
-       uint64_t        vfp_tcam_info;
-       /* ar_id is 16 b */
-       uint16_t        ar_id;
-       /* flow_handle is 16 b */
-       uint16_t        flow_handle;
-       /* tunnel_handle is 32 b */
-       uint32_t        tunnel_handle;
-       uint8_t unused_0[7];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This bit must be '1' for the encap_record_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_ENCAP_RECORD_ID \
+               UINT32_C(0x10000)
+       /*
+        * This bit must be '1' for the meter_instance_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_ENABLES_METER_INSTANCE_ID \
+               UINT32_C(0x20000)
+       /*
+        * This value identifies a set of CFA data structures used for an L2
+        * context.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***********************
- * hwrm_cfa_flow_flush *
- ***********************/
-
-
-/* hwrm_cfa_flow_flush_input (size:192b/24B) */
-struct hwrm_cfa_flow_flush_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint64_t        l2_filter_id;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+               UINT32_C(0x0)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_EM_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint8_t unused_0[3];
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * Tunnel identifier.
+        * Virtual Network Identifier (VNI). Only valid with
+        * tunnel_types VXLAN, NVGRE, and Geneve.
+        * Only lower 24-bits of VNI field are used
+        * in setting up the filter.
         */
-       uint16_t        cmpl_ring;
+       uint32_t        tunnel_id;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This value indicates the source MAC address in
+        * the Ethernet header.
         */
-       uint16_t        seq_id;
+       uint8_t src_macaddr[6];
+       /* The meter instance to attach to the flow. */
+       uint16_t        meter_instance_id;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * A value of 0xfff is considered invalid and implies the
+        * instance is not configured.
         */
-       uint16_t        target_id;
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_INVALID \
+               UINT32_C(0xffff)
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_LAST \
+               HWRM_CFA_EM_FLOW_ALLOC_INPUT_METER_INSTANCE_ID_INVALID
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * This value indicates the destination MAC address in
+        * the Ethernet header.
         */
-       uint64_t        resp_addr;
-       uint32_t        flags;
-       uint8_t unused_0[4];
-} __attribute__((packed));
-
-/* hwrm_cfa_flow_flush_output (size:128b/16B) */
-struct hwrm_cfa_flow_flush_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint8_t dst_macaddr[6];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This value indicates the VLAN ID of the outer VLAN tag
+        * in the Ethernet header.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***********************
- * hwrm_cfa_flow_stats *
- ***********************/
-
-
-/* hwrm_cfa_flow_stats_input (size:320b/40B) */
-struct hwrm_cfa_flow_stats_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint16_t        ovlan_vid;
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This value indicates the VLAN ID of the inner VLAN tag
+        * in the Ethernet header.
         */
-       uint16_t        cmpl_ring;
+       uint16_t        ivlan_vid;
+       /* This value indicates the ethertype in the Ethernet header. */
+       uint16_t        ethertype;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This value indicates the type of IP address.
+        * 4 - IPv4
+        * 6 - IPv6
+        * All others are invalid.
         */
-       uint16_t        seq_id;
+       uint8_t ip_addr_type;
+       /* invalid */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN UINT32_C(0x0)
+       /* IPv4 */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV4    UINT32_C(0x4)
+       /* IPv6 */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6    UINT32_C(0x6)
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
+               HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * The value of protocol filed in IP header.
+        * Applies to UDP and TCP traffic.
+        * 6 - TCP
+        * 17 - UDP
         */
-       uint16_t        target_id;
+       uint8_t ip_protocol;
+       /* invalid */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN UINT32_C(0x0)
+       /* TCP */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_TCP     UINT32_C(0x6)
+       /* UDP */
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UDP     UINT32_C(0x11)
+       #define HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_LAST \
+               HWRM_CFA_EM_FLOW_ALLOC_INPUT_IP_PROTOCOL_UDP
+       uint8_t unused_1[2];
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * The value of source IP address to be used in filtering.
+        * For IPv4, first four bytes represent the IP address.
         */
-       uint64_t        resp_addr;
-       /* Flow handle. */
-       uint16_t        num_flows;
-       /* Flow handle. */
-       uint16_t        flow_handle_0;
-       /* Flow handle. */
-       uint16_t        flow_handle_1;
-       /* Flow handle. */
-       uint16_t        flow_handle_2;
-       /* Flow handle. */
-       uint16_t        flow_handle_3;
-       /* Flow handle. */
-       uint16_t        flow_handle_4;
-       /* Flow handle. */
-       uint16_t        flow_handle_5;
-       /* Flow handle. */
-       uint16_t        flow_handle_6;
-       /* Flow handle. */
-       uint16_t        flow_handle_7;
-       /* Flow handle. */
-       uint16_t        flow_handle_8;
-       /* Flow handle. */
-       uint16_t        flow_handle_9;
-       uint8_t unused_0[2];
-} __attribute__((packed));
-
-/* hwrm_cfa_flow_stats_output (size:1408b/176B) */
-struct hwrm_cfa_flow_stats_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* packet_0 is 64 b */
-       uint64_t        packet_0;
-       /* packet_1 is 64 b */
-       uint64_t        packet_1;
-       /* packet_2 is 64 b */
-       uint64_t        packet_2;
-       /* packet_3 is 64 b */
-       uint64_t        packet_3;
-       /* packet_4 is 64 b */
-       uint64_t        packet_4;
-       /* packet_5 is 64 b */
-       uint64_t        packet_5;
-       /* packet_6 is 64 b */
-       uint64_t        packet_6;
-       /* packet_7 is 64 b */
-       uint64_t        packet_7;
-       /* packet_8 is 64 b */
-       uint64_t        packet_8;
-       /* packet_9 is 64 b */
-       uint64_t        packet_9;
-       /* byte_0 is 64 b */
-       uint64_t        byte_0;
-       /* byte_1 is 64 b */
-       uint64_t        byte_1;
-       /* byte_2 is 64 b */
-       uint64_t        byte_2;
-       /* byte_3 is 64 b */
-       uint64_t        byte_3;
-       /* byte_4 is 64 b */
-       uint64_t        byte_4;
-       /* byte_5 is 64 b */
-       uint64_t        byte_5;
-       /* byte_6 is 64 b */
-       uint64_t        byte_6;
-       /* byte_7 is 64 b */
-       uint64_t        byte_7;
-       /* byte_8 is 64 b */
-       uint64_t        byte_8;
-       /* byte_9 is 64 b */
-       uint64_t        byte_9;
-       uint8_t unused_0[7];
+       uint32_t        src_ipaddr[4];
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * big_endian = True
+        *     The value of destination IP address to be used in filtering.
+        *     For IPv4, first four bytes represent the IP address.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**************************
- * hwrm_cfa_vf_pair_alloc *
- **************************/
-
-
-/* hwrm_cfa_vf_pair_alloc_input (size:448b/56B) */
-struct hwrm_cfa_vf_pair_alloc_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint32_t        dst_ipaddr[4];
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * The value of source port to be used in filtering.
+        * Applies to UDP and TCP traffic.
         */
-       uint16_t        cmpl_ring;
+       uint16_t        src_port;
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * The value of destination port to be used in filtering.
+        * Applies to UDP and TCP traffic.
         */
-       uint16_t        seq_id;
+       uint16_t        dst_port;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * If set, this value shall represent the
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path and network port id of the destination port for
+        * the TX path.
         */
-       uint16_t        target_id;
+       uint16_t        dst_id;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Logical VNIC ID of the VNIC where traffic is
+        * mirrored.
         */
-       uint64_t        resp_addr;
-       /* Logical VF number (range: 0 -> MAX_VFS -1). */
-       uint16_t        vf_a_id;
-       /* Logical VF number (range: 0 -> MAX_VFS -1). */
-       uint16_t        vf_b_id;
-       uint8_t unused_0[4];
-       /* VF Pair name (32 byte string). */
-       char    pair_name[32];
+       uint16_t        mirror_vnic_id;
+       /* Logical ID of the encapsulation record. */
+       uint32_t        encap_record_id;
+       uint8_t unused_2[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_vf_pair_alloc_output (size:128b/16B) */
-struct hwrm_cfa_vf_pair_alloc_output {
+/* hwrm_cfa_em_flow_alloc_output (size:192b/24B) */
+struct hwrm_cfa_em_flow_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24585,7 +23506,17 @@ struct hwrm_cfa_vf_pair_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       /* This value is an opaque id into CFA data structures. */
+       uint64_t        em_filter_id;
+       /*
+        * This is the ID of the flow associated with this
+        * filter.
+        * This value shall be used to match and associate the
+        * flow identifier returned in completion records.
+        * A value of 0xFFFFFFFF shall indicate no flow id.
+        */
+       uint32_t        flow_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -24597,12 +23528,12 @@ struct hwrm_cfa_vf_pair_alloc_output {
 } __attribute__((packed));
 
 /*************************
- * hwrm_cfa_vf_pair_free *
+ * hwrm_cfa_em_flow_free *
  *************************/
 
 
-/* hwrm_cfa_vf_pair_free_input (size:384b/48B) */
-struct hwrm_cfa_vf_pair_free_input {
+/* hwrm_cfa_em_flow_free_input (size:192b/24B) */
+struct hwrm_cfa_em_flow_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24630,12 +23561,12 @@ struct hwrm_cfa_vf_pair_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* VF Pair name (32 byte string). */
-       char    pair_name[32];
+       /* This value is an opaque id into CFA data structures. */
+       uint64_t        em_filter_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_vf_pair_free_output (size:128b/16B) */
-struct hwrm_cfa_vf_pair_free_output {
+/* hwrm_cfa_em_flow_free_output (size:128b/16B) */
+struct hwrm_cfa_em_flow_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24655,13 +23586,13 @@ struct hwrm_cfa_vf_pair_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*************************
- * hwrm_cfa_vf_pair_info *
- *************************/
+/*******************************
+ * hwrm_cfa_decap_filter_alloc *
+ *******************************/
 
 
-/* hwrm_cfa_vf_pair_info_input (size:448b/56B) */
-struct hwrm_cfa_vf_pair_info_input {
+/* hwrm_cfa_decap_filter_alloc_input (size:832b/104B) */
+struct hwrm_cfa_decap_filter_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24690,177 +23621,265 @@ struct hwrm_cfa_vf_pair_info_input {
         */
        uint64_t        resp_addr;
        uint32_t        flags;
-       /* If this flag is set, lookup by name else lookup by index. */
-       #define HWRM_CFA_VF_PAIR_INFO_INPUT_FLAGS_LOOKUP_TYPE     UINT32_C(0x1)
-       /* vf pair table index. */
-       uint16_t        vf_pair_index;
-       uint8_t unused_0[2];
-       /* VF Pair name (32 byte string). */
-       char    vf_pair_name[32];
-} __attribute__((packed));
-
-/* hwrm_cfa_vf_pair_info_output (size:512b/64B) */
-struct hwrm_cfa_vf_pair_info_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* vf pair table index. */
-       uint16_t        next_vf_pair_index;
-       /* vf pair member a's vf_fid. */
-       uint16_t        vf_a_fid;
-       /* vf pair member a's Linux logical VF number. */
-       uint16_t        vf_a_index;
-       /* vf pair member b's vf_fid. */
-       uint16_t        vf_b_fid;
-       /* vf pair member a's Linux logical VF number. */
-       uint16_t        vf_b_index;
-       /* vf pair state. */
-       uint8_t pair_state;
-       /* Pair has been allocated */
-       #define HWRM_CFA_VF_PAIR_INFO_OUTPUT_PAIR_STATE_ALLOCATED UINT32_C(0x1)
-       /* Both pair members are active */
-       #define HWRM_CFA_VF_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE    UINT32_C(0x2)
-       #define HWRM_CFA_VF_PAIR_INFO_OUTPUT_PAIR_STATE_LAST \
-               HWRM_CFA_VF_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE
-       uint8_t unused_0[5];
-       /* VF Pair name (32 byte string). */
-       char    pair_name[32];
-       uint8_t unused_1[7];
+       /* ovs_tunnel is 1 b */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_FLAGS_OVS_TUNNEL \
+               UINT32_C(0x1)
+       uint32_t        enables;
+       /*
+        * This bit must be '1' for the tunnel_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_TYPE \
+               UINT32_C(0x1)
+       /*
+        * This bit must be '1' for the tunnel_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_TUNNEL_ID \
+               UINT32_C(0x2)
+       /*
+        * This bit must be '1' for the src_macaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_MACADDR \
+               UINT32_C(0x4)
+       /*
+        * This bit must be '1' for the dst_macaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_MACADDR \
+               UINT32_C(0x8)
+       /*
+        * This bit must be '1' for the ovlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_OVLAN_VID \
+               UINT32_C(0x10)
+       /*
+        * This bit must be '1' for the ivlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IVLAN_VID \
+               UINT32_C(0x20)
+       /*
+        * This bit must be '1' for the t_ovlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_T_OVLAN_VID \
+               UINT32_C(0x40)
+       /*
+        * This bit must be '1' for the t_ivlan_vid field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_T_IVLAN_VID \
+               UINT32_C(0x80)
+       /*
+        * This bit must be '1' for the ethertype field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_ETHERTYPE \
+               UINT32_C(0x100)
+       /*
+        * This bit must be '1' for the src_ipaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_IPADDR \
+               UINT32_C(0x200)
+       /*
+        * This bit must be '1' for the dst_ipaddr field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_IPADDR \
+               UINT32_C(0x400)
+       /*
+        * This bit must be '1' for the ipaddr_type field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IPADDR_TYPE \
+               UINT32_C(0x800)
+       /*
+        * This bit must be '1' for the ip_protocol field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_IP_PROTOCOL \
+               UINT32_C(0x1000)
+       /*
+        * This bit must be '1' for the src_port field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_SRC_PORT \
+               UINT32_C(0x2000)
+       /*
+        * This bit must be '1' for the dst_port field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_PORT \
+               UINT32_C(0x4000)
+       /*
+        * This bit must be '1' for the dst_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_DST_ID \
+               UINT32_C(0x8000)
+       /*
+        * This bit must be '1' for the mirror_vnic_id field to be
+        * configured.
+        */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_ENABLES_MIRROR_VNIC_ID \
+               UINT32_C(0x10000)
+       /*
+        * Tunnel identifier.
+        * Virtual Network Identifier (VNI). Only valid with
+        * tunnel_types VXLAN, NVGRE, and Geneve.
+        * Only lower 24-bits of VNI field are used
+        * in setting up the filter.
+        */
+       uint32_t        tunnel_id;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL \
+               UINT32_C(0x0)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_NVGRE \
+               UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_L2GRE \
+               UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPIP \
+               UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_MPLS \
+               UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_STT \
+               UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_IPGRE \
+               UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL \
+               UINT32_C(0xff)
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
+       uint8_t unused_0;
+       uint16_t        unused_1;
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * This value indicates the source MAC address in
+        * the Ethernet header.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/***********************
- * hwrm_cfa_pair_alloc *
- ***********************/
-
-
-/* hwrm_cfa_pair_alloc_input (size:576b/72B) */
-struct hwrm_cfa_pair_alloc_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       uint8_t src_macaddr[6];
+       uint8_t unused_2[2];
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * This value indicates the destination MAC address in
+        * the Ethernet header.
         */
-       uint16_t        cmpl_ring;
+       uint8_t dst_macaddr[6];
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * This value indicates the VLAN ID of the outer VLAN tag
+        * in the Ethernet header.
         */
-       uint16_t        seq_id;
+       uint16_t        ovlan_vid;
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * This value indicates the VLAN ID of the inner VLAN tag
+        * in the Ethernet header.
         */
-       uint16_t        target_id;
+       uint16_t        ivlan_vid;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * This value indicates the VLAN ID of the outer VLAN tag
+        * in the tunnel Ethernet header.
         */
-       uint64_t        resp_addr;
-       /* Pair mode (0-vf2fn, 1-rep2fn, 2-rep2rep, 3-proxy, 4-pfpair, 5-rep2fn_mod). */
-       uint8_t pair_mode;
-       /* Pair between VF on local host with PF or VF on specified host. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_VF2FN         UINT32_C(0x0)
-       /* Pair between REP on local host with PF or VF on specified host. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_REP2FN        UINT32_C(0x1)
-       /* Pair between REP on local host with REP on specified host. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_REP2REP       UINT32_C(0x2)
-       /* Pair for the proxy interface. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_PROXY         UINT32_C(0x3)
-       /* Pair for the PF interface. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_PFPAIR        UINT32_C(0x4)
-       /* Modify exiting rep2fn pair and move pair to new PF. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_REP2FN_MOD    UINT32_C(0x5)
-       /* Modify exiting rep2fn pairs paired with same PF and move pairs to new PF. */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_REP2FN_MODALL UINT32_C(0x6)
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_LAST \
-               HWRM_CFA_PAIR_ALLOC_INPUT_PAIR_MODE_REP2FN_MODALL
-       uint8_t unused_0;
-       /* Logical VF number (range: 0 -> MAX_VFS -1). */
-       uint16_t        vf_a_id;
-       /* Logical Host (0xff-local host). */
-       uint8_t host_b_id;
-       /* Logical PF (0xff-PF for command channel). */
-       uint8_t pf_b_id;
-       /* Logical VF number (range: 0 -> MAX_VFS -1). */
-       uint16_t        vf_b_id;
-       /* Loopback port (0xff-internal loopback), valid for mode-3. */
-       uint8_t port_id;
-       /* Priority used for encap of loopback packets valid for mode-3. */
-       uint8_t pri;
-       /* New PF for rep2fn modify, valid for mode 5. */
-       uint16_t        new_pf_fid;
-       uint32_t        enables;
+       uint16_t        t_ovlan_vid;
        /*
-        * This bit must be '1' for the q_ab field to be
-        * configured.
+        * This value indicates the VLAN ID of the inner VLAN tag
+        * in the tunnel Ethernet header.
         */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_ENABLES_Q_AB_VALID      UINT32_C(0x1)
+       uint16_t        t_ivlan_vid;
+       /* This value indicates the ethertype in the Ethernet header. */
+       uint16_t        ethertype;
        /*
-        * This bit must be '1' for the q_ba field to be
-        * configured.
+        * This value indicates the type of IP address.
+        * 4 - IPv4
+        * 6 - IPv6
+        * All others are invalid.
         */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_ENABLES_Q_BA_VALID      UINT32_C(0x2)
+       uint8_t ip_addr_type;
+       /* invalid */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_UNKNOWN \
+               UINT32_C(0x0)
+       /* IPv4 */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV4 \
+               UINT32_C(0x4)
+       /* IPv6 */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6 \
+               UINT32_C(0x6)
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_LAST \
+               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_ADDR_TYPE_IPV6
        /*
-        * This bit must be '1' for the fc_ab field to be
-        * configured.
+        * The value of protocol filed in IP header.
+        * Applies to UDP and TCP traffic.
+        * 6 - TCP
+        * 17 - UDP
+        */
+       uint8_t ip_protocol;
+       /* invalid */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UNKNOWN \
+               UINT32_C(0x0)
+       /* TCP */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_TCP \
+               UINT32_C(0x6)
+       /* UDP */
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP \
+               UINT32_C(0x11)
+       #define HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_LAST \
+               HWRM_CFA_DECAP_FILTER_ALLOC_INPUT_IP_PROTOCOL_UDP
+       uint16_t        unused_3;
+       uint32_t        unused_4;
+       /*
+        * The value of source IP address to be used in filtering.
+        * For IPv4, first four bytes represent the IP address.
         */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_ENABLES_FC_AB_VALID     UINT32_C(0x4)
+       uint32_t        src_ipaddr[4];
        /*
-        * This bit must be '1' for the fc_ba field to be
-        * configured.
+        * The value of destination IP address to be used in filtering.
+        * For IPv4, first four bytes represent the IP address.
         */
-       #define HWRM_CFA_PAIR_ALLOC_INPUT_ENABLES_FC_BA_VALID     UINT32_C(0x8)
-       /* VF Pair name (32 byte string). */
-       char    pair_name[32];
+       uint32_t        dst_ipaddr[4];
        /*
-        * The q_ab value specifies the logical index of the TX/RX CoS
-        * queue to be assigned for traffic in the A to B direction of
-        * the interface pair. The default value is 0.
+        * The value of source port to be used in filtering.
+        * Applies to UDP and TCP traffic.
         */
-       uint8_t q_ab;
+       uint16_t        src_port;
        /*
-        * The q_ba value specifies the logical index of the TX/RX CoS
-        * queue to be assigned for traffic in the B to A direction of
-        * the interface pair. The default value is 1.
+        * The value of destination port to be used in filtering.
+        * Applies to UDP and TCP traffic.
         */
-       uint8_t q_ba;
+       uint16_t        dst_port;
        /*
-        * Specifies whether RX ring flow control is disabled (0) or enabled
-        * (1) in the A to B direction. The default value is 0, meaning that
-        * packets will be dropped when the B-side RX rings are full.
+        * If set, this value shall represent the
+        * Logical VNIC ID of the destination VNIC for the RX
+        * path.
         */
-       uint8_t fc_ab;
+       uint16_t        dst_id;
        /*
-        * Specifies whether RX ring flow control is disabled (0) or enabled
-        * (1) in the B to A direction. The default value is 1, meaning that
-        * the RX CoS queue will be flow controlled when the A-side RX rings
-        * are full.
+        * If set, this value shall represent the L2 context that matches the L2
+        * information of the decap filter.
         */
-       uint8_t fc_ba;
-       uint8_t unused_1[4];
+       uint16_t        l2_ctxt_ref_id;
 } __attribute__((packed));
 
-/* hwrm_cfa_pair_alloc_output (size:192b/24B) */
-struct hwrm_cfa_pair_alloc_output {
+/* hwrm_cfa_decap_filter_alloc_output (size:128b/16B) */
+struct hwrm_cfa_decap_filter_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24869,15 +23888,9 @@ struct hwrm_cfa_pair_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Only valid for modes 1 and 2. */
-       uint16_t        rx_cfa_code_a;
-       /* Only valid for modes 1 and 2. */
-       uint16_t        tx_cfa_action_a;
-       /* Only valid for mode 2. */
-       uint16_t        rx_cfa_code_b;
-       /* Only valid for mode 2. */
-       uint16_t        tx_cfa_action_b;
-       uint8_t unused_0[7];
+       /* This value is an opaque id into CFA data structures. */
+       uint32_t        decap_filter_id;
+       uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -24888,13 +23901,13 @@ struct hwrm_cfa_pair_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_cfa_pair_free *
- **********************/
+/******************************
+ * hwrm_cfa_decap_filter_free *
+ ******************************/
 
 
-/* hwrm_cfa_pair_free_input (size:384b/48B) */
-struct hwrm_cfa_pair_free_input {
+/* hwrm_cfa_decap_filter_free_input (size:192b/24B) */
+struct hwrm_cfa_decap_filter_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24922,12 +23935,13 @@ struct hwrm_cfa_pair_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* VF Pair name (32 byte string). */
-       char    pair_name[32];
+       /* This value is an opaque id into CFA data structures. */
+       uint32_t        decap_filter_id;
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_cfa_pair_free_output (size:128b/16B) */
-struct hwrm_cfa_pair_free_output {
+/* hwrm_cfa_decap_filter_free_output (size:128b/16B) */
+struct hwrm_cfa_decap_filter_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -24947,13 +23961,13 @@ struct hwrm_cfa_pair_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_cfa_pair_info *
- **********************/
+/***********************
+ * hwrm_cfa_flow_alloc *
+ ***********************/
 
 
-/* hwrm_cfa_pair_info_input (size:448b/56B) */
-struct hwrm_cfa_pair_info_input {
+/* hwrm_cfa_flow_alloc_input (size:1024b/128B) */
+struct hwrm_cfa_flow_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -24981,140 +23995,213 @@ struct hwrm_cfa_pair_info_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       uint32_t        flags;
-       /* If this flag is set, lookup by name else lookup by index. */
-       #define HWRM_CFA_PAIR_INFO_INPUT_FLAGS_LOOKUP_TYPE      UINT32_C(0x1)
-       /* If this flag is set, lookup by PF id and VF id. */
-       #define HWRM_CFA_PAIR_INFO_INPUT_FLAGS_LOOKUP_REPRE     UINT32_C(0x2)
-       /* Pair table index. */
-       uint16_t        pair_index;
-       /* Pair pf index. */
-       uint8_t pair_pfid;
-       /* Pair vf index. */
-       uint8_t pair_vfid;
-       /* Pair name (32 byte string). */
-       char    pair_name[32];
-} __attribute__((packed));
-
-/* hwrm_cfa_pair_info_output (size:576b/72B) */
-struct hwrm_cfa_pair_info_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* Pair table index. */
-       uint16_t        next_pair_index;
-       /* Pair member a's fid. */
-       uint16_t        a_fid;
-       /* Logical host number. */
-       uint8_t host_a_index;
-       /* Logical PF number. */
-       uint8_t pf_a_index;
-       /* Pair member a's Linux logical VF number. */
-       uint16_t        vf_a_index;
-       /* Rx CFA code. */
-       uint16_t        rx_cfa_code_a;
-       /* Tx CFA action. */
-       uint16_t        tx_cfa_action_a;
-       /* Pair member b's fid. */
-       uint16_t        b_fid;
-       /* Logical host number. */
-       uint8_t host_b_index;
-       /* Logical PF number. */
-       uint8_t pf_b_index;
-       /* Pair member a's Linux logical VF number. */
-       uint16_t        vf_b_index;
-       /* Rx CFA code. */
-       uint16_t        rx_cfa_code_b;
-       /* Tx CFA action. */
-       uint16_t        tx_cfa_action_b;
-       /* Pair mode (0-vf2fn, 1-rep2fn, 2-rep2rep, 3-proxy, 4-pfpair). */
-       uint8_t pair_mode;
-       /* Pair between VF on local host with PF or VF on specified host. */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_VF2FN   UINT32_C(0x0)
-       /* Pair between REP on local host with PF or VF on specified host. */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_REP2FN  UINT32_C(0x1)
-       /* Pair between REP on local host with REP on specified host. */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_REP2REP UINT32_C(0x2)
-       /* Pair for the proxy interface. */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PROXY   UINT32_C(0x3)
-       /* Pair for the PF interface. */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PFPAIR  UINT32_C(0x4)
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_LAST \
-               HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PFPAIR
-       /* Pair state. */
-       uint8_t pair_state;
-       /* Pair has been allocated */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ALLOCATED UINT32_C(0x1)
-       /* Both pair members are active */
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE    UINT32_C(0x2)
-       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_LAST \
-               HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE
-       /* Pair name (32 byte string). */
-       char    pair_name[32];
-       uint8_t unused_0[7];
+       uint16_t        flags;
+       /* tunnel is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_TUNNEL \
+               UINT32_C(0x1)
+       /* num_vlan is 2 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_MASK \
+               UINT32_C(0x6)
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_SFT           1
+       /* no tags */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_NONE \
+               (UINT32_C(0x0) << 1)
+       /* 1 tag */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_ONE \
+               (UINT32_C(0x1) << 1)
+       /* 2 tags */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_TWO \
+               (UINT32_C(0x2) << 1)
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_LAST \
+               HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_NUM_VLAN_TWO
+       /* Enumeration denoting the Flow Type. */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_MASK \
+               UINT32_C(0x38)
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_SFT           3
+       /* L2 flow */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_L2 \
+               (UINT32_C(0x0) << 3)
+       /* IPV4 flow */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV4 \
+               (UINT32_C(0x1) << 3)
+       /* IPV6 flow */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV6 \
+               (UINT32_C(0x2) << 3)
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_LAST \
+               HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_FLOWTYPE_IPV6
        /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
+        * when set to 1, indicates TX flow offload for function specified in src_fid and
+        * the dst_fid should be set to invalid value. To indicate a VM to VM flow, both
+        * of the path_tx and path_rx flags need to be set. For virtio vSwitch offload
+        * case, the src_fid and dst_fid is set to the same fid value. For the SRIOV
+        * vSwitch offload case, the src_fid and dst_fid must be set to the same VF FID
+        * belong to the children VFs of the same PF to indicate VM to VM flow.
         */
-       uint8_t valid;
-} __attribute__((packed));
-
-/**********************
- * hwrm_cfa_vfr_alloc *
- **********************/
-
-
-/* hwrm_cfa_vfr_alloc_input (size:448b/56B) */
-struct hwrm_cfa_vfr_alloc_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_PATH_TX \
+               UINT32_C(0x40)
        /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
+        * when set to 1, indicates RX flow offload for function specified in dst_fid and
+        * the src_fid should be set to invalid value.
         */
-       uint16_t        cmpl_ring;
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_PATH_RX \
+               UINT32_C(0x80)
        /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
+        * Set to 1 to indicate matching of VXLAN VNI from the custom vxlan header is
+        * required and the VXLAN VNI value is stored in the first 24 bits of the dmac field.
+        * This flag is only valid when the flow direction is RX.
         */
-       uint16_t        seq_id;
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_FLAGS_MATCH_VXLAN_IP_VNI \
+               UINT32_C(0x100)
        /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
+        * Tx Flow: vf fid.
+        * Rx Flow: pf fid.
         */
-       uint16_t        target_id;
+       uint16_t        src_fid;
+       /* Tunnel handle valid when tunnel flag is set. */
+       uint32_t        tunnel_handle;
+       uint16_t        action_flags;
        /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
         */
-       uint64_t        resp_addr;
-       /* Logical VF number (range: 0 -> MAX_VFS -1). */
-       uint16_t        vf_id;
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_FWD \
+               UINT32_C(0x1)
+       /* recycle is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_RECYCLE \
+               UINT32_C(0x2)
        /*
-        * This field is reserved for the future use.
-        * It shall be set to 0.
+        * Setting of this flag indicates drop action. If this flag is not set,
+        * then it should be considered accept action.
         */
-       uint16_t        reserved;
-       uint8_t unused_0[4];
-       /* VF Representor name (32 byte string). */
-       char    vfr_name[32];
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_DROP \
+               UINT32_C(0x4)
+       /* meter is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_METER \
+               UINT32_C(0x8)
+       /* tunnel is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_TUNNEL \
+               UINT32_C(0x10)
+       /* nat_src is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_SRC \
+               UINT32_C(0x20)
+       /* nat_dest is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_DEST \
+               UINT32_C(0x40)
+       /* nat_ipv4_address is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_NAT_IPV4_ADDRESS \
+               UINT32_C(0x80)
+       /* l2_header_rewrite is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_L2_HEADER_REWRITE \
+               UINT32_C(0x100)
+       /* ttl_decrement is 1 b */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_TTL_DECREMENT \
+               UINT32_C(0x200)
+       /*
+        * If set to 1 and flow direction is TX, it indicates decap of L2 header
+        * and encap of tunnel header. If set to 1 and flow direction is RX, it
+        * indicates decap of tunnel header and encap L2 header. The type of tunnel
+        * is specified in the tunnel_type field.
+        */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_ACTION_FLAGS_TUNNEL_IP \
+               UINT32_C(0x400)
+       /*
+        * Tx Flow: pf or vf fid.
+        * Rx Flow: vf fid.
+        */
+       uint16_t        dst_fid;
+       /* VLAN tpid, valid when push_vlan flag is set. */
+       uint16_t        l2_rewrite_vlan_tpid;
+       /* VLAN tci, valid when push_vlan flag is set. */
+       uint16_t        l2_rewrite_vlan_tci;
+       /* Meter id, valid when meter flag is set. */
+       uint16_t        act_meter_id;
+       /* Flow with the same l2 context tcam key. */
+       uint16_t        ref_flow_handle;
+       /* This value sets the match value for the ethertype. */
+       uint16_t        ethertype;
+       /* valid when num tags is 1 or 2. */
+       uint16_t        outer_vlan_tci;
+       /* This value sets the match value for the Destination MAC address. */
+       uint16_t        dmac[3];
+       /* valid when num tags is 2. */
+       uint16_t        inner_vlan_tci;
+       /* This value sets the match value for the Source MAC address. */
+       uint16_t        smac[3];
+       /* The bit length of destination IP address mask. */
+       uint8_t ip_dst_mask_len;
+       /* The bit length of source IP address mask. */
+       uint8_t ip_src_mask_len;
+       /* The value of destination IPv4/IPv6 address. */
+       uint32_t        ip_dst[4];
+       /* The source IPv4/IPv6 address. */
+       uint32_t        ip_src[4];
+       /*
+        * The value of source port.
+        * Applies to UDP and TCP traffic.
+        */
+       uint16_t        l4_src_port;
+       /*
+        * The value of source port mask.
+        * Applies to UDP and TCP traffic.
+        */
+       uint16_t        l4_src_port_mask;
+       /*
+        * The value of destination port.
+        * Applies to UDP and TCP traffic.
+        */
+       uint16_t        l4_dst_port;
+       /*
+        * The value of destination port mask.
+        * Applies to UDP and TCP traffic.
+        */
+       uint16_t        l4_dst_port_mask;
+       /*
+        * NAT IPv4/6 address based on address type flag.
+        * 0 values are ignored.
+        */
+       uint32_t        nat_ip_address[4];
+       /* L2 header re-write Destination MAC address. */
+       uint16_t        l2_rewrite_dmac[3];
+       /*
+        * The NAT source/destination port based on direction flag.
+        * Applies to UDP and TCP traffic.
+        * 0 values are ignored.
+        */
+       uint16_t        nat_port;
+       /* L2 header re-write Source MAC address. */
+       uint16_t        l2_rewrite_smac[3];
+       /* The value of ip protocol. */
+       uint8_t ip_proto;
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Non-tunnel */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NONTUNNEL UINT32_C(0x0)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN     UINT32_C(0x1)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_NVGRE     UINT32_C(0x2)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_L2GRE     UINT32_C(0x3)
+       /* IP in IP */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPIP      UINT32_C(0x4)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_GENEVE    UINT32_C(0x5)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_MPLS      UINT32_C(0x6)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_STT       UINT32_C(0x7)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_IPGRE     UINT32_C(0x8)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4  UINT32_C(0x9)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL UINT32_C(0xff)
+       #define HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_CFA_FLOW_ALLOC_INPUT_TUNNEL_TYPE_ANYTUNNEL
 } __attribute__((packed));
 
-/* hwrm_cfa_vfr_alloc_output (size:128b/16B) */
-struct hwrm_cfa_vfr_alloc_output {
+/* hwrm_cfa_flow_alloc_output (size:256b/32B) */
+struct hwrm_cfa_flow_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25123,11 +24210,20 @@ struct hwrm_cfa_vfr_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Rx CFA code. */
-       uint16_t        rx_cfa_code;
-       /* Tx CFA action. */
-       uint16_t        tx_cfa_action;
-       uint8_t unused_0[3];
+       /* Flow record index. */
+       uint16_t        flow_handle;
+       uint8_t unused_0[2];
+       /*
+        * This is the ID of the flow associated with this
+        * filter.
+        * This value shall be used to match and associate the
+        * flow identifier returned in completion records.
+        * A value of 0xFFFFFFFF shall indicate no flow id.
+        */
+       uint32_t        flow_id;
+       /* This value identifies a set of CFA data structures used for a flow. */
+       uint64_t        ext_flow_handle;
+       uint8_t unused_1[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -25138,13 +24234,13 @@ struct hwrm_cfa_vfr_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*********************
- * hwrm_cfa_vfr_free *
- *********************/
+/**********************
+ * hwrm_cfa_flow_free *
+ **********************/
 
 
-/* hwrm_cfa_vfr_free_input (size:384b/48B) */
-struct hwrm_cfa_vfr_free_input {
+/* hwrm_cfa_flow_free_input (size:256b/32B) */
+struct hwrm_cfa_flow_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25172,12 +24268,15 @@ struct hwrm_cfa_vfr_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* VF Representor name (32 byte string). */
-       char    vfr_name[32];
+       /* Flow record index. */
+       uint16_t        flow_handle;
+       uint8_t unused_0[6];
+       /* This value identifies a set of CFA data structures used for a flow. */
+       uint64_t        ext_flow_handle;
 } __attribute__((packed));
 
-/* hwrm_cfa_vfr_free_output (size:128b/16B) */
-struct hwrm_cfa_vfr_free_output {
+/* hwrm_cfa_flow_free_output (size:256b/32B) */
+struct hwrm_cfa_flow_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25186,6 +24285,10 @@ struct hwrm_cfa_vfr_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
+       /* packet is 64 b */
+       uint64_t        packet;
+       /* byte is 64 b */
+       uint64_t        byte;
        uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
@@ -25197,13 +24300,13 @@ struct hwrm_cfa_vfr_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_tunnel_dst_port_query *
- ******************************/
+/***********************
+ * hwrm_cfa_flow_flush *
+ ***********************/
 
 
-/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
-struct hwrm_tunnel_dst_port_query_input {
+/* hwrm_cfa_flow_flush_input (size:192b/24B) */
+struct hwrm_cfa_flow_flush_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25231,27 +24334,12 @@ struct hwrm_tunnel_dst_port_query_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_IPGRE_V1
-       uint8_t unused_0[7];
+       uint32_t        flags;
+       uint8_t unused_0[4];
 } __attribute__((packed));
 
-/* hwrm_tunnel_dst_port_query_output (size:128b/16B) */
-struct hwrm_tunnel_dst_port_query_output {
+/* hwrm_cfa_flow_flush_output (size:128b/16B) */
+struct hwrm_cfa_flow_flush_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25260,25 +24348,7 @@ struct hwrm_tunnel_dst_port_query_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /*
-        * This field represents the identifier of L4 destination port
-        * used for the given tunnel type. This field is valid for
-        * specific tunnel types that use layer 4 (e.g. UDP)
-        * transports for tunneling.
-        */
-       uint16_t        tunnel_dst_port_id;
-       /*
-        * This field represents the value of L4 destination port
-        * identified by tunnel_dst_port_id. This field is valid for
-        * specific tunnel types that use layer 4 (e.g. UDP)
-        * transports for tunneling.
-        * This field is in network byte order.
-        *
-        * A value of 0 means that the destination port is not
-        * configured.
-        */
-       uint16_t        tunnel_dst_port_val;
-       uint8_t unused_0[3];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -25289,13 +24359,13 @@ struct hwrm_tunnel_dst_port_query_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/******************************
- * hwrm_tunnel_dst_port_alloc *
- ******************************/
+/***********************
+ * hwrm_cfa_flow_stats *
+ ***********************/
 
 
-/* hwrm_tunnel_dst_port_alloc_input (size:192b/24B) */
-struct hwrm_tunnel_dst_port_alloc_input {
+/* hwrm_cfa_flow_stats_input (size:640b/80B) */
+struct hwrm_cfa_flow_stats_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25323,39 +24393,53 @@ struct hwrm_tunnel_dst_port_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1
-       uint8_t unused_0;
-       /*
-        * This field represents the value of L4 destination port used
-        * for the given tunnel type. This field is valid for
-        * specific tunnel types that use layer 4 (e.g. UDP)
-        * transports for tunneling.
-        *
-        * This field is in network byte order.
-        *
-        * A value of 0 shall fail the command.
-        */
-       uint16_t        tunnel_dst_port_val;
-       uint8_t unused_1[4];
+       /* Flow handle. */
+       uint16_t        num_flows;
+       /* Flow handle. */
+       uint16_t        flow_handle_0;
+       /* Flow handle. */
+       uint16_t        flow_handle_1;
+       /* Flow handle. */
+       uint16_t        flow_handle_2;
+       /* Flow handle. */
+       uint16_t        flow_handle_3;
+       /* Flow handle. */
+       uint16_t        flow_handle_4;
+       /* Flow handle. */
+       uint16_t        flow_handle_5;
+       /* Flow handle. */
+       uint16_t        flow_handle_6;
+       /* Flow handle. */
+       uint16_t        flow_handle_7;
+       /* Flow handle. */
+       uint16_t        flow_handle_8;
+       /* Flow handle. */
+       uint16_t        flow_handle_9;
+       uint8_t unused_0[2];
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_0;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_1;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_2;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_3;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_4;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_5;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_6;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_7;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_8;
+       /* Flow ID of a flow. */
+       uint32_t        flow_id_9;
 } __attribute__((packed));
 
-/* hwrm_tunnel_dst_port_alloc_output (size:128b/16B) */
-struct hwrm_tunnel_dst_port_alloc_output {
+/* hwrm_cfa_flow_stats_output (size:1408b/176B) */
+struct hwrm_cfa_flow_stats_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25364,12 +24448,47 @@ struct hwrm_tunnel_dst_port_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /*
-        * Identifier of a tunnel L4 destination port value. Only applies to tunnel
-        * types that has l4 destination port parameters.
-        */
-       uint16_t        tunnel_dst_port_id;
-       uint8_t unused_0[5];
+       /* packet_0 is 64 b */
+       uint64_t        packet_0;
+       /* packet_1 is 64 b */
+       uint64_t        packet_1;
+       /* packet_2 is 64 b */
+       uint64_t        packet_2;
+       /* packet_3 is 64 b */
+       uint64_t        packet_3;
+       /* packet_4 is 64 b */
+       uint64_t        packet_4;
+       /* packet_5 is 64 b */
+       uint64_t        packet_5;
+       /* packet_6 is 64 b */
+       uint64_t        packet_6;
+       /* packet_7 is 64 b */
+       uint64_t        packet_7;
+       /* packet_8 is 64 b */
+       uint64_t        packet_8;
+       /* packet_9 is 64 b */
+       uint64_t        packet_9;
+       /* byte_0 is 64 b */
+       uint64_t        byte_0;
+       /* byte_1 is 64 b */
+       uint64_t        byte_1;
+       /* byte_2 is 64 b */
+       uint64_t        byte_2;
+       /* byte_3 is 64 b */
+       uint64_t        byte_3;
+       /* byte_4 is 64 b */
+       uint64_t        byte_4;
+       /* byte_5 is 64 b */
+       uint64_t        byte_5;
+       /* byte_6 is 64 b */
+       uint64_t        byte_6;
+       /* byte_7 is 64 b */
+       uint64_t        byte_7;
+       /* byte_8 is 64 b */
+       uint64_t        byte_8;
+       /* byte_9 is 64 b */
+       uint64_t        byte_9;
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -25380,13 +24499,13 @@ struct hwrm_tunnel_dst_port_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/*****************************
- * hwrm_tunnel_dst_port_free *
- *****************************/
+/**********************
+ * hwrm_cfa_pair_info *
+ **********************/
 
 
-/* hwrm_tunnel_dst_port_free_input (size:192b/24B) */
-struct hwrm_tunnel_dst_port_free_input {
+/* hwrm_cfa_pair_info_input (size:448b/56B) */
+struct hwrm_cfa_pair_info_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25414,33 +24533,23 @@ struct hwrm_tunnel_dst_port_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* Tunnel Type. */
-       uint8_t tunnel_type;
-       /* Virtual eXtensible Local Area Network (VXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN \
-               UINT32_C(0x1)
-       /* Generic Network Virtualization Encapsulation (Geneve) */
-       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_GENEVE \
-               UINT32_C(0x5)
-       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
-       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN_V4 \
-               UINT32_C(0x9)
-       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
-       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1 \
-               UINT32_C(0xa)
-       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_LAST \
-               HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1
-       uint8_t unused_0;
-       /*
-        * Identifier of a tunnel L4 destination port value. Only applies to tunnel
-        * types that has l4 destination port parameters.
-        */
-       uint16_t        tunnel_dst_port_id;
-       uint8_t unused_1[4];
+       uint32_t        flags;
+       /* If this flag is set, lookup by name else lookup by index. */
+       #define HWRM_CFA_PAIR_INFO_INPUT_FLAGS_LOOKUP_TYPE      UINT32_C(0x1)
+       /* If this flag is set, lookup by PF id and VF id. */
+       #define HWRM_CFA_PAIR_INFO_INPUT_FLAGS_LOOKUP_REPRE     UINT32_C(0x2)
+       /* Pair table index. */
+       uint16_t        pair_index;
+       /* Pair pf index. */
+       uint8_t pair_pfid;
+       /* Pair vf index. */
+       uint8_t pair_vfid;
+       /* Pair name (32 byte string). */
+       char    pair_name[32];
 } __attribute__((packed));
 
-/* hwrm_tunnel_dst_port_free_output (size:128b/16B) */
-struct hwrm_tunnel_dst_port_free_output {
+/* hwrm_cfa_pair_info_output (size:576b/72B) */
+struct hwrm_cfa_pair_info_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25449,68 +24558,74 @@ struct hwrm_tunnel_dst_port_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_1[7];
+       /* Pair table index. */
+       uint16_t        next_pair_index;
+       /* Pair member a's fid. */
+       uint16_t        a_fid;
+       /* Logical host number. */
+       uint8_t host_a_index;
+       /* Logical PF number. */
+       uint8_t pf_a_index;
+       /* Pair member a's Linux logical VF number. */
+       uint16_t        vf_a_index;
+       /* Rx CFA code. */
+       uint16_t        rx_cfa_code_a;
+       /* Tx CFA action. */
+       uint16_t        tx_cfa_action_a;
+       /* Pair member b's fid. */
+       uint16_t        b_fid;
+       /* Logical host number. */
+       uint8_t host_b_index;
+       /* Logical PF number. */
+       uint8_t pf_b_index;
+       /* Pair member a's Linux logical VF number. */
+       uint16_t        vf_b_index;
+       /* Rx CFA code. */
+       uint16_t        rx_cfa_code_b;
+       /* Tx CFA action. */
+       uint16_t        tx_cfa_action_b;
+       /* Pair mode (0-vf2fn, 1-rep2fn, 2-rep2rep, 3-proxy, 4-pfpair). */
+       uint8_t pair_mode;
+       /* Pair between VF on local host with PF or VF on specified host. */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_VF2FN   UINT32_C(0x0)
+       /* Pair between REP on local host with PF or VF on specified host. */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_REP2FN  UINT32_C(0x1)
+       /* Pair between REP on local host with REP on specified host. */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_REP2REP UINT32_C(0x2)
+       /* Pair for the proxy interface. */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PROXY   UINT32_C(0x3)
+       /* Pair for the PF interface. */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PFPAIR  UINT32_C(0x4)
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_LAST \
+               HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_MODE_PFPAIR
+       /* Pair state. */
+       uint8_t pair_state;
+       /* Pair has been allocated */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ALLOCATED UINT32_C(0x1)
+       /* Both pair members are active */
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE    UINT32_C(0x2)
+       #define HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_LAST \
+               HWRM_CFA_PAIR_INFO_OUTPUT_PAIR_STATE_ACTIVE
+       /* Pair name (32 byte string). */
+       char    pair_name[32];
+       uint8_t unused_0[7];
        /*
         * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/* ctx_hw_stats (size:1280b/160B) */
-struct ctx_hw_stats {
-       /* Number of received unicast packets */
-       uint64_t        rx_ucast_pkts;
-       /* Number of received multicast packets */
-       uint64_t        rx_mcast_pkts;
-       /* Number of received broadcast packets */
-       uint64_t        rx_bcast_pkts;
-       /* Number of discarded packets on received path */
-       uint64_t        rx_discard_pkts;
-       /* Number of dropped packets on received path */
-       uint64_t        rx_drop_pkts;
-       /* Number of received bytes for unicast traffic */
-       uint64_t        rx_ucast_bytes;
-       /* Number of received bytes for multicast traffic */
-       uint64_t        rx_mcast_bytes;
-       /* Number of received bytes for broadcast traffic */
-       uint64_t        rx_bcast_bytes;
-       /* Number of transmitted unicast packets */
-       uint64_t        tx_ucast_pkts;
-       /* Number of transmitted multicast packets */
-       uint64_t        tx_mcast_pkts;
-       /* Number of transmitted broadcast packets */
-       uint64_t        tx_bcast_pkts;
-       /* Number of discarded packets on transmit path */
-       uint64_t        tx_discard_pkts;
-       /* Number of dropped packets on transmit path */
-       uint64_t        tx_drop_pkts;
-       /* Number of transmitted bytes for unicast traffic */
-       uint64_t        tx_ucast_bytes;
-       /* Number of transmitted bytes for multicast traffic */
-       uint64_t        tx_mcast_bytes;
-       /* Number of transmitted bytes for broadcast traffic */
-       uint64_t        tx_bcast_bytes;
-       /* Number of TPA packets */
-       uint64_t        tpa_pkts;
-       /* Number of TPA bytes */
-       uint64_t        tpa_bytes;
-       /* Number of TPA events */
-       uint64_t        tpa_events;
-       /* Number of TPA aborts */
-       uint64_t        tpa_aborts;
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
+        */
+       uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_stat_ctx_alloc *
- ***********************/
+/***************************************
+ * hwrm_cfa_redirect_query_tunnel_type *
+ ***************************************/
 
 
-/* hwrm_stat_ctx_alloc_input (size:256b/32B) */
-struct hwrm_stat_ctx_alloc_input {
+/* hwrm_cfa_redirect_query_tunnel_type_input (size:192b/24B) */
+struct hwrm_cfa_redirect_query_tunnel_type_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25538,36 +24653,13 @@ struct hwrm_stat_ctx_alloc_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* This is the address for statistic block. */
-       uint64_t        stats_dma_addr;
-       /*
-        * The statistic block update period in ms.
-        * e.g. 250ms, 500ms, 750ms, 1000ms.
-        * If update_period_ms is 0, then the stats update
-        * shall be never done and the DMA address shall not be used.
-        * In this case, the stat block can only be read by
-        * hwrm_stat_ctx_query command.
-        */
-       uint32_t        update_period_ms;
-       /*
-        * This field is used to specify statistics context specific
-        * configuration flags.
-        */
-       uint8_t stat_ctx_flags;
-       /*
-        * When this bit is set to '1', the statistics context shall be
-        * allocated for RoCE traffic only. In this case, traffic other
-        * than offloaded RoCE traffic shall not be included in this
-        * statistic context.
-        * When this bit is set to '0', the statistics context shall be
-        * used for the network traffic other than offloaded RoCE traffic.
-        */
-       #define HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE     UINT32_C(0x1)
-       uint8_t unused_0[3];
+       /* The source function id. */
+       uint16_t        src_fid;
+       uint8_t unused_0[6];
 } __attribute__((packed));
 
-/* hwrm_stat_ctx_alloc_output (size:128b/16B) */
-struct hwrm_stat_ctx_alloc_output {
+/* hwrm_cfa_redirect_query_tunnel_type_output (size:128b/16B) */
+struct hwrm_cfa_redirect_query_tunnel_type_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25576,8 +24668,44 @@ struct hwrm_stat_ctx_alloc_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This is the statistics context ID value. */
-       uint32_t        stat_ctx_id;
+       /* Tunnel Mask. */
+       uint32_t        tunnel_mask;
+       /* Non-tunnel */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_NONTUNNEL \
+               UINT32_C(0x1)
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_VXLAN \
+               UINT32_C(0x2)
+       /* Network Virtualization Generic Routing Encapsulation (NVGRE) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_NVGRE \
+               UINT32_C(0x4)
+       /* Generic Routing Encapsulation (GRE) inside Ethernet payload */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_L2GRE \
+               UINT32_C(0x8)
+       /* IP in IP */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_IPIP \
+               UINT32_C(0x10)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_GENEVE \
+               UINT32_C(0x20)
+       /* Multi-Protocol Lable Switching (MPLS) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_MPLS \
+               UINT32_C(0x40)
+       /* Stateless Transport Tunnel (STT) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_STT \
+               UINT32_C(0x80)
+       /* Generic Routing Encapsulation (GRE) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_IPGRE \
+               UINT32_C(0x100)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_VXLAN_V4 \
+               UINT32_C(0x200)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_IPGRE_V1 \
+               UINT32_C(0x400)
+       /* Any tunneled traffic */
+       #define HWRM_CFA_REDIRECT_QUERY_TUNNEL_TYPE_OUTPUT_TUNNEL_MASK_ANYTUNNEL \
+               UINT32_C(0x800)
        uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
@@ -25589,13 +24717,13 @@ struct hwrm_stat_ctx_alloc_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/**********************
- * hwrm_stat_ctx_free *
- **********************/
+/******************************
+ * hwrm_tunnel_dst_port_query *
+ ******************************/
 
 
-/* hwrm_stat_ctx_free_input (size:192b/24B) */
-struct hwrm_stat_ctx_free_input {
+/* hwrm_tunnel_dst_port_query_input (size:192b/24B) */
+struct hwrm_tunnel_dst_port_query_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25623,13 +24751,27 @@ struct hwrm_stat_ctx_free_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* ID of the statistics context that is being queried. */
-       uint32_t        stat_ctx_id;
-       uint8_t unused_0[4];
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       #define HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_TUNNEL_DST_PORT_QUERY_INPUT_TUNNEL_TYPE_IPGRE_V1
+       uint8_t unused_0[7];
 } __attribute__((packed));
 
-/* hwrm_stat_ctx_free_output (size:128b/16B) */
-struct hwrm_stat_ctx_free_output {
+/* hwrm_tunnel_dst_port_query_output (size:128b/16B) */
+struct hwrm_tunnel_dst_port_query_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25638,8 +24780,24 @@ struct hwrm_stat_ctx_free_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* This is the statistics context ID value. */
-       uint32_t        stat_ctx_id;
+       /*
+        * This field represents the identifier of L4 destination port
+        * used for the given tunnel type. This field is valid for
+        * specific tunnel types that use layer 4 (e.g. UDP)
+        * transports for tunneling.
+        */
+       uint16_t        tunnel_dst_port_id;
+       /*
+        * This field represents the value of L4 destination port
+        * identified by tunnel_dst_port_id. This field is valid for
+        * specific tunnel types that use layer 4 (e.g. UDP)
+        * transports for tunneling.
+        * This field is in network byte order.
+        *
+        * A value of 0 means that the destination port is not
+        * configured.
+        */
+       uint16_t        tunnel_dst_port_val;
        uint8_t unused_0[3];
        /*
         * This field is used in Output records to indicate that the output
@@ -25651,13 +24809,13 @@ struct hwrm_stat_ctx_free_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***********************
- * hwrm_stat_ctx_query *
- ***********************/
+/******************************
+ * hwrm_tunnel_dst_port_alloc *
+ ******************************/
 
 
-/* hwrm_stat_ctx_query_input (size:192b/24B) */
-struct hwrm_stat_ctx_query_input {
+/* hwrm_tunnel_dst_port_alloc_input (size:192b/24B) */
+struct hwrm_tunnel_dst_port_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25685,13 +24843,39 @@ struct hwrm_stat_ctx_query_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* ID of the statistics context that is being queried. */
-       uint32_t        stat_ctx_id;
-       uint8_t unused_0[4];
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       #define HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_TUNNEL_DST_PORT_ALLOC_INPUT_TUNNEL_TYPE_IPGRE_V1
+       uint8_t unused_0;
+       /*
+        * This field represents the value of L4 destination port used
+        * for the given tunnel type. This field is valid for
+        * specific tunnel types that use layer 4 (e.g. UDP)
+        * transports for tunneling.
+        *
+        * This field is in network byte order.
+        *
+        * A value of 0 shall fail the command.
+        */
+       uint16_t        tunnel_dst_port_val;
+       uint8_t unused_1[4];
 } __attribute__((packed));
 
-/* hwrm_stat_ctx_query_output (size:1408b/176B) */
-struct hwrm_stat_ctx_query_output {
+/* hwrm_tunnel_dst_port_alloc_output (size:128b/16B) */
+struct hwrm_tunnel_dst_port_alloc_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25700,47 +24884,12 @@ struct hwrm_stat_ctx_query_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       /* Number of transmitted unicast packets */
-       uint64_t        tx_ucast_pkts;
-       /* Number of transmitted multicast packets */
-       uint64_t        tx_mcast_pkts;
-       /* Number of transmitted broadcast packets */
-       uint64_t        tx_bcast_pkts;
-       /* Number of transmitted packets with error */
-       uint64_t        tx_err_pkts;
-       /* Number of dropped packets on transmit path */
-       uint64_t        tx_drop_pkts;
-       /* Number of transmitted bytes for unicast traffic */
-       uint64_t        tx_ucast_bytes;
-       /* Number of transmitted bytes for multicast traffic */
-       uint64_t        tx_mcast_bytes;
-       /* Number of transmitted bytes for broadcast traffic */
-       uint64_t        tx_bcast_bytes;
-       /* Number of received unicast packets */
-       uint64_t        rx_ucast_pkts;
-       /* Number of received multicast packets */
-       uint64_t        rx_mcast_pkts;
-       /* Number of received broadcast packets */
-       uint64_t        rx_bcast_pkts;
-       /* Number of received packets with error */
-       uint64_t        rx_err_pkts;
-       /* Number of dropped packets on received path */
-       uint64_t        rx_drop_pkts;
-       /* Number of received bytes for unicast traffic */
-       uint64_t        rx_ucast_bytes;
-       /* Number of received bytes for multicast traffic */
-       uint64_t        rx_mcast_bytes;
-       /* Number of received bytes for broadcast traffic */
-       uint64_t        rx_bcast_bytes;
-       /* Number of aggregated unicast packets */
-       uint64_t        rx_agg_pkts;
-       /* Number of aggregated unicast bytes */
-       uint64_t        rx_agg_bytes;
-       /* Number of aggregation events */
-       uint64_t        rx_agg_events;
-       /* Number of aborted aggregations */
-       uint64_t        rx_agg_aborts;
-       uint8_t unused_0[7];
+       /*
+        * Identifier of a tunnel L4 destination port value. Only applies to tunnel
+        * types that has l4 destination port parameters.
+        */
+       uint16_t        tunnel_dst_port_id;
+       uint8_t unused_0[5];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -25751,13 +24900,13 @@ struct hwrm_stat_ctx_query_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/***************************
- * hwrm_stat_ctx_clr_stats *
- ***************************/
+/*****************************
+ * hwrm_tunnel_dst_port_free *
+ *****************************/
 
 
-/* hwrm_stat_ctx_clr_stats_input (size:192b/24B) */
-struct hwrm_stat_ctx_clr_stats_input {
+/* hwrm_tunnel_dst_port_free_input (size:192b/24B) */
+struct hwrm_tunnel_dst_port_free_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25785,13 +24934,33 @@ struct hwrm_stat_ctx_clr_stats_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
-       /* ID of the statistics context that is being queried. */
-       uint32_t        stat_ctx_id;
-       uint8_t unused_0[4];
+       /* Tunnel Type. */
+       uint8_t tunnel_type;
+       /* Virtual eXtensible Local Area Network (VXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN \
+               UINT32_C(0x1)
+       /* Generic Network Virtualization Encapsulation (Geneve) */
+       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_GENEVE \
+               UINT32_C(0x5)
+       /* IPV4 over virtual eXtensible Local Area Network (IPV4oVXLAN) */
+       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_VXLAN_V4 \
+               UINT32_C(0x9)
+       /* Enhance Generic Routing Encapsulation (GRE version 1) inside IP datagram payload */
+       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1 \
+               UINT32_C(0xa)
+       #define HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_LAST \
+               HWRM_TUNNEL_DST_PORT_FREE_INPUT_TUNNEL_TYPE_IPGRE_V1
+       uint8_t unused_0;
+       /*
+        * Identifier of a tunnel L4 destination port value. Only applies to tunnel
+        * types that has l4 destination port parameters.
+        */
+       uint16_t        tunnel_dst_port_id;
+       uint8_t unused_1[4];
 } __attribute__((packed));
-
-/* hwrm_stat_ctx_clr_stats_output (size:128b/16B) */
-struct hwrm_stat_ctx_clr_stats_output {
+
+/* hwrm_tunnel_dst_port_free_output (size:128b/16B) */
+struct hwrm_tunnel_dst_port_free_output {
        /* The specific error status for the command. */
        uint16_t        error_code;
        /* The HWRM command request type. */
@@ -25800,7 +24969,7 @@ struct hwrm_stat_ctx_clr_stats_output {
        uint16_t        seq_id;
        /* The length of the response data in number of bytes. */
        uint16_t        resp_len;
-       uint8_t unused_0[7];
+       uint8_t unused_1[7];
        /*
         * This field is used in Output records to indicate that the output
         * is completely written to RAM.  This field should be read as '1'
@@ -25811,13 +24980,58 @@ struct hwrm_stat_ctx_clr_stats_output {
        uint8_t valid;
 } __attribute__((packed));
 
-/********************
- * hwrm_pcie_qstats *
- ********************/
+/* Periodic statistics context DMA to host. */
+/* ctx_hw_stats (size:1280b/160B) */
+struct ctx_hw_stats {
+       /* Number of received unicast packets */
+       uint64_t        rx_ucast_pkts;
+       /* Number of received multicast packets */
+       uint64_t        rx_mcast_pkts;
+       /* Number of received broadcast packets */
+       uint64_t        rx_bcast_pkts;
+       /* Number of discarded packets on received path */
+       uint64_t        rx_discard_pkts;
+       /* Number of dropped packets on received path */
+       uint64_t        rx_drop_pkts;
+       /* Number of received bytes for unicast traffic */
+       uint64_t        rx_ucast_bytes;
+       /* Number of received bytes for multicast traffic */
+       uint64_t        rx_mcast_bytes;
+       /* Number of received bytes for broadcast traffic */
+       uint64_t        rx_bcast_bytes;
+       /* Number of transmitted unicast packets */
+       uint64_t        tx_ucast_pkts;
+       /* Number of transmitted multicast packets */
+       uint64_t        tx_mcast_pkts;
+       /* Number of transmitted broadcast packets */
+       uint64_t        tx_bcast_pkts;
+       /* Number of discarded packets on transmit path */
+       uint64_t        tx_discard_pkts;
+       /* Number of dropped packets on transmit path */
+       uint64_t        tx_drop_pkts;
+       /* Number of transmitted bytes for unicast traffic */
+       uint64_t        tx_ucast_bytes;
+       /* Number of transmitted bytes for multicast traffic */
+       uint64_t        tx_mcast_bytes;
+       /* Number of transmitted bytes for broadcast traffic */
+       uint64_t        tx_bcast_bytes;
+       /* Number of TPA packets */
+       uint64_t        tpa_pkts;
+       /* Number of TPA bytes */
+       uint64_t        tpa_bytes;
+       /* Number of TPA events */
+       uint64_t        tpa_events;
+       /* Number of TPA aborts */
+       uint64_t        tpa_aborts;
+} __attribute__((packed));
 
+/***********************
+ * hwrm_stat_ctx_alloc *
+ ***********************/
 
-/* hwrm_pcie_qstats_input (size:256b/32B) */
-struct hwrm_pcie_qstats_input {
+
+/* hwrm_stat_ctx_alloc_input (size:256b/32B) */
+struct hwrm_stat_ctx_alloc_input {
        /* The HWRM command request type. */
        uint16_t        req_type;
        /*
@@ -25845,412 +25059,348 @@ struct hwrm_pcie_qstats_input {
         * point to a physically contiguous block of memory.
         */
        uint64_t        resp_addr;
+       /* This is the address for statistic block. */
+       uint64_t        stats_dma_addr;
        /*
-        * The size of PCIe statistics block in bytes.
-        * Firmware will DMA the PCIe statistics to
-        * the host with this field size in the response.
-        */
-       uint16_t        pcie_stat_size;
-       uint8_t unused_0[6];
-       /*
-        * This is the host address where
-        * PCIe statistics will be stored
-        */
-       uint64_t        pcie_stat_host_addr;
-} __attribute__((packed));
-
-/* hwrm_pcie_qstats_output (size:128b/16B) */
-struct hwrm_pcie_qstats_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       /* The size of PCIe statistics block in bytes. */
-       uint16_t        pcie_stat_size;
-       uint8_t unused_0[5];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/* Port Tx Statistics Formats */
-/* tx_port_stats (size:3264b/408B) */
-struct tx_port_stats {
-       /* Total Number of 64 Bytes frames transmitted */
-       uint64_t        tx_64b_frames;
-       /* Total Number of 65-127 Bytes frames transmitted */
-       uint64_t        tx_65b_127b_frames;
-       /* Total Number of 128-255 Bytes frames transmitted */
-       uint64_t        tx_128b_255b_frames;
-       /* Total Number of 256-511 Bytes frames transmitted */
-       uint64_t        tx_256b_511b_frames;
-       /* Total Number of 512-1023 Bytes frames transmitted */
-       uint64_t        tx_512b_1023b_frames;
-       /* Total Number of 1024-1518 Bytes frames transmitted */
-       uint64_t        tx_1024b_1518_frames;
-       /*
-        * Total Number of each good VLAN (exludes FCS errors)
-        * frame transmitted which is 1519 to 1522 bytes in length
-        * inclusive (excluding framing bits but including FCS bytes).
-        */
-       uint64_t        tx_good_vlan_frames;
-       /* Total Number of 1519-2047 Bytes frames transmitted */
-       uint64_t        tx_1519b_2047_frames;
-       /* Total Number of 2048-4095 Bytes frames transmitted */
-       uint64_t        tx_2048b_4095b_frames;
-       /* Total Number of 4096-9216 Bytes frames transmitted */
-       uint64_t        tx_4096b_9216b_frames;
-       /* Total Number of 9217-16383 Bytes frames transmitted */
-       uint64_t        tx_9217b_16383b_frames;
-       /* Total Number of good frames transmitted */
-       uint64_t        tx_good_frames;
-       /* Total Number of frames transmitted */
-       uint64_t        tx_total_frames;
-       /* Total number of unicast frames transmitted */
-       uint64_t        tx_ucast_frames;
-       /* Total number of multicast frames transmitted */
-       uint64_t        tx_mcast_frames;
-       /* Total number of broadcast frames transmitted */
-       uint64_t        tx_bcast_frames;
-       /* Total number of PAUSE control frames transmitted */
-       uint64_t        tx_pause_frames;
-       /*
-        * Total number of PFC/per-priority PAUSE
-        * control frames transmitted
-        */
-       uint64_t        tx_pfc_frames;
-       /* Total number of jabber frames transmitted */
-       uint64_t        tx_jabber_frames;
-       /* Total number of frames transmitted with FCS error */
-       uint64_t        tx_fcs_err_frames;
-       /* Total number of control frames transmitted */
-       uint64_t        tx_control_frames;
-       /* Total number of over-sized frames transmitted */
-       uint64_t        tx_oversz_frames;
-       /* Total number of frames with single deferral */
-       uint64_t        tx_single_dfrl_frames;
-       /* Total number of frames with multiple deferrals */
-       uint64_t        tx_multi_dfrl_frames;
-       /* Total number of frames with single collision */
-       uint64_t        tx_single_coll_frames;
-       /* Total number of frames with multiple collisions */
-       uint64_t        tx_multi_coll_frames;
-       /* Total number of frames with late collisions */
-       uint64_t        tx_late_coll_frames;
-       /* Total number of frames with excessive collisions */
-       uint64_t        tx_excessive_coll_frames;
-       /* Total number of fragmented frames transmitted */
-       uint64_t        tx_frag_frames;
-       /* Total number of transmit errors */
-       uint64_t        tx_err;
-       /* Total number of single VLAN tagged frames transmitted */
-       uint64_t        tx_tagged_frames;
-       /* Total number of double VLAN tagged frames transmitted */
-       uint64_t        tx_dbl_tagged_frames;
-       /* Total number of runt frames transmitted */
-       uint64_t        tx_runt_frames;
-       /* Total number of TX FIFO under runs */
-       uint64_t        tx_fifo_underruns;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 0 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri0;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 1 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri1;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 2 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri2;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 3 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri3;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 4 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri4;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 5 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri5;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 6 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri6;
-       /*
-        * Total number of PFC frames with PFC enabled bit for
-        * Pri 7 transmitted
-        */
-       uint64_t        tx_pfc_ena_frames_pri7;
-       /* Total number of EEE LPI Events on TX */
-       uint64_t        tx_eee_lpi_events;
-       /* EEE LPI Duration Counter on TX */
-       uint64_t        tx_eee_lpi_duration;
-       /*
-        * Total number of Link Level Flow Control (LLFC) messages
-        * transmitted
+        * The statistic block update period in ms.
+        * e.g. 250ms, 500ms, 750ms, 1000ms.
+        * If update_period_ms is 0, then the stats update
+        * shall be never done and the DMA address shall not be used.
+        * In this case, the stat block can only be read by
+        * hwrm_stat_ctx_query command.
         */
-       uint64_t        tx_llfc_logical_msgs;
-       /* Total number of HCFC messages transmitted */
-       uint64_t        tx_hcfc_msgs;
-       /* Total number of TX collisions */
-       uint64_t        tx_total_collisions;
-       /* Total number of transmitted bytes */
-       uint64_t        tx_bytes;
-       /* Total number of end-to-end HOL frames */
-       uint64_t        tx_xthol_frames;
-       /* Total Tx Drops per Port reported by STATS block */
-       uint64_t        tx_stat_discard;
-       /* Total Tx Error Drops per Port reported by STATS block */
-       uint64_t        tx_stat_error;
-} __attribute__((packed));
-
-/* Port Rx Statistics Formats */
-/* rx_port_stats (size:4224b/528B) */
-struct rx_port_stats {
-       /* Total Number of 64 Bytes frames received */
-       uint64_t        rx_64b_frames;
-       /* Total Number of 65-127 Bytes frames received */
-       uint64_t        rx_65b_127b_frames;
-       /* Total Number of 128-255 Bytes frames received */
-       uint64_t        rx_128b_255b_frames;
-       /* Total Number of 256-511 Bytes frames received */
-       uint64_t        rx_256b_511b_frames;
-       /* Total Number of 512-1023 Bytes frames received */
-       uint64_t        rx_512b_1023b_frames;
-       /* Total Number of 1024-1518 Bytes frames received */
-       uint64_t        rx_1024b_1518_frames;
+       uint32_t        update_period_ms;
        /*
-        * Total Number of each good VLAN (exludes FCS errors)
-        * frame received which is 1519 to 1522 bytes in length
-        * inclusive (excluding framing bits but including FCS bytes).
+        * This field is used to specify statistics context specific
+        * configuration flags.
         */
-       uint64_t        rx_good_vlan_frames;
-       /* Total Number of 1519-2047 Bytes frames received */
-       uint64_t        rx_1519b_2047b_frames;
-       /* Total Number of 2048-4095 Bytes frames received */
-       uint64_t        rx_2048b_4095b_frames;
-       /* Total Number of 4096-9216 Bytes frames received */
-       uint64_t        rx_4096b_9216b_frames;
-       /* Total Number of 9217-16383 Bytes frames received */
-       uint64_t        rx_9217b_16383b_frames;
-       /* Total number of frames received */
-       uint64_t        rx_total_frames;
-       /* Total number of unicast frames received */
-       uint64_t        rx_ucast_frames;
-       /* Total number of multicast frames received */
-       uint64_t        rx_mcast_frames;
-       /* Total number of broadcast frames received */
-       uint64_t        rx_bcast_frames;
-       /* Total number of received frames with FCS error */
-       uint64_t        rx_fcs_err_frames;
-       /* Total number of control frames received */
-       uint64_t        rx_ctrl_frames;
-       /* Total number of PAUSE frames received */
-       uint64_t        rx_pause_frames;
-       /* Total number of PFC frames received */
-       uint64_t        rx_pfc_frames;
+       uint8_t stat_ctx_flags;
        /*
-        * Total number of frames received with an unsupported
-        * opcode
+        * When this bit is set to '1', the statistics context shall be
+        * allocated for RoCE traffic only. In this case, traffic other
+        * than offloaded RoCE traffic shall not be included in this
+        * statistic context.
+        * When this bit is set to '0', the statistics context shall be
+        * used for the network traffic other than offloaded RoCE traffic.
         */
-       uint64_t        rx_unsupported_opcode_frames;
+       #define HWRM_STAT_CTX_ALLOC_INPUT_STAT_CTX_FLAGS_ROCE     UINT32_C(0x1)
+       uint8_t unused_0[3];
+} __attribute__((packed));
+
+/* hwrm_stat_ctx_alloc_output (size:128b/16B) */
+struct hwrm_stat_ctx_alloc_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* This is the statistics context ID value. */
+       uint32_t        stat_ctx_id;
+       uint8_t unused_0[3];
        /*
-        * Total number of frames received with an unsupported
-        * DA for pause and PFC
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint64_t        rx_unsupported_da_pausepfc_frames;
-       /* Total number of frames received with an unsupported SA */
-       uint64_t        rx_wrong_sa_frames;
-       /* Total number of received packets with alignment error */
-       uint64_t        rx_align_err_frames;
-       /* Total number of received frames with out-of-range length */
-       uint64_t        rx_oor_len_frames;
-       /* Total number of received frames with error termination */
-       uint64_t        rx_code_err_frames;
+       uint8_t valid;
+} __attribute__((packed));
+
+/**********************
+ * hwrm_stat_ctx_free *
+ **********************/
+
+
+/* hwrm_stat_ctx_free_input (size:192b/24B) */
+struct hwrm_stat_ctx_free_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * Total number of received frames with a false carrier is
-        * detected during idle, as defined by RX_ER samples active
-        * and RXD is 0xE. The event is reported along with the
-        * statistics generated on the next received frame. Only
-        * one false carrier condition can be detected and logged
-        * between frames.
-        *
-        * Carrier event, valid for 10M/100M speed modes only.
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint64_t        rx_false_carrier_frames;
-       /* Total number of over-sized frames received */
-       uint64_t        rx_ovrsz_frames;
-       /* Total number of jabber packets received */
-       uint64_t        rx_jbr_frames;
-       /* Total number of received frames with MTU error */
-       uint64_t        rx_mtu_err_frames;
-       /* Total number of received frames with CRC match */
-       uint64_t        rx_match_crc_frames;
-       /* Total number of frames received promiscuously */
-       uint64_t        rx_promiscuous_frames;
+       uint16_t        cmpl_ring;
        /*
-        * Total number of received frames with one or two VLAN
-        * tags
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint64_t        rx_tagged_frames;
-       /* Total number of received frames with two VLAN tags */
-       uint64_t        rx_double_tagged_frames;
-       /* Total number of truncated frames received */
-       uint64_t        rx_trunc_frames;
-       /* Total number of good frames (without errors) received */
-       uint64_t        rx_good_frames;
+       uint16_t        seq_id;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 0
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri0;
+       uint16_t        target_id;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 1
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri1;
+       uint64_t        resp_addr;
+       /* ID of the statistics context that is being queried. */
+       uint32_t        stat_ctx_id;
+       uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_stat_ctx_free_output (size:128b/16B) */
+struct hwrm_stat_ctx_free_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* This is the statistics context ID value. */
+       uint32_t        stat_ctx_id;
+       uint8_t unused_0[3];
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 2
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri2;
+       uint8_t valid;
+} __attribute__((packed));
+
+/***********************
+ * hwrm_stat_ctx_query *
+ ***********************/
+
+
+/* hwrm_stat_ctx_query_input (size:192b/24B) */
+struct hwrm_stat_ctx_query_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 3
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri3;
+       uint16_t        cmpl_ring;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 4
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri4;
+       uint16_t        seq_id;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 5
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri5;
+       uint16_t        target_id;
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 6
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri6;
+       uint64_t        resp_addr;
+       /* ID of the statistics context that is being queried. */
+       uint32_t        stat_ctx_id;
+       uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_stat_ctx_query_output (size:1408b/176B) */
+struct hwrm_stat_ctx_query_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* Number of transmitted unicast packets */
+       uint64_t        tx_ucast_pkts;
+       /* Number of transmitted multicast packets */
+       uint64_t        tx_mcast_pkts;
+       /* Number of transmitted broadcast packets */
+       uint64_t        tx_bcast_pkts;
+       /* Number of transmitted packets with error */
+       uint64_t        tx_err_pkts;
+       /* Number of dropped packets on transmit path */
+       uint64_t        tx_drop_pkts;
+       /* Number of transmitted bytes for unicast traffic */
+       uint64_t        tx_ucast_bytes;
+       /* Number of transmitted bytes for multicast traffic */
+       uint64_t        tx_mcast_bytes;
+       /* Number of transmitted bytes for broadcast traffic */
+       uint64_t        tx_bcast_bytes;
+       /* Number of received unicast packets */
+       uint64_t        rx_ucast_pkts;
+       /* Number of received multicast packets */
+       uint64_t        rx_mcast_pkts;
+       /* Number of received broadcast packets */
+       uint64_t        rx_bcast_pkts;
+       /* Number of received packets with error */
+       uint64_t        rx_err_pkts;
+       /* Number of dropped packets on received path */
+       uint64_t        rx_drop_pkts;
+       /* Number of received bytes for unicast traffic */
+       uint64_t        rx_ucast_bytes;
+       /* Number of received bytes for multicast traffic */
+       uint64_t        rx_mcast_bytes;
+       /* Number of received bytes for broadcast traffic */
+       uint64_t        rx_bcast_bytes;
+       /* Number of aggregated unicast packets */
+       uint64_t        rx_agg_pkts;
+       /* Number of aggregated unicast bytes */
+       uint64_t        rx_agg_bytes;
+       /* Number of aggregation events */
+       uint64_t        rx_agg_events;
+       /* Number of aborted aggregations */
+       uint64_t        rx_agg_aborts;
+       uint8_t unused_0[7];
        /*
-        * Total number of received PFC frames with transition from
-        * XON to XOFF on Pri 7
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint64_t        rx_pfc_xon2xoff_frames_pri7;
+       uint8_t valid;
+} __attribute__((packed));
+
+/***************************
+ * hwrm_stat_ctx_clr_stats *
+ ***************************/
+
+
+/* hwrm_stat_ctx_clr_stats_input (size:192b/24B) */
+struct hwrm_stat_ctx_clr_stats_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 0
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint64_t        rx_pfc_ena_frames_pri0;
+       uint16_t        cmpl_ring;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 1
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint64_t        rx_pfc_ena_frames_pri1;
+       uint16_t        seq_id;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 2
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint64_t        rx_pfc_ena_frames_pri2;
+       uint16_t        target_id;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 3
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint64_t        rx_pfc_ena_frames_pri3;
+       uint64_t        resp_addr;
+       /* ID of the statistics context that is being queried. */
+       uint32_t        stat_ctx_id;
+       uint8_t unused_0[4];
+} __attribute__((packed));
+
+/* hwrm_stat_ctx_clr_stats_output (size:128b/16B) */
+struct hwrm_stat_ctx_clr_stats_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       uint8_t unused_0[7];
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 4
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
         */
-       uint64_t        rx_pfc_ena_frames_pri4;
+       uint8_t valid;
+} __attribute__((packed));
+
+/********************
+ * hwrm_pcie_qstats *
+ ********************/
+
+
+/* hwrm_pcie_qstats_input (size:256b/32B) */
+struct hwrm_pcie_qstats_input {
+       /* The HWRM command request type. */
+       uint16_t        req_type;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 5
+        * The completion ring to send the completion event on. This should
+        * be the NQ ID returned from the `nq_alloc` HWRM command.
         */
-       uint64_t        rx_pfc_ena_frames_pri5;
+       uint16_t        cmpl_ring;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 6
+        * The sequence ID is used by the driver for tracking multiple
+        * commands. This ID is treated as opaque data by the firmware and
+        * the value is returned in the `hwrm_resp_hdr` upon completion.
         */
-       uint64_t        rx_pfc_ena_frames_pri6;
+       uint16_t        seq_id;
        /*
-        * Total number of received PFC frames with PFC enabled
-        * bit for Pri 7
+        * The target ID of the command:
+        * * 0x0-0xFFF8 - The function ID
+        * * 0xFFF8-0xFFFE - Reserved for internal processors
+        * * 0xFFFF - HWRM
         */
-       uint64_t        rx_pfc_ena_frames_pri7;
-       /* Total Number of frames received with SCH CRC error */
-       uint64_t        rx_sch_crc_err_frames;
-       /* Total Number of under-sized frames received */
-       uint64_t        rx_undrsz_frames;
-       /* Total Number of fragmented frames received */
-       uint64_t        rx_frag_frames;
-       /* Total number of RX EEE LPI Events */
-       uint64_t        rx_eee_lpi_events;
-       /* EEE LPI Duration Counter on RX */
-       uint64_t        rx_eee_lpi_duration;
+       uint16_t        target_id;
        /*
-        * Total number of physical type Link Level Flow Control
-        * (LLFC) messages received
+        * A physical address pointer pointing to a host buffer that the
+        * command's response data will be written. This can be either a host
+        * physical address (HPA) or a guest physical address (GPA) and must
+        * point to a physically contiguous block of memory.
         */
-       uint64_t        rx_llfc_physical_msgs;
+       uint64_t        resp_addr;
        /*
-        * Total number of logical type Link Level Flow Control
-        * (LLFC) messages received
+        * The size of PCIe statistics block in bytes.
+        * Firmware will DMA the PCIe statistics to
+        * the host with this field size in the response.
         */
-       uint64_t        rx_llfc_logical_msgs;
+       uint16_t        pcie_stat_size;
+       uint8_t unused_0[6];
        /*
-        * Total number of logical type Link Level Flow Control
-        * (LLFC) messages received with CRC error
+        * This is the host address where
+        * PCIe statistics will be stored
         */
-       uint64_t        rx_llfc_msgs_with_crc_err;
-       /* Total number of HCFC messages received */
-       uint64_t        rx_hcfc_msgs;
-       /* Total number of HCFC messages received with CRC error */
-       uint64_t        rx_hcfc_msgs_with_crc_err;
-       /* Total number of received bytes */
-       uint64_t        rx_bytes;
-       /* Total number of bytes received in runt frames */
-       uint64_t        rx_runt_bytes;
-       /* Total number of runt frames received */
-       uint64_t        rx_runt_frames;
-       /* Total Rx Discards per Port reported by STATS block */
-       uint64_t        rx_stat_discard;
-       uint64_t        rx_stat_err;
+       uint64_t        pcie_stat_host_addr;
 } __attribute__((packed));
 
-/* Port Rx Statistics extended Formats */
-/* rx_port_stats_ext (size:320b/40B) */
-struct rx_port_stats_ext {
-       /* Number of times link state changed to down */
-       uint64_t        link_down_events;
-       /* Number of times the idle rings with pause bit are found */
-       uint64_t        continuous_pause_events;
-       /* Number of times the active rings pause bit resumed back */
-       uint64_t        resume_pause_events;
-       /* Number of times, the ROCE cos queue PFC is disabled to avoid pause flood/burst */
-       uint64_t        continuous_roce_pause_events;
-       /* Number of times, the ROCE cos queue PFC is enabled back */
-       uint64_t        resume_roce_pause_events;
+/* hwrm_pcie_qstats_output (size:128b/16B) */
+struct hwrm_pcie_qstats_output {
+       /* The specific error status for the command. */
+       uint16_t        error_code;
+       /* The HWRM command request type. */
+       uint16_t        req_type;
+       /* The sequence ID from the original command. */
+       uint16_t        seq_id;
+       /* The length of the response data in number of bytes. */
+       uint16_t        resp_len;
+       /* The size of PCIe statistics block in bytes. */
+       uint16_t        pcie_stat_size;
+       uint8_t unused_0[5];
+       /*
+        * This field is used in Output records to indicate that the output
+        * is completely written to RAM.  This field should be read as '1'
+        * to indicate that the output has been completely written.
+        * When writing a command completion or response to an internal processor,
+        * the order of writes has to be such that this field is written last.
+        */
+       uint8_t valid;
 } __attribute__((packed));
 
 /* PCIe Statistics Formats */
@@ -28109,103 +27259,4 @@ struct hwrm_nvm_validate_option_cmd_err {
        uint8_t unused_0[7];
 } __attribute__((packed));
 
-/*****************************
- * hwrm_nvm_factory_defaults *
- *****************************/
-
-
-/* hwrm_nvm_factory_defaults_input (size:192b/24B) */
-struct hwrm_nvm_factory_defaults_input {
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /*
-        * The completion ring to send the completion event on. This should
-        * be the NQ ID returned from the `nq_alloc` HWRM command.
-        */
-       uint16_t        cmpl_ring;
-       /*
-        * The sequence ID is used by the driver for tracking multiple
-        * commands. This ID is treated as opaque data by the firmware and
-        * the value is returned in the `hwrm_resp_hdr` upon completion.
-        */
-       uint16_t        seq_id;
-       /*
-        * The target ID of the command:
-        * * 0x0-0xFFF8 - The function ID
-        * * 0xFFF8-0xFFFE - Reserved for internal processors
-        * * 0xFFFF - HWRM
-        */
-       uint16_t        target_id;
-       /*
-        * A physical address pointer pointing to a host buffer that the
-        * command's response data will be written. This can be either a host
-        * physical address (HPA) or a guest physical address (GPA) and must
-        * point to a physically contiguous block of memory.
-        */
-       uint64_t        resp_addr;
-       /* mode is 8 b */
-       uint8_t mode;
-       /* If set to 1, it will trigger restoration of factory default settings */
-       #define HWRM_NVM_FACTORY_DEFAULTS_INPUT_MODE_RESTORE UINT32_C(0x0)
-       /* If set to 1, it will trigger creation of factory default settings */
-       #define HWRM_NVM_FACTORY_DEFAULTS_INPUT_MODE_CREATE  UINT32_C(0x1)
-       #define HWRM_NVM_FACTORY_DEFAULTS_INPUT_MODE_LAST \
-               HWRM_NVM_FACTORY_DEFAULTS_INPUT_MODE_CREATE
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
-/* hwrm_nvm_factory_defaults_output (size:128b/16B) */
-struct hwrm_nvm_factory_defaults_output {
-       /* The specific error status for the command. */
-       uint16_t        error_code;
-       /* The HWRM command request type. */
-       uint16_t        req_type;
-       /* The sequence ID from the original command. */
-       uint16_t        seq_id;
-       /* The length of the response data in number of bytes. */
-       uint16_t        resp_len;
-       uint8_t result;
-       /* factory defaults created successfully. */
-       #define HWRM_NVM_FACTORY_DEFAULTS_OUTPUT_RESULT_CREATE_OK \
-               UINT32_C(0x0)
-       /* factory defaults restored successfully. */
-       #define HWRM_NVM_FACTORY_DEFAULTS_OUTPUT_RESULT_RESTORE_OK \
-               UINT32_C(0x1)
-       /* factory defaults already created. */
-       #define HWRM_NVM_FACTORY_DEFAULTS_OUTPUT_RESULT_CREATE_ALREADY \
-               UINT32_C(0x2)
-       #define HWRM_NVM_FACTORY_DEFAULTS_OUTPUT_RESULT_LAST \
-               HWRM_NVM_FACTORY_DEFAULTS_OUTPUT_RESULT_CREATE_ALREADY
-       uint8_t unused_0[6];
-       /*
-        * This field is used in Output records to indicate that the output
-        * is completely written to RAM.  This field should be read as '1'
-        * to indicate that the output has been completely written.
-        * When writing a command completion or response to an internal processor,
-        * the order of writes has to be such that this field is written last.
-        */
-       uint8_t valid;
-} __attribute__((packed));
-
-/* hwrm_nvm_factory_defaults_cmd_err (size:64b/8B) */
-struct hwrm_nvm_factory_defaults_cmd_err {
-       /*
-        * command specific error codes that goes to
-        * the cmd_err field in Common HWRM Error Response.
-        */
-       uint8_t code;
-       /* Unknown error */
-       #define HWRM_NVM_FACTORY_DEFAULTS_CMD_ERR_CODE_UNKNOWN \
-               UINT32_C(0x0)
-       /* valid configuration not present to create defaults */
-       #define HWRM_NVM_FACTORY_DEFAULTS_CMD_ERR_CODE_NO_VALID_CFG \
-               UINT32_C(0x1)
-       /* No saved configuration present to restore, restore failed */
-       #define HWRM_NVM_FACTORY_DEFAULTS_CMD_ERR_CODE_NO_SAVED_CFG \
-               UINT32_C(0x2)
-       #define HWRM_NVM_FACTORY_DEFAULTS_CMD_ERR_CODE_LAST \
-               HWRM_NVM_FACTORY_DEFAULTS_CMD_ERR_CODE_NO_SAVED_CFG
-       uint8_t unused_0[7];
-} __attribute__((packed));
-
 #endif /* _HSI_STRUCT_DEF_DPDK_H_ */
index acad16a..1893e3c 100644 (file)
@@ -8,6 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 #
 LIB = librte_pmd_bond.a
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
index 602d288..00374ed 100644 (file)
@@ -3,6 +3,7 @@
 
 name = 'bond' #, james bond :-)
 version = 2
+allow_experimental_apis = true
 sources = files('rte_eth_bond_api.c', 'rte_eth_bond_pmd.c', 'rte_eth_bond_flow.c',
        'rte_eth_bond_args.c', 'rte_eth_bond_8023ad.c', 'rte_eth_bond_alb.c')
 
index f8cea4b..dd847c6 100644 (file)
@@ -130,7 +130,7 @@ static const struct ether_addr lacp_mac_addr = {
        .addr_bytes = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x02 }
 };
 
-struct port mode_8023ad_ports[RTE_MAX_ETHPORTS];
+struct port bond_mode_8023ad_ports[RTE_MAX_ETHPORTS];
 
 static void
 timer_cancel(uint64_t *timer)
@@ -187,7 +187,7 @@ set_warning_flags(struct port *port, uint16_t flags)
 static void
 show_warnings(uint16_t slave_id)
 {
-       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_id];
        uint8_t warnings;
 
        do {
@@ -260,7 +260,7 @@ static void
 rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
                struct lacpdu *lacp)
 {
-       struct port *agg, *port = &mode_8023ad_ports[slave_id];
+       struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
        uint64_t timeout;
 
        if (SM_FLAG(port, BEGIN)) {
@@ -319,7 +319,7 @@ rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
                ACTOR_STATE_CLR(port, DEFAULTED);
 
                /* If LACP partner params match this port actor params */
-               agg = &mode_8023ad_ports[port->aggregator_port_id];
+               agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
                bool match = port->actor.system_priority ==
                        lacp->partner.port_params.system_priority &&
                        is_same_ether_addr(&agg->actor.system,
@@ -380,7 +380,7 @@ rx_machine(struct bond_dev_private *internals, uint16_t slave_id,
 static void
 periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
-       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_id];
        /* Calculate if either site is LACP enabled */
        uint64_t timeout;
        uint8_t active = ACTOR_STATE(port, LACP_ACTIVE) ||
@@ -442,7 +442,7 @@ periodic_machine(struct bond_dev_private *internals, uint16_t slave_id)
 static void
 mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
-       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_id];
 
        /* Save current state for later use */
        const uint8_t state_mask = STATE_SYNCHRONIZATION | STATE_DISTRIBUTING |
@@ -545,7 +545,7 @@ mux_machine(struct bond_dev_private *internals, uint16_t slave_id)
 static void
 tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
 {
-       struct port *agg, *port = &mode_8023ad_ports[slave_id];
+       struct port *agg, *port = &bond_mode_8023ad_ports[slave_id];
 
        struct rte_mbuf *lacp_pkt = NULL;
        struct lacpdu_header *hdr;
@@ -591,7 +591,7 @@ tx_machine(struct bond_dev_private *internals, uint16_t slave_id)
        lacpdu->actor.info_length = sizeof(struct lacpdu_actor_partner_params);
        memcpy(&hdr->lacpdu.actor.port_params, &port->actor,
                        sizeof(port->actor));
-       agg = &mode_8023ad_ports[port->aggregator_port_id];
+       agg = &bond_mode_8023ad_ports[port->aggregator_port_id];
        ether_addr_copy(&agg->actor.system, &hdr->lacpdu.actor.port_params.system);
        lacpdu->actor.state = port->actor_state;
 
@@ -677,11 +677,11 @@ selection_logic(struct bond_dev_private *internals, uint8_t slave_id)
 
        slaves = internals->active_slaves;
        slaves_count = internals->active_slave_count;
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
 
        /* Search for aggregator suitable for this port */
        for (i = 0; i < slaves_count; ++i) {
-               agg = &mode_8023ad_ports[slaves[i]];
+               agg = &bond_mode_8023ad_ports[slaves[i]];
                /* Skip ports that are not aggreagators */
                if (agg->aggregator_port_id != slaves[i])
                        continue;
@@ -824,7 +824,7 @@ bond_mode_8023ad_periodic_cb(void *arg)
                } else
                        key = 0;
 
-               port = &mode_8023ad_ports[slave_id];
+               port = &bond_mode_8023ad_ports[slave_id];
 
                key = rte_cpu_to_be_16(key);
                if (key != port->actor.key) {
@@ -844,7 +844,7 @@ bond_mode_8023ad_periodic_cb(void *arg)
 
        for (i = 0; i < internals->active_slave_count; i++) {
                slave_id = internals->active_slaves[i];
-               port = &mode_8023ad_ports[slave_id];
+               port = &bond_mode_8023ad_ports[slave_id];
 
                if ((port->actor.key &
                                rte_cpu_to_be_16(BOND_LINK_FULL_DUPLEX_KEY)) == 0) {
@@ -907,7 +907,7 @@ bond_mode_8023ad_activate_slave(struct rte_eth_dev *bond_dev,
 {
        struct bond_dev_private *internals = bond_dev->data->dev_private;
 
-       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_id];
        struct port_params initial = {
                        .system = { { 0 } },
                        .system_priority = rte_cpu_to_be_16(0xFFFF),
@@ -1008,7 +1008,7 @@ bond_mode_8023ad_deactivate_slave(struct rte_eth_dev *bond_dev __rte_unused,
        struct port *port = NULL;
        uint8_t old_partner_state;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
 
        ACTOR_STATE_CLR(port, AGGREGATION);
        port->selected = UNSELECTED;
@@ -1045,7 +1045,7 @@ bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
 
        for (i = 0; i < internals->active_slave_count; i++) {
                slave_id = internals->active_slaves[i];
-               slave = &mode_8023ad_ports[slave_id];
+               slave = &bond_mode_8023ad_ports[slave_id];
                rte_eth_macaddr_get(slave_id, &slave_addr);
 
                if (is_same_ether_addr(&slave_addr, &slave->actor.system))
@@ -1058,7 +1058,7 @@ bond_mode_8023ad_mac_address_update(struct rte_eth_dev *bond_dev)
                        continue;
 
                for (j = 0; j < internals->active_slave_count; j++) {
-                       agg_slave = &mode_8023ad_ports[internals->active_slaves[j]];
+                       agg_slave = &bond_mode_8023ad_ports[internals->active_slaves[j]];
                        if (agg_slave->aggregator_port_id == slave_id)
                                SM_FLAG_SET(agg_slave, NTT);
                }
@@ -1191,7 +1191,7 @@ bond_mode_8023ad_handle_slow_pkt(struct bond_dev_private *internals,
                                  uint16_t slave_id, struct rte_mbuf *pkt)
 {
        struct mode8023ad_private *mode4 = &internals->mode4;
-       struct port *port = &mode_8023ad_ports[slave_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_id];
        struct marker_header *m_hdr;
        uint64_t marker_timer, old_marker_timer;
        int retval;
@@ -1395,7 +1395,7 @@ rte_eth_bond_8023ad_slave_info(uint16_t port_id, uint16_t slave_id,
                                internals->active_slave_count)
                return -EINVAL;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
        info->selected = port->selected;
 
        info->actor_state = port->actor_state;
@@ -1447,7 +1447,7 @@ rte_eth_bond_8023ad_ext_collect(uint16_t port_id, uint16_t slave_id,
        if (res != 0)
                return res;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
 
        if (enabled)
                ACTOR_STATE_SET(port, COLLECTING);
@@ -1468,7 +1468,7 @@ rte_eth_bond_8023ad_ext_distrib(uint16_t port_id, uint16_t slave_id,
        if (res != 0)
                return res;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
 
        if (enabled)
                ACTOR_STATE_SET(port, DISTRIBUTING);
@@ -1488,7 +1488,7 @@ rte_eth_bond_8023ad_ext_distrib_get(uint16_t port_id, uint16_t slave_id)
        if (err != 0)
                return err;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
        return ACTOR_STATE(port, DISTRIBUTING);
 }
 
@@ -1502,7 +1502,7 @@ rte_eth_bond_8023ad_ext_collect_get(uint16_t port_id, uint16_t slave_id)
        if (err != 0)
                return err;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
        return ACTOR_STATE(port, COLLECTING);
 }
 
@@ -1517,7 +1517,7 @@ rte_eth_bond_8023ad_ext_slowtx(uint16_t port_id, uint16_t slave_id,
        if (res != 0)
                return res;
 
-       port = &mode_8023ad_ports[slave_id];
+       port = &bond_mode_8023ad_ports[slave_id];
 
        if (rte_pktmbuf_pkt_len(lacp_pkt) < sizeof(struct lacpdu_header))
                return -EINVAL;
@@ -1546,7 +1546,7 @@ bond_mode_8023ad_ext_periodic_cb(void *arg)
 
        for (i = 0; i < internals->active_slave_count; i++) {
                slave_id = internals->active_slaves[i];
-               port = &mode_8023ad_ports[slave_id];
+               port = &bond_mode_8023ad_ports[slave_id];
 
                if (rte_ring_dequeue(port->rx_ring, &pkt) == 0) {
                        struct rte_mbuf *lacp_pkt = pkt;
index 0f490a5..c51426b 100644 (file)
@@ -174,7 +174,7 @@ struct mode8023ad_private {
  * The pool of *port* structures. The size of the pool
  * is configured at compile-time in the <rte_eth_bond_8023ad.c> file.
  */
-extern struct port mode_8023ad_ports[];
+extern struct port bond_mode_8023ad_ports[];
 
 /* Forward declaration */
 struct bond_dev_private;
index 8bc04cf..21bcd50 100644 (file)
@@ -245,9 +245,9 @@ slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals)
        }
        TAILQ_FOREACH(flow, &internals->flow_list, next) {
                flow->flows[slave_id] = rte_flow_create(slave_port_id,
-                                                       &flow->fd->attr,
-                                                       flow->fd->items,
-                                                       flow->fd->actions,
+                                                       flow->rule.attr,
+                                                       flow->rule.pattern,
+                                                       flow->rule.actions,
                                                        &ferror);
                if (flow->flows[slave_id] == NULL) {
                        RTE_BOND_LOG(ERR, "Cannot create flow for slave"
@@ -269,6 +269,173 @@ slave_rte_flow_prepare(uint16_t slave_id, struct bond_dev_private *internals)
        return 0;
 }
 
+static void
+eth_bond_slave_inherit_dev_info_rx_first(struct bond_dev_private *internals,
+                                        const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf;
+
+       internals->reta_size = di->reta_size;
+
+       /* Inherit Rx offload capabilities from the first slave device */
+       internals->rx_offload_capa = di->rx_offload_capa;
+       internals->rx_queue_offload_capa = di->rx_queue_offload_capa;
+       internals->flow_type_rss_offloads = di->flow_type_rss_offloads;
+
+       /* Inherit maximum Rx packet size from the first slave device */
+       internals->candidate_max_rx_pktlen = di->max_rx_pktlen;
+
+       /* Inherit default Rx queue settings from the first slave device */
+       memcpy(rxconf_i, &di->default_rxconf, sizeof(*rxconf_i));
+
+       /*
+        * Turn off descriptor prefetch and writeback by default for all
+        * slave devices. Applications may tweak this setting if need be.
+        */
+       rxconf_i->rx_thresh.pthresh = 0;
+       rxconf_i->rx_thresh.hthresh = 0;
+       rxconf_i->rx_thresh.wthresh = 0;
+
+       /* Setting this to zero should effectively enable default values */
+       rxconf_i->rx_free_thresh = 0;
+
+       /* Disable deferred start by default for all slave devices */
+       rxconf_i->rx_deferred_start = 0;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_tx_first(struct bond_dev_private *internals,
+                                        const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_txconf *txconf_i = &internals->default_txconf;
+
+       /* Inherit Tx offload capabilities from the first slave device */
+       internals->tx_offload_capa = di->tx_offload_capa;
+       internals->tx_queue_offload_capa = di->tx_queue_offload_capa;
+
+       /* Inherit default Tx queue settings from the first slave device */
+       memcpy(txconf_i, &di->default_txconf, sizeof(*txconf_i));
+
+       /*
+        * Turn off descriptor prefetch and writeback by default for all
+        * slave devices. Applications may tweak this setting if need be.
+        */
+       txconf_i->tx_thresh.pthresh = 0;
+       txconf_i->tx_thresh.hthresh = 0;
+       txconf_i->tx_thresh.wthresh = 0;
+
+       /*
+        * Setting these parameters to zero assumes that default
+        * values will be configured implicitly by slave devices.
+        */
+       txconf_i->tx_free_thresh = 0;
+       txconf_i->tx_rs_thresh = 0;
+
+       /* Disable deferred start by default for all slave devices */
+       txconf_i->tx_deferred_start = 0;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_rx_next(struct bond_dev_private *internals,
+                                       const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_rxconf *rxconf_i = &internals->default_rxconf;
+       const struct rte_eth_rxconf *rxconf = &di->default_rxconf;
+
+       internals->rx_offload_capa &= di->rx_offload_capa;
+       internals->rx_queue_offload_capa &= di->rx_queue_offload_capa;
+       internals->flow_type_rss_offloads &= di->flow_type_rss_offloads;
+
+       /*
+        * If at least one slave device suggests enabling this
+        * setting by default, enable it for all slave devices
+        * since disabling it may not be necessarily supported.
+        */
+       if (rxconf->rx_drop_en == 1)
+               rxconf_i->rx_drop_en = 1;
+
+       /*
+        * Adding a new slave device may cause some of previously inherited
+        * offloads to be withdrawn from the internal rx_queue_offload_capa
+        * value. Thus, the new internal value of default Rx queue offloads
+        * has to be masked by rx_queue_offload_capa to make sure that only
+        * commonly supported offloads are preserved from both the previous
+        * value and the value being inhereted from the new slave device.
+        */
+       rxconf_i->offloads = (rxconf_i->offloads | rxconf->offloads) &
+                            internals->rx_queue_offload_capa;
+
+       /*
+        * RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
+        * the power of 2, the lower one is GCD
+        */
+       if (internals->reta_size > di->reta_size)
+               internals->reta_size = di->reta_size;
+
+       if (!internals->max_rx_pktlen &&
+           di->max_rx_pktlen < internals->candidate_max_rx_pktlen)
+               internals->candidate_max_rx_pktlen = di->max_rx_pktlen;
+}
+
+static void
+eth_bond_slave_inherit_dev_info_tx_next(struct bond_dev_private *internals,
+                                       const struct rte_eth_dev_info *di)
+{
+       struct rte_eth_txconf *txconf_i = &internals->default_txconf;
+       const struct rte_eth_txconf *txconf = &di->default_txconf;
+
+       internals->tx_offload_capa &= di->tx_offload_capa;
+       internals->tx_queue_offload_capa &= di->tx_queue_offload_capa;
+
+       /*
+        * Adding a new slave device may cause some of previously inherited
+        * offloads to be withdrawn from the internal tx_queue_offload_capa
+        * value. Thus, the new internal value of default Tx queue offloads
+        * has to be masked by tx_queue_offload_capa to make sure that only
+        * commonly supported offloads are preserved from both the previous
+        * value and the value being inhereted from the new slave device.
+        */
+       txconf_i->offloads = (txconf_i->offloads | txconf->offloads) &
+                            internals->tx_queue_offload_capa;
+}
+
+static void
+eth_bond_slave_inherit_desc_lim_first(struct rte_eth_desc_lim *bond_desc_lim,
+               const struct rte_eth_desc_lim *slave_desc_lim)
+{
+       memcpy(bond_desc_lim, slave_desc_lim, sizeof(*bond_desc_lim));
+}
+
+static int
+eth_bond_slave_inherit_desc_lim_next(struct rte_eth_desc_lim *bond_desc_lim,
+               const struct rte_eth_desc_lim *slave_desc_lim)
+{
+       bond_desc_lim->nb_max = RTE_MIN(bond_desc_lim->nb_max,
+                                       slave_desc_lim->nb_max);
+       bond_desc_lim->nb_min = RTE_MAX(bond_desc_lim->nb_min,
+                                       slave_desc_lim->nb_min);
+       bond_desc_lim->nb_align = RTE_MAX(bond_desc_lim->nb_align,
+                                         slave_desc_lim->nb_align);
+
+       if (bond_desc_lim->nb_min > bond_desc_lim->nb_max ||
+           bond_desc_lim->nb_align > bond_desc_lim->nb_max) {
+               RTE_BOND_LOG(ERR, "Failed to inherit descriptor limits");
+               return -EINVAL;
+       }
+
+       /* Treat maximum number of segments equal to 0 as unspecified */
+       if (slave_desc_lim->nb_seg_max != 0 &&
+           (bond_desc_lim->nb_seg_max == 0 ||
+            slave_desc_lim->nb_seg_max < bond_desc_lim->nb_seg_max))
+               bond_desc_lim->nb_seg_max = slave_desc_lim->nb_seg_max;
+       if (slave_desc_lim->nb_mtu_seg_max != 0 &&
+           (bond_desc_lim->nb_mtu_seg_max == 0 ||
+            slave_desc_lim->nb_mtu_seg_max < bond_desc_lim->nb_mtu_seg_max))
+               bond_desc_lim->nb_mtu_seg_max = slave_desc_lim->nb_mtu_seg_max;
+
+       return 0;
+}
+
 static int
 __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
 {
@@ -326,34 +493,28 @@ __eth_bond_slave_add_lock_free(uint16_t bonded_port_id, uint16_t slave_port_id)
                internals->nb_rx_queues = slave_eth_dev->data->nb_rx_queues;
                internals->nb_tx_queues = slave_eth_dev->data->nb_tx_queues;
 
-               internals->reta_size = dev_info.reta_size;
+               eth_bond_slave_inherit_dev_info_rx_first(internals, &dev_info);
+               eth_bond_slave_inherit_dev_info_tx_first(internals, &dev_info);
 
-               /* Take the first dev's offload capabilities */
-               internals->rx_offload_capa = dev_info.rx_offload_capa;
-               internals->tx_offload_capa = dev_info.tx_offload_capa;
-               internals->rx_queue_offload_capa = dev_info.rx_queue_offload_capa;
-               internals->tx_queue_offload_capa = dev_info.tx_queue_offload_capa;
-               internals->flow_type_rss_offloads = dev_info.flow_type_rss_offloads;
+               eth_bond_slave_inherit_desc_lim_first(&internals->rx_desc_lim,
+                                                     &dev_info.rx_desc_lim);
+               eth_bond_slave_inherit_desc_lim_first(&internals->tx_desc_lim,
+                                                     &dev_info.tx_desc_lim);
+       } else {
+               int ret;
 
-               /* Inherit first slave's max rx packet size */
-               internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen;
+               eth_bond_slave_inherit_dev_info_rx_next(internals, &dev_info);
+               eth_bond_slave_inherit_dev_info_tx_next(internals, &dev_info);
 
-       } else {
-               internals->rx_offload_capa &= dev_info.rx_offload_capa;
-               internals->tx_offload_capa &= dev_info.tx_offload_capa;
-               internals->rx_queue_offload_capa &= dev_info.rx_queue_offload_capa;
-               internals->tx_queue_offload_capa &= dev_info.tx_queue_offload_capa;
-               internals->flow_type_rss_offloads &= dev_info.flow_type_rss_offloads;
-
-               /* RETA size is GCD of all slaves RETA sizes, so, if all sizes will be
-                * the power of 2, the lower one is GCD
-                */
-               if (internals->reta_size > dev_info.reta_size)
-                       internals->reta_size = dev_info.reta_size;
+               ret = eth_bond_slave_inherit_desc_lim_next(
+                               &internals->rx_desc_lim, &dev_info.rx_desc_lim);
+               if (ret != 0)
+                       return ret;
 
-               if (!internals->max_rx_pktlen &&
-                   dev_info.max_rx_pktlen < internals->candidate_max_rx_pktlen)
-                       internals->candidate_max_rx_pktlen = dev_info.max_rx_pktlen;
+               ret = eth_bond_slave_inherit_desc_lim_next(
+                               &internals->tx_desc_lim, &dev_info.tx_desc_lim);
+               if (ret != 0)
+                       return ret;
        }
 
        bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf &=
index 31e4bca..f94d46c 100644 (file)
@@ -2,8 +2,11 @@
  * Copyright 2018 Mellanox Technologies, Ltd
  */
 
+#include <stddef.h>
+#include <string.h>
 #include <sys/queue.h>
 
+#include <rte_errno.h>
 #include <rte_malloc.h>
 #include <rte_tailq.h>
 #include <rte_flow.h>
@@ -16,19 +19,33 @@ bond_flow_alloc(int numa_node, const struct rte_flow_attr *attr,
                   const struct rte_flow_action *actions)
 {
        struct rte_flow *flow;
-       size_t fdsz;
+       const struct rte_flow_conv_rule rule = {
+               .attr_ro = attr,
+               .pattern_ro = items,
+               .actions_ro = actions,
+       };
+       struct rte_flow_error error;
+       int ret;
 
-       fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
-       flow = rte_zmalloc_socket(NULL, sizeof(struct rte_flow) + fdsz,
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error);
+       if (ret < 0) {
+               RTE_BOND_LOG(ERR, "Unable to process flow rule (%s): %s",
+                            error.message ? error.message : "unspecified",
+                            strerror(rte_errno));
+               return NULL;
+       }
+       flow = rte_zmalloc_socket(NULL, offsetof(struct rte_flow, rule) + ret,
                                  RTE_CACHE_LINE_SIZE, numa_node);
        if (unlikely(flow == NULL)) {
                RTE_BOND_LOG(ERR, "Could not allocate new flow");
                return NULL;
        }
-       flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
-       if (unlikely(rte_flow_copy(flow->fd, fdsz, attr, items, actions) !=
-                    fdsz)) {
-               RTE_BOND_LOG(ERR, "Failed to copy flow description");
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->rule, ret, &rule,
+                           &error);
+       if (ret < 0) {
+               RTE_BOND_LOG(ERR, "Failed to copy flow rule (%s): %s",
+                            error.message ? error.message : "unspecified",
+                            strerror(rte_errno));
                rte_free(flow);
                return NULL;
        }
index 58f7377..156f31c 100644 (file)
@@ -37,7 +37,8 @@ get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
 {
        size_t vlan_offset = 0;
 
-       if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+       if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
+               rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
                struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
 
                vlan_offset = sizeof(struct vlan_hdr);
@@ -57,28 +58,34 @@ bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        struct bond_dev_private *internals;
 
-       uint16_t num_rx_slave = 0;
        uint16_t num_rx_total = 0;
-
+       uint16_t slave_count;
+       uint16_t active_slave;
        int i;
 
        /* Cast to structure, containing bonded device's port id and queue id */
        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
-
        internals = bd_rx_q->dev_private;
+       slave_count = internals->active_slave_count;
+       active_slave = internals->active_slave;
 
+       for (i = 0; i < slave_count && nb_pkts; i++) {
+               uint16_t num_rx_slave;
 
-       for (i = 0; i < internals->active_slave_count && nb_pkts; i++) {
                /* Offset of pointer to *bufs increases as packets are received
                 * from other slaves */
-               num_rx_slave = rte_eth_rx_burst(internals->active_slaves[i],
-                               bd_rx_q->queue_id, bufs + num_rx_total, nb_pkts);
-               if (num_rx_slave) {
-                       num_rx_total += num_rx_slave;
-                       nb_pkts -= num_rx_slave;
-               }
+               num_rx_slave =
+                       rte_eth_rx_burst(internals->active_slaves[active_slave],
+                                        bd_rx_q->queue_id,
+                                        bufs + num_rx_total, nb_pkts);
+               num_rx_total += num_rx_slave;
+               nb_pkts -= num_rx_slave;
+               if (++active_slave == slave_count)
+                       active_slave = 0;
        }
 
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
        return num_rx_total;
 }
 
@@ -257,25 +264,32 @@ bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        uint16_t num_rx_total = 0;      /* Total number of received packets */
        uint16_t slaves[RTE_MAX_ETHPORTS];
        uint16_t slave_count;
-
-       uint16_t i, idx;
+       uint16_t active_slave;
+       uint16_t i;
 
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
        slave_count = internals->active_slave_count;
+       active_slave = internals->active_slave;
        memcpy(slaves, internals->active_slaves,
                        sizeof(internals->active_slaves[0]) * slave_count);
 
-       for (i = 0, idx = internals->active_slave;
-                       i < slave_count && num_rx_total < nb_pkts; i++, idx++) {
-               idx = idx % slave_count;
+       for (i = 0; i < slave_count && nb_pkts; i++) {
+               uint16_t num_rx_slave;
 
                /* Read packets from this slave */
-               num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
-                               &bufs[num_rx_total], nb_pkts - num_rx_total);
+               num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
+                                               bd_rx_q->queue_id,
+                                               bufs + num_rx_total, nb_pkts);
+               num_rx_total += num_rx_slave;
+               nb_pkts -= num_rx_slave;
+
+               if (++active_slave == slave_count)
+                       active_slave = 0;
        }
 
-       internals->active_slave = idx;
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
 
        return num_rx_total;
 }
@@ -300,10 +314,10 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -320,7 +334,7 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
        dist_slave_count = 0;
        for (i = 0; i < slave_count; i++) {
-               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
+               struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
 
                if (ACTOR_STATE(port, DISTRIBUTING))
                        dist_slave_port_ids[dist_slave_count++] =
@@ -358,34 +372,12 @@ bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
 
                /* If tx burst fails move packets to end of bufs */
                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                       int slave_tx_fail_count = slave_nb_bufs[i] -
                                        slave_tx_count;
-                       total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       total_tx_fail_count += slave_tx_fail_count;
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count * sizeof(bufs[0]));
                }
        }
 
@@ -400,8 +392,9 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        /* Cast to structure, containing bonded device's port id and queue id */
        struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
        struct bond_dev_private *internals = bd_rx_q->dev_private;
-       struct ether_addr bond_mac;
-
+       struct rte_eth_dev *bonded_eth_dev =
+                                       &rte_eth_devices[internals->port_id];
+       struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
        struct ether_hdr *hdr;
 
        const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
@@ -414,7 +407,6 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        uint8_t i, j, k;
        uint8_t subtype;
 
-       rte_eth_macaddr_get(internals->port_id, &bond_mac);
        /* Copy slave list to protect against slave up/down changes during tx
         * bursting */
        slave_count = internals->active_slave_count;
@@ -428,7 +420,7 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        }
        for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
                j = num_rx_total;
-               collecting = ACTOR_STATE(&mode_8023ad_ports[slaves[idx]],
+               collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
                                         COLLECTING);
 
                /* Read packets from this slave */
@@ -457,9 +449,11 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                         * in collecting state or bonding interface is not in promiscuous
                         * mode and packet address does not match. */
                        if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
-                               !collecting || (!promisc &&
-                                       !is_multicast_ether_addr(&hdr->d_addr) &&
-                                       !is_same_ether_addr(&bond_mac, &hdr->d_addr)))) {
+                               !collecting ||
+                               (!promisc &&
+                                !is_multicast_ether_addr(&hdr->d_addr) &&
+                                !is_same_ether_addr(bond_mac,
+                                                    &hdr->d_addr)))) {
 
                                if (hdr->ether_type == ether_type_slow_be) {
                                        bond_mode_8023ad_handle_slow_pkt(
@@ -480,7 +474,9 @@ bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
                        idx = 0;
        }
 
-       internals->active_slave = idx;
+       if (++internals->active_slave == slave_count)
+               internals->active_slave = 0;
+
        return num_rx_total;
 }
 
@@ -715,8 +711,8 @@ bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
                                tx_fail_total += tx_fail_slave;
 
                                memcpy(&bufs[nb_pkts - tx_fail_total],
-                                               &slave_bufs[i][num_tx_slave],
-                                               tx_fail_slave * sizeof(bufs[0]));
+                                      &slave_bufs[i][num_tx_slave],
+                                      tx_fail_slave * sizeof(bufs[0]));
                        }
                        num_tx_total += num_tx_slave;
                }
@@ -1221,10 +1217,10 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[nb_bufs];
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1265,34 +1261,12 @@ bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
 
                /* If tx burst fails move packets to end of bufs */
                if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                       slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                       int slave_tx_fail_count = slave_nb_bufs[i] -
                                        slave_tx_count;
-                       total_tx_fail_count += slave_tx_fail_count[i];
-
-                       /*
-                        * Shift bufs to beginning of array to allow reordering
-                        * later
-                        */
-                       for (j = 0; j < slave_tx_fail_count[i]; j++) {
-                               slave_bufs[i][j] =
-                                       slave_bufs[i][(slave_tx_count - 1) + j];
-                       }
-               }
-       }
-
-       /*
-        * If there are tx burst failures we move packets to end of bufs to
-        * preserve expected PMD behaviour of all failed transmitted being
-        * at the end of the input mbuf array
-        */
-       if (unlikely(total_tx_fail_count > 0)) {
-               int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-               for (i = 0; i < slave_count; i++) {
-                       if (slave_tx_fail_count[i] > 0) {
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       bufs[bufs_idx++] = slave_bufs[i][j];
-                       }
+                       total_tx_fail_count += slave_tx_fail_count;
+                       memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                              &slave_bufs[i][slave_tx_count],
+                              slave_tx_fail_count * sizeof(bufs[0]));
                }
        }
 
@@ -1319,10 +1293,10 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
        /* Mapping array generated by hash function to map mbufs to slaves */
        uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
 
-       uint16_t slave_tx_count, slave_tx_fail_count[RTE_MAX_ETHPORTS] = { 0 };
+       uint16_t slave_tx_count;
        uint16_t total_tx_count = 0, total_tx_fail_count = 0;
 
-       uint16_t i, j;
+       uint16_t i;
 
        if (unlikely(nb_bufs == 0))
                return 0;
@@ -1338,7 +1312,7 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 
        dist_slave_count = 0;
        for (i = 0; i < slave_count; i++) {
-               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
+               struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
 
                if (ACTOR_STATE(port, DISTRIBUTING))
                        dist_slave_port_ids[dist_slave_count++] =
@@ -1380,46 +1354,20 @@ bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
 
                        /* If tx burst fails move packets to end of bufs */
                        if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
-                               slave_tx_fail_count[i] = slave_nb_bufs[i] -
+                               int slave_tx_fail_count = slave_nb_bufs[i] -
                                                slave_tx_count;
-                               total_tx_fail_count += slave_tx_fail_count[i];
-
-                               /*
-                                * Shift bufs to beginning of array to allow
-                                * reordering later
-                                */
-                               for (j = 0; j < slave_tx_fail_count[i]; j++)
-                                       slave_bufs[i][j] =
-                                               slave_bufs[i]
-                                                       [(slave_tx_count - 1)
-                                                       + j];
-                       }
-               }
+                               total_tx_fail_count += slave_tx_fail_count;
 
-               /*
-                * If there are tx burst failures we move packets to end of
-                * bufs to preserve expected PMD behaviour of all failed
-                * transmitted being at the end of the input mbuf array
-                */
-               if (unlikely(total_tx_fail_count > 0)) {
-                       int bufs_idx = nb_bufs - total_tx_fail_count - 1;
-
-                       for (i = 0; i < slave_count; i++) {
-                               if (slave_tx_fail_count[i] > 0) {
-                                       for (j = 0;
-                                               j < slave_tx_fail_count[i];
-                                               j++) {
-                                               bufs[bufs_idx++] =
-                                                       slave_bufs[i][j];
-                                       }
-                               }
+                               memcpy(&bufs[nb_bufs - total_tx_fail_count],
+                                      &slave_bufs[i][slave_tx_count],
+                                      slave_tx_fail_count * sizeof(bufs[0]));
                        }
                }
        }
 
        /* Check for LACP control packets and send if available */
        for (i = 0; i < slave_count; i++) {
-               struct port *port = &mode_8023ad_ports[slave_port_ids[i]];
+               struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
                struct rte_mbuf *ctrl_pkt = NULL;
 
                if (likely(rte_ring_empty(port->tx_ring)))
@@ -1770,7 +1718,7 @@ slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
        int errval = 0;
        struct bond_dev_private *internals = (struct bond_dev_private *)
                bonded_eth_dev->data->dev_private;
-       struct port *port = &mode_8023ad_ports[slave_eth_dev->data->port_id];
+       struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
 
        if (port->slow_pool == NULL) {
                char mem_name[256];
@@ -1847,12 +1795,11 @@ slave_configure(struct rte_eth_dev *bonded_eth_dev,
 
        /* If RSS is enabled for bonding, try to enable it for slaves  */
        if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
-               if (bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len
-                               != 0) {
+               if (internals->rss_key_len != 0) {
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
-                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                                       internals->rss_key_len;
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
-                                       bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key;
+                                       internals->rss_key;
                } else {
                        slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
                }
@@ -2210,7 +2157,7 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
 
                /* Discard all messages to/from mode 4 state machines */
                for (i = 0; i < internals->active_slave_count; i++) {
-                       port = &mode_8023ad_ports[internals->active_slaves[i]];
+                       port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
 
                        RTE_ASSERT(port->rx_ring != NULL);
                        while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
@@ -2229,12 +2176,15 @@ bond_ethdev_stop(struct rte_eth_dev *eth_dev)
                        tlb_last_obytets[internals->active_slaves[i]] = 0;
        }
 
-       internals->link_status_polling_enabled = 0;
-       for (i = 0; i < internals->slave_count; i++)
-               internals->slaves[i].last_link_status = 0;
-
        eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
        eth_dev->data->dev_started = 0;
+
+       internals->link_status_polling_enabled = 0;
+       for (i = 0; i < internals->slave_count; i++) {
+               internals->slaves[i].last_link_status = 0;
+               rte_eth_dev_stop(internals->slaves[i].port_id);
+               deactivate_slave(eth_dev, internals->slaves[i].port_id);
+       }
 }
 
 void
@@ -2303,6 +2253,16 @@ bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_rx_queues = max_nb_rx_queues;
        dev_info->max_tx_queues = max_nb_tx_queues;
 
+       memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
+              sizeof(dev_info->default_rxconf));
+       memcpy(&dev_info->default_txconf, &internals->default_txconf,
+              sizeof(dev_info->default_txconf));
+
+       memcpy(&dev_info->rx_desc_lim, &internals->rx_desc_lim,
+              sizeof(dev_info->rx_desc_lim));
+       memcpy(&dev_info->tx_desc_lim, &internals->tx_desc_lim,
+              sizeof(dev_info->tx_desc_lim));
+
        /**
         * If dedicated hw queues enabled for link bonding device in LACP mode
         * then we need to reduce the maximum number of data path queues by 1.
@@ -3123,6 +3083,14 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
        /* Initially allow to choose any offload type */
        internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
 
+       memset(&internals->default_rxconf, 0,
+              sizeof(internals->default_rxconf));
+       memset(&internals->default_txconf, 0,
+              sizeof(internals->default_txconf));
+
+       memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
+       memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
+
        memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
        memset(internals->slaves, 0, sizeof(internals->slaves));
 
@@ -3162,10 +3130,9 @@ bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
 
 err:
        rte_free(internals);
-       if (eth_dev != NULL) {
-               rte_free(eth_dev->data->mac_addrs);
-               rte_eth_dev_release_port(eth_dev);
-       }
+       if (eth_dev != NULL)
+               eth_dev->data->dev_private = NULL;
+       rte_eth_dev_release_port(eth_dev);
        return -1;
 }
 
@@ -3186,8 +3153,7 @@ bond_probe(struct rte_vdev_device *dev)
        name = rte_vdev_device_name(dev);
        RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(rte_vdev_device_args(dev)) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        RTE_BOND_LOG(ERR, "Failed to probe %s", name);
@@ -3302,6 +3268,9 @@ bond_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return -ENODEV;
 
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return rte_eth_dev_release_port(eth_dev);
+
        RTE_ASSERT(eth_dev->device == &dev->device);
 
        internals = eth_dev->data->dev_private;
@@ -3324,8 +3293,6 @@ bond_remove(struct rte_vdev_device *dev)
        rte_mempool_free(internals->mode6.mempool);
        rte_bitmap_free(internals->vlan_filter_bmp);
        rte_free(internals->vlan_filter_bmpmem);
-       rte_free(eth_dev->data->dev_private);
-       rte_free(eth_dev->data->mac_addrs);
 
        rte_eth_dev_release_port(eth_dev);
 
@@ -3353,16 +3320,30 @@ bond_ethdev_configure(struct rte_eth_dev *dev)
 
        unsigned i, j;
 
-       /* If RSS is enabled, fill table and key with default values */
+       /*
+        * If RSS is enabled, fill table with default values and
+        * set key to the the value specified in port RSS configuration.
+        * Fall back to default RSS key if the key is not specified
+        */
        if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
-               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = internals->rss_key;
-               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len = 0;
-               memcpy(internals->rss_key, default_rss_key, 40);
+               if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
+                       internals->rss_key_len =
+                               dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
+                       memcpy(internals->rss_key,
+                              dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
+                              internals->rss_key_len);
+               } else {
+                       internals->rss_key_len = sizeof(default_rss_key);
+                       memcpy(internals->rss_key, default_rss_key,
+                              internals->rss_key_len);
+               }
 
                for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
                        internals->reta_conf[i].mask = ~0LL;
                        for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
-                               internals->reta_conf[i].reta[j] = j % dev->data->nb_rx_queues;
+                               internals->reta_conf[i].reta[j] =
+                                               (i * RTE_RETA_GROUP_SIZE + j) %
+                                               dev->data->nb_rx_queues;
                }
        }
 
@@ -3618,7 +3599,7 @@ int bond_logtype;
 
 RTE_INIT(bond_init_log)
 {
-       bond_logtype = rte_log_register("pmd.net.bon");
+       bond_logtype = rte_log_register("pmd.net.bond");
        if (bond_logtype >= 0)
                rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
 }
index 43e0e44..3ea5d68 100644 (file)
@@ -5,9 +5,11 @@
 #ifndef _RTE_ETH_BOND_PRIVATE_H_
 #define _RTE_ETH_BOND_PRIVATE_H_
 
+#include <stdint.h>
 #include <sys/queue.h>
 
 #include <rte_ethdev_driver.h>
+#include <rte_flow.h>
 #include <rte_spinlock.h>
 #include <rte_bitmap.h>
 #include <rte_flow_driver.h>
@@ -93,7 +95,8 @@ struct rte_flow {
        /* Slaves flows */
        struct rte_flow *flows[RTE_MAX_ETHPORTS];
        /* Flow description for synchronization */
-       struct rte_flow_desc *fd;
+       struct rte_flow_conv_rule rule;
+       uint8_t rule_data[];
 };
 
 typedef void (*burst_xmit_hash_t)(struct rte_mbuf **buf, uint16_t nb_pkts,
@@ -160,6 +163,11 @@ struct bond_dev_private {
        /** Bit mask of RSS offloads, the bit offset also means flow type */
        uint64_t flow_type_rss_offloads;
 
+       struct rte_eth_rxconf default_rxconf;   /**< Default RxQ conf. */
+       struct rte_eth_txconf default_txconf;   /**< Default TxQ conf. */
+       struct rte_eth_desc_lim rx_desc_lim;    /**< Rx descriptor limits */
+       struct rte_eth_desc_lim tx_desc_lim;    /**< Tx descriptor limits */
+
        uint16_t reta_size;
        struct rte_eth_rss_reta_entry64 reta_conf[ETH_RSS_RETA_SIZE_512 /
                        RTE_RETA_GROUP_SIZE];
index 5d66c4b..68466f1 100644 (file)
@@ -53,6 +53,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += cxgbe_filter.c
 SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += cxgbe_flow.c
 SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += t4_hw.c
 SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += clip_tbl.c
+SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += mps_tcam.c
+SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += l2t.c
 SRCS-$(CONFIG_RTE_LIBRTE_CXGBE_PMD) += t4vf_hw.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
index e98dd21..47cfc5f 100644 (file)
@@ -324,7 +324,11 @@ struct adapter {
 
        unsigned int clipt_start; /* CLIP table start */
        unsigned int clipt_end;   /* CLIP table end */
+       unsigned int l2t_start;   /* Layer 2 table start */
+       unsigned int l2t_end;     /* Layer 2 table end */
        struct clip_tbl *clipt;   /* CLIP table */
+       struct l2t_data *l2t;     /* Layer 2 table */
+       struct mpstcam_table *mpstcam;
 
        struct tid_info tids;     /* Info used to access TID related tables */
 };
index 157201d..fd20066 100644 (file)
@@ -157,6 +157,7 @@ struct tp_params {
        int port_shift;
        int protocol_shift;
        int ethertype_shift;
+       int macmatch_shift;
 
        u64 hash_filter_mask;
 };
@@ -270,6 +271,7 @@ struct adapter_params {
 
        bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
        u8 fw_caps_support;               /* 32-bit Port Capabilities */
+       u8 filter2_wr_support;            /* FW support for FILTER2_WR */
 };
 
 /* Firmware Port Capabilities types.
@@ -388,6 +390,12 @@ int t4_free_vi(struct adapter *adap, unsigned int mbox,
 int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
                  int mtu, int promisc, int all_multi, int bcast, int vlanex,
                  bool sleep_ok);
+int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
+                        const u8 *addr, const u8 *mask, unsigned int idx,
+                        u8 lookup_type, u8 port_id, bool sleep_ok);
+int t4_alloc_raw_mac_filt(struct adapter *adap, unsigned int viid,
+                         const u8 *addr, const u8 *mask, unsigned int idx,
+                         u8 lookup_type, u8 port_id, bool sleep_ok);
 int t4_change_mac(struct adapter *adap, unsigned int mbox, unsigned int viid,
                  int idx, const u8 *addr, bool persist, bool add_smt);
 int t4_enable_vi_params(struct adapter *adap, unsigned int mbox,
index 31762c9..701e0b1 100644 (file)
@@ -4161,6 +4161,112 @@ int t4_set_rxmode(struct adapter *adap, unsigned int mbox, unsigned int viid,
                return t4vf_wr_mbox(adap, &c, sizeof(c), NULL);
 }
 
+/**
+ *     t4_alloc_raw_mac_filt - Adds a raw mac entry in mps tcam
+ *     @adap: the adapter
+ *     @viid: the VI id
+ *     @mac: the MAC address
+ *     @mask: the mask
+ *     @idx: index at which to add this entry
+ *     @port_id: the port index
+ *     @lookup_type: MAC address for inner (1) or outer (0) header
+ *     @sleep_ok: call is allowed to sleep
+ *
+ *     Adds the mac entry at the specified index using raw mac interface.
+ *
+ *     Returns a negative error number or the allocated index for this mac.
+ */
+int t4_alloc_raw_mac_filt(struct adapter *adap, unsigned int viid,
+                         const u8 *addr, const u8 *mask, unsigned int idx,
+                         u8 lookup_type, u8 port_id, bool sleep_ok)
+{
+       int ret = 0;
+       struct fw_vi_mac_cmd c;
+       struct fw_vi_mac_raw *p = &c.u.raw;
+       u32 val;
+
+       memset(&c, 0, sizeof(c));
+       c.op_to_viid = cpu_to_be32(V_FW_CMD_OP(FW_VI_MAC_CMD) |
+                                  F_FW_CMD_REQUEST | F_FW_CMD_WRITE |
+                                  V_FW_VI_MAC_CMD_VIID(viid));
+       val = V_FW_CMD_LEN16(1) |
+             V_FW_VI_MAC_CMD_ENTRY_TYPE(FW_VI_MAC_TYPE_RAW);
+       c.freemacs_to_len16 = cpu_to_be32(val);
+
+       /* Specify that this is an inner mac address */
+       p->raw_idx_pkd = cpu_to_be32(V_FW_VI_MAC_CMD_RAW_IDX(idx));
+
+       /* Lookup Type. Outer header: 0, Inner header: 1 */
+       p->data0_pkd = cpu_to_be32(V_DATALKPTYPE(lookup_type) |
+                                  V_DATAPORTNUM(port_id));
+       /* Lookup mask and port mask */
+       p->data0m_pkd = cpu_to_be64(V_DATALKPTYPE(M_DATALKPTYPE) |
+                                   V_DATAPORTNUM(M_DATAPORTNUM));
+
+       /* Copy the address and the mask */
+       memcpy((u8 *)&p->data1[0] + 2, addr, ETHER_ADDR_LEN);
+       memcpy((u8 *)&p->data1m[0] + 2, mask, ETHER_ADDR_LEN);
+
+       ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
+       if (ret == 0) {
+               ret = G_FW_VI_MAC_CMD_RAW_IDX(be32_to_cpu(p->raw_idx_pkd));
+               if (ret != (int)idx)
+                       ret = -ENOMEM;
+       }
+
+       return ret;
+}
+
+/**
+ *     t4_free_raw_mac_filt - Frees a raw mac entry in mps tcam
+ *     @adap: the adapter
+ *     @viid: the VI id
+ *     @addr: the MAC address
+ *     @mask: the mask
+ *     @idx: index of the entry in mps tcam
+ *     @lookup_type: MAC address for inner (1) or outer (0) header
+ *     @port_id: the port index
+ *     @sleep_ok: call is allowed to sleep
+ *
+ *     Removes the mac entry at the specified index using raw mac interface.
+ *
+ *     Returns a negative error number on failure.
+ */
+int t4_free_raw_mac_filt(struct adapter *adap, unsigned int viid,
+                        const u8 *addr, const u8 *mask, unsigned int idx,
+                        u8 lookup_type, u8 port_id, bool sleep_ok)
+{
+       struct fw_vi_mac_cmd c;
+       struct fw_vi_mac_raw *p = &c.u.raw;
+       u32 raw;
+
+       memset(&c, 0, sizeof(c));
+       c.op_to_viid = cpu_to_be32(V_FW_CMD_OP(FW_VI_MAC_CMD) |
+                                  F_FW_CMD_REQUEST | F_FW_CMD_WRITE |
+                                  V_FW_CMD_EXEC(0) |
+                                  V_FW_VI_MAC_CMD_VIID(viid));
+       raw = V_FW_VI_MAC_CMD_ENTRY_TYPE(FW_VI_MAC_TYPE_RAW);
+       c.freemacs_to_len16 = cpu_to_be32(V_FW_VI_MAC_CMD_FREEMACS(0) |
+                                         raw |
+                                         V_FW_CMD_LEN16(1));
+
+       p->raw_idx_pkd = cpu_to_be32(V_FW_VI_MAC_CMD_RAW_IDX(idx) |
+                                    FW_VI_MAC_ID_BASED_FREE);
+
+       /* Lookup Type. Outer header: 0, Inner header: 1 */
+       p->data0_pkd = cpu_to_be32(V_DATALKPTYPE(lookup_type) |
+                                  V_DATAPORTNUM(port_id));
+       /* Lookup mask and port mask */
+       p->data0m_pkd = cpu_to_be64(V_DATALKPTYPE(M_DATALKPTYPE) |
+                                   V_DATAPORTNUM(M_DATAPORTNUM));
+
+       /* Copy the address and the mask */
+       memcpy((u8 *)&p->data1[0] + 2, addr, ETHER_ADDR_LEN);
+       memcpy((u8 *)&p->data1m[0] + 2, mask, ETHER_ADDR_LEN);
+
+       return t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
+}
+
 /**
  * t4_change_mac - modifies the exact-match filter for a MAC address
  * @adap: the adapter
@@ -5145,6 +5251,8 @@ int t4_init_tp_params(struct adapter *adap)
                                                               F_PROTOCOL);
        adap->params.tp.ethertype_shift = t4_filter_field_shift(adap,
                                                                F_ETHERTYPE);
+       adap->params.tp.macmatch_shift = t4_filter_field_shift(adap,
+                                                              F_MACMATCH);
 
        /*
         * If TP_INGRESS_CONFIG.VNID == 0, then TP_VLAN_PRI_MAP.VNIC_ID
index 5d433c9..9e052b0 100644 (file)
@@ -11,7 +11,9 @@ enum {
        CPL_SET_TCB_FIELD     = 0x5,
        CPL_ABORT_REQ         = 0xA,
        CPL_ABORT_RPL         = 0xB,
+       CPL_L2T_WRITE_REQ     = 0x12,
        CPL_TID_RELEASE       = 0x1A,
+       CPL_L2T_WRITE_RPL     = 0x23,
        CPL_ACT_OPEN_RPL      = 0x25,
        CPL_ABORT_RPL_RSS     = 0x2D,
        CPL_SET_TCB_RPL       = 0x3A,
@@ -30,6 +32,7 @@ enum CPL_error {
 
 enum {
        ULP_MODE_NONE          = 0,
+       ULP_MODE_TCPDDP        = 5,
 };
 
 enum {
@@ -66,6 +69,9 @@ union opcode_tid {
 #define M_TID_TID    0x3fff
 #define G_TID_TID(x) (((x) >> S_TID_TID) & M_TID_TID)
 
+#define S_TID_QID    14
+#define V_TID_QID(x) ((x) << S_TID_QID)
+
 struct rss_header {
        __u8 opcode;
 #if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN
@@ -133,6 +139,12 @@ struct work_request_hdr {
 #define V_TCAM_BYPASS(x) ((__u64)(x) << S_TCAM_BYPASS)
 #define F_TCAM_BYPASS    V_TCAM_BYPASS(1ULL)
 
+#define S_L2T_IDX    36
+#define V_L2T_IDX(x) ((__u64)(x) << S_L2T_IDX)
+
+#define S_NAGLE    49
+#define V_NAGLE(x) ((__u64)(x) << S_NAGLE)
+
 /* option 2 fields */
 #define S_RSS_QUEUE    0
 #define V_RSS_QUEUE(x) ((x) << S_RSS_QUEUE)
@@ -151,6 +163,9 @@ struct work_request_hdr {
 #define S_CCTRL_ECN    27
 #define V_CCTRL_ECN(x) ((x) << S_CCTRL_ECN)
 
+#define S_SACK_EN    30
+#define V_SACK_EN(x) ((x) << S_SACK_EN)
+
 #define S_T5_OPT_2_VALID    31
 #define V_T5_OPT_2_VALID(x) ((x) << S_T5_OPT_2_VALID)
 #define F_T5_OPT_2_VALID    V_T5_OPT_2_VALID(1U)
@@ -421,6 +436,35 @@ struct cpl_rx_pkt {
        __be16 err_vec;
 };
 
+struct cpl_l2t_write_req {
+       WR_HDR;
+       union opcode_tid ot;
+       __be16 params;
+       __be16 l2t_idx;
+       __be16 vlan;
+       __u8   dst_mac[6];
+};
+
+/* cpl_l2t_write_req.params fields */
+#define S_L2T_W_PORT    8
+#define V_L2T_W_PORT(x) ((x) << S_L2T_W_PORT)
+
+#define S_L2T_W_LPBK    10
+#define V_L2T_W_LPBK(x) ((x) << S_L2T_W_LPBK)
+
+#define S_L2T_W_ARPMISS         11
+#define V_L2T_W_ARPMISS(x)      ((x) << S_L2T_W_ARPMISS)
+
+#define S_L2T_W_NOREPLY    15
+#define V_L2T_W_NOREPLY(x) ((x) << S_L2T_W_NOREPLY)
+
+struct cpl_l2t_write_rpl {
+       RSS_HDR
+       union opcode_tid ot;
+       __u8 status;
+       __u8 rsvd[3];
+};
+
 /* rx_pkt.l2info fields */
 #define S_RXF_UDP    22
 #define V_RXF_UDP(x) ((x) << S_RXF_UDP)
index 6f872ed..af8c741 100644 (file)
 #define MPS_T5_CLS_SRAM_H(idx) (A_MPS_T5_CLS_SRAM_H + (idx) * 8)
 #define NUM_MPS_T5_CLS_SRAM_H_INSTANCES 512
 
+#define S_DATAPORTNUM    12
+#define M_DATAPORTNUM    0xfU
+#define V_DATAPORTNUM(x) ((x) << S_DATAPORTNUM)
+
+#define S_DATALKPTYPE    10
+#define M_DATALKPTYPE    0x3U
+#define V_DATALKPTYPE(x) ((x) << S_DATALKPTYPE)
+
 /* registers for module SGE */
 #define SGE_BASE_ADDR 0x1000
 
index 25435f9..68cda77 100644 (file)
@@ -6,6 +6,9 @@
 #ifndef _T4_TCB_DEFS_H
 #define _T4_TCB_DEFS_H
 
+/* 95:32 */
+#define W_TCB_T_FLAGS    1
+
 /* 105:96 */
 #define W_TCB_RSS_INFO    3
 #define S_TCB_RSS_INFO    0
@@ -23,4 +26,6 @@
 #define M_TCB_T_RTT_TS_RECENT_AGE    0xffffffffULL
 #define V_TCB_T_RTT_TS_RECENT_AGE(x) ((x) << S_TCB_T_RTT_TS_RECENT_AGE)
 
+#define S_TF_CCTRL_RFR    62
+
 #endif /* _T4_TCB_DEFS_H */
index e80b58a..06d3ef3 100644 (file)
@@ -61,6 +61,7 @@ enum fw_wr_opcodes {
        FW_ETH_TX_PKTS_WR       = 0x09,
        FW_ETH_TX_PKT_VM_WR     = 0x11,
        FW_ETH_TX_PKTS_VM_WR    = 0x12,
+       FW_FILTER2_WR           = 0x77,
        FW_ETH_TX_PKTS2_WR      = 0x78,
 };
 
@@ -165,7 +166,7 @@ enum fw_filter_wr_cookie {
        FW_FILTER_WR_EINVAL,
 };
 
-struct fw_filter_wr {
+struct fw_filter2_wr {
        __be32 op_pkd;
        __be32 len16_pkd;
        __be64 r3;
@@ -195,6 +196,19 @@ struct fw_filter_wr {
        __be16 fpm;
        __be16 r7;
        __u8   sma[6];
+       __be16 r8;
+       __u8   filter_type_swapmac;
+       __u8   natmode_to_ulp_type;
+       __be16 newlport;
+       __be16 newfport;
+       __u8   newlip[16];
+       __u8   newfip[16];
+       __be32 natseqcheck;
+       __be32 r9;
+       __be64 r10;
+       __be64 r11;
+       __be64 r12;
+       __be64 r13;
 };
 
 #define S_FW_FILTER_WR_TID     12
@@ -300,6 +314,15 @@ struct fw_filter_wr {
 #define S_FW_FILTER_WR_MATCHTYPEM      0
 #define V_FW_FILTER_WR_MATCHTYPEM(x)   ((x) << S_FW_FILTER_WR_MATCHTYPEM)
 
+#define S_FW_FILTER2_WR_SWAPMAC                0
+#define V_FW_FILTER2_WR_SWAPMAC(x)     ((x) << S_FW_FILTER2_WR_SWAPMAC)
+
+#define S_FW_FILTER2_WR_NATMODE                5
+#define V_FW_FILTER2_WR_NATMODE(x)     ((x) << S_FW_FILTER2_WR_NATMODE)
+
+#define S_FW_FILTER2_WR_ULP_TYPE       0
+#define V_FW_FILTER2_WR_ULP_TYPE(x)    ((x) << S_FW_FILTER2_WR_ULP_TYPE)
+
 /******************************************************************************
  *  C O M M A N D s
  *********************/
@@ -655,6 +678,7 @@ enum fw_params_param_dev {
        FW_PARAMS_PARAM_DEV_FWREV       = 0x0B, /* fw version */
        FW_PARAMS_PARAM_DEV_TPREV       = 0x0C, /* tp version */
        FW_PARAMS_PARAM_DEV_ULPTX_MEMWRITE_DSGL = 0x17,
+       FW_PARAMS_PARAM_DEV_FILTER2_WR  = 0x1D,
 };
 
 /*
@@ -665,6 +689,8 @@ enum fw_params_param_pfvf {
        FW_PARAMS_PARAM_PFVF_CLIP_END = 0x04,
        FW_PARAMS_PARAM_PFVF_FILTER_START = 0x05,
        FW_PARAMS_PARAM_PFVF_FILTER_END = 0x06,
+       FW_PARAMS_PARAM_PFVF_L2T_START = 0x13,
+       FW_PARAMS_PARAM_PFVF_L2T_END = 0x14,
        FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP = 0x31,
        FW_PARAMS_PARAM_PFVF_PORT_CAPS32 = 0x3A
 };
@@ -1280,12 +1306,17 @@ struct fw_vi_cmd {
 /* Special VI_MAC command index ids */
 #define FW_VI_MAC_ADD_MAC              0x3FF
 #define FW_VI_MAC_ADD_PERSIST_MAC      0x3FE
+#define FW_VI_MAC_ID_BASED_FREE         0x3FC
 
 enum fw_vi_mac_smac {
        FW_VI_MAC_MPS_TCAM_ENTRY,
        FW_VI_MAC_SMT_AND_MPSTCAM
 };
 
+enum fw_vi_mac_entry_types {
+       FW_VI_MAC_TYPE_RAW = 0x2,
+};
+
 struct fw_vi_mac_cmd {
        __be32 op_to_viid;
        __be32 freemacs_to_len16;
@@ -1297,6 +1328,13 @@ struct fw_vi_mac_cmd {
                struct fw_vi_mac_hash {
                        __be64 hashvec;
                } hash;
+               struct fw_vi_mac_raw {
+                       __be32 raw_idx_pkd;
+                       __be32 data0_pkd;
+                       __be32 data1[2];
+                       __be64 data0m_pkd;
+                       __be32 data1m[2];
+               } raw;
        } u;
 };
 
@@ -1306,6 +1344,12 @@ struct fw_vi_mac_cmd {
 #define G_FW_VI_MAC_CMD_VIID(x)        \
        (((x) >> S_FW_VI_MAC_CMD_VIID) & M_FW_VI_MAC_CMD_VIID)
 
+#define S_FW_VI_MAC_CMD_FREEMACS       31
+#define V_FW_VI_MAC_CMD_FREEMACS(x)    ((x) << S_FW_VI_MAC_CMD_FREEMACS)
+
+#define S_FW_VI_MAC_CMD_ENTRY_TYPE      23
+#define V_FW_VI_MAC_CMD_ENTRY_TYPE(x)   ((x) << S_FW_VI_MAC_CMD_ENTRY_TYPE)
+
 #define S_FW_VI_MAC_CMD_VALID          15
 #define M_FW_VI_MAC_CMD_VALID          0x1
 #define V_FW_VI_MAC_CMD_VALID(x)       ((x) << S_FW_VI_MAC_CMD_VALID)
@@ -1325,6 +1369,12 @@ struct fw_vi_mac_cmd {
 #define G_FW_VI_MAC_CMD_IDX(x) \
        (((x) >> S_FW_VI_MAC_CMD_IDX) & M_FW_VI_MAC_CMD_IDX)
 
+#define S_FW_VI_MAC_CMD_RAW_IDX         16
+#define M_FW_VI_MAC_CMD_RAW_IDX         0xffff
+#define V_FW_VI_MAC_CMD_RAW_IDX(x)      ((x) << S_FW_VI_MAC_CMD_RAW_IDX)
+#define G_FW_VI_MAC_CMD_RAW_IDX(x)      \
+       (((x) >> S_FW_VI_MAC_CMD_RAW_IDX) & M_FW_VI_MAC_CMD_RAW_IDX)
+
 struct fw_vi_rxmode_cmd {
        __be32 op_to_viid;
        __be32 retval_len16;
index 5e6f5c9..eb58f88 100644 (file)
                                    ETH_RSS_IPV6_UDP_EX)
 #define CXGBE_RSS_HF_ALL (ETH_RSS_IP | ETH_RSS_TCP | ETH_RSS_UDP)
 
+/* Tx/Rx Offloads supported */
+#define CXGBE_TX_OFFLOADS (DEV_TX_OFFLOAD_VLAN_INSERT | \
+                          DEV_TX_OFFLOAD_IPV4_CKSUM | \
+                          DEV_TX_OFFLOAD_UDP_CKSUM | \
+                          DEV_TX_OFFLOAD_TCP_CKSUM | \
+                          DEV_TX_OFFLOAD_TCP_TSO)
+
+#define CXGBE_RX_OFFLOADS (DEV_RX_OFFLOAD_VLAN_STRIP | \
+                          DEV_RX_OFFLOAD_IPV4_CKSUM | \
+                          DEV_RX_OFFLOAD_UDP_CKSUM | \
+                          DEV_RX_OFFLOAD_TCP_CKSUM | \
+                          DEV_RX_OFFLOAD_JUMBO_FRAME | \
+                          DEV_RX_OFFLOAD_SCATTER)
+
+
 #define CXGBE_DEVARG_KEEP_OVLAN "keep_ovlan"
 #define CXGBE_DEVARG_FORCE_LINK_UP "force_link_up"
 
index 4dcad7a..b2f83ea 100644 (file)
  */
 #include "t4_pci_id_tbl.h"
 
-#define CXGBE_TX_OFFLOADS (DEV_TX_OFFLOAD_VLAN_INSERT |\
-                          DEV_TX_OFFLOAD_IPV4_CKSUM |\
-                          DEV_TX_OFFLOAD_UDP_CKSUM |\
-                          DEV_TX_OFFLOAD_TCP_CKSUM |\
-                          DEV_TX_OFFLOAD_TCP_TSO)
-
-#define CXGBE_RX_OFFLOADS (DEV_RX_OFFLOAD_VLAN_STRIP |\
-                          DEV_RX_OFFLOAD_CRC_STRIP |\
-                          DEV_RX_OFFLOAD_IPV4_CKSUM |\
-                          DEV_RX_OFFLOAD_JUMBO_FRAME |\
-                          DEV_RX_OFFLOAD_UDP_CKSUM |\
-                          DEV_RX_OFFLOAD_TCP_CKSUM)
-
 uint16_t cxgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                         uint16_t nb_pkts)
 {
@@ -341,6 +328,7 @@ void cxgbe_dev_close(struct rte_eth_dev *eth_dev)
 int cxgbe_dev_start(struct rte_eth_dev *eth_dev)
 {
        struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
+       struct rte_eth_rxmode *rx_conf = &eth_dev->data->dev_conf.rxmode;
        struct adapter *adapter = pi->adapter;
        int err = 0, i;
 
@@ -361,6 +349,11 @@ int cxgbe_dev_start(struct rte_eth_dev *eth_dev)
                        goto out;
        }
 
+       if (rx_conf->offloads & DEV_RX_OFFLOAD_SCATTER)
+               eth_dev->data->scattered_rx = 1;
+       else
+               eth_dev->data->scattered_rx = 0;
+
        cxgbe_enable_rx_queues(pi);
 
        err = setup_rss(pi);
@@ -407,26 +400,16 @@ void cxgbe_dev_stop(struct rte_eth_dev *eth_dev)
         *  have been disabled
         */
        t4_sge_eth_clear_queues(pi);
+       eth_dev->data->scattered_rx = 0;
 }
 
 int cxgbe_dev_configure(struct rte_eth_dev *eth_dev)
 {
        struct port_info *pi = (struct port_info *)(eth_dev->data->dev_private);
        struct adapter *adapter = pi->adapter;
-       uint64_t configured_offloads;
        int err;
 
        CXGBE_FUNC_TRACE();
-       configured_offloads = eth_dev->data->dev_conf.rxmode.offloads;
-
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(configured_offloads)) {
-               dev_info(adapter, "can't disable hw crc strip\n");
-               eth_dev->data->dev_conf.rxmode.offloads |=
-                       DEV_RX_OFFLOAD_CRC_STRIP;
-       }
 
        if (!(adapter->flags & FW_QUEUE_BOUND)) {
                err = setup_sge_fwevtq(adapter);
@@ -1075,11 +1058,9 @@ static int cxgbe_get_regs(struct rte_eth_dev *eth_dev,
 int cxgbe_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *addr)
 {
        struct port_info *pi = (struct port_info *)(dev->data->dev_private);
-       struct adapter *adapter = pi->adapter;
        int ret;
 
-       ret = t4_change_mac(adapter, adapter->mbox, pi->viid,
-                           pi->xact_addr_filt, (u8 *)addr, true, true);
+       ret = cxgbe_mpstcam_modify(pi, (int)pi->xact_addr_filt, (u8 *)addr);
        if (ret < 0) {
                dev_err(adapter, "failed to set mac addr; err = %d\n",
                        ret);
index 7f0d380..ef1102b 100644 (file)
@@ -8,6 +8,7 @@
 #include "t4_regs.h"
 #include "cxgbe_filter.h"
 #include "clip_tbl.h"
+#include "l2t.h"
 
 /**
  * Initialize Hash Filters
@@ -65,7 +66,8 @@ int validate_filter(struct adapter *adapter, struct ch_filter_specification *fs)
 #define U(_mask, _field) \
        (!(fconf & (_mask)) && S(_field))
 
-       if (U(F_PORT, iport) || U(F_ETHERTYPE, ethtype) || U(F_PROTOCOL, proto))
+       if (U(F_PORT, iport) || U(F_ETHERTYPE, ethtype) ||
+           U(F_PROTOCOL, proto) || U(F_MACMATCH, macidx))
                return -EOPNOTSUPP;
 
 #undef S
@@ -87,6 +89,12 @@ int validate_filter(struct adapter *adapter, struct ch_filter_specification *fs)
        if (fs->val.iport >= adapter->params.nports)
                return -ERANGE;
 
+       if (!fs->cap && fs->nat_mode && !adapter->params.filter2_wr_support)
+               return -EOPNOTSUPP;
+
+       if (!fs->cap && fs->swapmac && !adapter->params.filter2_wr_support)
+               return -EOPNOTSUPP;
+
        return 0;
 }
 
@@ -164,6 +172,16 @@ static void set_tcb_field(struct adapter *adapter, unsigned int ftid,
        t4_mgmt_tx(ctrlq, mbuf);
 }
 
+/**
+ * Set one of the t_flags bits in the TCB.
+ */
+static void set_tcb_tflag(struct adapter *adap, unsigned int ftid,
+                         unsigned int bit_pos, unsigned int val, int no_reply)
+{
+       set_tcb_field(adap, ftid,  W_TCB_T_FLAGS, 1ULL << bit_pos,
+                     (unsigned long long)val << bit_pos, no_reply);
+}
+
 /**
  * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command.
  */
@@ -257,6 +275,8 @@ static u64 hash_filter_ntuple(const struct filter_entry *f)
 
        if (tp->ethertype_shift >= 0 && f->fs.mask.ethtype)
                ntuple |= (u64)(f->fs.val.ethtype) << tp->ethertype_shift;
+       if (tp->macmatch_shift >= 0 && f->fs.mask.macidx)
+               ntuple |= (u64)(f->fs.val.macidx) << tp->macmatch_shift;
 
        if (ntuple != tp->hash_filter_mask)
                return 0;
@@ -425,7 +445,10 @@ static void mk_act_open_req6(struct filter_entry *f, struct rte_mbuf *mbuf,
        req->local_ip_lo = local_lo;
        req->peer_ip_hi = peer_hi;
        req->peer_ip_lo = peer_lo;
-       req->opt0 = cpu_to_be64(V_DELACK(f->fs.hitcnts) |
+       req->opt0 = cpu_to_be64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE ||
+                                       f->fs.newvlan == VLAN_REWRITE) |
+                               V_DELACK(f->fs.hitcnts) |
+                               V_L2T_IDX(f->l2t ? f->l2t->idx : 0) |
                                V_SMAC_SEL((cxgbe_port_viid(f->dev) & 0x7F)
                                           << 1) |
                                V_TX_CHAN(f->fs.eport) |
@@ -436,6 +459,7 @@ static void mk_act_open_req6(struct filter_entry *f, struct rte_mbuf *mbuf,
                            V_RSS_QUEUE(f->fs.iq) |
                            F_T5_OPT_2_VALID |
                            F_RX_CHANNEL |
+                           V_SACK_EN(f->fs.swapmac) |
                            V_CONG_CNTRL((f->fs.action == FILTER_DROP) |
                                         (f->fs.dirsteer << 1)) |
                            V_CCTRL_ECN(f->fs.action == FILTER_SWITCH));
@@ -468,7 +492,10 @@ static void mk_act_open_req(struct filter_entry *f, struct rte_mbuf *mbuf,
                        f->fs.val.lip[2] << 16 | f->fs.val.lip[3] << 24;
        req->peer_ip = f->fs.val.fip[0] | f->fs.val.fip[1] << 8 |
                        f->fs.val.fip[2] << 16 | f->fs.val.fip[3] << 24;
-       req->opt0 = cpu_to_be64(V_DELACK(f->fs.hitcnts) |
+       req->opt0 = cpu_to_be64(V_NAGLE(f->fs.newvlan == VLAN_REMOVE ||
+                                       f->fs.newvlan == VLAN_REWRITE) |
+                               V_DELACK(f->fs.hitcnts) |
+                               V_L2T_IDX(f->l2t ? f->l2t->idx : 0) |
                                V_SMAC_SEL((cxgbe_port_viid(f->dev) & 0x7F)
                                           << 1) |
                                V_TX_CHAN(f->fs.eport) |
@@ -479,6 +506,7 @@ static void mk_act_open_req(struct filter_entry *f, struct rte_mbuf *mbuf,
                            V_RSS_QUEUE(f->fs.iq) |
                            F_T5_OPT_2_VALID |
                            F_RX_CHANNEL |
+                           V_SACK_EN(f->fs.swapmac) |
                            V_CONG_CNTRL((f->fs.action == FILTER_DROP) |
                                         (f->fs.dirsteer << 1)) |
                            V_CCTRL_ECN(f->fs.action == FILTER_SWITCH));
@@ -518,6 +546,22 @@ static int cxgbe_set_hash_filter(struct rte_eth_dev *dev,
        f->dev = dev;
        f->fs.iq = iq;
 
+       /*
+        * If the new filter requires loopback Destination MAC and/or VLAN
+        * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+        * the filter.
+        */
+       if (f->fs.newvlan == VLAN_INSERT ||
+           f->fs.newvlan == VLAN_REWRITE) {
+               /* allocate L2T entry for new filter */
+               f->l2t = cxgbe_l2t_alloc_switching(dev, f->fs.vlan,
+                                                  f->fs.eport, f->fs.dmac);
+               if (!f->l2t) {
+                       ret = -ENOMEM;
+                       goto out_err;
+               }
+       }
+
        atid = cxgbe_alloc_atid(t, f);
        if (atid < 0)
                goto out_err;
@@ -591,6 +635,7 @@ void clear_filter(struct filter_entry *f)
 
 /**
  * t4_mk_filtdelwr - create a delete filter WR
+ * @adap: adapter context
  * @ftid: the filter ID
  * @wr: the filter work request to populate
  * @qid: ingress queue to receive the delete notification
@@ -598,10 +643,14 @@ void clear_filter(struct filter_entry *f)
  * Creates a filter work request to delete the supplied filter.  If @qid is
  * negative the delete notification is suppressed.
  */
-static void t4_mk_filtdelwr(unsigned int ftid, struct fw_filter_wr *wr, int qid)
+static void t4_mk_filtdelwr(struct adapter *adap, unsigned int ftid,
+                           struct fw_filter2_wr *wr, int qid)
 {
        memset(wr, 0, sizeof(*wr));
-       wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER_WR));
+       if (adap->params.filter2_wr_support)
+               wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER2_WR));
+       else
+               wr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER_WR));
        wr->len16_pkd = cpu_to_be32(V_FW_WR_LEN16(sizeof(*wr) / 16));
        wr->tid_to_iq = cpu_to_be32(V_FW_FILTER_WR_TID(ftid) |
                                    V_FW_FILTER_WR_NOREPLY(qid < 0));
@@ -619,7 +668,7 @@ static int del_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
        struct adapter *adapter = ethdev2adap(dev);
        struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
        struct rte_mbuf *mbuf;
-       struct fw_filter_wr *fwr;
+       struct fw_filter2_wr *fwr;
        struct sge_ctrl_txq *ctrlq;
        unsigned int port_id = ethdev2pinfo(dev)->port_id;
 
@@ -631,8 +680,8 @@ static int del_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
        mbuf->data_len = sizeof(*fwr);
        mbuf->pkt_len = mbuf->data_len;
 
-       fwr = rte_pktmbuf_mtod(mbuf, struct fw_filter_wr *);
-       t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id);
+       fwr = rte_pktmbuf_mtod(mbuf, struct fw_filter2_wr *);
+       t4_mk_filtdelwr(adapter, f->tid, fwr, adapter->sge.fw_evtq.abs_id);
 
        /*
         * Mark the filter as "pending" and ship off the Filter Work Request.
@@ -648,11 +697,24 @@ int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
        struct adapter *adapter = ethdev2adap(dev);
        struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
        struct rte_mbuf *mbuf;
-       struct fw_filter_wr *fwr;
+       struct fw_filter2_wr *fwr;
        struct sge_ctrl_txq *ctrlq;
        unsigned int port_id = ethdev2pinfo(dev)->port_id;
        int ret;
 
+       /*
+        * If the new filter requires loopback Destination MAC and/or VLAN
+        * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
+        * the filter.
+        */
+       if (f->fs.newvlan) {
+               /* allocate L2T entry for new filter */
+               f->l2t = cxgbe_l2t_alloc_switching(f->dev, f->fs.vlan,
+                                                  f->fs.eport, f->fs.dmac);
+               if (!f->l2t)
+                       return -ENOMEM;
+       }
+
        ctrlq = &adapter->sge.ctrlq[port_id];
        mbuf = rte_pktmbuf_alloc(ctrlq->mb_pool);
        if (!mbuf) {
@@ -663,13 +725,16 @@ int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
        mbuf->data_len = sizeof(*fwr);
        mbuf->pkt_len = mbuf->data_len;
 
-       fwr = rte_pktmbuf_mtod(mbuf, struct fw_filter_wr *);
+       fwr = rte_pktmbuf_mtod(mbuf, struct fw_filter2_wr *);
        memset(fwr, 0, sizeof(*fwr));
 
        /*
         * Construct the work request to set the filter.
         */
-       fwr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER_WR));
+       if (adapter->params.filter2_wr_support)
+               fwr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER2_WR));
+       else
+               fwr->op_pkd = cpu_to_be32(V_FW_WR_OP(FW_FILTER_WR));
        fwr->len16_pkd = cpu_to_be32(V_FW_WR_LEN16(sizeof(*fwr) / 16));
        fwr->tid_to_iq =
                cpu_to_be32(V_FW_FILTER_WR_TID(f->tid) |
@@ -680,9 +745,16 @@ int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
                cpu_to_be32(V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) |
                            V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) |
                            V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) |
+                           V_FW_FILTER_WR_INSVLAN
+                               (f->fs.newvlan == VLAN_INSERT ||
+                                f->fs.newvlan == VLAN_REWRITE) |
+                           V_FW_FILTER_WR_RMVLAN
+                               (f->fs.newvlan == VLAN_REMOVE ||
+                                f->fs.newvlan == VLAN_REWRITE) |
                            V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) |
                            V_FW_FILTER_WR_TXCHAN(f->fs.eport) |
-                           V_FW_FILTER_WR_PRIO(f->fs.prio));
+                           V_FW_FILTER_WR_PRIO(f->fs.prio) |
+                           V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0));
        fwr->ethtype = cpu_to_be16(f->fs.val.ethtype);
        fwr->ethtypem = cpu_to_be16(f->fs.mask.ethtype);
        fwr->smac_sel = 0;
@@ -691,7 +763,9 @@ int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
                            V_FW_FILTER_WR_RX_RPL_IQ(adapter->sge.fw_evtq.abs_id
                                                     ));
        fwr->maci_to_matchtypem =
-               cpu_to_be32(V_FW_FILTER_WR_PORT(f->fs.val.iport) |
+               cpu_to_be32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) |
+                           V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) |
+                           V_FW_FILTER_WR_PORT(f->fs.val.iport) |
                            V_FW_FILTER_WR_PORTM(f->fs.mask.iport));
        fwr->ptcl = f->fs.val.proto;
        fwr->ptclm = f->fs.mask.proto;
@@ -704,6 +778,20 @@ int set_filter_wr(struct rte_eth_dev *dev, unsigned int fidx)
        fwr->fp = cpu_to_be16(f->fs.val.fport);
        fwr->fpm = cpu_to_be16(f->fs.mask.fport);
 
+       if (adapter->params.filter2_wr_support) {
+               fwr->filter_type_swapmac =
+                        V_FW_FILTER2_WR_SWAPMAC(f->fs.swapmac);
+               fwr->natmode_to_ulp_type =
+                       V_FW_FILTER2_WR_ULP_TYPE(f->fs.nat_mode ?
+                                                ULP_MODE_TCPDDP :
+                                                ULP_MODE_NONE) |
+                       V_FW_FILTER2_WR_NATMODE(f->fs.nat_mode);
+               memcpy(fwr->newlip, f->fs.nat_lip, sizeof(fwr->newlip));
+               memcpy(fwr->newfip, f->fs.nat_fip, sizeof(fwr->newfip));
+               fwr->newlport = cpu_to_be16(f->fs.nat_lport);
+               fwr->newfport = cpu_to_be16(f->fs.nat_fport);
+       }
+
        /*
         * Mark the filter as "pending" and ship off the Filter Work Request.
         * When we get the Work Request Reply we'll clear the pending status.
@@ -1046,6 +1134,9 @@ void hash_filter_rpl(struct adapter *adap, const struct cpl_act_open_rpl *rpl)
                                      V_TCB_TIMESTAMP(0ULL) |
                                      V_TCB_T_RTT_TS_RECENT_AGE(0ULL),
                                      1);
+               if (f->fs.newvlan == VLAN_INSERT ||
+                   f->fs.newvlan == VLAN_REWRITE)
+                       set_tcb_tflag(adap, tid, S_TF_CCTRL_RFR, 1, 1);
                break;
        }
        default:
index af8fa75..b7bcbf5 100644 (file)
@@ -77,6 +77,7 @@ struct ch_filter_tuple {
  * Filter specification
  */
 struct ch_filter_specification {
+       void *private;
        /* Administrative fields for filter. */
        uint32_t hitcnts:1;     /* count filter hits in TCB */
        uint32_t prio:1;        /* filter has priority over active/server */
@@ -99,6 +100,22 @@ struct ch_filter_specification {
        uint32_t iq:10;         /* ingress queue */
 
        uint32_t eport:2;       /* egress port to switch packet out */
+       uint32_t swapmac:1;     /* swap SMAC/DMAC for loopback packet */
+       uint32_t newvlan:2;     /* rewrite VLAN Tag */
+       uint8_t dmac[ETHER_ADDR_LEN];   /* new destination MAC address */
+       uint16_t vlan;          /* VLAN Tag to insert */
+
+       /*
+        * Switch proxy/rewrite fields.  An ingress packet which matches a
+        * filter with "switch" set will be looped back out as an egress
+        * packet -- potentially with some header rewriting.
+        */
+       uint32_t nat_mode:3;    /* specify NAT operation mode */
+
+       uint8_t nat_lip[16];    /* local IP to use after NAT'ing */
+       uint8_t nat_fip[16];    /* foreign IP to use after NAT'ing */
+       uint16_t nat_lport;     /* local port number to use after NAT'ing */
+       uint16_t nat_fport;     /* foreign port number to use after NAT'ing */
 
        /* Filter rule value/mask pairs. */
        struct ch_filter_tuple val;
@@ -111,6 +128,23 @@ enum {
        FILTER_SWITCH
 };
 
+enum {
+       VLAN_REMOVE = 1,
+       VLAN_INSERT,
+       VLAN_REWRITE
+};
+
+enum {
+       NAT_MODE_NONE = 0,      /* No NAT performed */
+       NAT_MODE_DIP,           /* NAT on Dst IP */
+       NAT_MODE_DIP_DP,        /* NAT on Dst IP, Dst Port */
+       NAT_MODE_DIP_DP_SIP,    /* NAT on Dst IP, Dst Port and Src IP */
+       NAT_MODE_DIP_DP_SP,     /* NAT on Dst IP, Dst Port and Src Port */
+       NAT_MODE_SIP_SP,        /* NAT on Src IP and Src Port */
+       NAT_MODE_DIP_SIP_SP,    /* NAT on Dst IP, Src IP and Src Port */
+       NAT_MODE_ALL            /* NAT on entire 4-tuple */
+};
+
 enum filter_type {
        FILTER_TYPE_IPV4 = 0,
        FILTER_TYPE_IPV6,
@@ -145,6 +179,7 @@ struct filter_entry {
        u32 pending:1;              /* filter action is pending FW reply */
        struct filter_ctx *ctx;     /* caller's completion hook */
        struct clip_entry *clipt;   /* CLIP Table entry for IPv6 */
+       struct l2t_entry *l2t;      /* Layer Two Table entry for dmac */
        struct rte_eth_dev *dev;    /* Port's rte eth device */
        void *private;              /* For use by apps using filter_entry */
 
index 01c945f..54ec7e5 100644 (file)
@@ -95,6 +95,8 @@ cxgbe_fill_filter_region(struct adapter *adap,
                ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift;
        if (tp->port_shift >= 0)
                ntuple_mask |= (u64)fs->mask.iport << tp->port_shift;
+       if (tp->macmatch_shift >= 0)
+               ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift;
 
        if (ntuple_mask != hash_filter_mask)
                return;
@@ -102,6 +104,46 @@ cxgbe_fill_filter_region(struct adapter *adap,
        fs->cap = 1;    /* use hash region */
 }
 
+static int
+ch_rte_parsetype_eth(const void *dmask, const struct rte_flow_item *item,
+                    struct ch_filter_specification *fs,
+                    struct rte_flow_error *e)
+{
+       const struct rte_flow_item_eth *spec = item->spec;
+       const struct rte_flow_item_eth *umask = item->mask;
+       const struct rte_flow_item_eth *mask;
+
+       /* If user has not given any mask, then use chelsio supported mask. */
+       mask = umask ? umask : (const struct rte_flow_item_eth *)dmask;
+
+       /* we don't support SRC_MAC filtering*/
+       if (!is_zero_ether_addr(&mask->src))
+               return rte_flow_error_set(e, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+                                         item,
+                                         "src mac filtering not supported");
+
+       if (!is_zero_ether_addr(&mask->dst)) {
+               const u8 *addr = (const u8 *)&spec->dst.addr_bytes[0];
+               const u8 *m = (const u8 *)&mask->dst.addr_bytes[0];
+               struct rte_flow *flow = (struct rte_flow *)fs->private;
+               struct port_info *pi = (struct port_info *)
+                                       (flow->dev->data->dev_private);
+               int idx;
+
+               idx = cxgbe_mpstcam_alloc(pi, addr, m);
+               if (idx <= 0)
+                       return rte_flow_error_set(e, idx,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL, "unable to allocate mac"
+                                                 " entry in h/w");
+               CXGBE_FILL_FS(idx, 0x1ff, macidx);
+       }
+
+       CXGBE_FILL_FS(be16_to_cpu(spec->type),
+                     be16_to_cpu(mask->type), ethtype);
+       return 0;
+}
+
 static int
 ch_rte_parsetype_port(const void *dmask, const struct rte_flow_item *item,
                      struct ch_filter_specification *fs,
@@ -326,18 +368,200 @@ static int cxgbe_get_fidx(struct rte_flow *flow, unsigned int *fidx)
        return 0;
 }
 
+static int
+cxgbe_get_flow_item_index(const struct rte_flow_item items[], u32 type)
+{
+       const struct rte_flow_item *i;
+       int j, index = -ENOENT;
+
+       for (i = items, j = 0; i->type != RTE_FLOW_ITEM_TYPE_END; i++, j++) {
+               if (i->type == type) {
+                       index = j;
+                       break;
+               }
+       }
+
+       return index;
+}
+
+static int
+ch_rte_parse_nat(uint8_t nmode, struct ch_filter_specification *fs)
+{
+       /* nmode:
+        * BIT_0 = [src_ip],   BIT_1 = [dst_ip]
+        * BIT_2 = [src_port], BIT_3 = [dst_port]
+        *
+        * Only below cases are supported as per our spec.
+        */
+       switch (nmode) {
+       case 0:  /* 0000b */
+               fs->nat_mode = NAT_MODE_NONE;
+               break;
+       case 2:  /* 0010b */
+               fs->nat_mode = NAT_MODE_DIP;
+               break;
+       case 5:  /* 0101b */
+               fs->nat_mode = NAT_MODE_SIP_SP;
+               break;
+       case 7:  /* 0111b */
+               fs->nat_mode = NAT_MODE_DIP_SIP_SP;
+               break;
+       case 10: /* 1010b */
+               fs->nat_mode = NAT_MODE_DIP_DP;
+               break;
+       case 11: /* 1011b */
+               fs->nat_mode = NAT_MODE_DIP_DP_SIP;
+               break;
+       case 14: /* 1110b */
+               fs->nat_mode = NAT_MODE_DIP_DP_SP;
+               break;
+       case 15: /* 1111b */
+               fs->nat_mode = NAT_MODE_ALL;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int
 ch_rte_parse_atype_switch(const struct rte_flow_action *a,
+                         const struct rte_flow_item items[],
+                         uint8_t *nmode,
                          struct ch_filter_specification *fs,
                          struct rte_flow_error *e)
 {
+       const struct rte_flow_action_of_set_vlan_vid *vlanid;
+       const struct rte_flow_action_of_push_vlan *pushvlan;
+       const struct rte_flow_action_set_ipv4 *ipv4;
+       const struct rte_flow_action_set_ipv6 *ipv6;
+       const struct rte_flow_action_set_tp *tp_port;
        const struct rte_flow_action_phy_port *port;
+       int item_index;
 
        switch (a->type) {
+       case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+               vlanid = (const struct rte_flow_action_of_set_vlan_vid *)
+                         a->conf;
+               fs->newvlan = VLAN_REWRITE;
+               fs->vlan = vlanid->vlan_vid;
+               break;
+       case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+               pushvlan = (const struct rte_flow_action_of_push_vlan *)
+                           a->conf;
+               if (pushvlan->ethertype != ETHER_TYPE_VLAN)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "only ethertype 0x8100 "
+                                                 "supported for push vlan.");
+               fs->newvlan = VLAN_INSERT;
+               break;
+       case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+               fs->newvlan = VLAN_REMOVE;
+               break;
        case RTE_FLOW_ACTION_TYPE_PHY_PORT:
                port = (const struct rte_flow_action_phy_port *)a->conf;
                fs->eport = port->index;
                break;
+       case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_IPV4);
+               if (item_index < 0)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "No RTE_FLOW_ITEM_TYPE_IPV4 "
+                                                 "found.");
+
+               ipv4 = (const struct rte_flow_action_set_ipv4 *)a->conf;
+               memcpy(fs->nat_fip, &ipv4->ipv4_addr, sizeof(ipv4->ipv4_addr));
+               *nmode |= 1 << 0;
+               break;
+       case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_IPV4);
+               if (item_index < 0)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "No RTE_FLOW_ITEM_TYPE_IPV4 "
+                                                 "found.");
+
+               ipv4 = (const struct rte_flow_action_set_ipv4 *)a->conf;
+               memcpy(fs->nat_lip, &ipv4->ipv4_addr, sizeof(ipv4->ipv4_addr));
+               *nmode |= 1 << 1;
+               break;
+       case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_IPV6);
+               if (item_index < 0)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "No RTE_FLOW_ITEM_TYPE_IPV6 "
+                                                 "found.");
+
+               ipv6 = (const struct rte_flow_action_set_ipv6 *)a->conf;
+               memcpy(fs->nat_fip, ipv6->ipv6_addr, sizeof(ipv6->ipv6_addr));
+               *nmode |= 1 << 0;
+               break;
+       case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_IPV6);
+               if (item_index < 0)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "No RTE_FLOW_ITEM_TYPE_IPV6 "
+                                                 "found.");
+
+               ipv6 = (const struct rte_flow_action_set_ipv6 *)a->conf;
+               memcpy(fs->nat_lip, ipv6->ipv6_addr, sizeof(ipv6->ipv6_addr));
+               *nmode |= 1 << 1;
+               break;
+       case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_TCP);
+               if (item_index < 0) {
+                       item_index =
+                               cxgbe_get_flow_item_index(items,
+                                               RTE_FLOW_ITEM_TYPE_UDP);
+                       if (item_index < 0)
+                               return rte_flow_error_set(e, EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                               "No RTE_FLOW_ITEM_TYPE_TCP or "
+                                               "RTE_FLOW_ITEM_TYPE_UDP found");
+               }
+
+               tp_port = (const struct rte_flow_action_set_tp *)a->conf;
+               fs->nat_fport = be16_to_cpu(tp_port->port);
+               *nmode |= 1 << 2;
+               break;
+       case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_TCP);
+               if (item_index < 0) {
+                       item_index =
+                               cxgbe_get_flow_item_index(items,
+                                               RTE_FLOW_ITEM_TYPE_UDP);
+                       if (item_index < 0)
+                               return rte_flow_error_set(e, EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                               "No RTE_FLOW_ITEM_TYPE_TCP or "
+                                               "RTE_FLOW_ITEM_TYPE_UDP found");
+               }
+
+               tp_port = (const struct rte_flow_action_set_tp *)a->conf;
+               fs->nat_lport = be16_to_cpu(tp_port->port);
+               *nmode |= 1 << 3;
+               break;
+       case RTE_FLOW_ACTION_TYPE_MAC_SWAP:
+               item_index = cxgbe_get_flow_item_index(items,
+                                                      RTE_FLOW_ITEM_TYPE_ETH);
+               if (item_index < 0)
+                       return rte_flow_error_set(e, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                                 "No RTE_FLOW_ITEM_TYPE_ETH "
+                                                 "found");
+               fs->swapmac = 1;
+               break;
        default:
                /* We are not supposed to come here */
                return rte_flow_error_set(e, EINVAL,
@@ -350,10 +574,12 @@ ch_rte_parse_atype_switch(const struct rte_flow_action *a,
 
 static int
 cxgbe_rtef_parse_actions(struct rte_flow *flow,
+                        const struct rte_flow_item items[],
                         const struct rte_flow_action action[],
                         struct rte_flow_error *e)
 {
        struct ch_filter_specification *fs = &flow->fs;
+       uint8_t nmode = 0, nat_ipv4 = 0, nat_ipv6 = 0;
        const struct rte_flow_action_queue *q;
        const struct rte_flow_action *a;
        char abit = 0;
@@ -391,7 +617,22 @@ cxgbe_rtef_parse_actions(struct rte_flow *flow,
                case RTE_FLOW_ACTION_TYPE_COUNT:
                        fs->hitcnts = 1;
                        break;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
                case RTE_FLOW_ACTION_TYPE_PHY_PORT:
+               case RTE_FLOW_ACTION_TYPE_MAC_SWAP:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       nat_ipv4++;
+                       goto action_switch;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       nat_ipv6++;
+                       goto action_switch;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+action_switch:
                        /* We allow multiple switch actions, but switch is
                         * not compatible with either queue or drop
                         */
@@ -399,7 +640,14 @@ cxgbe_rtef_parse_actions(struct rte_flow *flow,
                                return rte_flow_error_set(e, EINVAL,
                                                RTE_FLOW_ERROR_TYPE_ACTION, a,
                                                "overlapping action specified");
-                       ret = ch_rte_parse_atype_switch(a, fs, e);
+                       if (nat_ipv4 && nat_ipv6)
+                               return rte_flow_error_set(e, EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                       "Can't have one address ipv4 and the"
+                                       " other ipv6");
+
+                       ret = ch_rte_parse_atype_switch(a, items, &nmode, fs,
+                                                       e);
                        if (ret)
                                return ret;
                        fs->action = FILTER_SWITCH;
@@ -412,11 +660,24 @@ cxgbe_rtef_parse_actions(struct rte_flow *flow,
                }
        }
 
+       if (ch_rte_parse_nat(nmode, fs))
+               return rte_flow_error_set(e, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, a,
+                                         "invalid settings for swich action");
        return 0;
 }
 
-struct chrte_fparse parseitem[] = {
-               [RTE_FLOW_ITEM_TYPE_PHY_PORT] = {
+static struct chrte_fparse parseitem[] = {
+       [RTE_FLOW_ITEM_TYPE_ETH] = {
+               .fptr  = ch_rte_parsetype_eth,
+               .dmask = &(const struct rte_flow_item_eth){
+                       .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+                       .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
+                       .type = 0xffff,
+               }
+       },
+
+       [RTE_FLOW_ITEM_TYPE_PHY_PORT] = {
                .fptr = ch_rte_parsetype_port,
                .dmask = &(const struct rte_flow_item_phy_port){
                        .index = 0x7,
@@ -454,10 +715,10 @@ cxgbe_rtef_parse_items(struct rte_flow *flow,
        char repeat[ARRAY_SIZE(parseitem)] = {0};
 
        for (i = items; i->type != RTE_FLOW_ITEM_TYPE_END; i++) {
-               struct chrte_fparse *idx = &flow->item_parser[i->type];
+               struct chrte_fparse *idx;
                int ret;
 
-               if (i->type > ARRAY_SIZE(parseitem))
+               if (i->type >= ARRAY_SIZE(parseitem))
                        return rte_flow_error_set(e, ENOTSUP,
                                                  RTE_FLOW_ERROR_TYPE_ITEM,
                                                  i, "Item not supported");
@@ -478,6 +739,7 @@ cxgbe_rtef_parse_items(struct rte_flow *flow,
                        if (ret)
                                return ret;
 
+                       idx = &flow->item_parser[i->type];
                        if (!idx || !idx->fptr) {
                                return rte_flow_error_set(e, ENOTSUP,
                                                RTE_FLOW_ERROR_TYPE_ITEM, i,
@@ -503,7 +765,6 @@ cxgbe_flow_parse(struct rte_flow *flow,
                 struct rte_flow_error *e)
 {
        int ret;
-
        /* parse user request into ch_filter_specification */
        ret = cxgbe_rtef_parse_attr(flow, attr, e);
        if (ret)
@@ -511,7 +772,7 @@ cxgbe_flow_parse(struct rte_flow *flow,
        ret = cxgbe_rtef_parse_items(flow, item, e);
        if (ret)
                return ret;
-       return cxgbe_rtef_parse_actions(flow, action, e);
+       return cxgbe_rtef_parse_actions(flow, item, action, e);
 }
 
 static int __cxgbe_flow_create(struct rte_eth_dev *dev, struct rte_flow *flow)
@@ -582,6 +843,7 @@ cxgbe_flow_create(struct rte_eth_dev *dev,
 
        flow->item_parser = parseitem;
        flow->dev = dev;
+       flow->fs.private = (void *)flow;
 
        if (cxgbe_flow_parse(flow, attr, item, action, e)) {
                t4_os_free(flow);
@@ -636,6 +898,17 @@ static int __cxgbe_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
                return ctx.result;
        }
 
+       fs = &flow->fs;
+       if (fs->mask.macidx) {
+               struct port_info *pi = (struct port_info *)
+                                       (dev->data->dev_private);
+               int ret;
+
+               ret = cxgbe_mpstcam_remove(pi, fs->val.macidx);
+               if (!ret)
+                       return ret;
+       }
+
        return 0;
 }
 
index 0f75047..718bf3d 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <rte_flow_driver.h>
 #include "cxgbe_filter.h"
+#include "mps_tcam.h"
 #include "cxgbe.h"
 
 #define CXGBE_FLOW_POLL_US  10
index c3938e8..88dc851 100644 (file)
@@ -38,6 +38,8 @@
 #include "t4_msg.h"
 #include "cxgbe.h"
 #include "clip_tbl.h"
+#include "l2t.h"
+#include "mps_tcam.h"
 
 /**
  * Allocate a chunk of memory. The allocated memory is cleared.
@@ -99,6 +101,10 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
                const struct cpl_act_open_rpl *p = (const void *)rsp;
 
                hash_filter_rpl(q->adapter, p);
+       } else if (opcode == CPL_L2T_WRITE_RPL) {
+               const struct cpl_l2t_write_rpl *p = (const void *)rsp;
+
+               do_l2t_write_rpl(q->adapter, p);
        } else {
                dev_err(adapter, "unexpected CPL %#x on FW event queue\n",
                        opcode);
@@ -1135,13 +1141,17 @@ static int adap_init0(struct adapter *adap)
         V_FW_PARAMS_PARAM_Y(0) | \
         V_FW_PARAMS_PARAM_Z(0))
 
-       params[0] = FW_PARAM_PFVF(FILTER_START);
-       params[1] = FW_PARAM_PFVF(FILTER_END);
-       ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2, params, val);
+       params[0] = FW_PARAM_PFVF(L2T_START);
+       params[1] = FW_PARAM_PFVF(L2T_END);
+       params[2] = FW_PARAM_PFVF(FILTER_START);
+       params[3] = FW_PARAM_PFVF(FILTER_END);
+       ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 4, params, val);
        if (ret < 0)
                goto bye;
-       adap->tids.ftid_base = val[0];
-       adap->tids.nftids = val[1] - val[0] + 1;
+       adap->l2t_start = val[0];
+       adap->l2t_end = val[1];
+       adap->tids.ftid_base = val[2];
+       adap->tids.nftids = val[3] - val[2] + 1;
 
        params[0] = FW_PARAM_PFVF(CLIP_START);
        params[1] = FW_PARAM_PFVF(CLIP_END);
@@ -1170,6 +1180,16 @@ static int adap_init0(struct adapter *adap)
                        goto bye;
        }
 
+       /* See if FW supports FW_FILTER2 work request */
+       if (is_t4(adap->params.chip)) {
+               adap->params.filter2_wr_support = 0;
+       } else {
+               params[0] = FW_PARAM_DEV(FILTER2_WR);
+               ret = t4_query_params(adap, adap->mbox, adap->pf, 0,
+                                     1, params, val);
+               adap->params.filter2_wr_support = (ret == 0 && val[0] != 0);
+       }
+
        /* query tid-related parameters */
        params[0] = FW_PARAM_DEV(NTID);
        ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 1,
@@ -1332,10 +1352,8 @@ int link_start(struct port_info *pi)
        ret = t4_set_rxmode(adapter, adapter->mbox, pi->viid, mtu, -1, -1,
                            -1, 1, true);
        if (ret == 0) {
-               ret = t4_change_mac(adapter, adapter->mbox, pi->viid,
-                                   pi->xact_addr_filt,
-                                   (u8 *)&pi->eth_dev->data->mac_addrs[0],
-                                   true, true);
+               ret = cxgbe_mpstcam_modify(pi, (int)pi->xact_addr_filt,
+                               (u8 *)&pi->eth_dev->data->mac_addrs[0]);
                if (ret >= 0) {
                        pi->xact_addr_filt = ret;
                        ret = 0;
@@ -1679,10 +1697,12 @@ void cxgbe_close(struct adapter *adapter)
        int i;
 
        if (adapter->flags & FULL_INIT_DONE) {
-               if (is_pf4(adapter))
-                       t4_intr_disable(adapter);
                tid_free(&adapter->tids);
+               t4_cleanup_mpstcam(adapter);
                t4_cleanup_clip_tbl(adapter);
+               t4_cleanup_l2t(adapter);
+               if (is_pf4(adapter))
+                       t4_intr_disable(adapter);
                t4_sge_tx_monitor_stop(adapter);
                t4_free_sge_resources(adapter);
                for_each_port(adapter, i) {
@@ -1690,12 +1710,7 @@ void cxgbe_close(struct adapter *adapter)
                        if (pi->viid != 0)
                                t4_free_vi(adapter, adapter->mbox,
                                           adapter->pf, 0, pi->viid);
-                       rte_free(pi->eth_dev->data->mac_addrs);
-                       /* Skip first port since it'll be freed by DPDK stack */
-                       if (i) {
-                               rte_free(pi->eth_dev->data->dev_private);
-                               rte_eth_dev_release_port(pi->eth_dev);
-                       }
+                       rte_eth_dev_release_port(pi->eth_dev);
                }
                adapter->flags &= ~FULL_INIT_DONE;
        }
@@ -1855,12 +1870,23 @@ allocate_mac:
                dev_warn(adapter, "could not allocate CLIP. Continuing\n");
        }
 
+       adapter->l2t = t4_init_l2t(adapter->l2t_start, adapter->l2t_end);
+       if (!adapter->l2t) {
+               /* We tolerate a lack of L2T, giving up some functionality */
+               dev_warn(adapter, "could not allocate L2T. Continuing\n");
+       }
+
        if (tid_init(&adapter->tids) < 0) {
                /* Disable filtering support */
                dev_warn(adapter, "could not allocate TID table, "
                         "filter support disabled. Continuing\n");
        }
 
+       adapter->mpstcam = t4_init_mpstcam(adapter);
+       if (!adapter->mpstcam)
+               dev_warn(adapter, "could not allocate mps tcam table."
+                        " Continuing\n");
+
        if (is_hashfilter(adapter)) {
                if (t4_read_reg(adapter, A_LE_DB_CONFIG) & F_HASHEN) {
                        u32 hash_base, hash_reg;
@@ -1887,14 +1913,7 @@ out_free:
                if (pi->viid != 0)
                        t4_free_vi(adapter, adapter->mbox, adapter->pf,
                                   0, pi->viid);
-               /* Skip first port since it'll be de-allocated by DPDK */
-               if (i == 0)
-                       continue;
-               if (pi->eth_dev) {
-                       if (pi->eth_dev->data->dev_private)
-                               rte_free(pi->eth_dev->data->dev_private);
-                       rte_eth_dev_release_port(pi->eth_dev);
-               }
+               rte_eth_dev_release_port(pi->eth_dev);
        }
 
        if (adapter->flags & FW_OK)
index 4214d03..6223e12 100644 (file)
@@ -282,14 +282,7 @@ out_free:
                if (pi->viid != 0)
                        t4_free_vi(adapter, adapter->mbox, adapter->pf,
                                   0, pi->viid);
-               /* Skip first port since it'll be de-allocated by DPDK */
-               if (i == 0)
-                       continue;
-               if (pi->eth_dev) {
-                       if (pi->eth_dev->data->dev_private)
-                               rte_free(pi->eth_dev->data->dev_private);
-                       rte_eth_dev_release_port(pi->eth_dev);
-               }
+               rte_eth_dev_release_port(pi->eth_dev);
        }
        return -err;
 }
diff --git a/drivers/net/cxgbe/l2t.c b/drivers/net/cxgbe/l2t.c
new file mode 100644 (file)
index 0000000..814188f
--- /dev/null
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Chelsio Communications.
+ * All rights reserved.
+ */
+#include "common.h"
+#include "l2t.h"
+
+/**
+ * cxgbe_l2t_release - Release associated L2T entry
+ * @e: L2T entry to release
+ *
+ * Releases ref count and frees up an L2T entry from L2T table
+ */
+void cxgbe_l2t_release(struct l2t_entry *e)
+{
+       if (rte_atomic32_read(&e->refcnt) != 0)
+               rte_atomic32_dec(&e->refcnt);
+}
+
+/**
+ * Process a CPL_L2T_WRITE_RPL. Note that the TID in the reply is really
+ * the L2T index it refers to.
+ */
+void do_l2t_write_rpl(struct adapter *adap, const struct cpl_l2t_write_rpl *rpl)
+{
+       struct l2t_data *d = adap->l2t;
+       unsigned int tid = GET_TID(rpl);
+       unsigned int l2t_idx = tid % L2T_SIZE;
+
+       if (unlikely(rpl->status != CPL_ERR_NONE)) {
+               dev_err(adap,
+                       "Unexpected L2T_WRITE_RPL status %u for entry %u\n",
+                       rpl->status, l2t_idx);
+               return;
+       }
+
+       if (tid & F_SYNC_WR) {
+               struct l2t_entry *e = &d->l2tab[l2t_idx - d->l2t_start];
+
+               t4_os_lock(&e->lock);
+               if (e->state != L2T_STATE_SWITCHING)
+                       e->state = L2T_STATE_VALID;
+               t4_os_unlock(&e->lock);
+       }
+}
+
+/**
+ * Write an L2T entry.  Must be called with the entry locked.
+ * The write may be synchronous or asynchronous.
+ */
+static int write_l2e(struct rte_eth_dev *dev, struct l2t_entry *e, int sync,
+                    bool loopback, bool arpmiss)
+{
+       struct adapter *adap = ethdev2adap(dev);
+       struct l2t_data *d = adap->l2t;
+       struct rte_mbuf *mbuf;
+       struct cpl_l2t_write_req *req;
+       struct sge_ctrl_txq *ctrlq;
+       unsigned int l2t_idx = e->idx + d->l2t_start;
+       unsigned int port_id = ethdev2pinfo(dev)->port_id;
+
+       ctrlq = &adap->sge.ctrlq[port_id];
+       mbuf = rte_pktmbuf_alloc(ctrlq->mb_pool);
+       if (!mbuf)
+               return -ENOMEM;
+
+       mbuf->data_len = sizeof(*req);
+       mbuf->pkt_len = mbuf->data_len;
+
+       req = rte_pktmbuf_mtod(mbuf, struct cpl_l2t_write_req *);
+       INIT_TP_WR(req, 0);
+
+       OPCODE_TID(req) =
+               cpu_to_be32(MK_OPCODE_TID(CPL_L2T_WRITE_REQ,
+                                         l2t_idx | V_SYNC_WR(sync) |
+                                         V_TID_QID(adap->sge.fw_evtq.abs_id)));
+       req->params = cpu_to_be16(V_L2T_W_PORT(e->lport) |
+                                 V_L2T_W_LPBK(loopback) |
+                                 V_L2T_W_ARPMISS(arpmiss) |
+                                 V_L2T_W_NOREPLY(!sync));
+       req->l2t_idx = cpu_to_be16(l2t_idx);
+       req->vlan = cpu_to_be16(e->vlan);
+       rte_memcpy(req->dst_mac, e->dmac, ETHER_ADDR_LEN);
+
+       if (loopback)
+               memset(req->dst_mac, 0, ETHER_ADDR_LEN);
+
+       t4_mgmt_tx(ctrlq, mbuf);
+
+       if (sync && e->state != L2T_STATE_SWITCHING)
+               e->state = L2T_STATE_SYNC_WRITE;
+
+       return 0;
+}
+
+/**
+ * find_or_alloc_l2e - Find/Allocate a free L2T entry
+ * @d: L2T table
+ * @vlan: VLAN id to compare/add
+ * @port: port id to compare/add
+ * @dmac: Destination MAC address to compare/add
+ * Returns pointer to the L2T entry found/created
+ *
+ * Finds/Allocates an L2T entry to be used by switching rule of a filter.
+ */
+static struct l2t_entry *find_or_alloc_l2e(struct l2t_data *d, u16 vlan,
+                                          u8 port, u8 *dmac)
+{
+       struct l2t_entry *end, *e;
+       struct l2t_entry *first_free = NULL;
+
+       for (e = &d->l2tab[0], end = &d->l2tab[d->l2t_size]; e != end; ++e) {
+               if (rte_atomic32_read(&e->refcnt) == 0) {
+                       if (!first_free)
+                               first_free = e;
+               } else {
+                       if (e->state == L2T_STATE_SWITCHING) {
+                               if ((!memcmp(e->dmac, dmac, ETHER_ADDR_LEN)) &&
+                                   e->vlan == vlan && e->lport == port)
+                                       goto exists;
+                       }
+               }
+       }
+
+       if (first_free) {
+               e = first_free;
+               goto found;
+       }
+
+       return NULL;
+
+found:
+       e->state = L2T_STATE_UNUSED;
+
+exists:
+       return e;
+}
+
+static struct l2t_entry *t4_l2t_alloc_switching(struct rte_eth_dev *dev,
+                                               u16 vlan, u8 port,
+                                               u8 *eth_addr)
+{
+       struct adapter *adap = ethdev2adap(dev);
+       struct l2t_data *d = adap->l2t;
+       struct l2t_entry *e;
+       int ret = 0;
+
+       t4_os_write_lock(&d->lock);
+       e = find_or_alloc_l2e(d, vlan, port, eth_addr);
+       if (e) {
+               t4_os_lock(&e->lock);
+               if (!rte_atomic32_read(&e->refcnt)) {
+                       e->state = L2T_STATE_SWITCHING;
+                       e->vlan = vlan;
+                       e->lport = port;
+                       rte_memcpy(e->dmac, eth_addr, ETHER_ADDR_LEN);
+                       rte_atomic32_set(&e->refcnt, 1);
+                       ret = write_l2e(dev, e, 0, !L2T_LPBK, !L2T_ARPMISS);
+                       if (ret < 0)
+                               dev_debug(adap, "Failed to write L2T entry: %d",
+                                         ret);
+               } else {
+                       rte_atomic32_inc(&e->refcnt);
+               }
+               t4_os_unlock(&e->lock);
+       }
+       t4_os_write_unlock(&d->lock);
+
+       return ret ? NULL : e;
+}
+
+/**
+ * cxgbe_l2t_alloc_switching - Allocate a L2T entry for switching rule
+ * @dev: rte_eth_dev pointer
+ * @vlan: VLAN Id
+ * @port: Associated port
+ * @dmac: Destination MAC address to add to L2T
+ * Returns pointer to the allocated l2t entry
+ *
+ * Allocates a L2T entry for use by switching rule of a filter
+ */
+struct l2t_entry *cxgbe_l2t_alloc_switching(struct rte_eth_dev *dev, u16 vlan,
+                                           u8 port, u8 *dmac)
+{
+       return t4_l2t_alloc_switching(dev, vlan, port, dmac);
+}
+
+/**
+ * Initialize L2 Table
+ */
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end)
+{
+       unsigned int l2t_size;
+       unsigned int i;
+       struct l2t_data *d;
+
+       if (l2t_start >= l2t_end || l2t_end >= L2T_SIZE)
+               return NULL;
+       l2t_size = l2t_end - l2t_start + 1;
+
+       d = t4_os_alloc(sizeof(*d) + l2t_size * sizeof(struct l2t_entry));
+       if (!d)
+               return NULL;
+
+       d->l2t_start = l2t_start;
+       d->l2t_size = l2t_size;
+
+       t4_os_rwlock_init(&d->lock);
+
+       for (i = 0; i < d->l2t_size; ++i) {
+               d->l2tab[i].idx = i;
+               d->l2tab[i].state = L2T_STATE_UNUSED;
+               t4_os_lock_init(&d->l2tab[i].lock);
+               rte_atomic32_set(&d->l2tab[i].refcnt, 0);
+       }
+
+       return d;
+}
+
+/**
+ * Cleanup L2 Table
+ */
+void t4_cleanup_l2t(struct adapter *adap)
+{
+       if (adap->l2t)
+               t4_os_free(adap->l2t);
+}
diff --git a/drivers/net/cxgbe/l2t.h b/drivers/net/cxgbe/l2t.h
new file mode 100644 (file)
index 0000000..22a34e3
--- /dev/null
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Chelsio Communications.
+ * All rights reserved.
+ */
+#ifndef _CXGBE_L2T_H_
+#define _CXGBE_L2T_H_
+
+#include "t4_msg.h"
+
+enum {
+       L2T_SIZE = 4096       /* # of L2T entries */
+};
+
+enum {
+       L2T_STATE_VALID,      /* entry is up to date */
+       L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */
+
+       /* when state is one of the below the entry is not hashed */
+       L2T_STATE_SWITCHING,  /* entry is being used by a switching filter */
+       L2T_STATE_UNUSED      /* entry not in use */
+};
+
+/*
+ * State for the corresponding entry of the HW L2 table.
+ */
+struct l2t_entry {
+       u16 state;                  /* entry state */
+       u16 idx;                    /* entry index within in-memory table */
+       u16 vlan;                   /* VLAN TCI (id: bits 0-11, prio: 13-15 */
+       u8  lport;                  /* destination port */
+       u8  dmac[ETHER_ADDR_LEN];   /* destination MAC address */
+       rte_spinlock_t lock;        /* entry lock */
+       rte_atomic32_t refcnt;      /* entry reference count */
+};
+
+struct l2t_data {
+       unsigned int l2t_start;     /* start index of our piece of the L2T */
+       unsigned int l2t_size;      /* number of entries in l2tab */
+       rte_rwlock_t lock;          /* table rw lock */
+       struct l2t_entry l2tab[0];  /* MUST BE LAST */
+};
+
+#define L2T_LPBK       true
+#define L2T_ARPMISS    true
+
+/* identifies sync vs async L2T_WRITE_REQs */
+#define S_SYNC_WR    12
+#define V_SYNC_WR(x) ((x) << S_SYNC_WR)
+#define F_SYNC_WR    V_SYNC_WR(1)
+
+struct l2t_data *t4_init_l2t(unsigned int l2t_start, unsigned int l2t_end);
+void t4_cleanup_l2t(struct adapter *adap);
+struct l2t_entry *cxgbe_l2t_alloc_switching(struct rte_eth_dev *dev, u16 vlan,
+                                           u8 port, u8 *dmac);
+void cxgbe_l2t_release(struct l2t_entry *e);
+void do_l2t_write_rpl(struct adapter *p, const struct cpl_l2t_write_rpl *rpl);
+#endif /* _CXGBE_L2T_H_ */
index 7c69a34..c51af26 100644 (file)
@@ -9,6 +9,8 @@ sources = files('cxgbe_ethdev.c',
        'cxgbe_filter.c',
        'cxgbe_flow.c',
        'clip_tbl.c',
+       'mps_tcam.c',
+       'l2t.c',
        'base/t4_hw.c',
        'base/t4vf_hw.c')
 includes += include_directories('base')
diff --git a/drivers/net/cxgbe/mps_tcam.c b/drivers/net/cxgbe/mps_tcam.c
new file mode 100644 (file)
index 0000000..02ec69a
--- /dev/null
@@ -0,0 +1,243 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Chelsio Communications.
+ * All rights reserved.
+ */
+
+#include "mps_tcam.h"
+
+static inline bool
+match_entry(struct mps_tcam_entry *entry, const u8 *eth_addr, const u8 *mask)
+{
+       if (!memcmp(eth_addr, entry->eth_addr, ETHER_ADDR_LEN) &&
+           !memcmp(mask, entry->mask, ETHER_ADDR_LEN))
+               return true;
+       return false;
+}
+
+static int cxgbe_update_free_idx(struct mpstcam_table *t)
+{
+       struct mps_tcam_entry *entry = t->entry;
+       u16 i, next = t->free_idx + 1;
+
+       if (entry[t->free_idx].state == MPS_ENTRY_UNUSED)
+               /* You are already pointing to a free entry !! */
+               return 0;
+
+       /* loop, till we don't rollback to same index where we started */
+       for (i = next; i != t->free_idx; i++) {
+               if (i == t->size)
+                       /* rollback and search free entry from start */
+                       i = 0;
+
+               if (entry[i].state == MPS_ENTRY_UNUSED) {
+                       t->free_idx = i;
+                       return 0;
+               }
+       }
+
+       return -1;      /* table is full */
+}
+
+static struct mps_tcam_entry *
+cxgbe_mpstcam_lookup(struct mpstcam_table *t, const u8 *eth_addr,
+                    const u8 *mask)
+{
+       struct mps_tcam_entry *entry = t->entry;
+       int i;
+
+       if (!entry)
+               return NULL;
+
+       for (i = 0; i < t->size; i++) {
+               if (entry[i].state == MPS_ENTRY_UNUSED)
+                       continue;       /* entry is not being used */
+               if (match_entry(&entry[i], eth_addr, mask))
+                       return &entry[i];
+       }
+
+       return NULL;
+}
+
+int cxgbe_mpstcam_alloc(struct port_info *pi, const u8 *eth_addr,
+                       const u8 *mask)
+{
+       struct adapter *adap = pi->adapter;
+       struct mpstcam_table *mpstcam = adap->mpstcam;
+       struct mps_tcam_entry *entry;
+       int ret;
+
+       if (!adap->mpstcam) {
+               dev_err(adap, "mpstcam table is not available\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* If entry already present, return it. */
+       t4_os_write_lock(&mpstcam->lock);
+       entry = cxgbe_mpstcam_lookup(adap->mpstcam, eth_addr, mask);
+       if (entry) {
+               rte_atomic32_add(&entry->refcnt, 1);
+               t4_os_write_unlock(&mpstcam->lock);
+               return entry->idx;
+       }
+
+       if (mpstcam->full) {
+               t4_os_write_unlock(&mpstcam->lock);
+               dev_err(adap, "mps-tcam table is full\n");
+               return -ENOMEM;
+       }
+
+       ret = t4_alloc_raw_mac_filt(adap, pi->viid, eth_addr, mask,
+                                   mpstcam->free_idx, 0, pi->port_id, false);
+       if (ret <= 0) {
+               t4_os_write_unlock(&mpstcam->lock);
+               return ret;
+       }
+
+       /* Fill in the new values */
+       entry = &mpstcam->entry[ret];
+       memcpy(entry->eth_addr, eth_addr, ETHER_ADDR_LEN);
+       memcpy(entry->mask, mask, ETHER_ADDR_LEN);
+       rte_atomic32_set(&entry->refcnt, 1);
+       entry->state = MPS_ENTRY_USED;
+
+       if (cxgbe_update_free_idx(mpstcam))
+               mpstcam->full = true;
+
+       t4_os_write_unlock(&mpstcam->lock);
+       return ret;
+}
+
+int cxgbe_mpstcam_modify(struct port_info *pi, int idx, const u8 *addr)
+{
+       struct adapter *adap = pi->adapter;
+       struct mpstcam_table *mpstcam = adap->mpstcam;
+       struct mps_tcam_entry *entry;
+
+       if (!mpstcam)
+               return -EOPNOTSUPP;
+       t4_os_write_lock(&mpstcam->lock);
+       if (idx != -1 && idx >= mpstcam->size) {
+               t4_os_write_unlock(&mpstcam->lock);
+               return -EINVAL;
+       }
+       if (idx >= 0) {
+               entry = &mpstcam->entry[idx];
+               /* user wants to modify an existing entry.
+                * verify if entry exists
+                */
+               if (entry->state != MPS_ENTRY_USED) {
+                       t4_os_write_unlock(&mpstcam->lock);
+                       return -EINVAL;
+               }
+       }
+
+       idx = t4_change_mac(adap, adap->mbox, pi->viid, idx, addr, true, true);
+       if (idx < 0) {
+               t4_os_write_unlock(&mpstcam->lock);
+               return idx;
+       }
+
+       /* idx can now be different from what user provided */
+       entry = &mpstcam->entry[idx];
+       memcpy(entry->eth_addr, addr, ETHER_ADDR_LEN);
+       /* NOTE: we have considered the case that idx returned by t4_change_mac
+        * will be different from the user provided value only if user
+        * provided value is -1
+        */
+       if (entry->state == MPS_ENTRY_UNUSED) {
+               rte_atomic32_set(&entry->refcnt, 1);
+               entry->state = MPS_ENTRY_USED;
+       }
+
+       if (cxgbe_update_free_idx(mpstcam))
+               mpstcam->full = true;
+
+       t4_os_write_unlock(&mpstcam->lock);
+       return idx;
+}
+
+/**
+ * hold appropriate locks while calling this.
+ */
+static inline void reset_mpstcam_entry(struct mps_tcam_entry *entry)
+{
+       memset(entry->eth_addr, 0, ETHER_ADDR_LEN);
+       memset(entry->mask, 0, ETHER_ADDR_LEN);
+       rte_atomic32_clear(&entry->refcnt);
+       entry->state = MPS_ENTRY_UNUSED;
+}
+
+/**
+ * ret < 0: fatal error
+ * ret = 0: entry removed in h/w
+ * ret > 0: updated refcount.
+ */
+int cxgbe_mpstcam_remove(struct port_info *pi, u16 idx)
+{
+       struct adapter *adap = pi->adapter;
+       struct mpstcam_table *t = adap->mpstcam;
+       struct mps_tcam_entry *entry;
+       int ret;
+
+       if (!t)
+               return -EOPNOTSUPP;
+       t4_os_write_lock(&t->lock);
+       entry = &t->entry[idx];
+       if (entry->state == MPS_ENTRY_UNUSED) {
+               t4_os_write_unlock(&t->lock);
+               return -EINVAL;
+       }
+
+       if (rte_atomic32_read(&entry->refcnt) == 1)
+               ret = t4_free_raw_mac_filt(adap, pi->viid, entry->eth_addr,
+                                          entry->mask, idx, 1, pi->port_id,
+                                          false);
+       else
+               ret = rte_atomic32_sub_return(&entry->refcnt, 1);
+
+       if (ret == 0) {
+               reset_mpstcam_entry(entry);
+               t->full = false;        /* We have atleast 1 free entry */
+               cxgbe_update_free_idx(t);
+       }
+
+       t4_os_write_unlock(&t->lock);
+       return ret;
+}
+
+struct mpstcam_table *t4_init_mpstcam(struct adapter *adap)
+{
+       struct mpstcam_table *t;
+       int i;
+       u16 size = adap->params.arch.mps_tcam_size;
+
+       t =  t4_os_alloc(sizeof(*t) + size * sizeof(struct mps_tcam_entry));
+       if (!t)
+               return NULL;
+
+       t4_os_rwlock_init(&t->lock);
+       t->full = false;
+       t->size = size;
+
+       for (i = 0; i < size; i++) {
+               reset_mpstcam_entry(&t->entry[i]);
+               t->entry[i].mpstcam = t;
+               t->entry[i].idx = i;
+       }
+
+       /* first entry is used by chip. this is overwritten only
+        * in t4_cleanup_mpstcam()
+        */
+       t->entry[0].state = MPS_ENTRY_USED;
+       t->free_idx = 1;
+
+       return t;
+}
+
+void t4_cleanup_mpstcam(struct adapter *adap)
+{
+       if (adap->mpstcam) {
+               t4_os_free(adap->mpstcam->entry);
+               t4_os_free(adap->mpstcam);
+       }
+}
diff --git a/drivers/net/cxgbe/mps_tcam.h b/drivers/net/cxgbe/mps_tcam.h
new file mode 100644 (file)
index 0000000..c3d6fe0
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Chelsio Communications.
+ * All rights reserved.
+ */
+
+#ifndef _CXGBE_MPSTCAM_H_
+#define _CXGBE_MPSTCAM_H_
+
+#include "common.h"
+
+enum {
+       MPS_ENTRY_UNUSED,       /* Keep this first so memset 0 renders
+                                * the correct state. Other states can
+                                * be added in future like MPS_ENTRY_BUSY
+                                * to reduce contention while mboxing
+                                * the request to f/w or to denote attributes
+                                * for a specific entry
+                                */
+       MPS_ENTRY_USED,
+};
+
+struct mps_tcam_entry {
+       u8 state;
+       u16 idx;
+
+       /* add data here which uniquely defines an entry */
+       u8 eth_addr[ETHER_ADDR_LEN];
+       u8 mask[ETHER_ADDR_LEN];
+
+       struct mpstcam_table *mpstcam; /* backptr */
+       rte_atomic32_t refcnt;
+};
+
+struct mpstcam_table {
+       u16 size;
+       rte_rwlock_t lock;
+       u16 free_idx;   /* next free index */
+       bool full;      /* since free index can be present
+                        * anywhere in the table, size and
+                        * free_idx cannot alone determine
+                        * if the table is full
+                        */
+       struct mps_tcam_entry entry[0];
+};
+
+struct mpstcam_table *t4_init_mpstcam(struct adapter *adap);
+void t4_cleanup_mpstcam(struct adapter *adap);
+int cxgbe_mpstcam_alloc(struct port_info *pi, const u8 *mac, const u8 *mask);
+int cxgbe_mpstcam_remove(struct port_info *pi, u16 idx);
+int cxgbe_mpstcam_modify(struct port_info *pi, int idx, const u8 *addr);
+
+#endif /* _CXGBE_MPSTCAM_H_ */
index 4ea40d1..f9d2d48 100644 (file)
@@ -1873,10 +1873,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
        /* Size needs to be multiple of 16, including status entry. */
        iq->size = cxgbe_roundup(iq->size, 16);
 
-       snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                eth_dev->device->driver->name,
-                fwevtq ? "fwq_ring" : "rx_ring",
-                eth_dev->data->port_id, queue_id);
+       snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+                       eth_dev->data->port_id, queue_id,
+                       fwevtq ? "fwq_ring" : "rx_ring");
        snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
        iq->desc = alloc_ring(iq->size, iq->iqe_len, 0, &iq->phys_addr, NULL, 0,
@@ -1938,10 +1937,9 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
                        fl->size = s->fl_starve_thres - 1 + 2 * 8;
                fl->size = cxgbe_roundup(fl->size, 8);
 
-               snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                        eth_dev->device->driver->name,
-                        fwevtq ? "fwq_ring" : "fl_ring",
-                        eth_dev->data->port_id, queue_id);
+               snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+                               eth_dev->data->port_id, queue_id,
+                               fwevtq ? "fwq_ring" : "fl_ring");
                snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
                fl->desc = alloc_ring(fl->size, sizeof(__be64),
@@ -2144,9 +2142,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
        /* Add status entries */
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
-       snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                eth_dev->device->driver->name, "tx_ring",
-                eth_dev->data->port_id, queue_id);
+       snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+                       eth_dev->data->port_id, queue_id, "tx_ring");
        snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
        txq->q.desc = alloc_ring(txq->q.size, sizeof(struct tx_desc),
@@ -2223,9 +2220,8 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
        /* Add status entries */
        nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
 
-       snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                eth_dev->device->driver->name, "ctrl_tx_ring",
-                eth_dev->data->port_id, queue_id);
+       snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+                       eth_dev->data->port_id, queue_id, "ctrl_tx_ring");
        snprintf(z_name_sw, sizeof(z_name_sw), "%s_sw_ring", z_name);
 
        txq->q.desc = alloc_ring(txq->q.size, sizeof(struct tx_desc),
index d7a0a50..1c4f7d9 100644 (file)
@@ -38,6 +38,7 @@ LDLIBS += -lrte_bus_dpaa
 LDLIBS += -lrte_mempool_dpaa
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_common_dpaax
 
 # install this header file
 SYMLINK-$(CONFIG_RTE_LIBRTE_DPAA_PMD)-include := rte_pmd_dpaa.h
index 7a950ac..d0572b3 100644 (file)
 
 /* Supported Rx offloads */
 static uint64_t dev_rx_offloads_sup =
-               DEV_RX_OFFLOAD_JUMBO_FRAME;
+               DEV_RX_OFFLOAD_JUMBO_FRAME |
+               DEV_RX_OFFLOAD_SCATTER;
 
 /* Rx offloads which cannot be disabled */
 static uint64_t dev_rx_offloads_nodis =
                DEV_RX_OFFLOAD_IPV4_CKSUM |
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM |
-               DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-               DEV_RX_OFFLOAD_CRC_STRIP |
-               DEV_RX_OFFLOAD_SCATTER;
+               DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM;
 
 /* Supported Tx offloads */
 static uint64_t dev_tx_offloads_sup;
@@ -148,11 +147,30 @@ dpaa_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
        struct dpaa_if *dpaa_intf = dev->data->dev_private;
        uint32_t frame_size = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN
                                + VLAN_TAG_SIZE;
+       uint32_t buffsz = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
 
        PMD_INIT_FUNC_TRACE();
 
        if (mtu < ETHER_MIN_MTU || frame_size > DPAA_MAX_RX_PKT_LEN)
                return -EINVAL;
+       /*
+        * Refuse mtu that requires the support of scattered packets
+        * when this feature has not been enabled before.
+        */
+       if (dev->data->min_rx_buf_size &&
+               !dev->data->scattered_rx && frame_size > buffsz) {
+               DPAA_PMD_ERR("SG not enabled, will not fit in one buffer");
+               return -EINVAL;
+       }
+
+       /* check <seg size> * <max_seg>  >= max_frame */
+       if (dev->data->min_rx_buf_size && dev->data->scattered_rx &&
+               (frame_size > buffsz * DPAA_SGT_MAX_ENTRIES)) {
+               DPAA_PMD_ERR("Too big to fit for Max SG list %d",
+                               buffsz * DPAA_SGT_MAX_ENTRIES);
+               return -EINVAL;
+       }
+
        if (frame_size > ETHER_MAX_LEN)
                dev->data->dev_conf.rxmode.offloads &=
                                                DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -194,15 +212,32 @@ dpaa_eth_dev_configure(struct rte_eth_dev *dev)
        }
 
        if (rx_offloads & DEV_RX_OFFLOAD_JUMBO_FRAME) {
+               uint32_t max_len;
+
+               DPAA_PMD_DEBUG("enabling jumbo");
+
                if (dev->data->dev_conf.rxmode.max_rx_pkt_len <=
-                   DPAA_MAX_RX_PKT_LEN) {
-                       fman_if_set_maxfrm(dpaa_intf->fif,
-                               dev->data->dev_conf.rxmode.max_rx_pkt_len);
-                       return 0;
-               } else {
-                       return -1;
+                   DPAA_MAX_RX_PKT_LEN)
+                       max_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
+               else {
+                       DPAA_PMD_INFO("enabling jumbo override conf max len=%d "
+                               "supported is %d",
+                               dev->data->dev_conf.rxmode.max_rx_pkt_len,
+                               DPAA_MAX_RX_PKT_LEN);
+                       max_len = DPAA_MAX_RX_PKT_LEN;
                }
+
+               fman_if_set_maxfrm(dpaa_intf->fif, max_len);
+               dev->data->mtu = max_len
+                               - ETHER_HDR_LEN - ETHER_CRC_LEN - VLAN_TAG_SIZE;
        }
+
+       if (rx_offloads & DEV_RX_OFFLOAD_SCATTER) {
+               DPAA_PMD_DEBUG("enabling scatter mode");
+               fman_if_set_sg(dpaa_intf->fif, 1);
+               dev->data->scattered_rx = 1;
+       }
+
        return 0;
 }
 
@@ -300,15 +335,21 @@ static void dpaa_eth_dev_info(struct rte_eth_dev *dev,
 
        dev_info->max_rx_queues = dpaa_intf->nb_rx_queues;
        dev_info->max_tx_queues = dpaa_intf->nb_tx_queues;
-       dev_info->min_rx_bufsize = DPAA_MIN_RX_BUF_SIZE;
        dev_info->max_rx_pktlen = DPAA_MAX_RX_PKT_LEN;
        dev_info->max_mac_addrs = DPAA_MAX_MAC_FILTER;
        dev_info->max_hash_mac_addrs = 0;
        dev_info->max_vfs = 0;
        dev_info->max_vmdq_pools = ETH_16_POOLS;
        dev_info->flow_type_rss_offloads = DPAA_RSS_OFFLOAD_ALL;
-       dev_info->speed_capa = (ETH_LINK_SPEED_1G |
-                               ETH_LINK_SPEED_10G);
+
+       if (dpaa_intf->fif->mac_type == fman_mac_1g)
+               dev_info->speed_capa = ETH_LINK_SPEED_1G;
+       else if (dpaa_intf->fif->mac_type == fman_mac_10g)
+               dev_info->speed_capa = (ETH_LINK_SPEED_1G | ETH_LINK_SPEED_10G);
+       else
+               DPAA_PMD_ERR("invalid link_speed: %s, %d",
+                            dpaa_intf->name, dpaa_intf->fif->mac_type);
+
        dev_info->rx_offload_capa = dev_rx_offloads_sup |
                                        dev_rx_offloads_nodis;
        dev_info->tx_offload_capa = dev_tx_offloads_sup |
@@ -514,6 +555,7 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        struct qm_mcc_initfq opts = {0};
        u32 flags = 0;
        int ret;
+       u32 buffsz = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -527,6 +569,28 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
        DPAA_PMD_INFO("Rx queue setup for queue index: %d fq_id (0x%x)",
                        queue_idx, rxq->fqid);
 
+       /* Max packet can fit in single buffer */
+       if (dev->data->dev_conf.rxmode.max_rx_pkt_len <= buffsz) {
+               ;
+       } else if (dev->data->dev_conf.rxmode.offloads &
+                       DEV_RX_OFFLOAD_SCATTER) {
+               if (dev->data->dev_conf.rxmode.max_rx_pkt_len >
+                       buffsz * DPAA_SGT_MAX_ENTRIES) {
+                       DPAA_PMD_ERR("max RxPkt size %d too big to fit "
+                               "MaxSGlist %d",
+                               dev->data->dev_conf.rxmode.max_rx_pkt_len,
+                               buffsz * DPAA_SGT_MAX_ENTRIES);
+                       rte_errno = EOVERFLOW;
+                       return -rte_errno;
+               }
+       } else {
+               DPAA_PMD_WARN("The requested maximum Rx packet size (%u) is"
+                    " larger than a single mbuf (%u) and scattered"
+                    " mode has not been requested",
+                    dev->data->dev_conf.rxmode.max_rx_pkt_len,
+                    buffsz - RTE_PKTMBUF_HEADROOM);
+       }
+
        if (!dpaa_intf->bp_info || dpaa_intf->bp_info->mp != mp) {
                struct fman_if_ic_params icp;
                uint32_t fd_offset;
@@ -553,10 +617,13 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                fman_if_set_bp(dpaa_intf->fif, mp->size,
                               dpaa_intf->bp_info->bpid, bp_size);
                dpaa_intf->valid = 1;
-               DPAA_PMD_INFO("if =%s - fd_offset = %d offset = %d",
-                           dpaa_intf->name, fd_offset,
-                       fman_if_get_fdoff(dpaa_intf->fif));
+               DPAA_PMD_DEBUG("if:%s fd_offset = %d offset = %d",
+                               dpaa_intf->name, fd_offset,
+                               fman_if_get_fdoff(dpaa_intf->fif));
        }
+       DPAA_PMD_DEBUG("if:%s sg_on = %d, max_frm =%d", dpaa_intf->name,
+               fman_if_get_sg_enable(dpaa_intf->fif),
+               dev->data->dev_conf.rxmode.max_rx_pkt_len);
        /* checking if push mode only, no error check for now */
        if (dpaa_push_mode_max_queue > dpaa_push_queue_idx) {
                dpaa_push_queue_idx++;
@@ -594,8 +661,13 @@ int dpaa_eth_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                                "ret:%d(%s)", rxq->fqid, ret, strerror(ret));
                        return ret;
                }
-               rxq->cb.dqrr_dpdk_pull_cb = dpaa_rx_cb;
-               rxq->cb.dqrr_prepare = dpaa_rx_cb_prepare;
+               if (dpaa_svr_family == SVR_LS1043A_FAMILY) {
+                       rxq->cb.dqrr_dpdk_pull_cb = dpaa_rx_cb_no_prefetch;
+               } else {
+                       rxq->cb.dqrr_dpdk_pull_cb = dpaa_rx_cb;
+                       rxq->cb.dqrr_prepare = dpaa_rx_cb_prepare;
+               }
+
                rxq->is_static = true;
        }
        dev->data->rx_queues[queue_idx] = rxq;
@@ -630,7 +702,8 @@ dpaa_eth_eventq_attach(const struct rte_eth_dev *dev,
        struct qm_mcc_initfq opts = {0};
 
        if (dpaa_push_mode_max_queue)
-               DPAA_PMD_WARN("PUSH mode already enabled for first %d queues.\n"
+               DPAA_PMD_WARN("PUSH mode q and EVENTDEV are not compatible\n"
+                             "PUSH mode already enabled for first %d queues.\n"
                              "To disable set DPAA_PUSH_QUEUES_NUMBER to 0\n",
                              dpaa_push_mode_max_queue);
 
@@ -1012,7 +1085,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct qman_cgr *cgr_rx,
 {
        struct qm_mcc_initfq opts = {0};
        int ret;
-       u32 flags = 0;
+       u32 flags = QMAN_FQ_FLAG_NO_ENQUEUE;
        struct qm_mcc_initcgr cgr_opts = {
                .we_mask = QM_CGR_WE_CS_THRES |
                                QM_CGR_WE_CSTD_EN |
@@ -1025,15 +1098,18 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct qman_cgr *cgr_rx,
 
        PMD_INIT_FUNC_TRACE();
 
-       ret = qman_reserve_fqid(fqid);
-       if (ret) {
-               DPAA_PMD_ERR("reserve rx fqid 0x%x failed with ret: %d",
-                            fqid, ret);
-               return -EINVAL;
+       if (fqid) {
+               ret = qman_reserve_fqid(fqid);
+               if (ret) {
+                       DPAA_PMD_ERR("reserve rx fqid 0x%x failed with ret: %d",
+                                    fqid, ret);
+                       return -EINVAL;
+               }
+       } else {
+               flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;
        }
-
        DPAA_PMD_DEBUG("creating rx fq %p, fqid 0x%x", fq, fqid);
-       ret = qman_create_fq(fqid, QMAN_FQ_FLAG_NO_ENQUEUE, fq);
+       ret = qman_create_fq(fqid, flags, fq);
        if (ret) {
                DPAA_PMD_ERR("create rx fqid 0x%x failed with ret: %d",
                        fqid, ret);
@@ -1052,7 +1128,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct qman_cgr *cgr_rx,
                if (ret) {
                        DPAA_PMD_WARN(
                                "rx taildrop init fail on rx fqid 0x%x(ret=%d)",
-                               fqid, ret);
+                               fq->fqid, ret);
                        goto without_cgr;
                }
                opts.we_mask |= QM_INITFQ_WE_CGID;
@@ -1060,7 +1136,7 @@ static int dpaa_rx_queue_init(struct qman_fq *fq, struct qman_cgr *cgr_rx,
                opts.fqd.fq_ctrl |= QM_FQCTRL_CGE;
        }
 without_cgr:
-       ret = qman_init_fq(fq, flags, &opts);
+       ret = qman_init_fq(fq, 0, &opts);
        if (ret)
                DPAA_PMD_ERR("init rx fqid 0x%x failed with ret:%d", fqid, ret);
        return ret;
@@ -1213,7 +1289,7 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
                if (default_q)
                        fqid = cfg->rx_def;
                else
-                       fqid = DPAA_PCD_FQID_START + dpaa_intf->ifid *
+                       fqid = DPAA_PCD_FQID_START + dpaa_intf->fif->mac_idx *
                                DPAA_PCD_FQID_MULTIPLIER + loop;
 
                if (dpaa_intf->cgr_rx)
@@ -1304,6 +1380,9 @@ dpaa_dev_init(struct rte_eth_dev *eth_dev)
        fman_if_reset_mcast_filter_table(fman_intf);
        /* Reset interface statistics */
        fman_if_stats_reset(fman_intf);
+       /* Disable SG by default */
+       fman_if_set_sg(fman_intf, 0);
+       fman_if_set_maxfrm(fman_intf, ETHER_MAX_LEN + VLAN_TAG_SIZE);
 
        return 0;
 
@@ -1360,10 +1439,6 @@ dpaa_dev_uninit(struct rte_eth_dev *dev)
        rte_free(dpaa_intf->tx_queues);
        dpaa_intf->tx_queues = NULL;
 
-       /* free memory for storing MAC addresses */
-       rte_free(dev->data->mac_addrs);
-       dev->data->mac_addrs = NULL;
-
        dev->dev_ops = NULL;
        dev->rx_pkt_burst = NULL;
        dev->tx_pkt_burst = NULL;
@@ -1372,7 +1447,7 @@ dpaa_dev_uninit(struct rte_eth_dev *dev)
 }
 
 static int
-rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
+rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused,
               struct rte_dpaa_device *dpaa_dev)
 {
        int diag;
@@ -1456,7 +1531,6 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
        }
 
        eth_dev->device = &dpaa_dev->device;
-       eth_dev->device->driver = &dpaa_drv->driver;
        dpaa_dev->eth_dev = eth_dev;
 
        /* Invoke PMD device initialization function */
@@ -1466,9 +1540,6 @@ rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv,
                return 0;
        }
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
-
        rte_eth_dev_release_port(eth_dev);
        return diag;
 }
@@ -1483,9 +1554,6 @@ rte_dpaa_remove(struct rte_dpaa_device *dpaa_dev)
        eth_dev = dpaa_dev->eth_dev;
        dpaa_dev_uninit(eth_dev);
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
-
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
index c79b9f8..2fc7231 100644 (file)
 /* Alignment to use for cpu-local structs to avoid coherency problems. */
 #define MAX_CACHELINE                  64
 
-#define DPAA_MIN_RX_BUF_SIZE 512
 #define DPAA_MAX_RX_PKT_LEN  10240
 
+#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
+
 /* RX queue tail drop threshold (CGR Based) in frame count */
 #define CGR_RX_PERFQ_THRESH 256
 
@@ -63,7 +64,7 @@
 #define DPAA_PCD_FQID_START            0x400
 #define DPAA_PCD_FQID_MULTIPLIER       0x100
 #define DPAA_DEFAULT_NUM_PCD_QUEUES    1
-#define DPAA_MAX_NUM_PCD_QUEUES                32
+#define DPAA_MAX_NUM_PCD_QUEUES                4
 
 #define DPAA_IF_TX_PRIORITY            3
 #define DPAA_IF_RX_PRIORITY            0
index 168b77e..c4471c2 100644 (file)
@@ -306,8 +306,6 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        int i = 0;
        uint8_t fd_offset = fd->offset;
 
-       DPAA_DP_LOG(DEBUG, "Received an SG frame");
-
        vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
        if (!vaddr) {
                DPAA_PMD_ERR("unable to convert physical address");
@@ -349,6 +347,8 @@ dpaa_eth_sg_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
                }
                prev_seg = cur_seg;
        }
+       DPAA_DP_LOG(DEBUG, "Received an SG frame len =%d, num_sg =%d",
+                       first_seg->pkt_len, first_seg->nb_segs);
 
        dpaa_eth_packet_info(first_seg, vaddr);
        rte_pktmbuf_free_seg(temp);
@@ -367,22 +367,21 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        uint16_t offset;
        uint32_t length;
 
-       DPAA_DP_LOG(DEBUG, " FD--->MBUF");
-
        if (unlikely(format == qm_fd_sg))
                return dpaa_eth_sg_to_mbuf(fd, ifid);
 
-       ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
-
-       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
-
        offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >> DPAA_FD_OFFSET_SHIFT;
        length = fd->opaque & DPAA_FD_LENGTH_MASK;
 
+       DPAA_DP_LOG(DEBUG, " FD--->MBUF off %d len = %d", offset, length);
+
        /* Ignoring case when format != qm_fd_contig */
        dpaa_display_frame(fd);
+       ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
 
        mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+       /* Prefetch the Parse results and packet data to L1 */
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
 
        mbuf->data_off = offset;
        mbuf->data_len = length;
@@ -398,8 +397,9 @@ dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
        return mbuf;
 }
 
+/* Specific for LS1043 */
 void
-dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
           void **bufs, int num_bufs)
 {
        struct rte_mbuf *mbuf;
@@ -411,17 +411,13 @@ dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
        uint32_t length;
        uint8_t format;
 
-       if (dpaa_svr_family != SVR_LS1046A_FAMILY) {
-               bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[0]->fd.bpid);
-               ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[0]->fd));
-               rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
-               bufs[0] = (struct rte_mbuf *)((char *)ptr -
-                               bp_info->meta_data_size);
-       }
+       bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[0]->fd.bpid);
+       ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[0]->fd));
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+       bufs[0] = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
 
        for (i = 0; i < num_bufs; i++) {
-               if (dpaa_svr_family != SVR_LS1046A_FAMILY &&
-                   i < num_bufs - 1) {
+               if (i < num_bufs - 1) {
                        bp_info = DPAA_BPID_TO_POOL_INFO(dqrr[i + 1]->fd.bpid);
                        ptr = rte_dpaa_mem_ptov(qm_fd_addr(&dqrr[i + 1]->fd));
                        rte_prefetch0((void *)((uint8_t *)ptr +
@@ -458,6 +454,46 @@ dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
        }
 }
 
+void
+dpaa_rx_cb(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
+          void **bufs, int num_bufs)
+{
+       struct rte_mbuf *mbuf;
+       const struct qm_fd *fd;
+       struct dpaa_if *dpaa_intf;
+       uint16_t offset, i;
+       uint32_t length;
+       uint8_t format;
+
+       for (i = 0; i < num_bufs; i++) {
+               fd = &dqrr[i]->fd;
+               dpaa_intf = fq[0]->dpaa_intf;
+
+               format = (fd->opaque & DPAA_FD_FORMAT_MASK) >>
+                               DPAA_FD_FORMAT_SHIFT;
+               if (unlikely(format == qm_fd_sg)) {
+                       bufs[i] = dpaa_eth_sg_to_mbuf(fd, dpaa_intf->ifid);
+                       continue;
+               }
+
+               offset = (fd->opaque & DPAA_FD_OFFSET_MASK) >>
+                               DPAA_FD_OFFSET_SHIFT;
+               length = fd->opaque & DPAA_FD_LENGTH_MASK;
+
+               mbuf = bufs[i];
+               mbuf->data_off = offset;
+               mbuf->data_len = length;
+               mbuf->pkt_len = length;
+               mbuf->port = dpaa_intf->ifid;
+
+               mbuf->nb_segs = 1;
+               mbuf->ol_flags = 0;
+               mbuf->next = NULL;
+               rte_mbuf_refcnt_set(mbuf, 1);
+               dpaa_eth_packet_info(mbuf, mbuf->buf_addr);
+       }
+}
+
 void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs)
 {
        struct dpaa_bp_info *bp_info = DPAA_BPID_TO_POOL_INFO(dq->fd.bpid);
@@ -468,8 +504,7 @@ void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs)
         * So we prefetch the annoation beforehand, so that it is available
         * in cache when accessed.
         */
-       if (dpaa_svr_family == SVR_LS1046A_FAMILY)
-               rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
+       rte_prefetch0((void *)((uint8_t *)ptr + DEFAULT_RX_ICEOF));
 
        *bufs = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
 }
@@ -870,6 +905,19 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                                DPAA_TX_BURST_SIZE : nb_bufs;
                for (loop = 0; loop < frames_to_send; loop++) {
                        mbuf = *(bufs++);
+                       seqn = mbuf->seqn;
+                       if (seqn != DPAA_INVALID_MBUF_SEQN) {
+                               index = seqn - 1;
+                               if (DPAA_PER_LCORE_DQRR_HELD & (1 << index)) {
+                                       flags[loop] =
+                                          ((index & QM_EQCR_DCA_IDXMASK) << 8);
+                                       flags[loop] |= QMAN_ENQUEUE_FLAG_DCA;
+                                       DPAA_PER_LCORE_DQRR_SIZE--;
+                                       DPAA_PER_LCORE_DQRR_HELD &=
+                                                               ~(1 << index);
+                               }
+                       }
+
                        if (likely(RTE_MBUF_DIRECT(mbuf))) {
                                mp = mbuf->pool;
                                bp_info = DPAA_MEMPOOL_TO_POOL_INFO(mp);
@@ -916,18 +964,6 @@ dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
                                        goto send_pkts;
                                }
                        }
-                       seqn = mbuf->seqn;
-                       if (seqn != DPAA_INVALID_MBUF_SEQN) {
-                               index = seqn - 1;
-                               if (DPAA_PER_LCORE_DQRR_HELD & (1 << index)) {
-                                       flags[loop] =
-                                          ((index & QM_EQCR_DCA_IDXMASK) << 8);
-                                       flags[loop] |= QMAN_ENQUEUE_FLAG_DCA;
-                                       DPAA_PER_LCORE_DQRR_SIZE--;
-                                       DPAA_PER_LCORE_DQRR_HELD &=
-                                                               ~(1 << index);
-                               }
-                       }
                }
 
 send_pkts:
index d3e6351..75b093c 100644 (file)
@@ -32,8 +32,6 @@
 /* L4 Type field: TCP */
 #define DPAA_L4_PARSE_RESULT_TCP       0x20
 
-#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
-
 #define DPAA_MAX_DEQUEUE_NUM_FRAMES    63
        /** <Maximum number of frames to be dequeued in a single rx call*/
 
@@ -272,4 +270,7 @@ void dpaa_rx_cb(struct qman_fq **fq,
                struct qm_dqrr_entry **dqrr, void **bufs, int num_bufs);
 
 void dpaa_rx_cb_prepare(struct qm_dqrr_entry *dq, void **bufs);
+
+void dpaa_rx_cb_no_prefetch(struct qman_fq **fq,
+                   struct qm_dqrr_entry **dqrr, void **bufs, int num_bufs);
 #endif
index 9b0b143..ca5f7a3 100644 (file)
@@ -25,7 +25,7 @@ CFLAGS += -I$(RTE_SDK)/lib/librte_eal/linuxapp/eal
 EXPORT_MAP := rte_pmd_dpaa2_version.map
 
 # library version
-LIBABIVER := 1
+LIBABIVER := 2
 
 # depends on fslmc bus which uses experimental API
 CFLAGS += -DALLOW_EXPERIMENTAL_API
@@ -40,5 +40,6 @@ LDLIBS += -lrte_bus_fslmc
 LDLIBS += -lrte_mempool_dpaa2
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
+LDLIBS += -lrte_common_dpaax
 
 include $(RTE_SDK)/mk/rte.lib.mk
index 779cdf2..adb730b 100644 (file)
@@ -213,6 +213,46 @@ struct dpaa2_annot_hdr {
 #define DPAA2_L3_IPv6_UDP (L3_IPV6_1_PRESENT | L3_IPV6_1_UNICAST | \
        L3_PROTO_UDP_PRESENT | L4_UNKNOWN_PROTOCOL)
 
+/**
+ * Macros to get values in word5
+ */
+#define SHIM_OFFSET_1(var)             ((uint64_t)(var) & 0xFF00000000000000)
+#define SHIM_OFFSET_2(var)             ((uint64_t)(var) & 0x00FF000000000000)
+#define IP_PID_OFFSET(var)             ((uint64_t)(var) & 0x0000FF0000000000)
+#define ETH_OFFSET(var)                        ((uint64_t)(var) & 0x000000FF00000000)
+#define LLC_SNAP_OFFSET(var)           ((uint64_t)(var) & 0x00000000FF000000)
+#define VLAN_TCI_OFFSET_1(var)         ((uint64_t)(var) & 0x0000000000FF0000)
+#define VLAN_TCI_OFFSET_N(var)         ((uint64_t)(var) & 0x000000000000FF00)
+#define LAST_ETYPE_OFFSET(var)         ((uint64_t)(var) & 0x00000000000000FF)
+
+/**
+ * Macros to get values in word6
+ */
+#define PPPOE_OFFSET(var)              ((uint64_t)(var) & 0xFF00000000000000)
+#define MPLS_OFFSET_1(var)             ((uint64_t)(var) & 0x00FF000000000000)
+#define MPLS_OFFSET_N(var)             ((uint64_t)(var) & 0x0000FF0000000000)
+#define ARP_OR_IP_OFFSET_1(var)                ((uint64_t)(var) & 0x000000FF00000000)
+#define IP_N_OR_MIN_ENCAP_OFFSET(var)  ((uint64_t)(var) & 0x00000000FF000000)
+#define GRE_OFFSET(var)                        ((uint64_t)(var) & 0x0000000000FF0000)
+#define L4_OFFSET(var)                 ((uint64_t)(var) & 0x000000000000FF00)
+#define GTP_OR_ESP_OR_IPSEC_OFFSET(var)        ((uint64_t)(var) & 0x00000000000000FF)
+
+/**
+ * Macros to get values in word7
+ */
+#define IPV6_ROUTING_HDR_OFFSET_1(var) ((uint64_t)(var) & 0xFF00000000000000)
+#define IPV6_ROUTING_HDR_OFFSET_2(var) ((uint64_t)(var) & 0x00FF000000000000)
+#define NEXT_HDR_OFFSET(var)           ((uint64_t)(var) & 0x0000FF0000000000)
+#define IPV6_FRAG_OFFSET(var)          ((uint64_t)(var) & 0x000000FF00000000)
+#define GROSS_RUNNING_SUM(var)         ((uint64_t)(var) & 0x00000000FFFF0000)
+#define RUNNING_SUM(var)               ((uint64_t)(var) & 0x000000000000FFFF)
+
+/**
+ * Macros to get values in word8
+ */
+#define PARSE_ERROR_CODE(var)          ((uint64_t)(var) & 0xFF00000000000000)
+#define SOFT_PARSING_CONTEXT(var)      ((uint64_t)(var) & 0x00FFFFFFFFFFFFFF)
+
 /* Debug frame, otherwise supposed to be discarded */
 #define DPAA2_ETH_FAS_DISC           0x80000000
 /* MACSEC frame */
index c504736..fa71807 100644 (file)
@@ -39,7 +39,6 @@ static uint64_t dev_rx_offloads_sup =
 
 /* Rx offloads which cannot be disabled */
 static uint64_t dev_rx_offloads_nodis =
-               DEV_RX_OFFLOAD_CRC_STRIP |
                DEV_RX_OFFLOAD_SCATTER;
 
 /* Supported Tx offloads */
@@ -292,6 +291,35 @@ fail:
        return -1;
 }
 
+static void
+dpaa2_free_rx_tx_queues(struct rte_eth_dev *dev)
+{
+       struct dpaa2_dev_priv *priv = dev->data->dev_private;
+       struct dpaa2_queue *dpaa2_q;
+       int i;
+
+       PMD_INIT_FUNC_TRACE();
+
+       /* Queue allocation base */
+       if (priv->rx_vq[0]) {
+               /* cleaning up queue storage */
+               for (i = 0; i < priv->nb_rx_queues; i++) {
+                       dpaa2_q = (struct dpaa2_queue *)priv->rx_vq[i];
+                       if (dpaa2_q->q_storage)
+                               rte_free(dpaa2_q->q_storage);
+               }
+               /* cleanup tx queue cscn */
+               for (i = 0; i < priv->nb_tx_queues; i++) {
+                       dpaa2_q = (struct dpaa2_queue *)priv->tx_vq[i];
+                       if (!dpaa2_q->cscn)
+                               rte_free(dpaa2_q->cscn);
+               }
+               /*free memory for all queues (RX+TX) */
+               rte_free(priv->rx_vq[0]);
+               priv->rx_vq[0] = NULL;
+       }
+}
+
 static int
 dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
 {
@@ -406,7 +434,8 @@ dpaa2_eth_dev_configure(struct rte_eth_dev *dev)
                }
        }
 
-       dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK);
+       if (rx_offloads & DEV_RX_OFFLOAD_VLAN_FILTER)
+               dpaa2_vlan_offload_set(dev, ETH_VLAN_FILTER_MASK);
 
        /* update the current status */
        dpaa2_dev_link_update(dev, 0);
@@ -569,7 +598,8 @@ dpaa2_dev_tx_queue_setup(struct rte_eth_dev *dev,
                 */
                cong_notif_cfg.threshold_exit = CONG_EXIT_TX_THRESHOLD;
                cong_notif_cfg.message_ctx = 0;
-               cong_notif_cfg.message_iova = (size_t)dpaa2_q->cscn;
+               cong_notif_cfg.message_iova =
+                               (size_t)DPAA2_VADDR_TO_IOVA(dpaa2_q->cscn);
                cong_notif_cfg.dest_cfg.dest_type = DPNI_DEST_NONE;
                cong_notif_cfg.notification_mode =
                                         DPNI_CONG_OPT_WRITE_MEM_ON_ENTER |
@@ -867,23 +897,13 @@ dpaa2_dev_stop(struct rte_eth_dev *dev)
 static void
 dpaa2_dev_close(struct rte_eth_dev *dev)
 {
-       struct rte_eth_dev_data *data = dev->data;
        struct dpaa2_dev_priv *priv = dev->data->dev_private;
        struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
-       int i, ret;
+       int ret;
        struct rte_eth_link link;
-       struct dpaa2_queue *dpaa2_q;
 
        PMD_INIT_FUNC_TRACE();
 
-       for (i = 0; i < data->nb_tx_queues; i++) {
-               dpaa2_q = (struct dpaa2_queue *)data->tx_queues[i];
-               if (!dpaa2_q->cscn) {
-                       rte_free(dpaa2_q->cscn);
-                       dpaa2_q->cscn = NULL;
-               }
-       }
-
        /* Clean the device first */
        ret = dpni_reset(dpni, CMD_PRI_LOW, priv->token);
        if (ret) {
@@ -1117,6 +1137,8 @@ int dpaa2_dev_stats_get(struct rte_eth_dev *dev,
        int32_t  retcode;
        uint8_t page0 = 0, page1 = 1, page2 = 2;
        union dpni_statistics value;
+       int i;
+       struct dpaa2_queue *dpaa2_rxq, *dpaa2_txq;
 
        memset(&value, 0, sizeof(union dpni_statistics));
 
@@ -1164,6 +1186,21 @@ int dpaa2_dev_stats_get(struct rte_eth_dev *dev,
        stats->oerrors = value.page_2.egress_discarded_frames;
        stats->imissed = value.page_2.ingress_nobuffer_discards;
 
+       /* Fill in per queue stats */
+       for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
+               (i < priv->nb_rx_queues || i < priv->nb_tx_queues); ++i) {
+               dpaa2_rxq = (struct dpaa2_queue *)priv->rx_vq[i];
+               dpaa2_txq = (struct dpaa2_queue *)priv->tx_vq[i];
+               if (dpaa2_rxq)
+                       stats->q_ipackets[i] = dpaa2_rxq->rx_pkts;
+               if (dpaa2_txq)
+                       stats->q_opackets[i] = dpaa2_txq->tx_pkts;
+
+               /* Byte counting is not implemented */
+               stats->q_ibytes[i]   = 0;
+               stats->q_obytes[i]   = 0;
+       }
+
        return 0;
 
 err:
@@ -1323,6 +1360,8 @@ dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
        struct dpaa2_dev_priv *priv = dev->data->dev_private;
        struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
        int32_t  retcode;
+       int i;
+       struct dpaa2_queue *dpaa2_q;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1335,6 +1374,19 @@ dpaa2_dev_stats_reset(struct rte_eth_dev *dev)
        if (retcode)
                goto error;
 
+       /* Reset the per queue stats in dpaa2_queue structure */
+       for (i = 0; i < priv->nb_rx_queues; i++) {
+               dpaa2_q = (struct dpaa2_queue *)priv->rx_vq[i];
+               if (dpaa2_q)
+                       dpaa2_q->rx_pkts = 0;
+       }
+
+       for (i = 0; i < priv->nb_tx_queues; i++) {
+               dpaa2_q = (struct dpaa2_queue *)priv->tx_vq[i];
+               if (dpaa2_q)
+                       dpaa2_q->tx_pkts = 0;
+       }
+
        return;
 
 error:
@@ -1360,7 +1412,7 @@ dpaa2_dev_link_update(struct rte_eth_dev *dev,
 
        ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, &state);
        if (ret < 0) {
-               DPAA2_PMD_ERR("error: dpni_get_link_state %d", ret);
+               DPAA2_PMD_DEBUG("error: dpni_get_link_state %d", ret);
                return -1;
        }
 
@@ -1422,7 +1474,7 @@ dpaa2_dev_set_link_up(struct rte_eth_dev *dev)
        }
        ret = dpni_get_link_state(dpni, CMD_PRI_LOW, priv->token, &state);
        if (ret < 0) {
-               DPAA2_PMD_ERR("Unable to get link state (%d)", ret);
+               DPAA2_PMD_DEBUG("Unable to get link state (%d)", ret);
                return -1;
        }
 
@@ -1785,6 +1837,74 @@ static struct eth_dev_ops dpaa2_ethdev_ops = {
        .rss_hash_conf_get    = dpaa2_dev_rss_hash_conf_get,
 };
 
+/* Populate the mac address from physically available (u-boot/firmware) and/or
+ * one set by higher layers like MC (restool) etc.
+ * Returns the table of MAC entries (multiple entries)
+ */
+static int
+populate_mac_addr(struct fsl_mc_io *dpni_dev, struct dpaa2_dev_priv *priv,
+                 struct ether_addr *mac_entry)
+{
+       int ret;
+       struct ether_addr phy_mac, prime_mac;
+
+       memset(&phy_mac, 0, sizeof(struct ether_addr));
+       memset(&prime_mac, 0, sizeof(struct ether_addr));
+
+       /* Get the physical device MAC address */
+       ret = dpni_get_port_mac_addr(dpni_dev, CMD_PRI_LOW, priv->token,
+                                    phy_mac.addr_bytes);
+       if (ret) {
+               DPAA2_PMD_ERR("DPNI get physical port MAC failed: %d", ret);
+               goto cleanup;
+       }
+
+       ret = dpni_get_primary_mac_addr(dpni_dev, CMD_PRI_LOW, priv->token,
+                                       prime_mac.addr_bytes);
+       if (ret) {
+               DPAA2_PMD_ERR("DPNI get Prime port MAC failed: %d", ret);
+               goto cleanup;
+       }
+
+       /* Now that both MAC have been obtained, do:
+        *  if not_empty_mac(phy) && phy != Prime, overwrite prime with Phy
+        *     and return phy
+        *  If empty_mac(phy), return prime.
+        *  if both are empty, create random MAC, set as prime and return
+        */
+       if (!is_zero_ether_addr(&phy_mac)) {
+               /* If the addresses are not same, overwrite prime */
+               if (!is_same_ether_addr(&phy_mac, &prime_mac)) {
+                       ret = dpni_set_primary_mac_addr(dpni_dev, CMD_PRI_LOW,
+                                                       priv->token,
+                                                       phy_mac.addr_bytes);
+                       if (ret) {
+                               DPAA2_PMD_ERR("Unable to set MAC Address: %d",
+                                             ret);
+                               goto cleanup;
+                       }
+                       memcpy(&prime_mac, &phy_mac, sizeof(struct ether_addr));
+               }
+       } else if (is_zero_ether_addr(&prime_mac)) {
+               /* In case phys and prime, both are zero, create random MAC */
+               eth_random_addr(prime_mac.addr_bytes);
+               ret = dpni_set_primary_mac_addr(dpni_dev, CMD_PRI_LOW,
+                                               priv->token,
+                                               prime_mac.addr_bytes);
+               if (ret) {
+                       DPAA2_PMD_ERR("Unable to set MAC Address: %d", ret);
+                       goto cleanup;
+               }
+       }
+
+       /* prime_mac the final MAC address */
+       memcpy(mac_entry, &prime_mac, sizeof(struct ether_addr));
+       return 0;
+
+cleanup:
+       return -1;
+}
+
 static int
 dpaa2_dev_init(struct rte_eth_dev *eth_dev)
 {
@@ -1867,7 +1987,10 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
                goto init_err;
        }
 
-       /* Allocate memory for storing MAC addresses */
+       /* Allocate memory for storing MAC addresses.
+        * Table of mac_filter_entries size is allocated so that RTE ether lib
+        * can add MAC entries when rte_eth_dev_mac_addr_add is called.
+        */
        eth_dev->data->mac_addrs = rte_zmalloc("dpni",
                ETHER_ADDR_LEN * attr.mac_filter_entries, 0);
        if (eth_dev->data->mac_addrs == NULL) {
@@ -1878,12 +2001,11 @@ dpaa2_dev_init(struct rte_eth_dev *eth_dev)
                goto init_err;
        }
 
-       ret = dpni_get_primary_mac_addr(dpni_dev, CMD_PRI_LOW,
-                                       priv->token,
-                       (uint8_t *)(eth_dev->data->mac_addrs[0].addr_bytes));
+       ret = populate_mac_addr(dpni_dev, priv, &eth_dev->data->mac_addrs[0]);
        if (ret) {
-               DPAA2_PMD_ERR("DPNI get mac address failed:Err Code = %d",
-                            ret);
+               DPAA2_PMD_ERR("Unable to fetch MAC Address for device");
+               rte_free(eth_dev->data->mac_addrs);
+               eth_dev->data->mac_addrs = NULL;
                goto init_err;
        }
 
@@ -1927,8 +2049,7 @@ dpaa2_dev_uninit(struct rte_eth_dev *eth_dev)
 {
        struct dpaa2_dev_priv *priv = eth_dev->data->dev_private;
        struct fsl_mc_io *dpni = (struct fsl_mc_io *)priv->hw;
-       int i, ret;
-       struct dpaa2_queue *dpaa2_q;
+       int ret;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -1942,23 +2063,7 @@ dpaa2_dev_uninit(struct rte_eth_dev *eth_dev)
 
        dpaa2_dev_close(eth_dev);
 
-       if (priv->rx_vq[0]) {
-               /* cleaning up queue storage */
-               for (i = 0; i < priv->nb_rx_queues; i++) {
-                       dpaa2_q = (struct dpaa2_queue *)priv->rx_vq[i];
-                       if (dpaa2_q->q_storage)
-                               rte_free(dpaa2_q->q_storage);
-               }
-               /*free the all queue memory */
-               rte_free(priv->rx_vq[0]);
-               priv->rx_vq[0] = NULL;
-       }
-
-       /* free memory for storing MAC addresses */
-       if (eth_dev->data->mac_addrs) {
-               rte_free(eth_dev->data->mac_addrs);
-               eth_dev->data->mac_addrs = NULL;
-       }
+       dpaa2_free_rx_tx_queues(eth_dev);
 
        /* Close the device at underlying layer*/
        ret = dpni_close(dpni, CMD_PRI_LOW, priv->token);
@@ -2008,7 +2113,6 @@ rte_dpaa2_probe(struct rte_dpaa2_driver *dpaa2_drv,
        }
 
        eth_dev->device = &dpaa2_dev->device;
-       eth_dev->device->driver = &dpaa2_drv->driver;
 
        dpaa2_dev->eth_dev = eth_dev;
        eth_dev->data->rx_mbuf_alloc_failed = 0;
@@ -2023,8 +2127,6 @@ rte_dpaa2_probe(struct rte_dpaa2_driver *dpaa2_drv,
                return 0;
        }
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
        rte_eth_dev_release_port(eth_dev);
        return diag;
 }
@@ -2037,8 +2139,6 @@ rte_dpaa2_remove(struct rte_dpaa2_device *dpaa2_dev)
        eth_dev = dpaa2_dev->eth_dev;
        dpaa2_dev_uninit(eth_dev);
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
index ef109a6..eab943d 100644 (file)
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright (c) 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2016 NXP
+ *   Copyright 2016-2018 NXP
  *
  */
 
 #include "dpaa2_ethdev.h"
 #include "base/dpaa2_hw_dpni_annot.h"
 
+static inline uint32_t __attribute__((hot))
+dpaa2_dev_rx_parse_slow(struct rte_mbuf *mbuf,
+                       struct dpaa2_annot_hdr *annotation);
+
 #define DPAA2_MBUF_TO_CONTIG_FD(_mbuf, _fd, _bpid)  do { \
        DPAA2_SET_FD_ADDR(_fd, DPAA2_MBUF_VADDR_TO_IOVA(_mbuf)); \
        DPAA2_SET_FD_LEN(_fd, _mbuf->data_len); \
        DPAA2_SET_ONLY_FD_BPID(_fd, _bpid); \
        DPAA2_SET_FD_OFFSET(_fd, _mbuf->data_off); \
-       DPAA2_SET_FD_ASAL(_fd, DPAA2_ASAL_VAL); \
+       DPAA2_SET_FD_FRC(_fd, 0);               \
+       DPAA2_RESET_FD_CTRL(_fd);               \
+       DPAA2_RESET_FD_FLC(_fd);                \
 } while (0)
 
 static inline void __attribute__((hot))
-dpaa2_dev_rx_parse_frc(struct rte_mbuf *m, uint16_t frc)
+dpaa2_dev_rx_parse_new(struct rte_mbuf *m, const struct qbman_fd *fd)
 {
-       DPAA2_PMD_DP_DEBUG("frc = 0x%x\t", frc);
+       uint16_t frc = DPAA2_GET_FD_FRC_PARSE_SUM(fd);
 
        m->packet_type = RTE_PTYPE_UNKNOWN;
        switch (frc) {
@@ -91,29 +97,45 @@ dpaa2_dev_rx_parse_frc(struct rte_mbuf *m, uint16_t frc)
                m->packet_type = RTE_PTYPE_L2_ETHER |
                        RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_ICMP;
                break;
-       case DPAA2_PKT_TYPE_VLAN_1:
-       case DPAA2_PKT_TYPE_VLAN_2:
-               m->ol_flags |= PKT_RX_VLAN;
-               break;
-       /* More switch cases can be added */
-       /* TODO: Add handling for checksum error check from FRC */
        default:
-               m->packet_type = RTE_PTYPE_UNKNOWN;
+               m->packet_type = dpaa2_dev_rx_parse_slow(m,
+                 (void *)((size_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
+                        + DPAA2_FD_PTA_SIZE));
        }
+       m->hash.rss = fd->simple.flc_hi;
+       m->ol_flags |= PKT_RX_RSS_HASH;
 }
 
 static inline uint32_t __attribute__((hot))
-dpaa2_dev_rx_parse_slow(struct dpaa2_annot_hdr *annotation)
+dpaa2_dev_rx_parse_slow(struct rte_mbuf *mbuf,
+                       struct dpaa2_annot_hdr *annotation)
 {
        uint32_t pkt_type = RTE_PTYPE_UNKNOWN;
+       uint16_t *vlan_tci;
+
+       DPAA2_PMD_DP_DEBUG("(slow parse)annotation(3)=0x%" PRIx64 "\t"
+                       "(4)=0x%" PRIx64 "\t",
+                       annotation->word3, annotation->word4);
+
+       if (BIT_ISSET_AT_POS(annotation->word3, L2_VLAN_1_PRESENT)) {
+               vlan_tci = rte_pktmbuf_mtod_offset(mbuf, uint16_t *,
+                       (VLAN_TCI_OFFSET_1(annotation->word5) >> 16));
+               mbuf->vlan_tci = rte_be_to_cpu_16(*vlan_tci);
+               mbuf->ol_flags |= PKT_RX_VLAN;
+               pkt_type |= RTE_PTYPE_L2_ETHER_VLAN;
+       } else if (BIT_ISSET_AT_POS(annotation->word3, L2_VLAN_N_PRESENT)) {
+               vlan_tci = rte_pktmbuf_mtod_offset(mbuf, uint16_t *,
+                       (VLAN_TCI_OFFSET_1(annotation->word5) >> 16));
+               mbuf->vlan_tci = rte_be_to_cpu_16(*vlan_tci);
+               mbuf->ol_flags |= PKT_RX_VLAN | PKT_RX_QINQ;
+               pkt_type |= RTE_PTYPE_L2_ETHER_QINQ;
+       }
 
-       DPAA2_PMD_DP_DEBUG("(slow parse) Annotation = 0x%" PRIx64 "\t",
-                          annotation->word4);
        if (BIT_ISSET_AT_POS(annotation->word3, L2_ARP_PRESENT)) {
-               pkt_type = RTE_PTYPE_L2_ETHER_ARP;
+               pkt_type |= RTE_PTYPE_L2_ETHER_ARP;
                goto parse_done;
        } else if (BIT_ISSET_AT_POS(annotation->word3, L2_ETH_MAC_PRESENT)) {
-               pkt_type = RTE_PTYPE_L2_ETHER;
+               pkt_type |= RTE_PTYPE_L2_ETHER;
        } else {
                goto parse_done;
        }
@@ -135,6 +157,11 @@ dpaa2_dev_rx_parse_slow(struct dpaa2_annot_hdr *annotation)
                goto parse_done;
        }
 
+       if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE))
+               mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+       else if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE))
+               mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
+
        if (BIT_ISSET_AT_POS(annotation->word4, L3_IP_1_FIRST_FRAGMENT |
            L3_IP_1_MORE_FRAGMENT |
            L3_IP_N_FIRST_FRAGMENT |
@@ -173,16 +200,15 @@ dpaa2_dev_rx_parse(struct rte_mbuf *mbuf, void *hw_annot_addr)
        DPAA2_PMD_DP_DEBUG("(fast parse) Annotation = 0x%" PRIx64 "\t",
                           annotation->word4);
 
-       /* Check offloads first */
-       if (BIT_ISSET_AT_POS(annotation->word3,
-                            L2_VLAN_1_PRESENT | L2_VLAN_N_PRESENT))
-               mbuf->ol_flags |= PKT_RX_VLAN;
-
        if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L3CE))
                mbuf->ol_flags |= PKT_RX_IP_CKSUM_BAD;
        else if (BIT_ISSET_AT_POS(annotation->word8, DPAA2_ETH_FAS_L4CE))
                mbuf->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 
+       /* Check detailed parsing requirement */
+       if (annotation->word3 & 0x7FFFFC3FFFF)
+               return dpaa2_dev_rx_parse_slow(mbuf, annotation);
+
        /* Return some common types from parse processing */
        switch (annotation->word4) {
        case DPAA2_L3_IPv4:
@@ -205,7 +231,7 @@ dpaa2_dev_rx_parse(struct rte_mbuf *mbuf, void *hw_annot_addr)
                break;
        }
 
-       return dpaa2_dev_rx_parse_slow(annotation);
+       return dpaa2_dev_rx_parse_slow(mbuf, annotation);
 }
 
 static inline struct rte_mbuf *__attribute__((hot))
@@ -236,8 +262,7 @@ eth_sg_fd_to_mbuf(const struct qbman_fd *fd)
        first_seg->nb_segs = 1;
        first_seg->next = NULL;
        if (dpaa2_svr_family == SVR_LX2160A)
-               dpaa2_dev_rx_parse_frc(first_seg,
-                               DPAA2_GET_FD_FRC_PARSE_SUM(fd));
+               dpaa2_dev_rx_parse_new(first_seg, fd);
        else
                first_seg->packet_type = dpaa2_dev_rx_parse(first_seg,
                        (void *)((size_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
@@ -293,7 +318,7 @@ eth_fd_to_mbuf(const struct qbman_fd *fd)
         */
 
        if (dpaa2_svr_family == SVR_LX2160A)
-               dpaa2_dev_rx_parse_frc(mbuf, DPAA2_GET_FD_FRC_PARSE_SUM(fd));
+               dpaa2_dev_rx_parse_new(mbuf, fd);
        else
                mbuf->packet_type = dpaa2_dev_rx_parse(mbuf,
                        (void *)((size_t)DPAA2_IOVA_TO_VADDR(DPAA2_GET_FD_ADDR(fd))
@@ -476,8 +501,7 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
        }
        swp = DPAA2_PER_LCORE_ETHRX_PORTAL;
-       pull_size = (nb_pkts > DPAA2_DQRR_RING_SIZE) ?
-                                              DPAA2_DQRR_RING_SIZE : nb_pkts;
+       pull_size = (nb_pkts > dpaa2_dqrr_size) ? dpaa2_dqrr_size : nb_pkts;
        if (unlikely(!q_storage->active_dqs)) {
                q_storage->toggle = 0;
                dq_storage = q_storage->dq_storage[q_storage->toggle];
@@ -555,10 +579,12 @@ dpaa2_dev_prefetch_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                }
                fd = qbman_result_DQ_fd(dq_storage);
 
-               next_fd = qbman_result_DQ_fd(dq_storage + 1);
-               /* Prefetch Annotation address for the parse results */
-               rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(next_fd)
-                               + DPAA2_FD_PTA_SIZE + 16));
+               if (dpaa2_svr_family != SVR_LX2160A) {
+                       next_fd = qbman_result_DQ_fd(dq_storage + 1);
+                       /* Prefetch Annotation address for the parse results */
+                       rte_prefetch0((void *)(size_t)(DPAA2_GET_FD_ADDR(
+                                     next_fd) + DPAA2_FD_PTA_SIZE + 16));
+               }
 
                if (unlikely(DPAA2_FD_GET_FORMAT(fd) == qbman_fd_sg))
                        bufs[num_rx] = eth_sg_fd_to_mbuf(fd);
@@ -685,7 +711,6 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        /*Prepare enqueue descriptor*/
        qbman_eq_desc_clear(&eqdesc);
        qbman_eq_desc_set_no_orp(&eqdesc, DPAA2_EQ_RESP_ERR_FQ);
-       qbman_eq_desc_set_response(&eqdesc, 0, 0);
        qbman_eq_desc_set_qd(&eqdesc, priv->qdid,
                             dpaa2_q->flow_id, dpaa2_q->tc_index);
        /*Clear the unused FD fields before sending*/
@@ -699,7 +724,8 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                goto skip_tx;
                }
 
-               frames_to_send = (nb_pkts >> 3) ? MAX_TX_RING_SLOTS : nb_pkts;
+               frames_to_send = (nb_pkts > dpaa2_eqcr_size) ?
+                       dpaa2_eqcr_size : nb_pkts;
 
                for (loop = 0; loop < frames_to_send; loop++) {
                        if ((*bufs)->seqn) {
@@ -712,9 +738,6 @@ dpaa2_dev_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                                (*bufs)->seqn = DPAA2_INVALID_MBUF_SEQN;
                        }
 
-                       fd_arr[loop].simple.frc = 0;
-                       DPAA2_RESET_FD_CTRL((&fd_arr[loop]));
-                       DPAA2_SET_FD_FLC((&fd_arr[loop]), (size_t)NULL);
                        if (likely(RTE_MBUF_DIRECT(*bufs))) {
                                mp = (*bufs)->pool;
                                /* Check the basic scenario and set
index 9f22816..44b5604 100644 (file)
@@ -121,6 +121,7 @@ int dpni_create(struct fsl_mc_io *mc_io,
        cmd_params->num_queues = cfg->num_queues;
        cmd_params->num_tcs = cfg->num_tcs;
        cmd_params->mac_filter_entries = cfg->mac_filter_entries;
+       cmd_params->num_rx_tcs = cfg->num_rx_tcs;
        cmd_params->vlan_filter_entries =  cfg->vlan_filter_entries;
        cmd_params->qos_entries = cfg->qos_entries;
        cmd_params->fs_entries = cpu_to_le16(cfg->fs_entries);
@@ -664,9 +665,14 @@ int dpni_get_buffer_layout(struct fsl_mc_io *mc_io,
 
        /* retrieve response parameters */
        rsp_params = (struct dpni_rsp_get_buffer_layout *)cmd.params;
-       layout->pass_timestamp = dpni_get_field(rsp_params->flags, PASS_TS);
-       layout->pass_parser_result = dpni_get_field(rsp_params->flags, PASS_PR);
-       layout->pass_frame_status = dpni_get_field(rsp_params->flags, PASS_FS);
+       layout->pass_timestamp =
+                               (int)dpni_get_field(rsp_params->flags, PASS_TS);
+       layout->pass_parser_result =
+                               (int)dpni_get_field(rsp_params->flags, PASS_PR);
+       layout->pass_frame_status =
+                               (int)dpni_get_field(rsp_params->flags, PASS_FS);
+       layout->pass_sw_opaque =
+                       (int)dpni_get_field(rsp_params->flags, PASS_SWO);
        layout->private_data_size = le16_to_cpu(rsp_params->private_data_size);
        layout->data_align = le16_to_cpu(rsp_params->data_align);
        layout->data_head_room = le16_to_cpu(rsp_params->head_room);
@@ -702,10 +708,11 @@ int dpni_set_buffer_layout(struct fsl_mc_io *mc_io,
                                          token);
        cmd_params = (struct dpni_cmd_set_buffer_layout *)cmd.params;
        cmd_params->qtype = qtype;
-       cmd_params->options = cpu_to_le16(layout->options);
+       cmd_params->options = cpu_to_le16((uint16_t)layout->options);
        dpni_set_field(cmd_params->flags, PASS_TS, layout->pass_timestamp);
        dpni_set_field(cmd_params->flags, PASS_PR, layout->pass_parser_result);
        dpni_set_field(cmd_params->flags, PASS_FS, layout->pass_frame_status);
+       dpni_set_field(cmd_params->flags, PASS_SWO, layout->pass_sw_opaque);
        cmd_params->private_data_size = cpu_to_le16(layout->private_data_size);
        cmd_params->data_align = cpu_to_le16(layout->data_align);
        cmd_params->head_room = cpu_to_le16(layout->data_head_room);
@@ -893,6 +900,7 @@ int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
        cmd_params = (struct dpni_cmd_set_link_cfg *)cmd.params;
        cmd_params->rate = cpu_to_le32(cfg->rate);
        cmd_params->options = cpu_to_le64(cfg->options);
+       cmd_params->advertising = cpu_to_le64(cfg->advertising);
 
        /* send command to mc*/
        return mc_send_command(mc_io, &cmd);
@@ -929,8 +937,11 @@ int dpni_get_link_state(struct fsl_mc_io *mc_io,
        /* retrieve response parameters */
        rsp_params = (struct dpni_rsp_get_link_state *)cmd.params;
        state->up = dpni_get_field(rsp_params->flags, LINK_STATE);
+       state->state_valid = dpni_get_field(rsp_params->flags, STATE_VALID);
        state->rate = le32_to_cpu(rsp_params->rate);
        state->options = le64_to_cpu(rsp_params->options);
+       state->supported = le64_to_cpu(rsp_params->supported);
+       state->advertising = le64_to_cpu(rsp_params->advertising);
 
        return 0;
 }
@@ -1471,6 +1482,9 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
        dpni_set_field(cmd_params->keep_hash_key,
                       KEEP_HASH_KEY,
                       cfg->fs_cfg.keep_hash_key);
+       dpni_set_field(cmd_params->keep_hash_key,
+                      KEEP_ENTRIES,
+                      cfg->fs_cfg.keep_entries);
 
        /* send command to mc*/
        return mc_send_command(mc_io, &cmd);
@@ -1764,8 +1778,8 @@ int dpni_get_queue(struct fsl_mc_io *mc_io,
  * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
  * @token:     Token of DPNI object
  * @page:      Selects the statistics page to retrieve, see
- *             DPNI_GET_STATISTICS output. Pages are numbered 0 to 2.
- * @param:  Custom parameter for some pages used to select
+ *             DPNI_GET_STATISTICS output. Pages are numbered 0 to 3.
+ * @param:     Custom parameter for some pages used to select
  *             a certain statistic source, for example the TC.
  * @stat:      Structure containing the statistics
  *
@@ -1941,3 +1955,111 @@ int dpni_get_taildrop(struct fsl_mc_io *mc_io,
 
        return 0;
 }
+
+/**
+ * dpni_set_opr() - Set Order Restoration configuration.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPNI object
+ * @tc:                Traffic class, in range 0 to NUM_TCS - 1
+ * @index:     Selects the specific queue out of the set allocated
+ *                     for the same TC. Value must be in range 0 to
+ *                     NUM_QUEUES - 1
+ * @options:   Configuration mode options
+ *                     can be OPR_OPT_CREATE or OPR_OPT_RETIRE
+ * @cfg:       Configuration options for the OPR
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpni_set_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t tc,
+                uint8_t index,
+                uint8_t options,
+                struct opr_cfg *cfg)
+{
+       struct dpni_cmd_set_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(
+                       DPNI_CMDID_SET_OPR,
+                       cmd_flags,
+                       token);
+       cmd_params = (struct dpni_cmd_set_opr *)cmd.params;
+       cmd_params->tc_id = tc;
+       cmd_params->index = index;
+       cmd_params->options = options;
+       cmd_params->oloe = cfg->oloe;
+       cmd_params->oeane = cfg->oeane;
+       cmd_params->olws = cfg->olws;
+       cmd_params->oa = cfg->oa;
+       cmd_params->oprrws = cfg->oprrws;
+
+       /* send command to mc*/
+       return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_get_opr() - Retrieve Order Restoration config and query.
+ * @mc_io:     Pointer to MC portal's I/O object
+ * @cmd_flags: Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:     Token of DPNI object
+ * @tc:                Traffic class, in range 0 to NUM_TCS - 1
+ * @index:     Selects the specific queue out of the set allocated
+ *                     for the same TC. Value must be in range 0 to
+ *                     NUM_QUEUES - 1
+ * @cfg:       Returned OPR configuration
+ * @qry:       Returned OPR query
+ *
+ * Return:     '0' on Success; Error code otherwise.
+ */
+int dpni_get_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t tc,
+                uint8_t index,
+                struct opr_cfg *cfg,
+                struct opr_qry *qry)
+{
+       struct dpni_rsp_get_opr *rsp_params;
+       struct dpni_cmd_get_opr *cmd_params;
+       struct mc_command cmd = { 0 };
+       int err;
+
+       /* prepare command */
+       cmd.header = mc_encode_cmd_header(DPNI_CMDID_GET_OPR,
+                                         cmd_flags,
+                                         token);
+       cmd_params = (struct dpni_cmd_get_opr *)cmd.params;
+       cmd_params->index = index;
+       cmd_params->tc_id = tc;
+
+       /* send command to mc*/
+       err = mc_send_command(mc_io, &cmd);
+       if (err)
+               return err;
+
+       /* retrieve response parameters */
+       rsp_params = (struct dpni_rsp_get_opr *)cmd.params;
+       cfg->oloe = rsp_params->oloe;
+       cfg->oeane = rsp_params->oeane;
+       cfg->olws = rsp_params->olws;
+       cfg->oa = rsp_params->oa;
+       cfg->oprrws = rsp_params->oprrws;
+       qry->rip = dpni_get_field(rsp_params->flags, RIP);
+       qry->enable = dpni_get_field(rsp_params->flags, OPR_ENABLE);
+       qry->nesn = le16_to_cpu(rsp_params->nesn);
+       qry->ndsn = le16_to_cpu(rsp_params->ndsn);
+       qry->ea_tseq = le16_to_cpu(rsp_params->ea_tseq);
+       qry->tseq_nlis = dpni_get_field(rsp_params->tseq_nlis, TSEQ_NLIS);
+       qry->ea_hseq = le16_to_cpu(rsp_params->ea_hseq);
+       qry->hseq_nlis = dpni_get_field(rsp_params->hseq_nlis, HSEQ_NLIS);
+       qry->ea_hptr = le16_to_cpu(rsp_params->ea_hptr);
+       qry->ea_tptr = le16_to_cpu(rsp_params->ea_tptr);
+       qry->opr_vid = le16_to_cpu(rsp_params->opr_vid);
+       qry->opr_id = le16_to_cpu(rsp_params->opr_id);
+
+       return 0;
+}
index 4de70f3..02fe8d5 100644 (file)
@@ -71,45 +71,41 @@ struct dpkg_mask {
 /**
  * struct dpkg_extract - A structure for defining a single extraction
  * @type: Determines how the union below is interpreted:
- *             DPKG_EXTRACT_FROM_HDR: selects 'from_hdr';
- *             DPKG_EXTRACT_FROM_DATA: selects 'from_data';
- *             DPKG_EXTRACT_FROM_PARSE: selects 'from_parse'
+ *     DPKG_EXTRACT_FROM_HDR: selects 'from_hdr';
+ *     DPKG_EXTRACT_FROM_DATA: selects 'from_data';
+ *     DPKG_EXTRACT_FROM_PARSE: selects 'from_parse'
  * @extract: Selects extraction method
+ * @extract.from_hdr: Used when 'type = DPKG_EXTRACT_FROM_HDR'
+ * @extract.from_data: Used when 'type = DPKG_EXTRACT_FROM_DATA'
+ * @extract.from_parse:  Used when 'type = DPKG_EXTRACT_FROM_PARSE'
+ * @extract.from_hdr.prot: Any of the supported headers
+ * @extract.from_hdr.type: Defines the type of header extraction:
+ *     DPKG_FROM_HDR: use size & offset below;
+ *     DPKG_FROM_FIELD: use field, size and offset below;
+ *     DPKG_FULL_FIELD: use field below
+ * @extract.from_hdr.field: One of the supported fields (NH_FLD_)
+ * @extract.from_hdr.size: Size in bytes
+ * @extract.from_hdr.offset: Byte offset
+ * @extract.from_hdr.hdr_index: Clear for cases not listed below;
+ *     Used for protocols that may have more than a single
+ *     header, 0 indicates an outer header;
+ *     Supported protocols (possible values):
+ *     NET_PROT_VLAN (0, HDR_INDEX_LAST);
+ *     NET_PROT_MPLS (0, 1, HDR_INDEX_LAST);
+ *     NET_PROT_IP(0, HDR_INDEX_LAST);
+ *     NET_PROT_IPv4(0, HDR_INDEX_LAST);
+ *     NET_PROT_IPv6(0, HDR_INDEX_LAST);
+ * @extract.from_data.size: Size in bytes
+ * @extract.from_data.offset: Byte offset
+ * @extract.from_parse.size: Size in bytes
+ * @extract.from_parse.offset: Byte offset
  * @num_of_byte_masks: Defines the number of valid entries in the array below;
  *             This is also the number of bytes to be used as masks
  * @masks: Masks parameters
  */
 struct dpkg_extract {
        enum dpkg_extract_type type;
-       /**
-        * union extract - Selects extraction method
-        * @from_hdr - Used when 'type = DPKG_EXTRACT_FROM_HDR'
-        * @from_data - Used when 'type = DPKG_EXTRACT_FROM_DATA'
-        * @from_parse - Used when 'type = DPKG_EXTRACT_FROM_PARSE'
-        */
        union {
-               /**
-                * struct from_hdr - Used when 'type = DPKG_EXTRACT_FROM_HDR'
-                * @prot: Any of the supported headers
-                * @type: Defines the type of header extraction:
-                *      DPKG_FROM_HDR: use size & offset below;
-                *      DPKG_FROM_FIELD: use field, size and offset below;
-                *      DPKG_FULL_FIELD: use field below
-                * @field: One of the supported fields (NH_FLD_)
-                *
-                * @size: Size in bytes
-                * @offset: Byte offset
-                * @hdr_index: Clear for cases not listed below;
-                *      Used for protocols that may have more than a single
-                *      header, 0 indicates an outer header;
-                *      Supported protocols (possible values):
-                *      NET_PROT_VLAN (0, HDR_INDEX_LAST);
-                *      NET_PROT_MPLS (0, 1, HDR_INDEX_LAST);
-                *      NET_PROT_IP(0, HDR_INDEX_LAST);
-                *      NET_PROT_IPv4(0, HDR_INDEX_LAST);
-                *      NET_PROT_IPv6(0, HDR_INDEX_LAST);
-                */
-
                struct {
                        enum net_prot prot;
                        enum dpkg_extract_from_hdr_type type;
@@ -118,23 +114,10 @@ struct dpkg_extract {
                        uint8_t offset;
                        uint8_t hdr_index;
                } from_hdr;
-               /**
-                * struct from_data
-                *      Used when 'type = DPKG_EXTRACT_FROM_DATA'
-                * @size: Size in bytes
-                * @offset: Byte offset
-                */
                struct {
                        uint8_t size;
                        uint8_t offset;
                } from_data;
-
-               /**
-                * struct from_parse
-                *      Used when 'type = DPKG_EXTRACT_FROM_PARSE'
-                * @size: Size in bytes
-                * @offset: Byte offset
-                */
                struct {
                        uint8_t size;
                        uint8_t offset;
index f0edcd2..de1bcb5 100644 (file)
@@ -8,6 +8,7 @@
 #define __FSL_DPNI_H
 
 #include <fsl_dpkg.h>
+#include <fsl_dpopr.h>
 
 struct fsl_mc_io;
 
@@ -77,6 +78,20 @@ struct fsl_mc_io;
  */
 #define DPNI_OPT_NO_FS                         0x000020
 
+/**
+ * Enable the Order Restoration support
+ */
+#define DPNI_OPT_HAS_OPR                               0x000040
+
+/**
+ * Order Point Records are shared for the entire TC
+ */
+#define DPNI_OPT_OPR_PER_TC                            0x000080
+/**
+ * All Tx traffic classes will use a single sender (ignore num_queueus for tx)
+ */
+#define DPNI_OPT_SINGLE_SENDER                 0x000100
+
 int dpni_open(struct fsl_mc_io *mc_io,
              uint32_t cmd_flags,
              int dpni_id,
@@ -88,71 +103,74 @@ int dpni_close(struct fsl_mc_io *mc_io,
 
 /**
  * struct dpni_cfg - Structure representing DPNI configuration
- * @mac_addr:  Primary MAC address
- * @adv:       Advanced parameters; default is all zeros;
- *             use this structure to change default settings
+ * @options: Any combination of the following options:
+ *             DPNI_OPT_TX_FRM_RELEASE
+ *             DPNI_OPT_NO_MAC_FILTER
+ *             DPNI_OPT_HAS_POLICING
+ *             DPNI_OPT_SHARED_CONGESTION
+ *             DPNI_OPT_HAS_KEY_MASKING
+ *             DPNI_OPT_NO_FS
+ *             DPNI_OPT_SINGLE_SENDER
+ * @fs_entries: Number of entries in the flow steering table.
+ *             This table is used to select the ingress queue for
+ *             ingress traffic, targeting a GPP core or another.
+ *             In addition it can be used to discard traffic that
+ *             matches the set rule. It is either an exact match table
+ *             or a TCAM table, depending on DPNI_OPT_ HAS_KEY_MASKING
+ *             bit in OPTIONS field. This field is ignored if
+ *             DPNI_OPT_NO_FS bit is set in OPTIONS field. Otherwise,
+ *             value 0 defaults to 64. Maximum supported value is 1024.
+ *             Note that the total number of entries is limited on the
+ *             SoC to as low as 512 entries if TCAM is used.
+ * @vlan_filter_entries: Number of entries in the VLAN address filtering
+ *             table. This is an exact match table used to filter
+ *             ingress traffic based on VLAN IDs. Value 0 disables VLAN
+ *             filtering. Maximum supported value is 16.
+ * @mac_filter_entries: Number of entries in the MAC address filtering
+ *             table. This is an exact match table and allows both
+ *             unicast and multicast entries. The primary MAC address
+ *             of the network interface is not part of this table,
+ *             this contains only entries in addition to it. This
+ *             field is ignored if DPNI_OPT_ NO_MAC_FILTER is set in
+ *             OPTIONS field. Otherwise, value 0 defaults to 80.
+ *             Maximum supported value is 80.
+ * @num_queues: Number of Tx and Rx queues used for traffic
+ *             distribution. This is orthogonal to QoS and is only
+ *             used to distribute traffic to multiple GPP cores.
+ *             This configuration affects the number of Tx queues
+ *             (logical FQs, all associated with a single CEETM queue),
+ *             Rx queues and Tx confirmation queues, if applicable.
+ *             Value 0 defaults to one queue. Maximum supported value
+ *             is 8.
+ * @num_tcs: Number of traffic classes (TCs), reserved for the DPNI.
+ *             TCs can have different priority levels for the purpose
+ *             of Tx scheduling (see DPNI_SET_TX_PRIORITIES), different
+ *             BPs (DPNI_ SET_POOLS), policers. There are dedicated QM
+ *             queues for traffic classes (including class queues on
+ *             Tx). Value 0 defaults to one TC. Maximum supported value
+ *             is 16. There are maximum 16 TCs for Tx and 8 TCs for Rx.
+ *             When num_tcs>8 Tx will use this value but Rx will have
+ *             only 8 traffic classes.
+ * @num_rx_tcs: if set to other value than zero represents number
+ *             of TCs used for Rx. Maximum value is 8. If set to zero the
+ *             number of Rx TCs will be initialized with the value provided
+ *             in num_tcs parameter.
+ * @qos_entries: Number of entries in the QoS classification table. This
+ *             table is used to select the TC for ingress traffic. It
+ *             is either an exact match or a TCAM table, depending on
+ *             DPNI_OPT_ HAS_KEY_MASKING bit in OPTIONS field. This
+ *             field is ignored if the DPNI has a single TC. Otherwise,
+ *             a value of 0 defaults to 64. Maximum supported value
+ *             is 64.
  */
 struct dpni_cfg {
-       /**
-        * @options: Any combination of the following options:
-        *              DPNI_OPT_TX_FRM_RELEASE
-        *              DPNI_OPT_NO_MAC_FILTER
-        *              DPNI_OPT_HAS_POLICING
-        *              DPNI_OPT_SHARED_CONGESTION
-        *              DPNI_OPT_HAS_KEY_MASKING
-        *              DPNI_OPT_NO_FS
-        * @fs_entries: Number of entries in the flow steering table.
-        *              This table is used to select the ingress queue for
-        *              ingress traffic, targeting a GPP core or another.
-        *              In addition it can be used to discard traffic that
-        *              matches the set rule. It is either an exact match table
-        *              or a TCAM table, depending on DPNI_OPT_ HAS_KEY_MASKING
-        *              bit in OPTIONS field. This field is ignored if
-        *              DPNI_OPT_NO_FS bit is set in OPTIONS field. Otherwise,
-        *              value 0 defaults to 64. Maximum supported value is 1024.
-        *              Note that the total number of entries is limited on the
-        *              SoC to as low as 512 entries if TCAM is used.
-        * @vlan_filter_entries: Number of entries in the VLAN address filtering
-        *              table. This is an exact match table used to filter
-        *              ingress traffic based on VLAN IDs. Value 0 disables VLAN
-        *              filtering. Maximum supported value is 16.
-        * @mac_filter_entries: Number of entries in the MAC address filtering
-        *              table. This is an exact match table and allows both
-        *              unicast and multicast entries. The primary MAC address
-        *              of the network interface is not part of this table,
-        *              this contains only entries in addition to it. This
-        *              field is ignored if DPNI_OPT_ NO_MAC_FILTER is set in
-        *              OPTIONS field. Otherwise, value 0 defaults to 80.
-        *              Maximum supported value is 80.
-        * @num_queues: Number of Tx and Rx queues used for traffic
-        *              distribution. This is orthogonal to QoS and is only
-        *              used to distribute traffic to multiple GPP cores.
-        *              This configuration affects the number of Tx queues
-        *              (logical FQs, all associated with a single CEETM queue),
-        *              Rx queues and Tx confirmation queues, if applicable.
-        *              Value 0 defaults to one queue. Maximum supported value
-        *              is 8.
-        * @num_tcs: Number of traffic classes (TCs), reserved for the DPNI.
-        *              TCs can have different priority levels for the purpose
-        *              of Tx scheduling (see DPNI_SET_TX_SELECTION), different
-        *              BPs (DPNI_ SET_POOLS), policers. There are dedicated QM
-        *              queues for traffic classes (including class queues on
-        *              Tx). Value 0 defaults to one TC. Maximum supported value
-        *              is 8.
-        * @qos_entries: Number of entries in the QoS classification table. This
-        *              table is used to select the TC for ingress traffic. It
-        *              is either an exact match or a TCAM table, depending on
-        *              DPNI_OPT_ HAS_KEY_MASKING bit in OPTIONS field. This
-        *              field is ignored if the DPNI has a single TC. Otherwise,
-        *              a value of 0 defaults to 64. Maximum supported value
-        *              is 64.
-        */
        uint32_t options;
        uint16_t fs_entries;
        uint8_t  vlan_filter_entries;
        uint8_t  mac_filter_entries;
        uint8_t  num_queues;
        uint8_t  num_tcs;
+       uint8_t  num_rx_tcs;
        uint8_t  qos_entries;
 };
 
@@ -172,17 +190,14 @@ int dpni_destroy(struct fsl_mc_io *mc_io,
  * @num_dpbp:  Number of DPBPs
  * @pools:     Array of buffer pools parameters; The number of valid entries
  *             must match 'num_dpbp' value
+ * @pools.dpbp_id:     DPBP object ID
+ * @pools.priority:    Priority mask that indicates TC's used with this buffer.
+ *                    I set to 0x00 MC will assume value 0xff.
+ * @pools.buffer_size: Buffer size
+ * @pools.backup_pool: Backup pool
  */
 struct dpni_pools_cfg {
        uint8_t num_dpbp;
-       /**
-        * struct pools - Buffer pools parameters
-        * @dpbp_id: DPBP object ID
-        * @priority: priority mask that indicates TC's used with this buffer.
-        * I set to 0x00 MC will assume value 0xff.
-        * @buffer_size: Buffer size
-        * @backup_pool: Backup pool
-        */
        struct {
                int             dpbp_id;
                uint8_t         priority_mask;
@@ -296,6 +311,8 @@ int dpni_clear_irq_status(struct fsl_mc_io *mc_io,
  *                     variants,
  *                     - 0x422 - WRIOP version 1.1.2, used on LS1088 and
  *                     variants.
+ *                     - 0xC00 - WRIOP version 3.0.0, used on LX2160 and
+ *                     variants.
  */
 struct dpni_attr {
        uint32_t options;
@@ -320,6 +337,13 @@ int dpni_get_attributes(struct fsl_mc_io *mc_io,
  * DPNI errors
  */
 
+/**
+ * Discard error. When set all discarded frames in wriop will be enqueued to
+ * error queue. To be used in dpni_set_errors_behavior() only if error_action
+ * parameter is set to DPNI_ERROR_ACTION_SEND_TO_ERROR_QUEUE.
+ */
+#define DPNI_ERROR_DISC                0x80000000
+
 /**
  * Extract out of frame header error
  */
@@ -408,6 +432,10 @@ int dpni_set_errors_behavior(struct fsl_mc_io *mc_io,
  * Select to modify the data-tail-room setting
  */
 #define DPNI_BUF_LAYOUT_OPT_DATA_TAIL_ROOM     0x00000040
+/**
+ * Select to modify the sw-opaque value setting
+ */
+#define DPNI_BUF_LAYOUT_OPT_SW_OPAQUE          0x00000080
 
 /**
  * struct dpni_buffer_layout - Structure representing DPNI buffer layout
@@ -427,6 +455,7 @@ struct dpni_buffer_layout {
        int pass_timestamp;
        int pass_parser_result;
        int pass_frame_status;
+       int pass_sw_opaque;
        uint16_t private_data_size;
        uint16_t data_align;
        uint16_t data_head_room;
@@ -501,16 +530,48 @@ int dpni_get_tx_data_offset(struct fsl_mc_io *mc_io,
 
 #define DPNI_STATISTICS_CNT            7
 
+/**
+ * union dpni_statistics - Union describing the DPNI statistics
+ * @page_0: Page_0 statistics structure
+ * @page_0.ingress_all_frames: Ingress frame count
+ * @page_0.ingress_all_bytes: Ingress byte count
+ * @page_0.ingress_multicast_frames: Ingress multicast frame count
+ * @page_0.ingress_multicast_bytes: Ingress multicast byte count
+ * @page_0.ingress_broadcast_frames: Ingress broadcast frame count
+ * @page_0.ingress_broadcast_bytes: Ingress broadcast byte count
+ * @page_1: Page_1 statistics structure
+ * @page_1.egress_all_frames: Egress frame count
+ * @page_1.egress_all_bytes: Egress byte count
+ * @page_1.egress_multicast_frames: Egress multicast frame count
+ * @page_1.egress_multicast_bytes: Egress multicast byte count
+ * @page_1.egress_broadcast_frames: Egress broadcast frame count
+ * @page_1.egress_broadcast_bytes: Egress broadcast byte count
+ * @page_2: Page_2 statistics structure
+ * @page_2.ingress_filtered_frames: Ingress filtered frame count
+ * @page_2.ingress_discarded_frames: Ingress discarded frame count
+ * @page_2.ingress_nobuffer_discards: Ingress discarded frame count due to
+ *     lack of buffers
+ * @page_2.egress_discarded_frames: Egress discarded frame count
+ * @page_2.egress_confirmed_frames: Egress confirmed frame count
+ * @page_3: Page_3 statistics structure with values for the selected TC
+ * @page_3.ceetm_dequeue_bytes: Cumulative count of the number of bytes dequeued
+ * @page_3.ceetm_dequeue_frames: Cumulative count of the number of frames
+ *     dequeued
+ * @page_3.ceetm_reject_bytes: Cumulative count of the number of bytes in all
+ *     frames whose enqueue was rejected
+ * @page_3.ceetm_reject_frames: Cumulative count of all frame enqueues rejected
+ * @page_4: congestion point drops for seleted TC
+ * @page_4.cgr_reject_frames: number of rejected frames due to congestion point
+ * @page_4.cgr_reject_bytes: number of rejected bytes due to congestion point
+ * @page_5: policer statistics per TC
+ * @page_5.policer_cnt_red: NUmber of red colored frames
+ * @page_5.policer_cnt_yellow: number of yellow colored frames
+ * @page_5.policer_cnt_green: number of green colored frames
+ * @page_5.policer_cnt_re_red: number of recolored red frames
+ * @page_5.policer_cnt_re_yellow: number of recolored yellow frames
+ * @raw: raw statistics structure, used to index counters
+ */
 union dpni_statistics {
-       /**
-        * struct page_0 - Page_0 statistics structure
-        * @ingress_all_frames: Ingress frame count
-        * @ingress_all_bytes: Ingress byte count
-        * @ingress_multicast_frames: Ingress multicast frame count
-        * @ingress_multicast_bytes: Ingress multicast byte count
-        * @ingress_broadcast_frames: Ingress broadcast frame count
-        * @ingress_broadcast_bytes: Ingress broadcast byte count
-        */
        struct {
                uint64_t ingress_all_frames;
                uint64_t ingress_all_bytes;
@@ -519,15 +580,6 @@ union dpni_statistics {
                uint64_t ingress_broadcast_frames;
                uint64_t ingress_broadcast_bytes;
        } page_0;
-       /**
-        * struct page_1 - Page_1 statistics structure
-        * @egress_all_frames: Egress frame count
-        * @egress_all_bytes: Egress byte count
-        * @egress_multicast_frames: Egress multicast frame count
-        * @egress_multicast_bytes: Egress multicast byte count
-        * @egress_broadcast_frames: Egress broadcast frame count
-        * @egress_broadcast_bytes: Egress broadcast byte count
-        */
        struct {
                uint64_t egress_all_frames;
                uint64_t egress_all_bytes;
@@ -536,15 +588,6 @@ union dpni_statistics {
                uint64_t egress_broadcast_frames;
                uint64_t egress_broadcast_bytes;
        } page_1;
-       /**
-        * struct page_2 - Page_2 statistics structure
-        * @ingress_filtered_frames: Ingress filtered frame count
-        * @ingress_discarded_frames: Ingress discarded frame count
-        * @ingress_nobuffer_discards: Ingress discarded frame count due to
-        *                                      lack of buffers
-        * @egress_discarded_frames: Egress discarded frame count
-        * @egress_confirmed_frames: Egress confirmed frame count
-        */
        struct {
                uint64_t ingress_filtered_frames;
                uint64_t ingress_discarded_frames;
@@ -552,26 +595,23 @@ union dpni_statistics {
                uint64_t egress_discarded_frames;
                uint64_t egress_confirmed_frames;
        } page_2;
-       /**
-        * struct page_3 - Page_3 statistics structure with values for the
-        *                      selected TC
-        * @ceetm_dequeue_bytes: Cumulative count of the number of bytes
-        *                      dequeued
-        * @ceetm_dequeue_frames: Cumulative count of the number of frames
-        *                      dequeued
-        * @ceetm_reject_bytes: Cumulative count of the number of bytes in all
-        *                      frames whose enqueue was rejected
-        * @ceetm_reject_frames: Cumulative count of all frame enqueues rejected
-        */
        struct {
                uint64_t ceetm_dequeue_bytes;
                uint64_t ceetm_dequeue_frames;
                uint64_t ceetm_reject_bytes;
                uint64_t ceetm_reject_frames;
        } page_3;
-       /**
-        * struct raw - raw statistics structure, used to index counters
-        */
+       struct {
+               uint64_t cgr_reject_frames;
+               uint64_t cgr_reject_bytes;
+       } page_4;
+       struct {
+               uint64_t policer_cnt_red;
+               uint64_t policer_cnt_yellow;
+               uint64_t policer_cnt_green;
+               uint64_t policer_cnt_re_red;
+               uint64_t policer_cnt_re_yellow;
+       } page_5;
        struct {
                uint64_t counter[DPNI_STATISTICS_CNT];
        } raw;
@@ -602,10 +642,12 @@ union dpni_statistics {
  * struct - Structure representing DPNI link configuration
  * @rate: Rate
  * @options: Mask of available options; use 'DPNI_LINK_OPT_<X>' values
+ * @advertising: Speeds that are advertised for autoneg (bitmap)
  */
 struct dpni_link_cfg {
        uint32_t rate;
        uint64_t options;
+       uint64_t advertising;
 };
 
 int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
@@ -618,11 +660,17 @@ int dpni_set_link_cfg(struct fsl_mc_io *mc_io,
  * @rate:      Rate
  * @options:   Mask of available options; use 'DPNI_LINK_OPT_<X>' values
  * @up:                Link state; '0' for down, '1' for up
+ * @state_valid: Ignore/Update the state of the link
+ * @supported: Speeds capability of the phy (bitmap)
+ * @advertising: Speeds that are advertised for autoneg (bitmap)
  */
 struct dpni_link_state {
        uint32_t rate;
        uint64_t options;
        int up;
+       int     state_valid;
+       uint64_t supported;
+       uint64_t advertising;
 };
 
 int dpni_get_link_state(struct fsl_mc_io *mc_io,
@@ -750,11 +798,20 @@ enum dpni_fs_miss_action {
  * struct dpni_fs_tbl_cfg - Flow Steering table configuration
  * @miss_action:       Miss action selection
  * @default_flow_id:   Used when 'miss_action = DPNI_FS_MISS_EXPLICIT_FLOWID'
+ * @keep_hash_key: used only when miss_action is set to DPNI_FS_MISS_HASH. When
+ *     set to one unclassified frames will be distributed according to previous
+ *     used hash key. If set to zero hash key will be replaced with the key
+ *     provided for flow steering.
+ * @keep_entries: if set to one command will not delete the entries that already
+ *     exist into FS table. Use this option with caution: if the table
+ *     entries are not compatible with the distribution key the packets
+ *     will not be classified properly.
  */
 struct dpni_fs_tbl_cfg {
        enum dpni_fs_miss_action miss_action;
        uint16_t default_flow_id;
        char keep_hash_key;
+       uint8_t keep_entries;
 };
 
 /**
@@ -915,34 +972,52 @@ int dpni_get_congestion_notification(struct fsl_mc_io *mc_io,
 
 /**
  * struct dpni_queue - Queue structure
- * @user_context:      User data, presented to the user along with any frames
- *                     from this queue. Not relevant for Tx queues.
+ * @destination - Destination structure
+ * @destination.id: ID of the destination, only relevant if DEST_TYPE is > 0.
+ *     Identifies either a DPIO or a DPCON object.
+ *     Not relevant for Tx queues.
+ * @destination.type:  May be one of the following:
+ *     0 - No destination, queue can be manually
+ *             queried, but will not push traffic or
+ *             notifications to a DPIO;
+ *     1 - The destination is a DPIO. When traffic
+ *             becomes available in the queue a FQDAN
+ *             (FQ data available notification) will be
+ *             generated to selected DPIO;
+ *     2 - The destination is a DPCON. The queue is
+ *             associated with a DPCON object for the
+ *             purpose of scheduling between multiple
+ *             queues. The DPCON may be independently
+ *             configured to generate notifications.
+ *             Not relevant for Tx queues.
+ * @destination.hold_active: Hold active, maintains a queue scheduled for longer
+ *     in a DPIO during dequeue to reduce spread of traffic.
+ *     Only relevant if queues are
+ *     not affined to a single DPIO.
+ * @user_context: User data, presented to the user along with any frames
+ *     from this queue. Not relevant for Tx queues.
+ * @flc: FD FLow Context structure
+ * @flc.value: Default FLC value for traffic dequeued from
+ *      this queue.  Please check description of FD
+ *      structure for more information.
+ *      Note that FLC values set using dpni_add_fs_entry,
+ *      if any, take precedence over values per queue.
+ * @flc.stash_control: Boolean, indicates whether the 6 lowest
+ *      - significant bits are used for stash control.
+ *      significant bits are used for stash control.  If set, the 6
+ *      least significant bits in value are interpreted as follows:
+ *      - bits 0-1: indicates the number of 64 byte units of context
+ *      that are stashed.  FLC value is interpreted as a memory address
+ *      in this case, excluding the 6 LS bits.
+ *      - bits 2-3: indicates the number of 64 byte units of frame
+ *      annotation to be stashed.  Annotation is placed at FD[ADDR].
+ *      - bits 4-5: indicates the number of 64 byte units of frame
+ *      data to be stashed.  Frame data is placed at FD[ADDR] +
+ *      FD[OFFSET].
+ *      For more details check the Frame Descriptor section in the
+ *      hardware documentation.
  */
 struct dpni_queue {
-       /**
-        * struct destination - Destination structure
-        * @id: ID of the destination, only relevant if DEST_TYPE is > 0.
-        *                      Identifies either a DPIO or a DPCON object.
-        *                      Not relevant for Tx queues.
-        * @type:       May be one of the following:
-        *                      0 - No destination, queue can be manually
-        *                              queried, but will not push traffic or
-        *                              notifications to a DPIO;
-        *                      1 - The destination is a DPIO. When traffic
-        *                              becomes available in the queue a FQDAN
-        *                              (FQ data available notification) will be
-        *                              generated to selected DPIO;
-        *                      2 - The destination is a DPCON. The queue is
-        *                              associated with a DPCON object for the
-        *                              purpose of scheduling between multiple
-        *                              queues. The DPCON may be independently
-        *                              configured to generate notifications.
-        *                              Not relevant for Tx queues.
-        * @hold_active: Hold active, maintains a queue scheduled for longer
-        *              in a DPIO during dequeue to reduce spread of traffic.
-        *              Only relevant if queues are
-        *              not affined to a single DPIO.
-        */
        struct {
                uint16_t id;
                enum dpni_dest type;
@@ -950,28 +1025,6 @@ struct dpni_queue {
                uint8_t priority;
        } destination;
        uint64_t user_context;
-       /**
-        * struct flc - FD FLow Context structure
-        * @value: Default FLC value for traffic dequeued from
-        *      this queue.  Please check description of FD
-        *      structure for more information.
-        *      Note that FLC values set using dpni_add_fs_entry,
-        *      if any, take precedence over values per queue.
-        * @stash_control: Boolean, indicates whether the 6 lowest
-        *      - significant bits are used for stash control.
-        *      significant bits are used for stash control.  If set, the 6
-        *      least significant bits in value are interpreted as follows:
-        *      - bits 0-1: indicates the number of 64 byte units of context
-        *      that are stashed.  FLC value is interpreted as a memory address
-        *      in this case, excluding the 6 LS bits.
-        *      - bits 2-3: indicates the number of 64 byte units of frame
-        *      annotation to be stashed.  Annotation is placed at FD[ADDR].
-        *      - bits 4-5: indicates the number of 64 byte units of frame
-        *      data to be stashed.  Frame data is placed at FD[ADDR] +
-        *      FD[OFFSET].
-        *      For more details check the Frame Descriptor section in the
-        *      hardware documentation.
-        */
        struct {
                uint64_t value;
                char stash_control;
@@ -1132,4 +1185,21 @@ int dpni_get_taildrop(struct fsl_mc_io *mc_io,
                      uint8_t tc,
                      uint8_t q_index,
                      struct dpni_taildrop *taildrop);
+
+int dpni_set_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t tc,
+                uint8_t index,
+                uint8_t options,
+                struct opr_cfg *cfg);
+
+int dpni_get_opr(struct fsl_mc_io *mc_io,
+                uint32_t cmd_flags,
+                uint16_t token,
+                uint8_t tc,
+                uint8_t index,
+                struct opr_cfg *cfg,
+                struct opr_qry *qry);
+
 #endif /* __FSL_DPNI_H */
index eb3e998..3df5bcf 100644 (file)
@@ -9,19 +9,21 @@
 
 /* DPNI Version */
 #define DPNI_VER_MAJOR                         7
-#define DPNI_VER_MINOR                         3
+#define DPNI_VER_MINOR                         8
 
 #define DPNI_CMD_BASE_VERSION                  1
 #define DPNI_CMD_VERSION_2                     2
+#define DPNI_CMD_VERSION_3                     3
 #define DPNI_CMD_ID_OFFSET                     4
 
 #define DPNI_CMD(id)   (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_BASE_VERSION)
 #define DPNI_CMD_V2(id)        (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_VERSION_2)
+#define DPNI_CMD_V3(id)        (((id) << DPNI_CMD_ID_OFFSET) | DPNI_CMD_VERSION_3)
 
 /* Command IDs */
 #define DPNI_CMDID_OPEN                                DPNI_CMD(0x801)
 #define DPNI_CMDID_CLOSE                       DPNI_CMD(0x800)
-#define DPNI_CMDID_CREATE                      DPNI_CMD(0x901)
+#define DPNI_CMDID_CREATE                      DPNI_CMD_V2(0x901)
 #define DPNI_CMDID_DESTROY                     DPNI_CMD(0x981)
 #define DPNI_CMDID_GET_API_VERSION             DPNI_CMD(0xa01)
 
 #define DPNI_CMDID_GET_QDID                    DPNI_CMD(0x210)
 #define DPNI_CMDID_GET_SP_INFO                 DPNI_CMD(0x211)
 #define DPNI_CMDID_GET_TX_DATA_OFFSET          DPNI_CMD(0x212)
-#define DPNI_CMDID_GET_LINK_STATE              DPNI_CMD(0x215)
+#define DPNI_CMDID_GET_LINK_STATE              DPNI_CMD_V2(0x215)
 #define DPNI_CMDID_SET_MAX_FRAME_LENGTH                DPNI_CMD(0x216)
 #define DPNI_CMDID_GET_MAX_FRAME_LENGTH                DPNI_CMD(0x217)
-#define DPNI_CMDID_SET_LINK_CFG                        DPNI_CMD(0x21A)
+#define DPNI_CMDID_SET_LINK_CFG                        DPNI_CMD_V2(0x21A)
 #define DPNI_CMDID_SET_TX_SHAPING              DPNI_CMD_V2(0x21B)
 
 #define DPNI_CMDID_SET_MCAST_PROMISC           DPNI_CMD(0x220)
@@ -65,7 +67,7 @@
 #define DPNI_CMDID_REMOVE_VLAN_ID              DPNI_CMD(0x232)
 #define DPNI_CMDID_CLR_VLAN_FILTERS            DPNI_CMD(0x233)
 
-#define DPNI_CMDID_SET_RX_TC_DIST              DPNI_CMD_V2(0x235)
+#define DPNI_CMDID_SET_RX_TC_DIST              DPNI_CMD_V3(0x235)
 
 #define DPNI_CMDID_GET_STATISTICS              DPNI_CMD_V2(0x25D)
 #define DPNI_CMDID_RESET_STATISTICS            DPNI_CMD(0x25E)
@@ -76,8 +78,8 @@
 
 #define DPNI_CMDID_GET_PORT_MAC_ADDR           DPNI_CMD(0x263)
 
-#define DPNI_CMDID_GET_BUFFER_LAYOUT           DPNI_CMD(0x264)
-#define DPNI_CMDID_SET_BUFFER_LAYOUT           DPNI_CMD(0x265)
+#define DPNI_CMDID_GET_BUFFER_LAYOUT           DPNI_CMD_V2(0x264)
+#define DPNI_CMDID_SET_BUFFER_LAYOUT           DPNI_CMD_V2(0x265)
 
 #define DPNI_CMDID_SET_CONGESTION_NOTIFICATION DPNI_CMD(0x267)
 #define DPNI_CMDID_GET_CONGESTION_NOTIFICATION DPNI_CMD(0x268)
@@ -87,6 +89,8 @@
 #define DPNI_CMDID_SET_OFFLOAD                 DPNI_CMD(0x26C)
 #define DPNI_CMDID_SET_TX_CONFIRMATION_MODE    DPNI_CMD(0x266)
 #define DPNI_CMDID_GET_TX_CONFIRMATION_MODE    DPNI_CMD(0x26D)
+#define DPNI_CMDID_SET_OPR                     DPNI_CMD(0x26e)
+#define DPNI_CMDID_GET_OPR                     DPNI_CMD(0x26f)
 
 /* Macros for accessing command fields smaller than 1byte */
 #define DPNI_MASK(field)       \
@@ -113,6 +117,7 @@ struct dpni_cmd_create {
        uint8_t qos_entries;
        uint8_t pad3;
        uint16_t fs_entries;
+       uint8_t num_rx_tcs;
 };
 
 struct dpni_cmd_destroy {
@@ -228,6 +233,8 @@ struct dpni_cmd_set_errors_behavior {
 #define DPNI_PASS_PR_SIZE              1
 #define DPNI_PASS_FS_SHIFT             2
 #define DPNI_PASS_FS_SIZE              1
+#define DPNI_PASS_SWO_SHIFT            3
+#define DPNI_PASS_SWO_SIZE             1
 
 struct dpni_cmd_get_buffer_layout {
        uint8_t qtype;
@@ -307,10 +314,13 @@ struct dpni_cmd_set_link_cfg {
        uint32_t rate;
        uint32_t pad1;
        uint64_t options;
+       uint64_t advertising;
 };
 
 #define DPNI_LINK_STATE_SHIFT          0
 #define DPNI_LINK_STATE_SIZE           1
+#define DPNI_STATE_VALID_SHIFT         1
+#define DPNI_STATE_VALID_SIZE          1
 
 struct dpni_rsp_get_link_state {
        uint32_t pad0;
@@ -320,6 +330,8 @@ struct dpni_rsp_get_link_state {
        uint32_t rate;
        uint32_t pad2;
        uint64_t options;
+       uint64_t supported;
+       uint64_t advertising;
 };
 
 struct dpni_cmd_set_max_frame_length {
@@ -415,6 +427,8 @@ struct dpni_cmd_set_tx_priorities {
 #define DPNI_MISS_ACTION_SIZE          4
 #define DPNI_KEEP_HASH_KEY_SHIFT       7
 #define DPNI_KEEP_HASH_KEY_SIZE                1
+#define DPNI_KEEP_ENTRIES_SHIFT                6
+#define DPNI_KEEP_ENTRIES_SIZE         1
 
 struct dpni_cmd_set_rx_tc_dist {
        uint16_t dist_size;
@@ -601,5 +615,64 @@ struct dpni_rsp_get_congestion_notification {
        uint32_t threshold_exit;
 };
 
+struct dpni_cmd_set_opr {
+       uint8_t pad0;
+       uint8_t tc_id;
+       uint8_t index;
+       uint8_t options;
+       uint8_t pad1[7];
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+};
+
+struct dpni_cmd_get_opr {
+       uint8_t pad;
+       uint8_t tc_id;
+       uint8_t index;
+};
+
+#define DPNI_RIP_SHIFT 0
+#define DPNI_RIP_SIZE          1
+#define DPNI_OPR_ENABLE_SHIFT  1
+#define DPNI_OPR_ENABLE_SIZE   1
+#define DPNI_TSEQ_NLIS_SHIFT   0
+#define DPNI_TSEQ_NLIS_SIZE    1
+#define DPNI_HSEQ_NLIS_SHIFT   0
+#define DPNI_HSEQ_NLIS_SIZE    1
+
+struct dpni_rsp_get_opr {
+       uint64_t pad0;
+       /* from LSB: rip:1 enable:1 */
+       uint8_t flags;
+       uint16_t pad1;
+       uint8_t oloe;
+       uint8_t oeane;
+       uint8_t olws;
+       uint8_t oa;
+       uint8_t oprrws;
+       uint16_t nesn;
+       uint16_t pad8;
+       uint16_t ndsn;
+       uint16_t pad2;
+       uint16_t ea_tseq;
+       /* only the LSB */
+       uint8_t tseq_nlis;
+       uint8_t pad3;
+       uint16_t ea_hseq;
+       /* only the LSB */
+       uint8_t hseq_nlis;
+       uint8_t pad4;
+       uint16_t ea_hptr;
+       uint16_t pad5;
+       uint16_t ea_tptr;
+       uint16_t pad6;
+       uint16_t opr_vid;
+       uint16_t pad7;
+       uint16_t opr_id;
+};
+
 #pragma pack(pop)
 #endif /* _FSL_DPNI_CMD_H */
index 964870b..0dc0131 100644 (file)
 #define NH_FLD_SCTP_CHUNK_DATA_STREAM_SQN     (NH_FLD_SCTP_CHUNK_DATA_TYPE << 5)
 #define NH_FLD_SCTP_CHUNK_DATA_PAYLOAD_PID    (NH_FLD_SCTP_CHUNK_DATA_TYPE << 6)
 #define NH_FLD_SCTP_CHUNK_DATA_UNORDERED      (NH_FLD_SCTP_CHUNK_DATA_TYPE << 7)
-#define NH_FLD_SCTP_CHUNK_DATA_BEGGINNING     (NH_FLD_SCTP_CHUNK_DATA_TYPE << 8)
+#define NH_FLD_SCTP_CHUNK_DATA_BEGGINING      (NH_FLD_SCTP_CHUNK_DATA_TYPE << 8)
 #define NH_FLD_SCTP_CHUNK_DATA_END            (NH_FLD_SCTP_CHUNK_DATA_TYPE << 9)
 #define NH_FLD_SCTP_CHUNK_DATA_ALL_FIELDS \
        ((NH_FLD_SCTP_CHUNK_DATA_TYPE << 10) - 1)
index 213f0d7..b345952 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 if host_machine.system() != 'linux'
         build = false
 endif
index 9c87e88..0ed6276 100644 (file)
@@ -10,6 +10,7 @@ LIB = librte_pmd_e1000.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
 LDLIBS += -lrte_bus_pci
index 7c279db..397dd94 100644 (file)
@@ -1257,6 +1257,11 @@ STATIC s32 e1000_init_hw_82571(struct e1000_hw *hw)
         */
        e1000_clear_hw_cntrs_82571(hw);
 
+       /* MSI-X configure for 82574 */
+       if (mac->type == e1000_82574)
+               E1000_WRITE_REG(hw, E1000_IVAR,
+                               (E1000_IVAR_INT_ALLOC_VALID << 16));
+
        return ret_val;
 }
 
index b886804..5958ea1 100644 (file)
@@ -48,7 +48,7 @@
 
 #include "../e1000_logs.h"
 
-#define DELAY(x) rte_delay_us(x)
+#define DELAY(x) rte_delay_us_sleep(x)
 #define usec_delay(x) DELAY(x)
 #define usec_delay_irq(x) DELAY(x)
 #define msec_delay(x) DELAY(1000*(x))
index 5e1716d..f26f242 100644 (file)
@@ -25,6 +25,9 @@ error_cflags = ['-Wno-uninitialized', '-Wno-unused-parameter',
        '-Wno-unused-variable', '-Wno-misleading-indentation',
        '-Wno-implicit-fallthrough']
 c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 foreach flag: error_cflags
        if cc.has_argument(flag)
                c_args += flag
index 902001f..94edff0 100644 (file)
@@ -236,7 +236,8 @@ struct igb_ethertype_filter {
 struct igb_rte_flow_rss_conf {
        struct rte_flow_action_rss conf; /**< RSS parameters. */
        uint8_t key[IGB_HKEY_MAX_INDEX * sizeof(uint32_t)]; /* Hash key. */
-       uint16_t queue[IGB_MAX_RX_QUEUE_NUM]; /**< Queues indices to use. */
+       /* Queues indices to use. */
+       uint16_t queue[IGB_MAX_RX_QUEUE_NUM_82576];
 };
 
 /*
@@ -506,7 +507,8 @@ int eth_igb_syn_filter_set(struct rte_eth_dev *dev,
 int eth_igb_add_del_flex_filter(struct rte_eth_dev *dev,
                        struct rte_eth_flex_filter *filter,
                        bool add);
-int igb_rss_conf_init(struct igb_rte_flow_rss_conf *out,
+int igb_rss_conf_init(struct rte_eth_dev *dev,
+                     struct igb_rte_flow_rss_conf *out,
                      const struct rte_flow_action_rss *in);
 int igb_action_rss_same(const struct rte_flow_action_rss *comp,
                        const struct rte_flow_action_rss *with);
index 053e855..8230824 100644 (file)
@@ -329,9 +329,6 @@ eth_em_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        /* disable uio intr before callback unregister */
        rte_intr_disable(intr_handle);
        rte_intr_callback_unregister(intr_handle,
@@ -1444,7 +1441,8 @@ eth_em_interrupt_setup(struct rte_eth_dev *dev)
        /* clear interrupt */
        E1000_READ_REG(hw, E1000_ICR);
        regval = E1000_READ_REG(hw, E1000_IMS);
-       E1000_WRITE_REG(hw, E1000_IMS, regval | E1000_ICR_LSC);
+       E1000_WRITE_REG(hw, E1000_IMS,
+                       regval | E1000_ICR_LSC | E1000_ICR_OTHER);
        return 0;
 }
 
@@ -1494,7 +1492,7 @@ em_rxq_intr_enable(struct e1000_hw *hw)
 static void
 em_lsc_intr_disable(struct e1000_hw *hw)
 {
-       E1000_WRITE_REG(hw, E1000_IMC, E1000_IMS_LSC);
+       E1000_WRITE_REG(hw, E1000_IMC, E1000_IMS_LSC | E1000_IMS_OTHER);
        E1000_WRITE_FLUSH(hw);
 }
 
index 7d2ac4e..a9cd765 100644 (file)
@@ -1160,6 +1160,7 @@ em_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
 
        RTE_SET_USED(dev);
        tx_offload_capa =
+               DEV_TX_OFFLOAD_MULTI_SEGS  |
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM  |
                DEV_TX_OFFLOAD_UDP_CKSUM   |
@@ -1363,7 +1364,6 @@ em_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
                DEV_RX_OFFLOAD_IPV4_CKSUM  |
                DEV_RX_OFFLOAD_UDP_CKSUM   |
                DEV_RX_OFFLOAD_TCP_CKSUM   |
-               DEV_RX_OFFLOAD_CRC_STRIP   |
                DEV_RX_OFFLOAD_KEEP_CRC    |
                DEV_RX_OFFLOAD_SCATTER;
        if (max_rx_pktlen > ETHER_MAX_LEN)
@@ -1417,12 +1417,13 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        /*
-        * EM devices don't support drop_en functionality
+        * EM devices don't support drop_en functionality.
+        * It's an optimization that does nothing on single-queue devices,
+        * so just log the issue and carry on.
         */
        if (rx_conf->rx_drop_en) {
-               PMD_INIT_LOG(ERR, "drop_en functionality not supported by "
+               PMD_INIT_LOG(NOTICE, "drop_en functionality not supported by "
                             "device");
-               return -EINVAL;
        }
 
        /* Free memory prior to re-allocation if needed. */
@@ -1459,7 +1460,7 @@ eth_em_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->rx_free_thresh = rx_conf->rx_free_thresh;
        rxq->queue_id = queue_idx;
        rxq->port_id = dev->data->port_id;
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = ETHER_CRC_LEN;
        else
                rxq->crc_len = 0;
@@ -1795,7 +1796,7 @@ eth_em_rx_init(struct rte_eth_dev *dev)
                 * Reset crc_len in case it was changed after queue setup by a
                 *  call to configure
                 */
-               if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+               if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                        rxq->crc_len = ETHER_CRC_LEN;
                else
                        rxq->crc_len = 0;
@@ -1877,7 +1878,7 @@ eth_em_rx_init(struct rte_eth_dev *dev)
        }
 
        /* Setup the Receive Control Register. */
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
        else
                rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
index 64dfe68..d9d29d2 100644 (file)
@@ -74,6 +74,7 @@ static void eth_igb_stop(struct rte_eth_dev *dev);
 static int  eth_igb_dev_set_link_up(struct rte_eth_dev *dev);
 static int  eth_igb_dev_set_link_down(struct rte_eth_dev *dev);
 static void eth_igb_close(struct rte_eth_dev *dev);
+static int eth_igb_reset(struct rte_eth_dev *dev);
 static void eth_igb_promiscuous_enable(struct rte_eth_dev *dev);
 static void eth_igb_promiscuous_disable(struct rte_eth_dev *dev);
 static void eth_igb_allmulticast_enable(struct rte_eth_dev *dev);
@@ -351,6 +352,7 @@ static const struct eth_dev_ops eth_igb_ops = {
        .dev_set_link_up      = eth_igb_dev_set_link_up,
        .dev_set_link_down    = eth_igb_dev_set_link_down,
        .dev_close            = eth_igb_close,
+       .dev_reset            = eth_igb_reset,
        .promiscuous_enable   = eth_igb_promiscuous_enable,
        .promiscuous_disable  = eth_igb_promiscuous_disable,
        .allmulticast_enable  = eth_igb_allmulticast_enable,
@@ -915,9 +917,6 @@ eth_igb_dev_uninit(struct rte_eth_dev *eth_dev)
        /* Reset any pending lock */
        igb_reset_swfw_lock(hw);
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        /* uninitialize PF if max_vfs not zero */
        igb_pf_host_uninit(eth_dev);
 
@@ -1071,9 +1070,6 @@ eth_igbvf_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        /* disable uio intr before callback unregister */
        rte_intr_disable(&pci_dev->intr_handle);
        rte_intr_callback_unregister(&pci_dev->intr_handle,
@@ -1593,6 +1589,33 @@ eth_igb_close(struct rte_eth_dev *dev)
        rte_eth_linkstatus_set(dev, &link);
 }
 
+/*
+ * Reset PF device.
+ */
+static int
+eth_igb_reset(struct rte_eth_dev *dev)
+{
+       int ret;
+
+       /* When a DPDK PMD PF begin to reset PF port, it should notify all
+        * its VF to make them align with it. The detailed notification
+        * mechanism is PMD specific and is currently not implemented.
+        * To avoid unexpected behavior in VF, currently reset of PF with
+        * SR-IOV activation is not supported. It might be supported later.
+        */
+       if (dev->data->sriov.active)
+               return -ENOTSUP;
+
+       ret = eth_igb_dev_uninit(dev);
+       if (ret)
+               return ret;
+
+       ret = eth_igb_dev_init(dev);
+
+       return ret;
+}
+
+
 static int
 igb_get_rx_buffer_size(struct e1000_hw *hw)
 {
@@ -3197,14 +3220,14 @@ igbvf_dev_configure(struct rte_eth_dev *dev)
         * Keep the persistent behavior the same as Host PF
         */
 #ifndef RTE_LIBRTE_E1000_PF_DISABLE_STRIP_CRC
-       if (rte_eth_dev_must_keep_crc(conf->rxmode.offloads)) {
+       if (conf->rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                PMD_INIT_LOG(NOTICE, "VF can't disable HW CRC Strip");
-               conf->rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
+               conf->rxmode.offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC;
        }
 #else
-       if (!rte_eth_dev_must_keep_crc(conf->rxmode.offloads)) {
+       if (!(conf->rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
                PMD_INIT_LOG(NOTICE, "VF can't enable HW CRC Strip");
-               conf->rxmode.offloads &= ~DEV_RX_OFFLOAD_CRC_STRIP;
+               conf->rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
        }
 #endif
 
index 0738529..3368349 100644 (file)
@@ -1307,6 +1307,7 @@ igb_parse_rss_filter(struct rte_eth_dev *dev,
                        struct igb_rte_flow_rss_conf *rss_conf,
                        struct rte_flow_error *error)
 {
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        const struct rte_flow_action *act;
        const struct rte_flow_action_rss *rss;
        uint16_t n, index;
@@ -1357,11 +1358,14 @@ igb_parse_rss_filter(struct rte_eth_dev *dev,
                return rte_flow_error_set
                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, act,
                         "RSS hash key must be exactly 40 bytes");
-       if (rss->queue_num > RTE_DIM(rss_conf->queue))
+       if (((hw->mac.type == e1000_82576) &&
+            (rss->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
+           ((hw->mac.type != e1000_82576) &&
+            (rss->queue_num > IGB_MAX_RX_QUEUE_NUM)))
                return rte_flow_error_set
                        (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION, act,
                         "too many queues for RSS context");
-       if (igb_rss_conf_init(rss_conf, rss))
+       if (igb_rss_conf_init(dev, rss_conf, rss))
                return rte_flow_error_set
                        (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION, act,
                         "RSS context initialization failure");
@@ -1574,7 +1578,7 @@ igb_flow_create(struct rte_eth_dev *dev,
                                PMD_DRV_LOG(ERR, "failed to allocate memory");
                                goto out;
                        }
-                       igb_rss_conf_init(&rss_filter_ptr->filter_info,
+                       igb_rss_conf_init(dev, &rss_filter_ptr->filter_info,
                                          &rss_conf.conf);
                        TAILQ_INSERT_TAIL(&igb_filter_rss_list,
                                rss_filter_ptr, entries);
index b955068..25ff5f6 100644 (file)
@@ -1452,10 +1452,10 @@ igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
 uint64_t
 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
 {
-       uint64_t rx_offload_capa;
+       uint64_t tx_offload_capa;
 
        RTE_SET_USED(dev);
-       rx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
+       tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
                          DEV_TX_OFFLOAD_IPV4_CKSUM  |
                          DEV_TX_OFFLOAD_UDP_CKSUM   |
                          DEV_TX_OFFLOAD_TCP_CKSUM   |
@@ -1463,17 +1463,17 @@ igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
                          DEV_TX_OFFLOAD_TCP_TSO     |
                          DEV_TX_OFFLOAD_MULTI_SEGS;
 
-       return rx_offload_capa;
+       return tx_offload_capa;
 }
 
 uint64_t
 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
 {
-       uint64_t rx_queue_offload_capa;
+       uint64_t tx_queue_offload_capa;
 
-       rx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
+       tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
 
-       return rx_queue_offload_capa;
+       return tx_queue_offload_capa;
 }
 
 int
@@ -1638,7 +1638,6 @@ igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
                          DEV_RX_OFFLOAD_UDP_CKSUM   |
                          DEV_RX_OFFLOAD_TCP_CKSUM   |
                          DEV_RX_OFFLOAD_JUMBO_FRAME |
-                         DEV_RX_OFFLOAD_CRC_STRIP   |
                          DEV_RX_OFFLOAD_KEEP_CRC    |
                          DEV_RX_OFFLOAD_SCATTER;
 
@@ -1721,7 +1720,7 @@ eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
        rxq->port_id = dev->data->port_id;
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = ETHER_CRC_LEN;
        else
                rxq->crc_len = 0;
@@ -2374,7 +2373,7 @@ eth_igb_rx_init(struct rte_eth_dev *dev)
                 * Reset crc_len in case it was changed after queue setup by a
                 *  call to configure
                 */
-               if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+               if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                        rxq->crc_len = ETHER_CRC_LEN;
                else
                        rxq->crc_len = 0;
@@ -2506,7 +2505,7 @@ eth_igb_rx_init(struct rte_eth_dev *dev)
        E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
 
        /* Setup the Receive Control Register. */
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads)) {
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
 
                /* clear STRCRC bit in all queues */
@@ -2852,11 +2851,17 @@ igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 }
 
 int
-igb_rss_conf_init(struct igb_rte_flow_rss_conf *out,
+igb_rss_conf_init(struct rte_eth_dev *dev,
+                 struct igb_rte_flow_rss_conf *out,
                  const struct rte_flow_action_rss *in)
 {
+       struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
        if (in->key_len > RTE_DIM(out->key) ||
-           in->queue_num > RTE_DIM(out->queue))
+           ((hw->mac.type == e1000_82576) &&
+            (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
+           ((hw->mac.type != e1000_82576) &&
+            (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
                return -EINVAL;
        out->conf = (struct rte_flow_action_rss){
                .func = in->func,
@@ -2945,7 +2950,7 @@ igb_config_rss_filter(struct rte_eth_dev *dev,
                rss_conf.rss_key = rss_intel_key; /* Default hash key */
        igb_hw_rss_hash_set(hw, &rss_conf);
 
-       if (igb_rss_conf_init(&filter_info->rss_info, &conf->conf))
+       if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))
                return -EINVAL;
 
        return 0;
index cf45699..d0901d3 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
index c255dc6..0c0ed93 100644 (file)
@@ -55,7 +55,7 @@
 
 #define DRV_MODULE_VER_MAJOR   1
 #define DRV_MODULE_VER_MINOR   1
-#define DRV_MODULE_VER_SUBMINOR        0
+#define DRV_MODULE_VER_SUBMINOR        1
 
 #define ENA_IO_TXQ_IDX(q)      (2 * (q))
 #define ENA_IO_RXQ_IDX(q)      (2 * (q) + 1)
@@ -239,6 +239,8 @@ static void ena_rx_queue_release_bufs(struct ena_ring *ring);
 static void ena_tx_queue_release_bufs(struct ena_ring *ring);
 static int ena_link_update(struct rte_eth_dev *dev,
                           int wait_to_complete);
+static int ena_create_io_queue(struct ena_ring *ring);
+static void ena_free_io_queues_all(struct ena_adapter *adapter);
 static int ena_queue_restart(struct ena_ring *ring);
 static int ena_queue_restart_all(struct rte_eth_dev *dev,
                                 enum ena_ring_type ring_type);
@@ -510,7 +512,8 @@ static void ena_close(struct rte_eth_dev *dev)
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
 
-       ena_stop(dev);
+       if (adapter->state == ENA_ADAPTER_STATE_RUNNING)
+               ena_stop(dev);
        adapter->state = ENA_ADAPTER_STATE_CLOSED;
 
        ena_rx_queue_release_all(dev);
@@ -746,21 +749,12 @@ static void ena_tx_queue_release_all(struct rte_eth_dev *dev)
 static void ena_rx_queue_release(void *queue)
 {
        struct ena_ring *ring = (struct ena_ring *)queue;
-       struct ena_adapter *adapter = ring->adapter;
-       int ena_qid;
 
        ena_assert_msg(ring->configured,
                       "API violation - releasing not configured queue");
        ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING,
                       "API violation");
 
-       /* Destroy HW queue */
-       ena_qid = ENA_IO_RXQ_IDX(ring->id);
-       ena_com_destroy_io_queue(&adapter->ena_dev, ena_qid);
-
-       /* Free all bufs */
-       ena_rx_queue_release_bufs(ring);
-
        /* Free ring resources */
        if (ring->rx_buffer_info)
                rte_free(ring->rx_buffer_info);
@@ -779,18 +773,12 @@ static void ena_rx_queue_release(void *queue)
 static void ena_tx_queue_release(void *queue)
 {
        struct ena_ring *ring = (struct ena_ring *)queue;
-       struct ena_adapter *adapter = ring->adapter;
-       int ena_qid;
 
        ena_assert_msg(ring->configured,
                       "API violation. Releasing not configured queue");
        ena_assert_msg(ring->adapter->state != ENA_ADAPTER_STATE_RUNNING,
                       "API violation");
 
-       /* Destroy HW queue */
-       ena_qid = ENA_IO_TXQ_IDX(ring->id);
-       ena_com_destroy_io_queue(&adapter->ena_dev, ena_qid);
-
        /* Free all bufs */
        ena_tx_queue_release_bufs(ring);
 
@@ -1078,10 +1066,86 @@ static void ena_stop(struct rte_eth_dev *dev)
                (struct ena_adapter *)(dev->data->dev_private);
 
        rte_timer_stop_sync(&adapter->timer_wd);
+       ena_free_io_queues_all(adapter);
 
        adapter->state = ENA_ADAPTER_STATE_STOPPED;
 }
 
+static int ena_create_io_queue(struct ena_ring *ring)
+{
+       struct ena_adapter *adapter;
+       struct ena_com_dev *ena_dev;
+       struct ena_com_create_io_ctx ctx =
+               /* policy set to _HOST just to satisfy icc compiler */
+               { ENA_ADMIN_PLACEMENT_POLICY_HOST,
+                 0, 0, 0, 0, 0 };
+       uint16_t ena_qid;
+       int rc;
+
+       adapter = ring->adapter;
+       ena_dev = &adapter->ena_dev;
+
+       if (ring->type == ENA_RING_TYPE_TX) {
+               ena_qid = ENA_IO_TXQ_IDX(ring->id);
+               ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
+               ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
+               ctx.queue_size = adapter->tx_ring_size;
+       } else {
+               ena_qid = ENA_IO_RXQ_IDX(ring->id);
+               ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
+               ctx.queue_size = adapter->rx_ring_size;
+       }
+       ctx.qid = ena_qid;
+       ctx.msix_vector = -1; /* interrupts not used */
+       ctx.numa_node = ena_cpu_to_node(ring->id);
+
+       rc = ena_com_create_io_queue(ena_dev, &ctx);
+       if (rc) {
+               RTE_LOG(ERR, PMD,
+                       "failed to create io queue #%d (qid:%d) rc: %d\n",
+                       ring->id, ena_qid, rc);
+               return rc;
+       }
+
+       rc = ena_com_get_io_handlers(ena_dev, ena_qid,
+                                    &ring->ena_com_io_sq,
+                                    &ring->ena_com_io_cq);
+       if (rc) {
+               RTE_LOG(ERR, PMD,
+                       "Failed to get io queue handlers. queue num %d rc: %d\n",
+                       ring->id, rc);
+               ena_com_destroy_io_queue(ena_dev, ena_qid);
+               return rc;
+       }
+
+       if (ring->type == ENA_RING_TYPE_TX)
+               ena_com_update_numa_node(ring->ena_com_io_cq, ctx.numa_node);
+
+       return 0;
+}
+
+static void ena_free_io_queues_all(struct ena_adapter *adapter)
+{
+       struct rte_eth_dev *eth_dev = adapter->rte_dev;
+       struct ena_com_dev *ena_dev = &adapter->ena_dev;
+       int i;
+       uint16_t ena_qid;
+       uint16_t nb_rxq = eth_dev->data->nb_rx_queues;
+       uint16_t nb_txq = eth_dev->data->nb_tx_queues;
+
+       for (i = 0; i < nb_txq; ++i) {
+               ena_qid = ENA_IO_TXQ_IDX(i);
+               ena_com_destroy_io_queue(ena_dev, ena_qid);
+       }
+
+       for (i = 0; i < nb_rxq; ++i) {
+               ena_qid = ENA_IO_RXQ_IDX(i);
+               ena_com_destroy_io_queue(ena_dev, ena_qid);
+
+               ena_rx_queue_release_bufs(&adapter->rx_ring[i]);
+       }
+}
+
 static int ena_queue_restart(struct ena_ring *ring)
 {
        int rc, bufs_num;
@@ -1089,6 +1153,12 @@ static int ena_queue_restart(struct ena_ring *ring)
        ena_assert_msg(ring->configured == 1,
                       "Trying to restart unconfigured queue\n");
 
+       rc = ena_create_io_queue(ring);
+       if (rc) {
+               PMD_INIT_LOG(ERR, "Failed to create IO queue!\n");
+               return rc;
+       }
+
        ring->next_to_clean = 0;
        ring->next_to_use = 0;
 
@@ -1111,17 +1181,10 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
                              __rte_unused unsigned int socket_id,
                              const struct rte_eth_txconf *tx_conf)
 {
-       struct ena_com_create_io_ctx ctx =
-               /* policy set to _HOST just to satisfy icc compiler */
-               { ENA_ADMIN_PLACEMENT_POLICY_HOST,
-                 ENA_COM_IO_QUEUE_DIRECTION_TX, 0, 0, 0, 0 };
        struct ena_ring *txq = NULL;
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
        unsigned int i;
-       int ena_qid;
-       int rc;
-       struct ena_com_dev *ena_dev = &adapter->ena_dev;
 
        txq = &adapter->tx_ring[queue_idx];
 
@@ -1146,37 +1209,6 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
-       ena_qid = ENA_IO_TXQ_IDX(queue_idx);
-
-       ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX;
-       ctx.qid = ena_qid;
-       ctx.msix_vector = -1; /* admin interrupts not used */
-       ctx.mem_queue_type = ena_dev->tx_mem_queue_type;
-       ctx.queue_size = adapter->tx_ring_size;
-       ctx.numa_node = ena_cpu_to_node(queue_idx);
-
-       rc = ena_com_create_io_queue(ena_dev, &ctx);
-       if (rc) {
-               RTE_LOG(ERR, PMD,
-                       "failed to create io TX queue #%d (qid:%d) rc: %d\n",
-                       queue_idx, ena_qid, rc);
-               return rc;
-       }
-       txq->ena_com_io_cq = &ena_dev->io_cq_queues[ena_qid];
-       txq->ena_com_io_sq = &ena_dev->io_sq_queues[ena_qid];
-
-       rc = ena_com_get_io_handlers(ena_dev, ena_qid,
-                                    &txq->ena_com_io_sq,
-                                    &txq->ena_com_io_cq);
-       if (rc) {
-               RTE_LOG(ERR, PMD,
-                       "Failed to get TX queue handlers. TX queue num %d rc: %d\n",
-                       queue_idx, rc);
-               goto err_destroy_io_queue;
-       }
-
-       ena_com_update_numa_node(txq->ena_com_io_cq, ctx.numa_node);
-
        txq->port_id = dev->data->port_id;
        txq->next_to_clean = 0;
        txq->next_to_use = 0;
@@ -1188,8 +1220,7 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
                                          RTE_CACHE_LINE_SIZE);
        if (!txq->tx_buffer_info) {
                RTE_LOG(ERR, PMD, "failed to alloc mem for tx buffer info\n");
-               rc = -ENOMEM;
-               goto err_destroy_io_queue;
+               return -ENOMEM;
        }
 
        txq->empty_tx_reqs = rte_zmalloc("txq->empty_tx_reqs",
@@ -1197,8 +1228,8 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
                                         RTE_CACHE_LINE_SIZE);
        if (!txq->empty_tx_reqs) {
                RTE_LOG(ERR, PMD, "failed to alloc mem for tx reqs\n");
-               rc = -ENOMEM;
-               goto err_free;
+               rte_free(txq->tx_buffer_info);
+               return -ENOMEM;
        }
 
        for (i = 0; i < txq->ring_size; i++)
@@ -1214,13 +1245,6 @@ static int ena_tx_queue_setup(struct rte_eth_dev *dev,
        dev->data->tx_queues[queue_idx] = txq;
 
        return 0;
-
-err_free:
-       rte_free(txq->tx_buffer_info);
-
-err_destroy_io_queue:
-       ena_com_destroy_io_queue(ena_dev, ena_qid);
-       return rc;
 }
 
 static int ena_rx_queue_setup(struct rte_eth_dev *dev,
@@ -1230,16 +1254,10 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                              __rte_unused const struct rte_eth_rxconf *rx_conf,
                              struct rte_mempool *mp)
 {
-       struct ena_com_create_io_ctx ctx =
-               /* policy set to _HOST just to satisfy icc compiler */
-               { ENA_ADMIN_PLACEMENT_POLICY_HOST,
-                 ENA_COM_IO_QUEUE_DIRECTION_RX, 0, 0, 0, 0 };
        struct ena_adapter *adapter =
                (struct ena_adapter *)(dev->data->dev_private);
        struct ena_ring *rxq = NULL;
-       uint16_t ena_qid = 0;
-       int i, rc = 0;
-       struct ena_com_dev *ena_dev = &adapter->ena_dev;
+       int i;
 
        rxq = &adapter->rx_ring[queue_idx];
        if (rxq->configured) {
@@ -1263,36 +1281,6 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                return -EINVAL;
        }
 
-       ena_qid = ENA_IO_RXQ_IDX(queue_idx);
-
-       ctx.qid = ena_qid;
-       ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX;
-       ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
-       ctx.msix_vector = -1; /* admin interrupts not used */
-       ctx.queue_size = adapter->rx_ring_size;
-       ctx.numa_node = ena_cpu_to_node(queue_idx);
-
-       rc = ena_com_create_io_queue(ena_dev, &ctx);
-       if (rc) {
-               RTE_LOG(ERR, PMD, "failed to create io RX queue #%d rc: %d\n",
-                       queue_idx, rc);
-               return rc;
-       }
-
-       rxq->ena_com_io_cq = &ena_dev->io_cq_queues[ena_qid];
-       rxq->ena_com_io_sq = &ena_dev->io_sq_queues[ena_qid];
-
-       rc = ena_com_get_io_handlers(ena_dev, ena_qid,
-                                    &rxq->ena_com_io_sq,
-                                    &rxq->ena_com_io_cq);
-       if (rc) {
-               RTE_LOG(ERR, PMD,
-                       "Failed to get RX queue handlers. RX queue num %d rc: %d\n",
-                       queue_idx, rc);
-               ena_com_destroy_io_queue(ena_dev, ena_qid);
-               return rc;
-       }
-
        rxq->port_id = dev->data->port_id;
        rxq->next_to_clean = 0;
        rxq->next_to_use = 0;
@@ -1304,7 +1292,6 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                                          RTE_CACHE_LINE_SIZE);
        if (!rxq->rx_buffer_info) {
                RTE_LOG(ERR, PMD, "failed to alloc mem for rx buffer info\n");
-               ena_com_destroy_io_queue(ena_dev, ena_qid);
                return -ENOMEM;
        }
 
@@ -1315,7 +1302,6 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
                RTE_LOG(ERR, PMD, "failed to alloc mem for empty rx reqs\n");
                rte_free(rxq->rx_buffer_info);
                rxq->rx_buffer_info = NULL;
-               ena_com_destroy_io_queue(ena_dev, ena_qid);
                return -ENOMEM;
        }
 
@@ -1326,7 +1312,7 @@ static int ena_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->configured = 1;
        dev->data->rx_queues[queue_idx] = rxq;
 
-       return rc;
+       return 0;
 }
 
 static int ena_populate_rx_queue(struct ena_ring *rxq, unsigned int count)
@@ -1703,7 +1689,7 @@ static int eth_ena_dev_uninit(struct rte_eth_dev *eth_dev)
                (struct ena_adapter *)(eth_dev->data->dev_private);
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return -EPERM;
+               return 0;
 
        if (adapter->state != ENA_ADAPTER_STATE_CLOSED)
                ena_close(eth_dev);
@@ -1924,7 +1910,7 @@ static uint16_t eth_ena_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 
                /* fill mbuf attributes if any */
                ena_rx_mbuf_prepare(mbuf_head, &ena_rx_ctx);
-               mbuf_head->hash.rss = (uint32_t)rx_ring->id;
+               mbuf_head->hash.rss = ena_rx_ctx.hash;
 
                /* pass to DPDK application head mbuf */
                rx_pkts[recv_idx] = mbuf_head;
diff --git a/drivers/net/enetc/Makefile b/drivers/net/enetc/Makefile
new file mode 100644 (file)
index 0000000..9895501
--- /dev/null
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 NXP
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_enetc.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+EXPORT_MAP := rte_pmd_enetc_version.map
+LIBABIVER := 1
+
+SRCS-$(CONFIG_RTE_LIBRTE_ENETC_PMD) += enetc_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_ENETC_PMD) += enetc_rxtx.c
+
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool
+LDLIBS += -lrte_ethdev
+LDLIBS += -lrte_bus_pci
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/enetc/base/enetc_hw.h b/drivers/net/enetc/base/enetc_hw.h
new file mode 100644 (file)
index 0000000..f36fa11
--- /dev/null
@@ -0,0 +1,226 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#ifndef _ENETC_HW_H_
+#define _ENETC_HW_H_
+#include <rte_io.h>
+
+#define BIT(x)         ((uint64_t)1 << ((x)))
+
+/* ENETC device IDs */
+#define ENETC_DEV_ID_VF                0xef00
+#define ENETC_DEV_ID           0xe100
+
+/* ENETC register block BAR */
+#define ENETC_BAR_REGS                 0x0
+
+/* SI regs, offset: 0h */
+#define ENETC_SIMR                     0x0
+#define ENETC_SIMR_EN                  BIT(31)
+
+#define ENETC_SIPMAR0                  0x80
+#define ENETC_SIPMAR1                  0x84
+
+#define ENETC_SICAPR0                  0x900
+#define ENETC_SICAPR1                  0x904
+
+#define ENETC_SIMSITRV(n)              (0xB00 + (n) * 0x4)
+#define ENETC_SIMSIRRV(n)              (0xB80 + (n) * 0x4)
+
+#define ENETC_SICCAPR                  0x1200
+
+/* enum for BD type */
+enum enetc_bdr_type {TX, RX};
+
+#define ENETC_BDR(type, n, off)                (0x8000 + (type) * 0x100 + (n) * 0x200 \
+                                                       + (off))
+/* RX BDR reg offsets */
+#define ENETC_RBMR             0x0 /* RX BDR mode register*/
+#define ENETC_RBMR_EN          BIT(31)
+
+#define ENETC_RBSR             0x4  /* Rx BDR status register*/
+#define ENETC_RBBSR            0x8  /* Rx BDR buffer size register*/
+#define ENETC_RBCIR            0xc  /* Rx BDR consumer index register*/
+#define ENETC_RBBAR0           0x10 /* Rx BDR base address register 0 */
+#define ENETC_RBBAR1           0x14 /* Rx BDR base address register 1*/
+#define ENETC_RBPIR            0x18 /* Rx BDR producer index register*/
+#define ENETC_RBLENR           0x20 /* Rx BDR length register*/
+#define ENETC_RBIER            0xa0 /* Rx BDR interrupt enable register*/
+#define ENETC_RBIER_RXTIE      BIT(0)
+#define ENETC_RBIDR            0xa4 /* Rx BDR interrupt detect register*/
+#define ENETC_RBICIR0          0xa8 /* Rx BDR inetrrupt coalescing register 0*/
+#define ENETC_RBICIR0_ICEN     BIT(31)
+
+
+#define ENETC_TBMR     0x0  /* Tx BDR mode register (TBMR) 32 RW */
+#define ENETC_TBSR     0x4  /* x BDR status register (TBSR) 32 RO */
+#define ENETC_TBBAR0   0x10 /* Tx BDR base address register 0 (TBBAR0) 32 RW */
+#define ENETC_TBBAR1   0x14 /* Tx BDR base address register 1 (TBBAR1) 32 RW */
+#define ENETC_TBCIR    0x18 /* Tx BDR consumer index register (TBCIR) 32 RW */
+#define ENETC_TBCISR   0x1C /* Tx BDR consumer index shadow register 32 RW */
+#define ENETC_TBIER    0xA0 /* Tx BDR interrupt enable register 32 RW */
+#define ENETC_TBIDR    0xA4 /* Tx BDR interrupt detect register 32 RO */
+#define ENETC_TBICR0   0xA8 /* Tx BDR interrupt coalescing register 0 32 RW */
+#define ENETC_TBICR1   0xAC /* Tx BDR interrupt coalescing register 1 32 RW */
+#define ENETC_TBLENR   0x20
+
+#define ENETC_TBCISR_IDX_MASK          0xffff
+#define ENETC_TBIER_TXFIE              BIT(1)
+
+#define ENETC_RTBLENR_LEN(n)           ((n) & ~0x7)
+#define ENETC_TBMR_EN                  BIT(31)
+
+/* Port regs, offset: 1_0000h */
+#define ENETC_PORT_BASE                        0x10000
+#define ENETC_PMR                      0x00000
+#define ENETC_PMR_EN                   (BIT(16) | BIT(17) | BIT(18))
+#define ENETC_PSR                      0x00004 /* RO */
+#define ENETC_PSIPMR                   0x00018
+#define ENETC_PSIPMR_SET_UP(n)         (0x1 << (n)) /* n = SI index */
+#define ENETC_PSIPMR_SET_MP(n)         (0x1 << ((n) + 8))
+#define ENETC_PSIPMR_SET_VLAN_MP(n)    (0x1 << ((n) + 16))
+#define ENETC_PSIPMAR0(n)              (0x00100 + (n) * 0x20)
+#define ENETC_PSIPMAR1(n)              (0x00104 + (n) * 0x20)
+#define ENETC_PCAPR0                   0x00900
+#define ENETC_PCAPR1                   0x00904
+
+#define ENETC_PV0CFGR(n)               (0x00920 + (n) * 0x10)
+#define ENETC_PVCFGR_SET_TXBDR(val)    ((val) & 0xff)
+#define ENETC_PVCFGR_SET_RXBDR(val)    (((val) & 0xff) << 16)
+
+#define ENETC_PM0_CMD_CFG              0x08008
+#define ENETC_PM0_TX_EN                        BIT(0)
+#define ENETC_PM0_RX_EN                        BIT(1)
+
+#define ENETC_PM0_MAXFRM               0x08014
+#define ENETC_SET_MAXFRM(val)          ((val) << 16)
+
+#define ENETC_PM0_STATUS               0x08304
+#define ENETC_LINK_MODE                        0x0000000000080000ULL
+#define ENETC_LINK_STATUS              0x0000000000010000ULL
+#define ENETC_LINK_SPEED_MASK          0x0000000000060000ULL
+#define ENETC_LINK_SPEED_10M           0x0ULL
+#define ENETC_LINK_SPEED_100M          0x0000000000020000ULL
+#define ENETC_LINK_SPEED_1G            0x0000000000040000ULL
+
+/* Global regs, offset: 2_0000h */
+#define ENETC_GLOBAL_BASE              0x20000
+#define ENETC_G_EIPBRR0                        0x00bf8
+#define ENETC_G_EIPBRR1                        0x00bfc
+
+/* general register accessors */
+#define enetc_rd_reg(reg)      rte_read32((void *)(reg))
+#define enetc_wr_reg(reg, val) rte_write32((val), (void *)(reg))
+#define enetc_rd(hw, off)      enetc_rd_reg((size_t)(hw)->reg + (off))
+#define enetc_wr(hw, off, val) enetc_wr_reg((size_t)(hw)->reg + (off), val)
+/* port register accessors - PF only */
+#define enetc_port_rd(hw, off) enetc_rd_reg((size_t)(hw)->port + (off))
+#define enetc_port_wr(hw, off, val) \
+                               enetc_wr_reg((size_t)(hw)->port + (off), val)
+/* global register accessors - PF only */
+#define enetc_global_rd(hw, off) \
+                               enetc_rd_reg((size_t)(hw)->global + (off))
+#define enetc_global_wr(hw, off, val) \
+                               enetc_wr_reg((size_t)(hw)->global + (off), val)
+/* BDR register accessors, see ENETC_BDR() */
+#define enetc_bdr_rd(hw, t, n, off) \
+                               enetc_rd(hw, ENETC_BDR(t, n, off))
+#define enetc_bdr_wr(hw, t, n, off, val) \
+                               enetc_wr(hw, ENETC_BDR(t, n, off), val)
+
+#define enetc_txbdr_rd(hw, n, off) enetc_bdr_rd(hw, TX, n, off)
+#define enetc_rxbdr_rd(hw, n, off) enetc_bdr_rd(hw, RX, n, off)
+#define enetc_txbdr_wr(hw, n, off, val) \
+                               enetc_bdr_wr(hw, TX, n, off, val)
+#define enetc_rxbdr_wr(hw, n, off, val) \
+                               enetc_bdr_wr(hw, RX, n, off, val)
+
+#define ENETC_TX_ADDR(txq, addr) ((void *)((txq)->enetc_txbdr + (addr)))
+
+#define ENETC_TXBD_FLAGS_IE            BIT(13)
+#define ENETC_TXBD_FLAGS_F             BIT(15)
+
+/* ENETC Parsed values (Little Endian) */
+#define ENETC_PKT_TYPE_ETHER            0x0060
+#define ENETC_PKT_TYPE_IPV4             0x0000
+#define ENETC_PKT_TYPE_IPV6             0x0020
+#define ENETC_PKT_TYPE_IPV4_TCP \
+                       (0x0010 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_TCP \
+                       (0x0010 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_PKT_TYPE_IPV4_UDP \
+                       (0x0011 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_UDP \
+                       (0x0011 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_PKT_TYPE_IPV4_SCTP \
+                       (0x0013 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_SCTP \
+                       (0x0013 | ENETC_PKT_TYPE_IPV6)
+#define ENETC_PKT_TYPE_IPV4_ICMP \
+                       (0x0003 | ENETC_PKT_TYPE_IPV4)
+#define ENETC_PKT_TYPE_IPV6_ICMP \
+                       (0x0003 | ENETC_PKT_TYPE_IPV6)
+
+/* PCI device info */
+struct enetc_hw {
+       void *reg;      /* SI registers, used by all PCI functions */
+       void *port;     /* Port registers, PF only */
+       void *global;   /* IP global registers, PF only */
+};
+
+struct enetc_eth_mac_info {
+       uint8_t addr[ETHER_ADDR_LEN];
+       uint8_t perm_addr[ETHER_ADDR_LEN];
+       uint8_t get_link_status;
+};
+
+struct enetc_eth_hw {
+       struct rte_eth_dev *ndev;
+       struct enetc_hw hw;
+       uint16_t device_id;
+       uint16_t vendor_id;
+       uint8_t revision_id;
+       struct enetc_eth_mac_info mac;
+};
+
+/* Transmit Descriptor */
+struct enetc_tx_desc {
+       uint64_t addr;
+       uint16_t frm_len;
+       uint16_t buf_len;
+       uint32_t flags_errors;
+};
+
+/* TX Buffer Descriptors (BD) */
+struct enetc_tx_bd {
+       uint64_t addr;
+       uint16_t buf_len;
+       uint16_t frm_len;
+       uint16_t err_csum;
+       uint16_t flags;
+};
+
+/* RX buffer descriptor */
+union enetc_rx_bd {
+       struct {
+               uint64_t addr;
+               uint8_t reserved[8];
+       } w;
+       struct {
+               uint16_t inet_csum;
+               uint16_t parse_summary;
+               uint32_t rss_hash;
+               uint16_t buf_len;
+               uint16_t vlan_opt;
+               union {
+                       struct {
+                               uint16_t flags;
+                               uint16_t error;
+                       };
+                       uint32_t lstatus;
+               };
+       } r;
+};
+
+#endif
diff --git a/drivers/net/enetc/enetc.h b/drivers/net/enetc/enetc.h
new file mode 100644 (file)
index 0000000..0e80d1c
--- /dev/null
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#ifndef _ENETC_H_
+#define _ENETC_H_
+
+#include <rte_time.h>
+
+#include "base/enetc_hw.h"
+
+#define PCI_VENDOR_ID_FREESCALE 0x1957
+
+/* Max TX rings per ENETC. */
+#define MAX_TX_RINGS   2
+
+/* Max RX rings per ENTEC. */
+#define MAX_RX_RINGS   1
+
+/* Max BD counts per Ring. */
+#define MAX_BD_COUNT   64000
+/* Min BD counts per Ring. */
+#define MIN_BD_COUNT   32
+/* BD ALIGN */
+#define BD_ALIGN       8
+
+/*
+ * upper_32_bits - return bits 32-63 of a number
+ * @n: the number we're accessing
+ *
+ * A basic shift-right of a 64- or 32-bit quantity.  Use this to suppress
+ * the "right shift count >= width of type" warning when that quantity is
+ * 32-bits.
+ */
+#define upper_32_bits(n) ((uint32_t)(((n) >> 16) >> 16))
+
+/*
+ * lower_32_bits - return bits 0-31 of a number
+ * @n: the number we're accessing
+ */
+#define lower_32_bits(n) ((uint32_t)(n))
+
+#define ENETC_TXBD(BDR, i) (&(((struct enetc_tx_bd *)((BDR).bd_base))[i]))
+#define ENETC_RXBD(BDR, i) (&(((union enetc_rx_bd *)((BDR).bd_base))[i]))
+
+struct enetc_swbd {
+       struct rte_mbuf *buffer_addr;
+};
+
+struct enetc_bdr {
+       struct rte_eth_dev *ndev;
+       struct rte_mempool *mb_pool;   /* mbuf pool to populate RX ring. */
+       void *bd_base;                  /* points to Rx or Tx BD ring */
+       union {
+               void *tcir;
+               void *rcir;
+       };
+       uint16_t index;
+       int bd_count; /* # of BDs */
+       int next_to_use;
+       int next_to_clean;
+       struct enetc_swbd *q_swbd;
+       union {
+               void *tcisr; /* Tx */
+               int next_to_alloc; /* Rx */
+       };
+};
+
+/*
+ * Structure to store private data for each driver instance (for each port).
+ */
+struct enetc_eth_adapter {
+       struct rte_eth_dev *ndev;
+       struct enetc_eth_hw hw;
+};
+
+#define ENETC_DEV_PRIVATE(adapter) \
+       ((struct enetc_eth_adapter *)adapter)
+
+#define ENETC_DEV_PRIVATE_TO_HW(adapter) \
+       (&((struct enetc_eth_adapter *)adapter)->hw)
+
+#define ENETC_DEV_PRIVATE_TO_STATS(adapter) \
+       (&((struct enetc_eth_adapter *)adapter)->stats)
+
+#define ENETC_DEV_PRIVATE_TO_INTR(adapter) \
+       (&((struct enetc_eth_adapter *)adapter)->intr)
+
+#define ENETC_GET_HW_ADDR(reg, addr) ((void *)(((size_t)reg) + (addr)))
+#define ENETC_REG_READ(addr) (*(uint32_t *)addr)
+#define ENETC_REG_WRITE(addr, val) (*(uint32_t *)addr = val)
+#define ENETC_REG_WRITE_RELAXED(addr, val) (*(uint32_t *)addr = val)
+
+/*
+ * RX/TX ENETC function prototypes
+ */
+uint16_t enetc_xmit_pkts(void *txq, struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts);
+uint16_t enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts);
+
+
+int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt);
+
+static inline int
+enetc_bd_unused(struct enetc_bdr *bdr)
+{
+       if (bdr->next_to_clean > bdr->next_to_use)
+               return bdr->next_to_clean - bdr->next_to_use - 1;
+
+       return bdr->bd_count + bdr->next_to_clean - bdr->next_to_use - 1;
+}
+#endif /* _ENETC_H_ */
diff --git a/drivers/net/enetc/enetc_ethdev.c b/drivers/net/enetc/enetc_ethdev.c
new file mode 100644 (file)
index 0000000..023fe75
--- /dev/null
@@ -0,0 +1,629 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#include <stdbool.h>
+#include <rte_ethdev_pci.h>
+
+#include "enetc_logs.h"
+#include "enetc.h"
+
+int enetc_logtype_pmd;
+
+/* Functions Prototypes */
+static int enetc_dev_configure(struct rte_eth_dev *dev);
+static int enetc_dev_start(struct rte_eth_dev *dev);
+static void enetc_dev_stop(struct rte_eth_dev *dev);
+static void enetc_dev_close(struct rte_eth_dev *dev);
+static void enetc_dev_infos_get(struct rte_eth_dev *dev,
+                               struct rte_eth_dev_info *dev_info);
+static int enetc_link_update(struct rte_eth_dev *dev, int wait_to_complete);
+static int enetc_hardware_init(struct enetc_eth_hw *hw);
+static int enetc_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+               uint16_t nb_rx_desc, unsigned int socket_id,
+               const struct rte_eth_rxconf *rx_conf,
+               struct rte_mempool *mb_pool);
+static void enetc_rx_queue_release(void *rxq);
+static int enetc_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+               uint16_t nb_tx_desc, unsigned int socket_id,
+               const struct rte_eth_txconf *tx_conf);
+static void enetc_tx_queue_release(void *txq);
+static const uint32_t *enetc_supported_ptypes_get(struct rte_eth_dev *dev);
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static const struct rte_pci_id pci_id_enetc_map[] = {
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID) },
+       { RTE_PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_VF) },
+       { .vendor_id = 0, /* sentinel */ },
+};
+
+/* Features supported by this driver */
+static const struct eth_dev_ops enetc_ops = {
+       .dev_configure        = enetc_dev_configure,
+       .dev_start            = enetc_dev_start,
+       .dev_stop             = enetc_dev_stop,
+       .dev_close            = enetc_dev_close,
+       .link_update          = enetc_link_update,
+       .dev_infos_get        = enetc_dev_infos_get,
+       .rx_queue_setup       = enetc_rx_queue_setup,
+       .rx_queue_release     = enetc_rx_queue_release,
+       .tx_queue_setup       = enetc_tx_queue_setup,
+       .tx_queue_release     = enetc_tx_queue_release,
+       .dev_supported_ptypes_get = enetc_supported_ptypes_get,
+};
+
+/**
+ * Initialisation of the enetc device
+ *
+ * @param eth_dev
+ *   - Pointer to the structure rte_eth_dev
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, negative value.
+ */
+static int
+enetc_dev_init(struct rte_eth_dev *eth_dev)
+{
+       int error = 0;
+       struct rte_pci_device *pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
+       struct enetc_eth_hw *hw =
+               ENETC_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+       PMD_INIT_FUNC_TRACE();
+       eth_dev->dev_ops = &enetc_ops;
+       eth_dev->rx_pkt_burst = &enetc_recv_pkts;
+       eth_dev->tx_pkt_burst = &enetc_xmit_pkts;
+
+       /* Retrieving and storing the HW base address of device */
+       hw->hw.reg = (void *)pci_dev->mem_resource[0].addr;
+       hw->device_id = pci_dev->id.device_id;
+
+       error = enetc_hardware_init(hw);
+       if (error != 0) {
+               ENETC_PMD_ERR("Hardware initialization failed");
+               return -1;
+       }
+
+       /* Allocate memory for storing MAC addresses */
+       eth_dev->data->mac_addrs = rte_zmalloc("enetc_eth", ETHER_ADDR_LEN, 0);
+       if (!eth_dev->data->mac_addrs) {
+               ENETC_PMD_ERR("Failed to allocate %d bytes needed to "
+                             "store MAC addresses",
+                             ETHER_ADDR_LEN * 1);
+               error = -ENOMEM;
+               return -1;
+       }
+
+       /* Copy the permanent MAC address */
+       ether_addr_copy((struct ether_addr *)hw->mac.addr,
+                       &eth_dev->data->mac_addrs[0]);
+
+       ENETC_PMD_DEBUG("port_id %d vendorID=0x%x deviceID=0x%x",
+                       eth_dev->data->port_id, pci_dev->id.vendor_id,
+                       pci_dev->id.device_id);
+       return 0;
+}
+
+static int
+enetc_dev_uninit(struct rte_eth_dev *eth_dev __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+       return 0;
+}
+
+static int
+enetc_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+       PMD_INIT_FUNC_TRACE();
+       return 0;
+}
+
+static int
+enetc_dev_start(struct rte_eth_dev *dev)
+{
+       struct enetc_eth_hw *hw =
+               ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t val;
+
+       PMD_INIT_FUNC_TRACE();
+       val = ENETC_REG_READ(ENETC_GET_HW_ADDR(hw->hw.port,
+                            ENETC_PM0_CMD_CFG));
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PM0_CMD_CFG),
+                       val | ENETC_PM0_TX_EN | ENETC_PM0_RX_EN);
+
+       /* Enable port */
+       val = ENETC_REG_READ(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PMR));
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PMR),
+                       val | ENETC_PMR_EN);
+
+       return 0;
+}
+
+static void
+enetc_dev_stop(struct rte_eth_dev *dev)
+{
+       struct enetc_eth_hw *hw =
+               ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t val;
+
+       PMD_INIT_FUNC_TRACE();
+       /* Disable port */
+       val = ENETC_REG_READ(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PMR));
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PMR),
+                       val & (~ENETC_PMR_EN));
+
+       val = ENETC_REG_READ(ENETC_GET_HW_ADDR(hw->hw.port,
+                            ENETC_PM0_CMD_CFG));
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PM0_CMD_CFG),
+                       val & (~(ENETC_PM0_TX_EN | ENETC_PM0_RX_EN)));
+}
+
+static void
+enetc_dev_close(struct rte_eth_dev *dev)
+{
+       uint16_t i;
+
+       PMD_INIT_FUNC_TRACE();
+       enetc_dev_stop(dev);
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               enetc_rx_queue_release(dev->data->rx_queues[i]);
+               dev->data->rx_queues[i] = NULL;
+       }
+       dev->data->nb_rx_queues = 0;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               enetc_tx_queue_release(dev->data->tx_queues[i]);
+               dev->data->tx_queues[i] = NULL;
+       }
+       dev->data->nb_tx_queues = 0;
+}
+
+static const uint32_t *
+enetc_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+{
+       static const uint32_t ptypes[] = {
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L3_IPV4,
+               RTE_PTYPE_L3_IPV6,
+               RTE_PTYPE_L4_TCP,
+               RTE_PTYPE_L4_UDP,
+               RTE_PTYPE_L4_SCTP,
+               RTE_PTYPE_L4_ICMP,
+               RTE_PTYPE_UNKNOWN
+       };
+
+       return ptypes;
+}
+
+/* return 0 means link status changed, -1 means not changed */
+static int
+enetc_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
+{
+       struct enetc_eth_hw *hw =
+               ENETC_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       struct rte_eth_link link;
+       uint32_t status;
+
+       PMD_INIT_FUNC_TRACE();
+
+       memset(&link, 0, sizeof(link));
+
+       status = ENETC_REG_READ(ENETC_GET_HW_ADDR(hw->hw.port,
+                               ENETC_PM0_STATUS));
+
+       if (status & ENETC_LINK_MODE)
+               link.link_duplex = ETH_LINK_FULL_DUPLEX;
+       else
+               link.link_duplex = ETH_LINK_HALF_DUPLEX;
+
+       if (status & ENETC_LINK_STATUS)
+               link.link_status = ETH_LINK_UP;
+       else
+               link.link_status = ETH_LINK_DOWN;
+
+       switch (status & ENETC_LINK_SPEED_MASK) {
+       case ENETC_LINK_SPEED_1G:
+               link.link_speed = ETH_SPEED_NUM_1G;
+               break;
+
+       case ENETC_LINK_SPEED_100M:
+               link.link_speed = ETH_SPEED_NUM_100M;
+               break;
+
+       default:
+       case ENETC_LINK_SPEED_10M:
+               link.link_speed = ETH_SPEED_NUM_10M;
+       }
+
+       return rte_eth_linkstatus_set(dev, &link);
+}
+
+static int
+enetc_hardware_init(struct enetc_eth_hw *hw)
+{
+       uint32_t psipmr = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       /* Calculating and storing the base HW addresses */
+       hw->hw.port = (void *)((size_t)hw->hw.reg + ENETC_PORT_BASE);
+       hw->hw.global = (void *)((size_t)hw->hw.reg + ENETC_GLOBAL_BASE);
+
+       /* Enabling Station Interface */
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.reg, ENETC_SIMR),
+                                         ENETC_SIMR_EN);
+
+       /* Setting to accept broadcast packets for each inetrface */
+       psipmr |= ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0) |
+                 ENETC_PSIPMR_SET_VLAN_MP(0);
+       psipmr |= ENETC_PSIPMR_SET_UP(1) | ENETC_PSIPMR_SET_MP(1) |
+                 ENETC_PSIPMR_SET_VLAN_MP(1);
+       psipmr |= ENETC_PSIPMR_SET_UP(2) | ENETC_PSIPMR_SET_MP(2) |
+                 ENETC_PSIPMR_SET_VLAN_MP(2);
+
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PSIPMR),
+                       psipmr);
+
+       /* Enabling broadcast address */
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PSIPMAR0(0)),
+                       0xFFFFFFFF);
+       ENETC_REG_WRITE(ENETC_GET_HW_ADDR(hw->hw.port, ENETC_PSIPMAR1(0)),
+                       0xFFFF << 16);
+
+       return 0;
+}
+
+static void
+enetc_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+                   struct rte_eth_dev_info *dev_info)
+{
+       PMD_INIT_FUNC_TRACE();
+       dev_info->rx_desc_lim = (struct rte_eth_desc_lim) {
+               .nb_max = MAX_BD_COUNT,
+               .nb_min = MIN_BD_COUNT,
+               .nb_align = BD_ALIGN,
+       };
+       dev_info->tx_desc_lim = (struct rte_eth_desc_lim) {
+               .nb_max = MAX_BD_COUNT,
+               .nb_min = MIN_BD_COUNT,
+               .nb_align = BD_ALIGN,
+       };
+       dev_info->max_rx_queues = MAX_RX_RINGS;
+       dev_info->max_tx_queues = MAX_TX_RINGS;
+       dev_info->max_rx_pktlen = 1500;
+}
+
+static int
+enetc_alloc_txbdr(struct enetc_bdr *txr, uint16_t nb_desc)
+{
+       int size;
+
+       size = nb_desc * sizeof(struct enetc_swbd);
+       txr->q_swbd = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE);
+       if (txr->q_swbd == NULL)
+               return -ENOMEM;
+
+       size = nb_desc * sizeof(struct enetc_tx_bd);
+       txr->bd_base = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE);
+       if (txr->bd_base == NULL) {
+               rte_free(txr->q_swbd);
+               txr->q_swbd = NULL;
+               return -ENOMEM;
+       }
+
+       txr->bd_count = nb_desc;
+       txr->next_to_clean = 0;
+       txr->next_to_use = 0;
+
+       return 0;
+}
+
+static void
+enetc_free_bdr(struct enetc_bdr *rxr)
+{
+       rte_free(rxr->q_swbd);
+       rte_free(rxr->bd_base);
+       rxr->q_swbd = NULL;
+       rxr->bd_base = NULL;
+}
+
+static void
+enetc_setup_txbdr(struct enetc_hw *hw, struct enetc_bdr *tx_ring)
+{
+       int idx = tx_ring->index;
+       uintptr_t base_addr;
+       uint32_t tbmr;
+
+       base_addr = (uintptr_t)tx_ring->bd_base;
+       enetc_txbdr_wr(hw, idx, ENETC_TBBAR0,
+                      lower_32_bits((uint64_t)base_addr));
+       enetc_txbdr_wr(hw, idx, ENETC_TBBAR1,
+                      upper_32_bits((uint64_t)base_addr));
+       enetc_txbdr_wr(hw, idx, ENETC_TBLENR,
+                      ENETC_RTBLENR_LEN(tx_ring->bd_count));
+
+       tbmr = ENETC_TBMR_EN;
+       /* enable ring */
+       enetc_txbdr_wr(hw, idx, ENETC_TBMR, tbmr);
+       enetc_txbdr_wr(hw, idx, ENETC_TBCIR, 0);
+       enetc_txbdr_wr(hw, idx, ENETC_TBCISR, 0);
+       tx_ring->tcir = (void *)((size_t)hw->reg +
+                       ENETC_BDR(TX, idx, ENETC_TBCIR));
+       tx_ring->tcisr = (void *)((size_t)hw->reg +
+                        ENETC_BDR(TX, idx, ENETC_TBCISR));
+}
+
+static int
+enetc_alloc_tx_resources(struct rte_eth_dev *dev,
+                        uint16_t queue_idx,
+                        uint16_t nb_desc)
+{
+       int err;
+       struct enetc_bdr *tx_ring;
+       struct rte_eth_dev_data *data = dev->data;
+       struct enetc_eth_adapter *priv =
+                       ENETC_DEV_PRIVATE(data->dev_private);
+
+       tx_ring = rte_zmalloc(NULL, sizeof(struct enetc_bdr), 0);
+       if (tx_ring == NULL) {
+               ENETC_PMD_ERR("Failed to allocate TX ring memory");
+               err = -ENOMEM;
+               return -1;
+       }
+
+       err = enetc_alloc_txbdr(tx_ring, nb_desc);
+       if (err)
+               goto fail;
+
+       tx_ring->index = queue_idx;
+       tx_ring->ndev = dev;
+       enetc_setup_txbdr(&priv->hw.hw, tx_ring);
+       data->tx_queues[queue_idx] = tx_ring;
+
+       return 0;
+fail:
+       rte_free(tx_ring);
+
+       return err;
+}
+
+static int
+enetc_tx_queue_setup(struct rte_eth_dev *dev,
+                    uint16_t queue_idx,
+                    uint16_t nb_desc,
+                    unsigned int socket_id __rte_unused,
+                    const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       if (nb_desc > MAX_BD_COUNT)
+               return -1;
+
+       err = enetc_alloc_tx_resources(dev, queue_idx, nb_desc);
+
+       return err;
+}
+
+static void
+enetc_tx_queue_release(void *txq)
+{
+       if (txq == NULL)
+               return;
+
+       struct enetc_bdr *tx_ring = (struct enetc_bdr *)txq;
+       struct enetc_eth_hw *eth_hw =
+               ENETC_DEV_PRIVATE_TO_HW(tx_ring->ndev->data->dev_private);
+       struct enetc_hw *hw;
+       struct enetc_swbd *tx_swbd;
+       int i;
+       uint32_t val;
+
+       /* Disable the ring */
+       hw = &eth_hw->hw;
+       val = enetc_txbdr_rd(hw, tx_ring->index, ENETC_TBMR);
+       val &= (~ENETC_TBMR_EN);
+       enetc_txbdr_wr(hw, tx_ring->index, ENETC_TBMR, val);
+
+       /* clean the ring*/
+       i = tx_ring->next_to_clean;
+       tx_swbd = &tx_ring->q_swbd[i];
+       while (tx_swbd->buffer_addr != NULL) {
+               rte_pktmbuf_free(tx_swbd->buffer_addr);
+               tx_swbd->buffer_addr = NULL;
+               tx_swbd++;
+               i++;
+               if (unlikely(i == tx_ring->bd_count)) {
+                       i = 0;
+                       tx_swbd = &tx_ring->q_swbd[i];
+               }
+       }
+
+       enetc_free_bdr(tx_ring);
+       rte_free(tx_ring);
+}
+
+static int
+enetc_alloc_rxbdr(struct enetc_bdr *rxr,
+                 uint16_t nb_rx_desc)
+{
+       int size;
+
+       size = nb_rx_desc * sizeof(struct enetc_swbd);
+       rxr->q_swbd = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE);
+       if (rxr->q_swbd == NULL)
+               return -ENOMEM;
+
+       size = nb_rx_desc * sizeof(union enetc_rx_bd);
+       rxr->bd_base = rte_malloc(NULL, size, RTE_CACHE_LINE_SIZE);
+       if (rxr->bd_base == NULL) {
+               rte_free(rxr->q_swbd);
+               rxr->q_swbd = NULL;
+               return -ENOMEM;
+       }
+
+       rxr->bd_count = nb_rx_desc;
+       rxr->next_to_clean = 0;
+       rxr->next_to_use = 0;
+       rxr->next_to_alloc = 0;
+
+       return 0;
+}
+
+static void
+enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
+                 struct rte_mempool *mb_pool)
+{
+       int idx = rx_ring->index;
+       uintptr_t base_addr;
+       uint16_t buf_size;
+
+       base_addr = (uintptr_t)rx_ring->bd_base;
+       enetc_rxbdr_wr(hw, idx, ENETC_RBBAR0,
+                      lower_32_bits((uint64_t)base_addr));
+       enetc_rxbdr_wr(hw, idx, ENETC_RBBAR1,
+                      upper_32_bits((uint64_t)base_addr));
+       enetc_rxbdr_wr(hw, idx, ENETC_RBLENR,
+                      ENETC_RTBLENR_LEN(rx_ring->bd_count));
+
+       rx_ring->mb_pool = mb_pool;
+       /* enable ring */
+       enetc_rxbdr_wr(hw, idx, ENETC_RBMR, ENETC_RBMR_EN);
+       enetc_rxbdr_wr(hw, idx, ENETC_RBPIR, 0);
+       rx_ring->rcir = (void *)((size_t)hw->reg +
+                       ENETC_BDR(RX, idx, ENETC_RBCIR));
+       enetc_refill_rx_ring(rx_ring, (enetc_bd_unused(rx_ring)));
+       buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rx_ring->mb_pool) -
+                  RTE_PKTMBUF_HEADROOM);
+       enetc_rxbdr_wr(hw, idx, ENETC_RBBSR, buf_size);
+}
+
+static int
+enetc_alloc_rx_resources(struct rte_eth_dev *dev,
+                        uint16_t rx_queue_id,
+                        uint16_t nb_rx_desc,
+                        struct rte_mempool *mb_pool)
+{
+       int err;
+       struct enetc_bdr *rx_ring;
+       struct rte_eth_dev_data *data =  dev->data;
+       struct enetc_eth_adapter *adapter =
+                       ENETC_DEV_PRIVATE(data->dev_private);
+
+       rx_ring = rte_zmalloc(NULL, sizeof(struct enetc_bdr), 0);
+       if (rx_ring == NULL) {
+               ENETC_PMD_ERR("Failed to allocate RX ring memory");
+               err = -ENOMEM;
+               return err;
+       }
+
+       err = enetc_alloc_rxbdr(rx_ring, nb_rx_desc);
+       if (err)
+               goto fail;
+
+       rx_ring->index = rx_queue_id;
+       rx_ring->ndev = dev;
+       enetc_setup_rxbdr(&adapter->hw.hw, rx_ring, mb_pool);
+       data->rx_queues[rx_queue_id] = rx_ring;
+
+       return 0;
+fail:
+       rte_free(rx_ring);
+
+       return err;
+}
+
+static int
+enetc_rx_queue_setup(struct rte_eth_dev *dev,
+                    uint16_t rx_queue_id,
+                    uint16_t nb_rx_desc,
+                    unsigned int socket_id __rte_unused,
+                    const struct rte_eth_rxconf *rx_conf __rte_unused,
+                    struct rte_mempool *mb_pool)
+{
+       int err = 0;
+
+       PMD_INIT_FUNC_TRACE();
+       if (nb_rx_desc > MAX_BD_COUNT)
+               return -1;
+
+       err = enetc_alloc_rx_resources(dev, rx_queue_id,
+                                      nb_rx_desc,
+                                      mb_pool);
+
+       return err;
+}
+
+static void
+enetc_rx_queue_release(void *rxq)
+{
+       if (rxq == NULL)
+               return;
+
+       struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
+       struct enetc_eth_hw *eth_hw =
+               ENETC_DEV_PRIVATE_TO_HW(rx_ring->ndev->data->dev_private);
+       struct enetc_swbd *q_swbd;
+       struct enetc_hw *hw;
+       uint32_t val;
+       int i;
+
+       /* Disable the ring */
+       hw = &eth_hw->hw;
+       val = enetc_rxbdr_rd(hw, rx_ring->index, ENETC_RBMR);
+       val &= (~ENETC_RBMR_EN);
+       enetc_rxbdr_wr(hw, rx_ring->index, ENETC_RBMR, val);
+
+       /* Clean the ring */
+       i = rx_ring->next_to_clean;
+       q_swbd = &rx_ring->q_swbd[i];
+       while (i != rx_ring->next_to_use) {
+               rte_pktmbuf_free(q_swbd->buffer_addr);
+               q_swbd->buffer_addr = NULL;
+               q_swbd++;
+               i++;
+               if (unlikely(i == rx_ring->bd_count)) {
+                       i = 0;
+                       q_swbd = &rx_ring->q_swbd[i];
+               }
+       }
+
+       enetc_free_bdr(rx_ring);
+       rte_free(rx_ring);
+}
+
+static int
+enetc_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+                          struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_probe(pci_dev,
+                                            sizeof(struct enetc_eth_adapter),
+                                            enetc_dev_init);
+}
+
+static int
+enetc_pci_remove(struct rte_pci_device *pci_dev)
+{
+       return rte_eth_dev_pci_generic_remove(pci_dev, enetc_dev_uninit);
+}
+
+static struct rte_pci_driver rte_enetc_pmd = {
+       .id_table = pci_id_enetc_map,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_IOVA_AS_VA,
+       .probe = enetc_pci_probe,
+       .remove = enetc_pci_remove,
+};
+
+RTE_PMD_REGISTER_PCI(net_enetc, rte_enetc_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_enetc, pci_id_enetc_map);
+RTE_PMD_REGISTER_KMOD_DEP(net_enetc, "* vfio-pci");
+
+RTE_INIT(enetc_pmd_init_log)
+{
+       enetc_logtype_pmd = rte_log_register("pmd.net.enetc");
+       if (enetc_logtype_pmd >= 0)
+               rte_log_set_level(enetc_logtype_pmd, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/enetc/enetc_logs.h b/drivers/net/enetc/enetc_logs.h
new file mode 100644 (file)
index 0000000..c8a6c0c
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#ifndef _ENETC_LOGS_H_
+#define _ENETC_LOGS_H_
+
+extern int enetc_logtype_pmd;
+
+#define ENETC_PMD_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, enetc_logtype_pmd, "enetc_net: " \
+               fmt "\n", ##args)
+
+#define ENETC_PMD_DEBUG(fmt, args...) \
+       rte_log(RTE_LOG_DEBUG, enetc_logtype_pmd, "enetc_net: %s(): "\
+               fmt "\n", __func__, ##args)
+
+#define PMD_INIT_FUNC_TRACE() ENETC_PMD_DEBUG(">>")
+
+#define ENETC_PMD_CRIT(fmt, args...) \
+       ENETC_PMD_LOG(CRIT, fmt, ## args)
+#define ENETC_PMD_INFO(fmt, args...) \
+       ENETC_PMD_LOG(INFO, fmt, ## args)
+#define ENETC_PMD_ERR(fmt, args...) \
+       ENETC_PMD_LOG(ERR, fmt, ## args)
+#define ENETC_PMD_WARN(fmt, args...) \
+       ENETC_PMD_LOG(WARNING, fmt, ## args)
+
+/* DP Logs, toggled out at compile time if level lower than current level */
+#define ENETC_PMD_DP_LOG(level, fmt, args...) \
+       RTE_LOG_DP(level, PMD, fmt, ## args)
+
+#define ENETC_PMD_DP_DEBUG(fmt, args...) \
+       ENETC_PMD_DP_LOG(DEBUG, fmt, ## args)
+#define ENETC_PMD_DP_INFO(fmt, args...) \
+       ENETC_PMD_DP_LOG(INFO, fmt, ## args)
+#define ENETC_PMD_DP_WARN(fmt, args...) \
+       ENETC_PMD_DP_LOG(WARNING, fmt, ## args)
+
+#endif /* _ENETC_LOGS_H_*/
diff --git a/drivers/net/enetc/enetc_rxtx.c b/drivers/net/enetc/enetc_rxtx.c
new file mode 100644 (file)
index 0000000..631e243
--- /dev/null
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 NXP
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "rte_ethdev.h"
+#include "rte_malloc.h"
+#include "rte_memzone.h"
+
+#include "base/enetc_hw.h"
+#include "enetc.h"
+#include "enetc_logs.h"
+
+#define ENETC_RXBD_BUNDLE 8 /* Number of BDs to update at once */
+
+static int
+enetc_clean_tx_ring(struct enetc_bdr *tx_ring)
+{
+       int tx_frm_cnt = 0;
+       struct enetc_swbd *tx_swbd;
+       int i;
+
+       i = tx_ring->next_to_clean;
+       tx_swbd = &tx_ring->q_swbd[i];
+       while ((int)(enetc_rd_reg(tx_ring->tcisr) &
+              ENETC_TBCISR_IDX_MASK) != i) {
+               rte_pktmbuf_free(tx_swbd->buffer_addr);
+               tx_swbd->buffer_addr = NULL;
+               tx_swbd++;
+               i++;
+               if (unlikely(i == tx_ring->bd_count)) {
+                       i = 0;
+                       tx_swbd = &tx_ring->q_swbd[0];
+               }
+
+               tx_frm_cnt++;
+       }
+
+       tx_ring->next_to_clean = i;
+       return tx_frm_cnt++;
+}
+
+uint16_t
+enetc_xmit_pkts(void *tx_queue,
+               struct rte_mbuf **tx_pkts,
+               uint16_t nb_pkts)
+{
+       struct enetc_swbd *tx_swbd;
+       int i, start;
+       struct enetc_tx_bd *txbd;
+       struct enetc_bdr *tx_ring = (struct enetc_bdr *)tx_queue;
+
+       i = tx_ring->next_to_use;
+       start = 0;
+       while (nb_pkts--) {
+               enetc_clean_tx_ring(tx_ring);
+               tx_ring->q_swbd[i].buffer_addr = tx_pkts[start];
+               txbd = ENETC_TXBD(*tx_ring, i);
+               tx_swbd = &tx_ring->q_swbd[i];
+               txbd->frm_len = tx_pkts[start]->pkt_len;
+               txbd->buf_len = txbd->frm_len;
+               txbd->flags = rte_cpu_to_le_16(ENETC_TXBD_FLAGS_F);
+               txbd->addr = (uint64_t)(uintptr_t)
+               rte_cpu_to_le_64((size_t)tx_swbd->buffer_addr->buf_addr +
+                                tx_swbd->buffer_addr->data_off);
+               i++;
+               start++;
+               if (unlikely(i == tx_ring->bd_count))
+                       i = 0;
+       }
+
+       tx_ring->next_to_use = i;
+       enetc_wr_reg(tx_ring->tcir, i);
+       return start;
+}
+
+int
+enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
+{
+       struct enetc_swbd *rx_swbd;
+       union enetc_rx_bd *rxbd;
+       int i, j;
+
+       i = rx_ring->next_to_use;
+       rx_swbd = &rx_ring->q_swbd[i];
+       rxbd = ENETC_RXBD(*rx_ring, i);
+       for (j = 0; j < buff_cnt; j++) {
+               rx_swbd->buffer_addr =
+                       rte_cpu_to_le_64(rte_mbuf_raw_alloc(rx_ring->mb_pool));
+               rxbd->w.addr = (uint64_t)(uintptr_t)
+                              rx_swbd->buffer_addr->buf_addr +
+                              rx_swbd->buffer_addr->data_off;
+               /* clear 'R" as well */
+               rxbd->r.lstatus = 0;
+               rx_swbd++;
+               rxbd++;
+               i++;
+               if (unlikely(i == rx_ring->bd_count)) {
+                       i = 0;
+                       rxbd = ENETC_RXBD(*rx_ring, 0);
+                       rx_swbd = &rx_ring->q_swbd[i];
+               }
+       }
+
+       if (likely(j)) {
+               rx_ring->next_to_alloc = i;
+               rx_ring->next_to_use = i;
+               enetc_wr_reg(rx_ring->rcir, i);
+       }
+
+       return j;
+}
+
+
+static inline void __attribute__((hot))
+enetc_dev_rx_parse(struct rte_mbuf *m, uint16_t parse_results)
+{
+       ENETC_PMD_DP_DEBUG("parse summary = 0x%x   ", parse_results);
+
+       m->packet_type = RTE_PTYPE_UNKNOWN;
+       switch (parse_results) {
+       case ENETC_PKT_TYPE_ETHER:
+               m->packet_type = RTE_PTYPE_L2_ETHER;
+               break;
+       case ENETC_PKT_TYPE_IPV4:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV4;
+               break;
+       case ENETC_PKT_TYPE_IPV6:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV6;
+               break;
+       case ENETC_PKT_TYPE_IPV4_TCP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV4 |
+                                RTE_PTYPE_L4_TCP;
+               break;
+       case ENETC_PKT_TYPE_IPV6_TCP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV6 |
+                                RTE_PTYPE_L4_TCP;
+               break;
+       case ENETC_PKT_TYPE_IPV4_UDP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV4 |
+                                RTE_PTYPE_L4_UDP;
+               break;
+       case ENETC_PKT_TYPE_IPV6_UDP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV6 |
+                                RTE_PTYPE_L4_UDP;
+               break;
+       case ENETC_PKT_TYPE_IPV4_SCTP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV4 |
+                                RTE_PTYPE_L4_SCTP;
+               break;
+       case ENETC_PKT_TYPE_IPV6_SCTP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV6 |
+                                RTE_PTYPE_L4_SCTP;
+               break;
+       case ENETC_PKT_TYPE_IPV4_ICMP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV4 |
+                                RTE_PTYPE_L4_ICMP;
+               break;
+       case ENETC_PKT_TYPE_IPV6_ICMP:
+               m->packet_type = RTE_PTYPE_L2_ETHER |
+                                RTE_PTYPE_L3_IPV6 |
+                                RTE_PTYPE_L4_ICMP;
+               break;
+       /* More switch cases can be added */
+       default:
+               m->packet_type = RTE_PTYPE_UNKNOWN;
+       }
+}
+
+static int
+enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
+                   struct rte_mbuf **rx_pkts,
+                   int work_limit)
+{
+       int rx_frm_cnt = 0;
+       int cleaned_cnt, i;
+       struct enetc_swbd *rx_swbd;
+
+       cleaned_cnt = enetc_bd_unused(rx_ring);
+       /* next descriptor to process */
+       i = rx_ring->next_to_clean;
+       rx_swbd = &rx_ring->q_swbd[i];
+       while (likely(rx_frm_cnt < work_limit)) {
+               union enetc_rx_bd *rxbd;
+               uint32_t bd_status;
+
+               if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
+                       int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
+
+                       cleaned_cnt -= count;
+               }
+
+               rxbd = ENETC_RXBD(*rx_ring, i);
+               bd_status = rte_le_to_cpu_32(rxbd->r.lstatus);
+               if (!bd_status)
+                       break;
+
+               rx_swbd->buffer_addr->pkt_len = rxbd->r.buf_len;
+               rx_swbd->buffer_addr->data_len = rxbd->r.buf_len;
+               rx_swbd->buffer_addr->hash.rss = rxbd->r.rss_hash;
+               rx_swbd->buffer_addr->ol_flags = 0;
+               enetc_dev_rx_parse(rx_swbd->buffer_addr,
+                                  rxbd->r.parse_summary);
+               rx_pkts[rx_frm_cnt] = rx_swbd->buffer_addr;
+               cleaned_cnt++;
+               rx_swbd++;
+               i++;
+               if (unlikely(i == rx_ring->bd_count)) {
+                       i = 0;
+                       rx_swbd = &rx_ring->q_swbd[i];
+               }
+
+               rx_ring->next_to_clean = i;
+               rx_frm_cnt++;
+       }
+
+       return rx_frm_cnt;
+}
+
+uint16_t
+enetc_recv_pkts(void *rxq, struct rte_mbuf **rx_pkts,
+               uint16_t nb_pkts)
+{
+       struct enetc_bdr *rx_ring = (struct enetc_bdr *)rxq;
+
+       return enetc_clean_rx_ring(rx_ring, rx_pkts, nb_pkts);
+}
diff --git a/drivers/net/enetc/meson.build b/drivers/net/enetc/meson.build
new file mode 100644 (file)
index 0000000..733156b
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 NXP
+
+if host_machine.system() != 'linux'
+       build = false
+endif
+
+sources = files('enetc_ethdev.c',
+               'enetc_rxtx.c')
+
+includes += include_directories('base')
diff --git a/drivers/net/enetc/rte_pmd_enetc_version.map b/drivers/net/enetc/rte_pmd_enetc_version.map
new file mode 100644 (file)
index 0000000..521e51f
--- /dev/null
@@ -0,0 +1,4 @@
+DPDK_18.11 {
+
+       local: *;
+};
index 7c6c29c..e39e476 100644 (file)
@@ -39,4 +39,32 @@ SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_intr.c
 SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rq.c
 SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += base/vnic_rss.c
 
+# The current implementation assumes 64-bit pointers
+CC_AVX2_SUPPORT=0
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+# Figure out if the compiler supports avx2. The extra check using
+# -march=core-avx2 is necessary to support users who build for the
+# 'default' machine (corei7 which has no avx2) and run the binary on
+# newer CPUs that have avx2.
+# This part is verbatim from i40e makefile.
+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+       CC_AVX2_SUPPORT=1
+else
+       CC_AVX2_SUPPORT=\
+       $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
+       grep -q AVX2 && echo 1)
+       ifeq ($(CC_AVX2_SUPPORT), 1)
+               ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+                       CFLAGS_enic_rxtx_vec_avx2.o += -march=core-avx2
+               else
+                       CFLAGS_enic_rxtx_vec_avx2.o += -mavx2
+               endif
+       endif
+endif
+endif
+
+ifeq ($(CC_AVX2_SUPPORT), 1)
+       SRCS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic_rxtx_vec_avx2.c
+endif
+
 include $(RTE_SDK)/mk/rte.lib.mk
index 16e8814..fd303fe 100644 (file)
@@ -57,6 +57,9 @@ struct vnic_dev {
        void (*free_consistent)(void *priv,
                size_t size, void *vaddr,
                dma_addr_t dma_handle);
+       struct vnic_counter_counts *flow_counters;
+       dma_addr_t flow_counters_pa;
+       u8 flow_counters_dma_active;
 };
 
 #define VNIC_MAX_RES_HDR_SIZE \
@@ -64,6 +67,8 @@ struct vnic_dev {
        sizeof(struct vnic_resource) * RES_TYPE_MAX)
 #define VNIC_RES_STRIDE        128
 
+#define VNIC_MAX_FLOW_COUNTERS 2048
+
 void *vnic_dev_priv(struct vnic_dev *vdev)
 {
        return vdev->priv;
@@ -611,6 +616,35 @@ int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats)
        return vnic_dev_cmd(vdev, CMD_STATS_DUMP, &a0, &a1, wait);
 }
 
+/*
+ * Configure counter DMA
+ */
+int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
+                            u32 num_counters)
+{
+       u64 args[3];
+       int wait = 1000;
+       int err;
+
+       if (num_counters > VNIC_MAX_FLOW_COUNTERS)
+               return -ENOMEM;
+       if (period > 0 && (period < VNIC_COUNTER_DMA_MIN_PERIOD ||
+           num_counters == 0))
+               return -EINVAL;
+
+       args[0] = num_counters;
+       args[1] = vdev->flow_counters_pa;
+       args[2] = period;
+       err =  vnic_dev_cmd_args(vdev, CMD_COUNTER_DMA_CONFIG, args, 3, wait);
+
+       /* record if DMAs need to be stopped on close */
+       if (!err)
+               vdev->flow_counters_dma_active = (num_counters != 0 &&
+                                                 period != 0);
+
+       return err;
+}
+
 int vnic_dev_close(struct vnic_dev *vdev)
 {
        u64 a0 = 0, a1 = 0;
@@ -939,6 +973,24 @@ int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev)
        return vdev->stats == NULL ? -ENOMEM : 0;
 }
 
+/*
+ * Initialize for up to VNIC_MAX_FLOW_COUNTERS
+ */
+int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev)
+{
+       char name[NAME_MAX];
+       static u32 instance;
+
+       snprintf((char *)name, sizeof(name), "vnic_flow_ctrs-%u", instance++);
+       vdev->flow_counters = vdev->alloc_consistent(vdev->priv,
+                                            sizeof(struct vnic_counter_counts)
+                                            * VNIC_MAX_FLOW_COUNTERS,
+                                            &vdev->flow_counters_pa,
+                                            (u8 *)name);
+       vdev->flow_counters_dma_active = 0;
+       return vdev->flow_counters == NULL ? -ENOMEM : 0;
+}
+
 void vnic_dev_unregister(struct vnic_dev *vdev)
 {
        if (vdev) {
@@ -951,6 +1003,16 @@ void vnic_dev_unregister(struct vnic_dev *vdev)
                        vdev->free_consistent(vdev->priv,
                                sizeof(struct vnic_stats),
                                vdev->stats, vdev->stats_pa);
+               if (vdev->flow_counters) {
+                       /* turn off counter DMAs before freeing memory */
+                       if (vdev->flow_counters_dma_active)
+                               vnic_dev_counter_dma_cfg(vdev, 0, 0);
+
+                       vdev->free_consistent(vdev->priv,
+                               sizeof(struct vnic_counter_counts)
+                               * VNIC_MAX_FLOW_COUNTERS,
+                               vdev->flow_counters, vdev->flow_counters_pa);
+               }
                if (vdev->fw_info)
                        vdev->free_consistent(vdev->priv,
                                sizeof(struct vnic_devcmd_fw_info),
@@ -1094,3 +1156,46 @@ int vnic_dev_capable_vxlan(struct vnic_dev *vdev)
                (a1 & (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ)) ==
                (FEATURE_VXLAN_IPV6 | FEATURE_VXLAN_MULTI_WQ);
 }
+
+bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx)
+{
+       u64 a0 = 0;
+       u64 a1 = 0;
+       int wait = 1000;
+
+       if (vnic_dev_cmd(vdev, CMD_COUNTER_ALLOC, &a0, &a1, wait))
+               return false;
+       *idx = (uint32_t)a0;
+       return true;
+}
+
+bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx)
+{
+       u64 a0 = idx;
+       u64 a1 = 0;
+       int wait = 1000;
+
+       return vnic_dev_cmd(vdev, CMD_COUNTER_FREE, &a0, &a1,
+                           wait) == 0;
+}
+
+bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
+                           bool reset, uint64_t *packets, uint64_t *bytes)
+{
+       u64 a0 = idx;
+       u64 a1 = reset ? 1 : 0;
+       int wait = 1000;
+
+       if (reset) {
+               /* query/reset returns updated counters */
+               if (vnic_dev_cmd(vdev, CMD_COUNTER_QUERY, &a0, &a1, wait))
+                       return false;
+               *packets = a0;
+               *bytes = a1;
+       } else {
+               /* Get values DMA'd from the adapter */
+               *packets = vdev->flow_counters[idx].vcc_packets;
+               *bytes = vdev->flow_counters[idx].vcc_bytes;
+       }
+       return true;
+}
index 270a47b..de2645c 100644 (file)
@@ -118,6 +118,8 @@ int vnic_dev_spec(struct vnic_dev *vdev, unsigned int offset, size_t size,
        void *value);
 int vnic_dev_stats_clear(struct vnic_dev *vdev);
 int vnic_dev_stats_dump(struct vnic_dev *vdev, struct vnic_stats **stats);
+int vnic_dev_counter_dma_cfg(struct vnic_dev *vdev, u32 period,
+                            u32 num_counters);
 int vnic_dev_hang_notify(struct vnic_dev *vdev);
 int vnic_dev_packet_filter(struct vnic_dev *vdev, int directed, int multicast,
        int broadcast, int promisc, int allmulti);
@@ -170,6 +172,7 @@ struct vnic_dev *vnic_dev_register(struct vnic_dev *vdev,
        unsigned int num_bars);
 struct rte_pci_device *vnic_dev_get_pdev(struct vnic_dev *vdev);
 int vnic_dev_alloc_stats_mem(struct vnic_dev *vdev);
+int vnic_dev_alloc_counter_mem(struct vnic_dev *vdev);
 int vnic_dev_cmd_init(struct vnic_dev *vdev, int fallback);
 int vnic_dev_get_size(void);
 int vnic_dev_int13(struct vnic_dev *vdev, u64 arg, u32 op);
@@ -187,4 +190,9 @@ int vnic_dev_overlay_offload_ctrl(struct vnic_dev *vdev,
 int vnic_dev_overlay_offload_cfg(struct vnic_dev *vdev, u8 overlay,
        u16 vxlan_udp_port_number);
 int vnic_dev_capable_vxlan(struct vnic_dev *vdev);
+bool vnic_dev_counter_alloc(struct vnic_dev *vdev, uint32_t *idx);
+bool vnic_dev_counter_free(struct vnic_dev *vdev, uint32_t idx);
+bool vnic_dev_counter_query(struct vnic_dev *vdev, uint32_t idx,
+                           bool reset, uint64_t *packets, uint64_t *bytes);
+
 #endif /* _VNIC_DEV_H_ */
index a22d8a7..3aad2db 100644 (file)
@@ -598,6 +598,48 @@ enum vnic_devcmd_cmd {
         *                       a3 = bitmask of supported actions
         */
        CMD_ADD_ADV_FILTER = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 77),
+
+       /*
+        * Allocate a counter for use with CMD_ADD_FILTER
+        * out:(u32) a0 = counter index
+        */
+       CMD_COUNTER_ALLOC = _CMDC(_CMD_DIR_READ, _CMD_VTYPE_ENET, 85),
+
+       /*
+        * Free a counter
+        * in: (u32) a0 = counter_id
+        */
+       CMD_COUNTER_FREE = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 86),
+
+       /*
+        * Read a counter
+        * in: (u32) a0 = counter_id
+        *     (u32) a1 = clear counter if non-zero
+        * out:(u64) a0 = packet count
+        *     (u64) a1 = byte count
+        */
+       CMD_COUNTER_QUERY = _CMDC(_CMD_DIR_RW, _CMD_VTYPE_ENET, 87),
+
+       /*
+        * Configure periodic counter DMA.  This will trigger an immediate
+        * DMA of the counters (unless period == 0), and then schedule a DMA
+        * of the counters every <period> seconds until disdabled.
+        * Each new COUNTER_DMA_CONFIG will override all previous commands on
+        * this vnic.
+        * Setting a2 (period) = 0 will disable periodic DMAs
+        * If a0 (num_counters) != 0, an immediate DMA will always be done,
+        * irrespective of the value in a2.
+        * in: (u32) a0 = number of counters to DMA
+        *     (u64) a1 = host target DMA address
+        *     (u32) a2 = DMA period in milliseconds (0 to disable)
+        */
+       CMD_COUNTER_DMA_CONFIG = _CMDC(_CMD_DIR_WRITE, _CMD_VTYPE_ENET, 88),
+#define VNIC_COUNTER_DMA_MIN_PERIOD 500
+
+       /*
+        * Clear all counters on a vnic
+        */
+       CMD_COUNTER_CLEAR_ALL = _CMDC(_CMD_DIR_NONE, _CMD_VTYPE_ENET, 89),
 };
 
 /* Modes for exchanging advanced filter capabilities. The modes supported by
@@ -863,9 +905,11 @@ struct filter_action {
 #define FILTER_ACTION_RQ_STEERING_FLAG (1 << 0)
 #define FILTER_ACTION_FILTER_ID_FLAG   (1 << 1)
 #define FILTER_ACTION_DROP_FLAG                (1 << 2)
+#define FILTER_ACTION_COUNTER_FLAG      (1 << 3)
 #define FILTER_ACTION_V2_ALL           (FILTER_ACTION_RQ_STEERING_FLAG \
+                                        | FILTER_ACTION_FILTER_ID_FLAG \
                                         | FILTER_ACTION_DROP_FLAG \
-                                        | FILTER_ACTION_FILTER_ID_FLAG)
+                                        | FILTER_ACTION_COUNTER_FLAG)
 
 /* Version 2 of filter action must be a strict extension of struct filter_action
  * where the first fields exactly match in size and meaning.
@@ -875,7 +919,8 @@ struct filter_action_v2 {
        u32 rq_idx;
        u32 flags;                     /* use FILTER_ACTION_XXX_FLAG defines */
        u16 filter_id;
-       u_int8_t reserved[32];         /* for future expansion */
+       u32 counter_index;
+       uint8_t reserved[28];         /* for future expansion */
 } __attribute__((packed));
 
 /* Specifies the filter type. */
@@ -941,9 +986,9 @@ enum {
 };
 
 struct filter_tlv {
-       u_int32_t type;
-       u_int32_t length;
-       u_int32_t val[0];
+       uint32_t type;
+       uint32_t length;
+       uint32_t val[0];
 };
 
 /* Data for CMD_ADD_FILTER is 2 TLV and filter + action structs */
@@ -957,10 +1002,10 @@ struct filter_tlv {
  * drivers should use this instead of "sizeof (struct filter_v2)" when
  * computing length for TLV.
  */
-static inline u_int32_t
+static inline uint32_t
 vnic_filter_size(struct filter_v2 *fp)
 {
-       u_int32_t size;
+       uint32_t size;
 
        switch (fp->type) {
        case FILTER_USNIC_ID:
@@ -999,10 +1044,10 @@ enum {
  * drivers should use this instead of "sizeof (struct filter_action_v2)"
  * when computing length for TLV.
  */
-static inline u_int32_t
+static inline uint32_t
 vnic_action_size(struct filter_action_v2 *fap)
 {
-       u_int32_t size;
+       uint32_t size;
 
        switch (fap->type) {
        case FILTER_ACTION_RQ_STEERING:
@@ -1122,4 +1167,13 @@ typedef enum {
        GRPINTR_UPD_VECT,
 } grpintr_subcmd_t;
 
+/*
+ * Structure for counter DMA
+ * (DMAed by CMD_COUNTER_DMA_CONFIG)
+ */
+struct vnic_counter_counts {
+       u64 vcc_packets;
+       u64 vcc_bytes;
+};
+
 #endif /* _VNIC_DEVCMD_H_ */
index 7c27bd5..7bca3ca 100644 (file)
@@ -39,6 +39,9 @@
 #define PAGE_ROUND_UP(x) \
        ((((unsigned long)(x)) + ENIC_PAGE_SIZE-1) & (~(ENIC_PAGE_SIZE-1)))
 
+/* must be >= VNIC_COUNTER_DMA_MIN_PERIOD */
+#define VNIC_FLOW_COUNTER_UPDATE_MSECS 500
+
 #define ENICPMD_VFIO_PATH          "/dev/vfio/vfio"
 /*#define ENIC_DESC_COUNT_MAKE_ODD (x) do{if ((~(x)) & 1) { (x)--; } }while(0)*/
 
@@ -94,6 +97,7 @@ struct rte_flow {
        LIST_ENTRY(rte_flow) next;
        u16 enic_filter_id;
        struct filter_v2 enic_filter;
+       int counter_idx; /* NIC allocated counter index (-1 = invalid) */
 };
 
 /* Per-instance private data structure */
@@ -104,6 +108,11 @@ struct enic {
        struct vnic_dev_bar bar0;
        struct vnic_dev *vdev;
 
+       /*
+        * mbuf_initializer contains 64 bits of mbuf rearm_data, used by
+        * the avx2 handler at this time.
+        */
+       uint64_t mbuf_initializer;
        unsigned int port_id;
        bool overlay_offload;
        struct rte_eth_dev *rte_dev;
@@ -126,6 +135,7 @@ struct enic {
        u8 filter_actions; /* HW supported actions */
        bool vxlan;
        bool disable_overlay; /* devargs disable_overlay=1 */
+       uint8_t enable_avx2_rx;  /* devargs enable-avx2-rx=1 */
        bool nic_cfg_chk;     /* NIC_CFG_CHK available */
        bool udp_rss_weak;    /* Bodega style UDP RSS */
        uint8_t ig_vlan_rewrite_mode; /* devargs ig-vlan-rewrite */
@@ -165,6 +175,7 @@ struct enic {
        rte_spinlock_t mtu_lock;
 
        LIST_HEAD(enic_flows, rte_flow) flows;
+       int max_flow_counter;
        rte_spinlock_t flows_lock;
 
        /* RSS */
@@ -326,6 +337,7 @@ uint16_t enic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                        uint16_t nb_pkts);
 int enic_set_mtu(struct enic *enic, uint16_t new_mtu);
 int enic_link_update(struct enic *enic);
+bool enic_use_vector_rx_handler(struct enic *enic);
 void enic_fdir_info(struct enic *enic);
 void enic_fdir_info_get(struct enic *enic, struct rte_eth_fdir_info *stats);
 void copy_fltr_v1(struct filter_v2 *fltr, struct rte_eth_fdir_input *input,
index b3d5777..996bb55 100644 (file)
@@ -37,6 +37,7 @@ static const struct rte_pci_id pci_id_enic_map[] = {
 };
 
 #define ENIC_DEVARG_DISABLE_OVERLAY "disable-overlay"
+#define ENIC_DEVARG_ENABLE_AVX2_RX "enable-avx2-rx"
 #define ENIC_DEVARG_IG_VLAN_REWRITE "ig-vlan-rewrite"
 
 RTE_INIT(enicpmd_init_log)
@@ -521,10 +522,34 @@ static const uint32_t *enicpmd_dev_supported_ptypes_get(struct rte_eth_dev *dev)
                RTE_PTYPE_L4_NONFRAG,
                RTE_PTYPE_UNKNOWN
        };
+       static const uint32_t ptypes_overlay[] = {
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L2_ETHER_VLAN,
+               RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
+               RTE_PTYPE_L3_IPV6_EXT_UNKNOWN,
+               RTE_PTYPE_L4_TCP,
+               RTE_PTYPE_L4_UDP,
+               RTE_PTYPE_L4_FRAG,
+               RTE_PTYPE_L4_NONFRAG,
+               RTE_PTYPE_TUNNEL_GRENAT,
+               RTE_PTYPE_INNER_L2_ETHER,
+               RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN,
+               RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN,
+               RTE_PTYPE_INNER_L4_TCP,
+               RTE_PTYPE_INNER_L4_UDP,
+               RTE_PTYPE_INNER_L4_FRAG,
+               RTE_PTYPE_INNER_L4_NONFRAG,
+               RTE_PTYPE_UNKNOWN
+       };
 
-       if (dev->rx_pkt_burst == enic_recv_pkts ||
-           dev->rx_pkt_burst == enic_noscatter_recv_pkts)
-               return ptypes;
+       if (dev->rx_pkt_burst != enic_dummy_recv_pkts &&
+           dev->rx_pkt_burst != NULL) {
+               struct enic *enic = pmd_priv(dev);
+               if (enic->overlay_offload)
+                       return ptypes_overlay;
+               else
+                       return ptypes;
+       }
        return NULL;
 }
 
@@ -915,22 +940,27 @@ static const struct eth_dev_ops enicpmd_eth_dev_ops = {
        .udp_tunnel_port_del  = enicpmd_dev_udp_tunnel_port_del,
 };
 
-static int enic_parse_disable_overlay(__rte_unused const char *key,
-                                     const char *value,
-                                     void *opaque)
+static int enic_parse_zero_one(const char *key,
+                              const char *value,
+                              void *opaque)
 {
        struct enic *enic;
+       bool b;
 
        enic = (struct enic *)opaque;
        if (strcmp(value, "0") == 0) {
-               enic->disable_overlay = false;
+               b = false;
        } else if (strcmp(value, "1") == 0) {
-               enic->disable_overlay = true;
+               b = true;
        } else {
-               dev_err(enic, "Invalid value for " ENIC_DEVARG_DISABLE_OVERLAY
-                       ": expected=0|1 given=%s\n", value);
+               dev_err(enic, "Invalid value for %s"
+                       ": expected=0|1 given=%s\n", key, value);
                return -EINVAL;
        }
+       if (strcmp(key, ENIC_DEVARG_DISABLE_OVERLAY) == 0)
+               enic->disable_overlay = b;
+       if (strcmp(key, ENIC_DEVARG_ENABLE_AVX2_RX) == 0)
+               enic->enable_avx2_rx = b;
        return 0;
 }
 
@@ -971,6 +1001,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
 {
        static const char *const valid_keys[] = {
                ENIC_DEVARG_DISABLE_OVERLAY,
+               ENIC_DEVARG_ENABLE_AVX2_RX,
                ENIC_DEVARG_IG_VLAN_REWRITE,
                NULL};
        struct enic *enic = pmd_priv(dev);
@@ -979,6 +1010,7 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
        ENICPMD_FUNC_TRACE();
 
        enic->disable_overlay = false;
+       enic->enable_avx2_rx = false;
        enic->ig_vlan_rewrite_mode = IG_VLAN_REWRITE_MODE_PASS_THRU;
        if (!dev->device->devargs)
                return 0;
@@ -986,7 +1018,9 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
        if (!kvlist)
                return -EINVAL;
        if (rte_kvargs_process(kvlist, ENIC_DEVARG_DISABLE_OVERLAY,
-                              enic_parse_disable_overlay, enic) < 0 ||
+                              enic_parse_zero_one, enic) < 0 ||
+           rte_kvargs_process(kvlist, ENIC_DEVARG_ENABLE_AVX2_RX,
+                              enic_parse_zero_one, enic) < 0 ||
            rte_kvargs_process(kvlist, ENIC_DEVARG_IG_VLAN_REWRITE,
                               enic_parse_ig_vlan_rewrite, enic) < 0) {
                rte_kvargs_free(kvlist);
@@ -996,7 +1030,6 @@ static int enic_check_devargs(struct rte_eth_dev *dev)
        return 0;
 }
 
-struct enic *enicpmd_list_head = NULL;
 /* Initialize the driver
  * It returns 0 on success.
  */
@@ -1044,7 +1077,8 @@ static int eth_enic_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_enic_pmd = {
        .id_table = pci_id_enic_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = eth_enic_pci_probe,
        .remove = eth_enic_pci_remove,
 };
@@ -1054,4 +1088,5 @@ RTE_PMD_REGISTER_PCI_TABLE(net_enic, pci_id_enic_map);
 RTE_PMD_REGISTER_KMOD_DEP(net_enic, "* igb_uio | uio_pci_generic | vfio-pci");
 RTE_PMD_REGISTER_PARAM_STRING(net_enic,
        ENIC_DEVARG_DISABLE_OVERLAY "=0|1 "
+       ENIC_DEVARG_ENABLE_AVX2_RX "=0|1 "
        ENIC_DEVARG_IG_VLAN_REWRITE "=trunk|untag|priority|pass");
index 0cf04ae..bb9ed03 100644 (file)
@@ -289,6 +289,15 @@ static const enum rte_flow_action_type enic_supported_actions_v2_drop[] = {
        RTE_FLOW_ACTION_TYPE_END,
 };
 
+static const enum rte_flow_action_type enic_supported_actions_v2_count[] = {
+       RTE_FLOW_ACTION_TYPE_QUEUE,
+       RTE_FLOW_ACTION_TYPE_MARK,
+       RTE_FLOW_ACTION_TYPE_FLAG,
+       RTE_FLOW_ACTION_TYPE_DROP,
+       RTE_FLOW_ACTION_TYPE_COUNT,
+       RTE_FLOW_ACTION_TYPE_END,
+};
+
 /** Action capabilities indexed by NIC version information */
 static const struct enic_action_cap enic_action_cap[] = {
        [FILTER_ACTION_RQ_STEERING_FLAG] = {
@@ -303,6 +312,10 @@ static const struct enic_action_cap enic_action_cap[] = {
                .actions = enic_supported_actions_v2_drop,
                .copy_fn = enic_copy_action_v2,
        },
+       [FILTER_ACTION_COUNTER_FLAG] = {
+               .actions = enic_supported_actions_v2_count,
+               .copy_fn = enic_copy_action_v2,
+       },
 };
 
 static int
@@ -1068,6 +1081,10 @@ enic_copy_action_v2(const struct rte_flow_action actions[],
                        enic_action->flags |= FILTER_ACTION_DROP_FLAG;
                        break;
                }
+               case RTE_FLOW_ACTION_TYPE_COUNT: {
+                       enic_action->flags |= FILTER_ACTION_COUNTER_FLAG;
+                       break;
+               }
                case RTE_FLOW_ACTION_TYPE_VOID:
                        continue;
                default:
@@ -1112,7 +1129,9 @@ enic_get_action_cap(struct enic *enic)
        uint8_t actions;
 
        actions = enic->filter_actions;
-       if (actions & FILTER_ACTION_DROP_FLAG)
+       if (actions & FILTER_ACTION_COUNTER_FLAG)
+               ea = &enic_action_cap[FILTER_ACTION_COUNTER_FLAG];
+       else if (actions & FILTER_ACTION_DROP_FLAG)
                ea = &enic_action_cap[FILTER_ACTION_DROP_FLAG];
        else if (actions & FILTER_ACTION_FILTER_ID_FLAG)
                ea = &enic_action_cap[FILTER_ACTION_FILTER_ID_FLAG];
@@ -1395,8 +1414,10 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
                   struct rte_flow_error *error)
 {
        struct rte_flow *flow;
-       int ret;
-       u16 entry;
+       int err;
+       uint16_t entry;
+       int ctr_idx;
+       int last_max_flow_ctr;
 
        FLOW_TRACE();
 
@@ -1407,20 +1428,64 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
                return NULL;
        }
 
+       flow->counter_idx = -1;
+       last_max_flow_ctr = -1;
+       if (enic_action->flags & FILTER_ACTION_COUNTER_FLAG) {
+               if (!vnic_dev_counter_alloc(enic->vdev, (uint32_t *)&ctr_idx)) {
+                       rte_flow_error_set(error, ENOMEM,
+                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                          NULL, "cannot allocate counter");
+                       goto unwind_flow_alloc;
+               }
+               flow->counter_idx = ctr_idx;
+               enic_action->counter_index = ctr_idx;
+
+               /* If index is the largest, increase the counter DMA size */
+               if (ctr_idx > enic->max_flow_counter) {
+                       err = vnic_dev_counter_dma_cfg(enic->vdev,
+                                                VNIC_FLOW_COUNTER_UPDATE_MSECS,
+                                                ctr_idx + 1);
+                       if (err) {
+                               rte_flow_error_set(error, -err,
+                                          RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                          NULL, "counter DMA config failed");
+                               goto unwind_ctr_alloc;
+                       }
+                       last_max_flow_ctr = enic->max_flow_counter;
+                       enic->max_flow_counter = ctr_idx;
+               }
+       }
+
        /* entry[in] is the queue id, entry[out] is the filter Id for delete */
        entry = enic_action->rq_idx;
-       ret = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter,
+       err = vnic_dev_classifier(enic->vdev, CLSF_ADD, &entry, enic_filter,
                                  enic_action);
-       if (!ret) {
-               flow->enic_filter_id = entry;
-               flow->enic_filter = *enic_filter;
-       } else {
-               rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
+       if (err) {
+               rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE,
                                   NULL, "vnic_dev_classifier error");
-               rte_free(flow);
-               return NULL;
+               goto unwind_ctr_dma_cfg;
        }
+
+       flow->enic_filter_id = entry;
+       flow->enic_filter = *enic_filter;
+
        return flow;
+
+/* unwind if there are errors */
+unwind_ctr_dma_cfg:
+       if (last_max_flow_ctr != -1) {
+               /* reduce counter DMA size */
+               vnic_dev_counter_dma_cfg(enic->vdev,
+                                        VNIC_FLOW_COUNTER_UPDATE_MSECS,
+                                        last_max_flow_ctr + 1);
+               enic->max_flow_counter = last_max_flow_ctr;
+       }
+unwind_ctr_alloc:
+       if (flow->counter_idx != -1)
+               vnic_dev_counter_free(enic->vdev, ctr_idx);
+unwind_flow_alloc:
+       rte_free(flow);
+       return NULL;
 }
 
 /**
@@ -1435,18 +1500,29 @@ enic_flow_add_filter(struct enic *enic, struct filter_v2 *enic_filter,
  * @param error[out]
  */
 static int
-enic_flow_del_filter(struct enic *enic, u16 filter_id,
+enic_flow_del_filter(struct enic *enic, struct rte_flow *flow,
                   struct rte_flow_error *error)
 {
-       int ret;
+       u16 filter_id;
+       int err;
 
        FLOW_TRACE();
 
-       ret = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL);
-       if (!ret)
-               rte_flow_error_set(error, ret, RTE_FLOW_ERROR_TYPE_HANDLE,
+       filter_id = flow->enic_filter_id;
+       err = vnic_dev_classifier(enic->vdev, CLSF_DEL, &filter_id, NULL, NULL);
+       if (err) {
+               rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_HANDLE,
                                   NULL, "vnic_dev_classifier failed");
-       return ret;
+               return -err;
+       }
+
+       if (flow->counter_idx != -1) {
+               if (!vnic_dev_counter_free(enic->vdev, flow->counter_idx))
+                       dev_err(enic, "counter free failed, idx: %d\n",
+                               flow->counter_idx);
+               flow->counter_idx = -1;
+       }
+       return 0;
 }
 
 /*
@@ -1529,9 +1605,10 @@ enic_flow_destroy(struct rte_eth_dev *dev, struct rte_flow *flow,
        FLOW_TRACE();
 
        rte_spinlock_lock(&enic->flows_lock);
-       enic_flow_del_filter(enic, flow->enic_filter_id, error);
+       enic_flow_del_filter(enic, flow, error);
        LIST_REMOVE(flow, next);
        rte_spinlock_unlock(&enic->flows_lock);
+       rte_free(flow);
        return 0;
 }
 
@@ -1553,13 +1630,77 @@ enic_flow_flush(struct rte_eth_dev *dev, struct rte_flow_error *error)
 
        while (!LIST_EMPTY(&enic->flows)) {
                flow = LIST_FIRST(&enic->flows);
-               enic_flow_del_filter(enic, flow->enic_filter_id, error);
+               enic_flow_del_filter(enic, flow, error);
                LIST_REMOVE(flow, next);
+               rte_free(flow);
        }
        rte_spinlock_unlock(&enic->flows_lock);
        return 0;
 }
 
+static int
+enic_flow_query_count(struct rte_eth_dev *dev,
+                     struct rte_flow *flow, void *data,
+                     struct rte_flow_error *error)
+{
+       struct enic *enic = pmd_priv(dev);
+       struct rte_flow_query_count *query;
+       uint64_t packets, bytes;
+
+       FLOW_TRACE();
+
+       if (flow->counter_idx == -1) {
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "flow does not have counter");
+       }
+       query = (struct rte_flow_query_count *)data;
+       if (!vnic_dev_counter_query(enic->vdev, flow->counter_idx,
+                                   !!query->reset, &packets, &bytes)) {
+               return rte_flow_error_set
+                       (error, EINVAL,
+                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                        NULL,
+                        "cannot read counter");
+       }
+       query->hits_set = 1;
+       query->bytes_set = 1;
+       query->hits = packets;
+       query->bytes = bytes;
+       return 0;
+}
+
+static int
+enic_flow_query(struct rte_eth_dev *dev,
+               struct rte_flow *flow,
+               const struct rte_flow_action *actions,
+               void *data,
+               struct rte_flow_error *error)
+{
+       int ret = 0;
+
+       FLOW_TRACE();
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = enic_flow_query_count(dev, flow, data, error);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+               if (ret < 0)
+                       return ret;
+       }
+       return 0;
+}
+
 /**
  * Flow callback registration.
  *
@@ -1570,4 +1711,5 @@ const struct rte_flow_ops enic_flow_ops = {
        .create = enic_flow_create,
        .destroy = enic_flow_destroy,
        .flush = enic_flow_flush,
+       .query = enic_flow_query,
 };
index fd940c5..e81c3f3 100644 (file)
@@ -514,12 +514,29 @@ static void enic_prep_wq_for_simple_tx(struct enic *enic, uint16_t queue_idx)
        }
 }
 
+/*
+ * The 'strong' version is in enic_rxtx_vec_avx2.c. This weak version is used
+ * used when that file is not compiled.
+ */
+bool __attribute__((weak))
+enic_use_vector_rx_handler(__rte_unused struct enic *enic)
+{
+       return false;
+}
+
 static void pick_rx_handler(struct enic *enic)
 {
        struct rte_eth_dev *eth_dev;
 
-       /* Use the non-scatter, simplified RX handler if possible. */
+       /*
+        * Preference order:
+        * 1. The vectorized handler if possible and requested.
+        * 2. The non-scatter, simplified handler if scatter Rx is not used.
+        * 3. The default handler as a fallback.
+        */
        eth_dev = enic->rte_dev;
+       if (enic_use_vector_rx_handler(enic))
+               return;
        if (enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0) {
                PMD_INIT_LOG(DEBUG, " use the non-scatter Rx handler");
                eth_dev->rx_pkt_burst = &enic_noscatter_recv_pkts;
@@ -534,6 +551,25 @@ int enic_enable(struct enic *enic)
        unsigned int index;
        int err;
        struct rte_eth_dev *eth_dev = enic->rte_dev;
+       uint64_t simple_tx_offloads;
+       uintptr_t p;
+
+       if (enic->enable_avx2_rx) {
+               struct rte_mbuf mb_def = { .buf_addr = 0 };
+
+               /*
+                * mbuf_initializer contains const-after-init fields of
+                * receive mbufs (i.e. 64 bits of fields from rearm_data).
+                * It is currently used by the vectorized handler.
+                */
+               mb_def.nb_segs = 1;
+               mb_def.data_off = RTE_PKTMBUF_HEADROOM;
+               mb_def.port = enic->port_id;
+               rte_mbuf_refcnt_set(&mb_def, 1);
+               rte_compiler_barrier();
+               p = (uintptr_t)&mb_def.rearm_data;
+               enic->mbuf_initializer = *(uint64_t *)p;
+       }
 
        eth_dev->data->dev_link.link_speed = vnic_dev_port_speed(enic->vdev);
        eth_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
@@ -572,10 +608,17 @@ int enic_enable(struct enic *enic)
        }
 
        /*
-        * Use the simple TX handler if possible. All offloads must be
-        * disabled.
+        * Use the simple TX handler if possible. Only checksum offloads
+        * and vlan insertion are supported.
         */
-       if (eth_dev->data->dev_conf.txmode.offloads == 0) {
+       simple_tx_offloads = enic->tx_offload_capa &
+               (DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
+                DEV_TX_OFFLOAD_VLAN_INSERT |
+                DEV_TX_OFFLOAD_IPV4_CKSUM |
+                DEV_TX_OFFLOAD_UDP_CKSUM |
+                DEV_TX_OFFLOAD_TCP_CKSUM);
+       if ((eth_dev->data->dev_conf.txmode.offloads &
+            ~simple_tx_offloads) == 0) {
                PMD_INIT_LOG(DEBUG, " use the simple tx handler");
                eth_dev->tx_pkt_burst = &enic_simple_xmit_pkts;
                for (index = 0; index < enic->wq_count; index++)
@@ -1639,11 +1682,25 @@ static int enic_dev_init(struct enic *enic)
 
        LIST_INIT(&enic->flows);
        rte_spinlock_init(&enic->flows_lock);
+       enic->max_flow_counter = -1;
 
        /* set up link status checking */
        vnic_dev_notify_set(enic->vdev, -1); /* No Intr for notify */
 
        enic->overlay_offload = false;
+       if (enic->disable_overlay && enic->vxlan) {
+               /*
+                * Explicitly disable overlay offload as the setting is
+                * sticky, and resetting vNIC does not disable it.
+                */
+               if (vnic_dev_overlay_offload_ctrl(enic->vdev,
+                                                 OVERLAY_FEATURE_VXLAN,
+                                                 OVERLAY_OFFLOAD_DISABLE)) {
+                       dev_err(enic, "failed to disable overlay offload\n");
+               } else {
+                       dev_info(enic, "Overlay offload is disabled\n");
+               }
+       }
        if (!enic->disable_overlay && enic->vxlan &&
            /* 'VXLAN feature' enables VXLAN, NVGRE, and GENEVE. */
            vnic_dev_overlay_offload_ctrl(enic->vdev,
@@ -1653,11 +1710,9 @@ static int enic_dev_init(struct enic *enic)
                        DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM |
                        DEV_TX_OFFLOAD_GENEVE_TNL_TSO |
                        DEV_TX_OFFLOAD_VXLAN_TNL_TSO;
-               /*
-                * Do not add PKT_TX_OUTER_{IPV4,IPV6} as they are not
-                * 'offload' flags (i.e. not part of PKT_TX_OFFLOAD_MASK).
-                */
                enic->tx_offload_mask |=
+                       PKT_TX_OUTER_IPV6 |
+                       PKT_TX_OUTER_IPV4 |
                        PKT_TX_OUTER_IP_CKSUM |
                        PKT_TX_TUNNEL_MASK;
                enic->overlay_offload = true;
@@ -1708,14 +1763,20 @@ int enic_probe(struct enic *enic)
                enic_free_consistent);
 
        /*
-        * Allocate the consistent memory for stats upfront so both primary and
-        * secondary processes can dump stats.
+        * Allocate the consistent memory for stats and counters upfront so
+        * both primary and secondary processes can access them.
         */
        err = vnic_dev_alloc_stats_mem(enic->vdev);
        if (err) {
                dev_err(enic, "Failed to allocate cmd memory, aborting\n");
                goto err_out_unregister;
        }
+       err = vnic_dev_alloc_counter_mem(enic->vdev);
+       if (err) {
+               dev_err(enic, "Failed to allocate counter memory, aborting\n");
+               goto err_out_unregister;
+       }
+
        /* Issue device open to get device in known state */
        err = enic_dev_open(enic);
        if (err) {
index 8d493ff..24b2844 100644 (file)
@@ -85,7 +85,7 @@ int enic_get_vnic_config(struct enic *enic)
        vnic_dev_capable_udp_rss_weak(enic->vdev, &enic->nic_cfg_chk,
                                      &enic->udp_rss_weak);
 
-       dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s\n",
+       dev_info(enic, "Flow api filter mode: %s Actions: %s%s%s%s\n",
                ((enic->flow_filter_mode == FILTER_DPDK_1) ? "DPDK" :
                ((enic->flow_filter_mode == FILTER_USNIC_IP) ? "USNIC" :
                ((enic->flow_filter_mode == FILTER_IPV4_5TUPLE) ? "5TUPLE" :
@@ -95,7 +95,9 @@ int enic_get_vnic_config(struct enic *enic)
                ((enic->filter_actions & FILTER_ACTION_FILTER_ID_FLAG) ?
                 "tag " : ""),
                ((enic->filter_actions & FILTER_ACTION_DROP_FLAG) ?
-                "drop " : ""));
+                "drop " : ""),
+               ((enic->filter_actions & FILTER_ACTION_COUNTER_FLAG) ?
+                "count " : ""));
 
        c->wq_desc_count =
                min_t(u32, ENIC_MAX_WQ_DESCS,
@@ -195,13 +197,14 @@ int enic_get_vnic_config(struct enic *enic)
        enic->rx_offload_capa =
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
-               DEV_RX_OFFLOAD_CRC_STRIP |
                DEV_RX_OFFLOAD_VLAN_STRIP |
                DEV_RX_OFFLOAD_IPV4_CKSUM |
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM;
        enic->tx_offload_mask =
-               PKT_TX_VLAN_PKT |
+               PKT_TX_IPV6 |
+               PKT_TX_IPV4 |
+               PKT_TX_VLAN |
                PKT_TX_IP_CKSUM |
                PKT_TX_L4_MASK |
                PKT_TX_TCP_SEG;
index 7129e12..5189ee6 100644 (file)
@@ -11,6 +11,7 @@
 #include "enic_compat.h"
 #include "rq_enet_desc.h"
 #include "enic.h"
+#include "enic_rxtx_common.h"
 #include <rte_ether.h>
 #include <rte_ip.h>
 #include <rte_tcp.h>
 #define rte_packet_prefetch(p) do {} while (0)
 #endif
 
-static inline uint16_t
-enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd)
-{
-       return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK;
-}
-
-static inline uint16_t
-enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd)
-{
-       return le16_to_cpu(crd->bytes_written_flags) &
-                          ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_packet_error(uint16_t bwflags)
-{
-       return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ==
-               CQ_ENET_RQ_DESC_FLAGS_TRUNCATED;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_eop(uint16_t ciflags)
-{
-       return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP)
-               == CQ_ENET_RQ_DESC_FLAGS_EOP;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd)
-{
-       return (le16_to_cpu(cqrd->q_number_rss_type_flags) &
-               CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ==
-               CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd)
-{
-       return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ==
-               CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd)
-{
-       return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ==
-               CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
-}
-
-static inline uint8_t
-enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd)
-{
-       return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >>
-               CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
-}
-
-static inline uint32_t
-enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd)
-{
-       return le32_to_cpu(cqrd->rss_hash);
-}
-
-static inline uint16_t
-enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd)
-{
-       return le16_to_cpu(cqrd->vlan);
-}
-
-static inline uint16_t
-enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
-{
-       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
-       return le16_to_cpu(cqrd->bytes_written_flags) &
-               CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
-}
-
-
-static inline uint8_t
-enic_cq_rx_check_err(struct cq_desc *cqd)
-{
-       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
-       uint16_t bwflags;
-
-       bwflags = enic_cq_rx_desc_bwflags(cqrd);
-       if (unlikely(enic_cq_rx_desc_packet_error(bwflags)))
-               return 1;
-       return 0;
-}
-
-/* Lookup table to translate RX CQ flags to mbuf flags. */
-static inline uint32_t
-enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl)
-{
-       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
-       uint8_t cqrd_flags = cqrd->flags;
-       /*
-        * Odd-numbered entries are for tunnel packets. All packet type info
-        * applies to the inner packet, and there is no info on the outer
-        * packet. The outer flags in these entries exist only to avoid
-        * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf
-        * afterwards.
-        *
-        * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set
-        * RTE_PTYPE_TUNNEL_GRENAT..
-        */
-       static const uint32_t cq_type_table[128] __rte_cache_aligned = {
-               [0x00] = RTE_PTYPE_UNKNOWN,
-               [0x01] = RTE_PTYPE_UNKNOWN |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER,
-               [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
-               [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_NONFRAG,
-               [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
-               [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_UDP,
-               [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
-               [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_TCP,
-               [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
-               [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_NONFRAG,
-               [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
-               [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_UDP,
-               [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
-               [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_TCP,
-               [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
-               [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
-                        RTE_PTYPE_TUNNEL_GRENAT |
-                        RTE_PTYPE_INNER_L2_ETHER |
-                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
-                        RTE_PTYPE_INNER_L4_FRAG,
-               /* All others reserved */
-       };
-       cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
-               | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6
-               | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP;
-       return cq_type_table[cqrd_flags + tnl];
-}
-
-static inline void
-enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
-{
-       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
-       uint16_t bwflags, pkt_flags = 0, vlan_tci;
-       bwflags = enic_cq_rx_desc_bwflags(cqrd);
-       vlan_tci = enic_cq_rx_desc_vlan(cqrd);
-
-       /* VLAN STRIPPED flag. The L2 packet type updated here also */
-       if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
-               pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
-               mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
-       } else {
-               if (vlan_tci != 0)
-                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
-               else
-                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
-       }
-       mbuf->vlan_tci = vlan_tci;
-
-       if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) {
-               struct cq_enet_rq_clsf_desc *clsf_cqd;
-               uint16_t filter_id;
-               clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd;
-               filter_id = clsf_cqd->filter_id;
-               if (filter_id) {
-                       pkt_flags |= PKT_RX_FDIR;
-                       if (filter_id != ENIC_MAGIC_FILTER_ID) {
-                               mbuf->hash.fdir.hi = clsf_cqd->filter_id;
-                               pkt_flags |= PKT_RX_FDIR_ID;
-                       }
-               }
-       } else if (enic_cq_rx_desc_rss_type(cqrd)) {
-               /* RSS flag */
-               pkt_flags |= PKT_RX_RSS_HASH;
-               mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd);
-       }
-
-       /* checksum flags */
-       if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) {
-               if (!enic_cq_rx_desc_csum_not_calc(cqrd)) {
-                       uint32_t l4_flags;
-                       l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK;
-
-                       /*
-                        * When overlay offload is enabled, the NIC may
-                        * set ipv4_csum_ok=1 if the inner packet is IPv6..
-                        * So, explicitly check for IPv4 before checking
-                        * ipv4_csum_ok.
-                        */
-                       if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
-                               if (enic_cq_rx_desc_ipv4_csum_ok(cqrd))
-                                       pkt_flags |= PKT_RX_IP_CKSUM_GOOD;
-                               else
-                                       pkt_flags |= PKT_RX_IP_CKSUM_BAD;
-                       }
-
-                       if (l4_flags == RTE_PTYPE_L4_UDP ||
-                           l4_flags == RTE_PTYPE_L4_TCP) {
-                               if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))
-                                       pkt_flags |= PKT_RX_L4_CKSUM_GOOD;
-                               else
-                                       pkt_flags |= PKT_RX_L4_CKSUM_BAD;
-                       }
-               }
-       }
-
-       mbuf->ol_flags = pkt_flags;
-}
-
 /* dummy receive function to replace actual function in
  * order to do safe reconfiguration operations.
  */
@@ -707,7 +448,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
        wq_desc_avail = vnic_wq_desc_avail(wq);
        head_idx = wq->head_idx;
        desc_count = wq->ring.desc_count;
-       ol_flags_mask = PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
+       ol_flags_mask = PKT_TX_VLAN | PKT_TX_IP_CKSUM | PKT_TX_L4_MASK;
        tx_oversized = &enic->soft_stats.tx_oversized;
 
        nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
@@ -735,7 +476,7 @@ uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
                mss = 0;
                vlan_id = tx_pkt->vlan_tci;
-               vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN_PKT);
+               vlan_tag_insert = !!(ol_flags & PKT_TX_VLAN);
                bus_addr = (dma_addr_t)
                           (tx_pkt->buf_iova + tx_pkt->data_off);
 
@@ -840,12 +581,33 @@ static void enqueue_simple_pkts(struct rte_mbuf **pkts,
                                struct enic *enic)
 {
        struct rte_mbuf *p;
+       uint16_t mss;
 
        while (n) {
                n--;
                p = *pkts++;
                desc->address = p->buf_iova + p->data_off;
                desc->length = p->pkt_len;
+               /* VLAN insert */
+               desc->vlan_tag = p->vlan_tci;
+               desc->header_length_flags &=
+                       ((1 << WQ_ENET_FLAGS_EOP_SHIFT) |
+                        (1 << WQ_ENET_FLAGS_CQ_ENTRY_SHIFT));
+               if (p->ol_flags & PKT_TX_VLAN) {
+                       desc->header_length_flags |=
+                               1 << WQ_ENET_FLAGS_VLAN_TAG_INSERT_SHIFT;
+               }
+               /*
+                * Checksum offload. We use WQ_ENET_OFFLOAD_MODE_CSUM, which
+                * is 0, so no need to set offload_mode.
+                */
+               mss = 0;
+               if (p->ol_flags & PKT_TX_IP_CKSUM)
+                       mss |= ENIC_CALC_IP_CKSUM << WQ_ENET_MSS_SHIFT;
+               if (p->ol_flags & PKT_TX_L4_MASK)
+                       mss |= ENIC_CALC_TCP_UDP_CKSUM << WQ_ENET_MSS_SHIFT;
+               desc->mss_loopback = mss;
+
                /*
                 * The app should not send oversized
                 * packets. tx_pkt_prepare includes a check as
diff --git a/drivers/net/enic/enic_rxtx_common.h b/drivers/net/enic/enic_rxtx_common.h
new file mode 100644 (file)
index 0000000..bfbb490
--- /dev/null
@@ -0,0 +1,271 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2008-2018 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ */
+
+#ifndef _ENIC_RXTX_COMMON_H_
+#define _ENIC_RXTX_COMMON_H_
+
+static inline uint16_t
+enic_cq_rx_desc_ciflags(struct cq_enet_rq_desc *crd)
+{
+       return le16_to_cpu(crd->completed_index_flags) & ~CQ_DESC_COMP_NDX_MASK;
+}
+
+static inline uint16_t
+enic_cq_rx_desc_bwflags(struct cq_enet_rq_desc *crd)
+{
+       return le16_to_cpu(crd->bytes_written_flags) &
+                          ~CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_packet_error(uint16_t bwflags)
+{
+       return (bwflags & CQ_ENET_RQ_DESC_FLAGS_TRUNCATED) ==
+               CQ_ENET_RQ_DESC_FLAGS_TRUNCATED;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_eop(uint16_t ciflags)
+{
+       return (ciflags & CQ_ENET_RQ_DESC_FLAGS_EOP)
+               == CQ_ENET_RQ_DESC_FLAGS_EOP;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_csum_not_calc(struct cq_enet_rq_desc *cqrd)
+{
+       return (le16_to_cpu(cqrd->q_number_rss_type_flags) &
+               CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC) ==
+               CQ_ENET_RQ_DESC_FLAGS_CSUM_NOT_CALC;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_ipv4_csum_ok(struct cq_enet_rq_desc *cqrd)
+{
+       return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK) ==
+               CQ_ENET_RQ_DESC_FLAGS_IPV4_CSUM_OK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_tcp_udp_csum_ok(struct cq_enet_rq_desc *cqrd)
+{
+       return (cqrd->flags & CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK) ==
+               CQ_ENET_RQ_DESC_FLAGS_TCP_UDP_CSUM_OK;
+}
+
+static inline uint8_t
+enic_cq_rx_desc_rss_type(struct cq_enet_rq_desc *cqrd)
+{
+       return (uint8_t)((le16_to_cpu(cqrd->q_number_rss_type_flags) >>
+               CQ_DESC_Q_NUM_BITS) & CQ_ENET_RQ_DESC_RSS_TYPE_MASK);
+}
+
+static inline uint32_t
+enic_cq_rx_desc_rss_hash(struct cq_enet_rq_desc *cqrd)
+{
+       return le32_to_cpu(cqrd->rss_hash);
+}
+
+static inline uint16_t
+enic_cq_rx_desc_vlan(struct cq_enet_rq_desc *cqrd)
+{
+       return le16_to_cpu(cqrd->vlan);
+}
+
+static inline uint16_t
+enic_cq_rx_desc_n_bytes(struct cq_desc *cqd)
+{
+       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+       return le16_to_cpu(cqrd->bytes_written_flags) &
+               CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+}
+
+
+static inline uint8_t
+enic_cq_rx_check_err(struct cq_desc *cqd)
+{
+       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+       uint16_t bwflags;
+
+       bwflags = enic_cq_rx_desc_bwflags(cqrd);
+       if (unlikely(enic_cq_rx_desc_packet_error(bwflags)))
+               return 1;
+       return 0;
+}
+
+/* Lookup table to translate RX CQ flags to mbuf flags. */
+static uint32_t
+enic_cq_rx_flags_to_pkt_type(struct cq_desc *cqd, uint8_t tnl)
+{
+       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+       uint8_t cqrd_flags = cqrd->flags;
+       /*
+        * Odd-numbered entries are for tunnel packets. All packet type info
+        * applies to the inner packet, and there is no info on the outer
+        * packet. The outer flags in these entries exist only to avoid
+        * changing enic_cq_rx_to_pkt_flags(). They are cleared from mbuf
+        * afterwards.
+        *
+        * Also, as there is no tunnel type info (VXLAN, NVGRE, or GENEVE), set
+        * RTE_PTYPE_TUNNEL_GRENAT..
+        */
+       static const uint32_t cq_type_table[128] __rte_cache_aligned = {
+               [0x00] = RTE_PTYPE_UNKNOWN,
+               [0x01] = RTE_PTYPE_UNKNOWN |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER,
+               [0x20] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+               [0x21] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_NONFRAG,
+               [0x22] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x23] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_UDP,
+               [0x24] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+               [0x25] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_TCP,
+               [0x60] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x61] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               [0x62] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x63] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               [0x64] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x65] = RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               [0x10] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG,
+               [0x11] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_NONFRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_NONFRAG,
+               [0x12] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP,
+               [0x13] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_UDP,
+               [0x14] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP,
+               [0x15] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_TCP,
+               [0x50] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x51] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               [0x52] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x53] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               [0x54] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG,
+               [0x55] = RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_FRAG |
+                        RTE_PTYPE_TUNNEL_GRENAT |
+                        RTE_PTYPE_INNER_L2_ETHER |
+                        RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_INNER_L4_FRAG,
+               /* All others reserved */
+       };
+       cqrd_flags &= CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT
+               | CQ_ENET_RQ_DESC_FLAGS_IPV4 | CQ_ENET_RQ_DESC_FLAGS_IPV6
+               | CQ_ENET_RQ_DESC_FLAGS_TCP | CQ_ENET_RQ_DESC_FLAGS_UDP;
+       return cq_type_table[cqrd_flags + tnl];
+}
+
+static void
+enic_cq_rx_to_pkt_flags(struct cq_desc *cqd, struct rte_mbuf *mbuf)
+{
+       struct cq_enet_rq_desc *cqrd = (struct cq_enet_rq_desc *)cqd;
+       uint16_t bwflags, pkt_flags = 0, vlan_tci;
+       bwflags = enic_cq_rx_desc_bwflags(cqrd);
+       vlan_tci = enic_cq_rx_desc_vlan(cqrd);
+
+       /* VLAN STRIPPED flag. The L2 packet type updated here also */
+       if (bwflags & CQ_ENET_RQ_DESC_FLAGS_VLAN_STRIPPED) {
+               pkt_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+               mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
+       } else {
+               if (vlan_tci != 0) {
+                       pkt_flags |= PKT_RX_VLAN;
+                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+               } else {
+                       mbuf->packet_type |= RTE_PTYPE_L2_ETHER;
+               }
+       }
+       mbuf->vlan_tci = vlan_tci;
+
+       if ((cqd->type_color & CQ_DESC_TYPE_MASK) == CQ_DESC_TYPE_CLASSIFIER) {
+               struct cq_enet_rq_clsf_desc *clsf_cqd;
+               uint16_t filter_id;
+               clsf_cqd = (struct cq_enet_rq_clsf_desc *)cqd;
+               filter_id = clsf_cqd->filter_id;
+               if (filter_id) {
+                       pkt_flags |= PKT_RX_FDIR;
+                       if (filter_id != ENIC_MAGIC_FILTER_ID) {
+                               mbuf->hash.fdir.hi = clsf_cqd->filter_id;
+                               pkt_flags |= PKT_RX_FDIR_ID;
+                       }
+               }
+       } else if (enic_cq_rx_desc_rss_type(cqrd)) {
+               /* RSS flag */
+               pkt_flags |= PKT_RX_RSS_HASH;
+               mbuf->hash.rss = enic_cq_rx_desc_rss_hash(cqrd);
+       }
+
+       /* checksum flags */
+       if (mbuf->packet_type & (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L3_IPV6)) {
+               if (!enic_cq_rx_desc_csum_not_calc(cqrd)) {
+                       uint32_t l4_flags;
+                       l4_flags = mbuf->packet_type & RTE_PTYPE_L4_MASK;
+
+                       /*
+                        * When overlay offload is enabled, the NIC may
+                        * set ipv4_csum_ok=1 if the inner packet is IPv6..
+                        * So, explicitly check for IPv4 before checking
+                        * ipv4_csum_ok.
+                        */
+                       if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) {
+                               if (enic_cq_rx_desc_ipv4_csum_ok(cqrd))
+                                       pkt_flags |= PKT_RX_IP_CKSUM_GOOD;
+                               else
+                                       pkt_flags |= PKT_RX_IP_CKSUM_BAD;
+                       }
+
+                       if (l4_flags == RTE_PTYPE_L4_UDP ||
+                           l4_flags == RTE_PTYPE_L4_TCP) {
+                               if (enic_cq_rx_desc_tcp_udp_csum_ok(cqrd))
+                                       pkt_flags |= PKT_RX_L4_CKSUM_GOOD;
+                               else
+                                       pkt_flags |= PKT_RX_L4_CKSUM_BAD;
+                       }
+               }
+       }
+
+       mbuf->ol_flags = pkt_flags;
+}
+
+#endif /* _ENIC_RXTX_COMMON_H_ */
diff --git a/drivers/net/enic/enic_rxtx_vec_avx2.c b/drivers/net/enic/enic_rxtx_vec_avx2.c
new file mode 100644 (file)
index 0000000..d218549
--- /dev/null
@@ -0,0 +1,831 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2008-2018 Cisco Systems, Inc.  All rights reserved.
+ * Copyright 2007 Nuova Systems, Inc.  All rights reserved.
+ */
+
+#include <rte_mbuf.h>
+#include <rte_ethdev_driver.h>
+
+#include "enic_compat.h"
+#include "rq_enet_desc.h"
+#include "enic.h"
+#include "enic_rxtx_common.h"
+
+#include <x86intrin.h>
+
+static struct rte_mbuf *
+rx_one(struct cq_enet_rq_desc *cqd, struct rte_mbuf *mb, struct enic *enic)
+{
+       bool tnl;
+
+       *(uint64_t *)&mb->rearm_data = enic->mbuf_initializer;
+       mb->data_len = cqd->bytes_written_flags &
+               CQ_ENET_RQ_DESC_BYTES_WRITTEN_MASK;
+       mb->pkt_len = mb->data_len;
+       tnl = enic->overlay_offload && (cqd->completed_index_flags &
+                                       CQ_ENET_RQ_DESC_FLAGS_FCOE) != 0;
+       mb->packet_type =
+               enic_cq_rx_flags_to_pkt_type((struct cq_desc *)cqd, tnl);
+       enic_cq_rx_to_pkt_flags((struct cq_desc *)cqd, mb);
+       /* Wipe the outer types set by enic_cq_rx_flags_to_pkt_type() */
+       if (tnl) {
+               mb->packet_type &= ~(RTE_PTYPE_L3_MASK |
+                                    RTE_PTYPE_L4_MASK);
+       }
+       return mb;
+}
+
+static uint16_t
+enic_noscatter_vec_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+                            uint16_t nb_pkts)
+{
+       struct rte_mbuf **rx, **rxmb;
+       uint16_t cq_idx, nb_rx, max_rx;
+       struct cq_enet_rq_desc *cqd;
+       struct rq_enet_desc *rqd;
+       struct vnic_cq *cq;
+       struct vnic_rq *rq;
+       struct enic *enic;
+       uint8_t color;
+
+       rq = rx_queue;
+       enic = vnic_dev_priv(rq->vdev);
+       cq = &enic->cq[enic_cq_rq(enic, rq->index)];
+       cq_idx = cq->to_clean;
+
+       /*
+        * Fill up the reserve of free mbufs. Below, we restock the receive
+        * ring with these mbufs to avoid allocation failures.
+        */
+       if (rq->num_free_mbufs == 0) {
+               if (rte_mempool_get_bulk(rq->mp, (void **)rq->free_mbufs,
+                                        ENIC_RX_BURST_MAX))
+                       return 0;
+               rq->num_free_mbufs = ENIC_RX_BURST_MAX;
+       }
+       /* Receive until the end of the ring, at most. */
+       max_rx = RTE_MIN(nb_pkts, rq->num_free_mbufs);
+       max_rx = RTE_MIN(max_rx, cq->ring.desc_count - cq_idx);
+
+       rxmb = rq->mbuf_ring + cq_idx;
+       color = cq->last_color;
+       cqd = (struct cq_enet_rq_desc *)(cq->ring.descs) + cq_idx;
+       rx = rx_pkts;
+       if (max_rx == 0 ||
+           (cqd->type_color & CQ_DESC_COLOR_MASK_NOSHIFT) == color)
+               return 0;
+
+       /* Step 1: Process one packet to do aligned 256-bit load below */
+       if (cq_idx & 0x1) {
+               if (unlikely(cqd->bytes_written_flags &
+                            CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
+                       rte_pktmbuf_free(*rxmb++);
+                       rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
+               } else {
+                       *rx++ = rx_one(cqd, *rxmb++, enic);
+               }
+               cqd++;
+               max_rx--;
+       }
+
+       const __m256i mask =
+               _mm256_set_epi8(/* Second descriptor */
+                       0xff, /* type_color */
+                       (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
+                        CQ_ENET_RQ_DESC_FLAGS_IPV4 |
+                        CQ_ENET_RQ_DESC_FLAGS_IPV6 |
+                        CQ_ENET_RQ_DESC_FLAGS_TCP |
+                        CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
+                       0, 0, /* checksum_fcoe */
+                       0xff, 0xff, /* vlan */
+                       0x3f, 0xff, /* bytes_written_flags */
+                       0xff, 0xff, 0xff, 0xff, /* rss_hash */
+                       0xff, 0xff, /* q_number_rss_type_flags */
+                       0, 0, /* completed_index_flags */
+                       /* First descriptor */
+                       0xff, /* type_color */
+                       (CQ_ENET_RQ_DESC_FLAGS_IPV4_FRAGMENT |
+                        CQ_ENET_RQ_DESC_FLAGS_IPV4 |
+                        CQ_ENET_RQ_DESC_FLAGS_IPV6 |
+                        CQ_ENET_RQ_DESC_FLAGS_TCP |
+                        CQ_ENET_RQ_DESC_FLAGS_UDP), /* flags */
+                       0, 0, /* checksum_fcoe */
+                       0xff, 0xff, /* vlan */
+                       0x3f, 0xff, /* bytes_written_flags */
+                       0xff, 0xff, 0xff, 0xff, /* rss_hash */
+                       0xff, 0xff, /* q_number_rss_type_flags */
+                       0, 0 /* completed_index_flags */
+                       );
+       const __m256i shuffle_mask =
+               _mm256_set_epi8(/* Second descriptor */
+                       7, 6, 5, 4,             /* rss = rss_hash */
+                       11, 10,                 /* vlan_tci = vlan */
+                       9, 8,                   /* data_len = bytes_written */
+                       0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */
+                       0x80, 0x80, 0x80, 0x80, /* packet_type = 0 */
+                       /* First descriptor */
+                       7, 6, 5, 4,             /* rss = rss_hash */
+                       11, 10,                 /* vlan_tci = vlan */
+                       9, 8,                   /* data_len = bytes_written */
+                       0x80, 0x80, 9, 8,       /* pkt_len = bytes_written */
+                       0x80, 0x80, 0x80, 0x80  /* packet_type = 0 */
+                       );
+       /* Used to collect 8 flags from 8 desc into one register */
+       const __m256i flags_shuffle_mask =
+               _mm256_set_epi8(/* Second descriptor */
+                       1, 3, 9, 14,
+                       1, 3, 9, 14,
+                       1, 3, 9, 14,
+                       1, 3, 9, 14,
+                       /* First descriptor */
+                       1, 3, 9, 14,
+                       1, 3, 9, 14,
+                       1, 3, 9, 14,
+                       /*
+                        * Byte 3: upper byte of completed_index_flags
+                        *         bit 5 = fcoe (tunnel)
+                        * Byte 2: upper byte of q_number_rss_type_flags
+                        *         bits 2,3,4,5 = rss type
+                        *         bit 6 = csum_not_calc
+                        * Byte 1: upper byte of bytes_written_flags
+                        *         bit 6 = truncated
+                        *         bit 7 = vlan stripped
+                        * Byte 0: flags
+                        */
+                       1, 3, 9, 14
+                       );
+       /* Used to collect 8 VLAN IDs from 8 desc into one register */
+       const __m256i vlan_shuffle_mask =
+               _mm256_set_epi8(/* Second descriptor */
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10,
+                       /* First descriptor */
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10,
+                       0x80, 0x80, 11, 10);
+       /* PKT_RX_RSS_HASH is 1<<1 so fits in 8-bit integer */
+       const __m256i rss_shuffle =
+               _mm256_set_epi8(/* second 128 bits */
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       0, /* rss_types = 0 */
+                       /* first 128 bits */
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH,
+                       0 /* rss_types = 0 */);
+       /*
+        * VLAN offload flags.
+        * shuffle index:
+        * vlan_stripped => bit 0
+        * vlan_id == 0  => bit 1
+        */
+       const __m256i vlan_shuffle =
+               _mm256_set_epi32(0, 0, 0, 0,
+                       PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, 0,
+                       PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN);
+       /* Use the same shuffle index as vlan_shuffle */
+       const __m256i vlan_ptype_shuffle =
+               _mm256_set_epi32(0, 0, 0, 0,
+                                RTE_PTYPE_L2_ETHER,
+                                RTE_PTYPE_L2_ETHER,
+                                RTE_PTYPE_L2_ETHER,
+                                RTE_PTYPE_L2_ETHER_VLAN);
+       /*
+        * CKSUM flags. Shift right so they fit int 8-bit integers.
+        * shuffle index:
+        * ipv4_csum_ok    => bit 3
+        * ip4             => bit 2
+        * tcp_or_udp      => bit 1
+        * tcp_udp_csum_ok => bit 0
+        */
+       const __m256i csum_shuffle =
+               _mm256_set_epi8(/* second 128 bits */
+                       /* 1111 ip4+ip4_ok+l4+l4_ok */
+                       ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+                       /* 1110 ip4_ok+ip4+l4+!l4_ok */
+                       ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),
+                       (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1101 ip4+ip4_ok */
+                       (PKT_RX_IP_CKSUM_GOOD >> 1), /* 1100 ip4_ok+ip4 */
+                       (PKT_RX_L4_CKSUM_GOOD >> 1), /* 1011 l4+l4_ok */
+                       (PKT_RX_L4_CKSUM_BAD >> 1),  /* 1010 l4+!l4_ok */
+                       0, /* 1001 */
+                       0, /* 1000 */
+                       /* 0111 !ip4_ok+ip4+l4+l4_ok */
+                       ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+                       /* 0110 !ip4_ok+ip4+l4+!l4_ok */
+                       ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),
+                       (PKT_RX_IP_CKSUM_BAD >> 1),  /* 0101 !ip4_ok+ip4 */
+                       (PKT_RX_IP_CKSUM_BAD >> 1),  /* 0100 !ip4_ok+ip4 */
+                       (PKT_RX_L4_CKSUM_GOOD >> 1), /* 0011 l4+l4_ok */
+                       (PKT_RX_L4_CKSUM_BAD >> 1),  /* 0010 l4+!l4_ok */
+                       0, /* 0001 */
+                       0, /* 0000 */
+                       /* first 128 bits */
+                       ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+                       ((PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1),
+                       (PKT_RX_IP_CKSUM_GOOD >> 1),
+                       (PKT_RX_IP_CKSUM_GOOD >> 1),
+                       (PKT_RX_L4_CKSUM_GOOD >> 1),
+                       (PKT_RX_L4_CKSUM_BAD >> 1),
+                       0, 0,
+                       ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD) >> 1),
+                       ((PKT_RX_IP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1),
+                       (PKT_RX_IP_CKSUM_BAD >> 1),
+                       (PKT_RX_IP_CKSUM_BAD >> 1),
+                       (PKT_RX_L4_CKSUM_GOOD >> 1),
+                       (PKT_RX_L4_CKSUM_BAD >> 1),
+                       0, 0);
+       /*
+        * Non-fragment PTYPEs.
+        * Shuffle 4-bit index:
+        * ip6 => bit 0
+        * ip4 => bit 1
+        * udp => bit 2
+        * tcp => bit 3
+        *   bit
+        * 3 2 1 0
+        * -------
+        * 0 0 0 0 unknown
+        * 0 0 0 1 ip6 | nonfrag
+        * 0 0 1 0 ip4 | nonfrag
+        * 0 0 1 1 unknown
+        * 0 1 0 0 unknown
+        * 0 1 0 1 ip6 | udp
+        * 0 1 1 0 ip4 | udp
+        * 0 1 1 1 unknown
+        * 1 0 0 0 unknown
+        * 1 0 0 1 ip6 | tcp
+        * 1 0 1 0 ip4 | tcp
+        * 1 0 1 1 unknown
+        * 1 1 0 0 unknown
+        * 1 1 0 1 unknown
+        * 1 1 1 0 unknown
+        * 1 1 1 1 unknown
+        *
+        * PTYPEs do not fit in 8 bits, so shift right 4..
+        */
+       const __m256i nonfrag_ptype_shuffle =
+               _mm256_set_epi8(/* second 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_NONFRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_NONFRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN,
+                       /* first 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_TCP) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | RTE_PTYPE_L4_UDP) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_NONFRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_NONFRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN);
+       /* Fragment PTYPEs. Use the same shuffle index as above. */
+       const __m256i frag_ptype_shuffle =
+               _mm256_set_epi8(/* second 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN,
+                       /* first 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       (RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       (RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
+                        RTE_PTYPE_L4_FRAG) >> 4,
+                       RTE_PTYPE_UNKNOWN);
+       /*
+        * Tunnel PTYPEs. Use the same shuffle index as above.
+        * L4 types are not part of this table. They come from non-tunnel
+        * types above.
+        */
+       const __m256i tnl_l3_ptype_shuffle =
+               _mm256_set_epi8(/* second 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN,
+                       /* first 128 bits */
+                       RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
+                       RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN >> 16,
+                       RTE_PTYPE_UNKNOWN);
+
+       const __m256i mbuf_init = _mm256_set_epi64x(0, enic->mbuf_initializer,
+                                                   0, enic->mbuf_initializer);
+
+       /*
+        * --- cq desc fields ---    offset
+        * completed_index_flags    - 0   use: fcoe
+        * q_number_rss_type_flags  - 2   use: rss types, csum_not_calc
+        * rss_hash                 - 4   ==> mbuf.hash.rss
+        * bytes_written_flags      - 8   ==> mbuf.pkt_len,data_len
+        *                                use: truncated, vlan_stripped
+        * vlan                     - 10  ==> mbuf.vlan_tci
+        * checksum_fcoe            - 12  (unused)
+        * flags                    - 14  use: all bits
+        * type_color               - 15  (unused)
+        *
+        * --- mbuf fields ---       offset
+        * rearm_data              ---- 16
+        * data_off    - 0      (mbuf_init) -+
+        * refcnt      - 2      (mbuf_init)  |
+        * nb_segs     - 4      (mbuf_init)  | 16B 128b
+        * port        - 6      (mbuf_init)  |
+        * ol_flag     - 8      (from cqd)  -+
+        * rx_descriptor_fields1   ---- 32
+        * packet_type - 0      (from cqd)  -+
+        * pkt_len     - 4      (from cqd)   |
+        * data_len    - 8      (from cqd)   | 16B 128b
+        * vlan_tci    - 10     (from cqd)   |
+        * rss         - 12     (from cqd)  -+
+        */
+
+       __m256i overlay_enabled =
+               _mm256_set1_epi32((uint32_t)enic->overlay_offload);
+
+       /* Step 2: Process 8 packets per loop using SIMD */
+       while (max_rx > 7 && (((cqd + 7)->type_color &
+                              CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
+               /* Load 8 16B CQ descriptors */
+               __m256i cqd01 = _mm256_load_si256((void *)cqd);
+               __m256i cqd23 = _mm256_load_si256((void *)(cqd + 2));
+               __m256i cqd45 = _mm256_load_si256((void *)(cqd + 4));
+               __m256i cqd67 = _mm256_load_si256((void *)(cqd + 6));
+               /* Copy 8 mbuf pointers to rx_pkts */
+               _mm256_storeu_si256((void *)rx,
+                                   _mm256_loadu_si256((void *)rxmb));
+               _mm256_storeu_si256((void *)(rx + 4),
+                                   _mm256_loadu_si256((void *)(rxmb + 4)));
+
+               /*
+                * Collect 8 flags (each 32 bits) into one register.
+                * 4 shuffles, 3 blends, 1 permute for 8 desc: 1 inst/desc
+                */
+               __m256i flags01 =
+                       _mm256_shuffle_epi8(cqd01, flags_shuffle_mask);
+               /*
+                * Shuffle above produces 8 x 32-bit flags for 8 descriptors
+                * in this order: 0, 0, 0, 0, 1, 1, 1, 1
+                * The duplicates in each 128-bit lane simplifies blending
+                * below.
+                */
+               __m256i flags23 =
+                       _mm256_shuffle_epi8(cqd23, flags_shuffle_mask);
+               __m256i flags45 =
+                       _mm256_shuffle_epi8(cqd45, flags_shuffle_mask);
+               __m256i flags67 =
+                       _mm256_shuffle_epi8(cqd67, flags_shuffle_mask);
+               /* 1st blend produces flags for desc: 0, 2, 0, 0, 1, 3, 1, 1 */
+               __m256i flags0_3 = _mm256_blend_epi32(flags01, flags23, 0x22);
+               /* 2nd blend produces flags for desc: 4, 4, 4, 6, 5, 5, 5, 7 */
+               __m256i flags4_7 = _mm256_blend_epi32(flags45, flags67, 0x88);
+               /* 3rd blend produces flags for desc: 0, 2, 4, 6, 1, 3, 5, 7 */
+               __m256i flags0_7 = _mm256_blend_epi32(flags0_3, flags4_7, 0xcc);
+               /*
+                * Swap to reorder flags in this order: 1, 3, 5, 7, 0, 2, 4, 6
+                * This order simplifies blend operations way below that
+                * produce 'rearm' data for each mbuf.
+                */
+               flags0_7 = _mm256_permute4x64_epi64(flags0_7,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+               /*
+                * Check truncated bits and bail out early on.
+                * 6 avx inst, 1 or, 1 if-then-else for 8 desc: 1 inst/desc
+                */
+               __m256i trunc =
+                       _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 17), 31);
+               trunc = _mm256_add_epi64(trunc, _mm256_permute4x64_epi64(trunc,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2));
+               /* 0:63 contains 1+3+0+2 and 64:127 contains 5+7+4+6 */
+               if (_mm256_extract_epi64(trunc, 0) ||
+                   _mm256_extract_epi64(trunc, 1))
+                       break;
+
+               /*
+                * Compute PKT_RX_RSS_HASH.
+                * Use 2 shifts and 1 shuffle for 8 desc: 0.375 inst/desc
+                * RSS types in byte 0, 4, 8, 12, 16, 20, 24, 28
+                * Everything else is zero.
+                */
+               __m256i rss_types =
+                       _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 10), 28);
+               /*
+                * RSS flags (PKT_RX_RSS_HASH) are in
+                * byte 0, 4, 8, 12, 16, 20, 24, 28
+                * Everything else is zero.
+                */
+               __m256i rss_flags = _mm256_shuffle_epi8(rss_shuffle, rss_types);
+
+               /*
+                * Compute CKSUM flags. First build the index and then
+                * use it to shuffle csum_shuffle.
+                * 20 instructions including const loads: 2.5 inst/desc
+                */
+               /*
+                * csum_not_calc (bit 22)
+                * csum_not_calc (0) => 0xffffffff
+                * csum_not_calc (1) => 0x0
+                */
+               const __m256i zero4 = _mm256_setzero_si256();
+               const __m256i mask22 = _mm256_set1_epi32(0x400000);
+               __m256i csum_not_calc = _mm256_cmpeq_epi32(zero4,
+                       _mm256_and_si256(flags0_7, mask22));
+               /*
+                * (tcp|udp) && !fragment => bit 1
+                * tcp = bit 2, udp = bit 1, frag = bit 6
+                */
+               const __m256i mask1 = _mm256_set1_epi32(0x2);
+               __m256i tcp_udp =
+                       _mm256_andnot_si256(_mm256_srli_epi32(flags0_7, 5),
+                               _mm256_or_si256(flags0_7,
+                                       _mm256_srli_epi32(flags0_7, 1)));
+               tcp_udp = _mm256_and_si256(tcp_udp, mask1);
+               /* ipv4 (bit 5) => bit 2 */
+               const __m256i mask2 = _mm256_set1_epi32(0x4);
+               __m256i ipv4 = _mm256_and_si256(mask2,
+                       _mm256_srli_epi32(flags0_7, 3));
+               /*
+                * ipv4_csum_ok (bit 3) => bit 3
+                * tcp_udp_csum_ok (bit 0) => bit 0
+                * 0x9
+                */
+               const __m256i mask0_3 = _mm256_set1_epi32(0x9);
+               __m256i csum_idx = _mm256_and_si256(flags0_7, mask0_3);
+               csum_idx = _mm256_and_si256(csum_not_calc,
+                       _mm256_or_si256(_mm256_or_si256(csum_idx, ipv4),
+                               tcp_udp));
+               __m256i csum_flags =
+                       _mm256_shuffle_epi8(csum_shuffle, csum_idx);
+               /* Shift left to restore CKSUM flags. See csum_shuffle. */
+               csum_flags = _mm256_slli_epi32(csum_flags, 1);
+               /* Combine csum flags and offload flags: 0.125 inst/desc */
+               rss_flags = _mm256_or_si256(rss_flags, csum_flags);
+
+               /*
+                * Collect 8 VLAN IDs and compute vlan_id != 0 on each.
+                * 4 shuffles, 3 blends, 1 permute, 1 cmp, 1 sub for 8 desc:
+                * 1.25 inst/desc
+                */
+               __m256i vlan01 = _mm256_shuffle_epi8(cqd01, vlan_shuffle_mask);
+               __m256i vlan23 = _mm256_shuffle_epi8(cqd23, vlan_shuffle_mask);
+               __m256i vlan45 = _mm256_shuffle_epi8(cqd45, vlan_shuffle_mask);
+               __m256i vlan67 = _mm256_shuffle_epi8(cqd67, vlan_shuffle_mask);
+               __m256i vlan0_3 = _mm256_blend_epi32(vlan01, vlan23, 0x22);
+               __m256i vlan4_7 = _mm256_blend_epi32(vlan45, vlan67, 0x88);
+               /* desc: 0, 2, 4, 6, 1, 3, 5, 7 */
+               __m256i vlan0_7 = _mm256_blend_epi32(vlan0_3, vlan4_7, 0xcc);
+               /* desc: 1, 3, 5, 7, 0, 2, 4, 6 */
+               vlan0_7 = _mm256_permute4x64_epi64(vlan0_7,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+               /*
+                * Compare 0 == vlan_id produces 0xffffffff (-1) if
+                * vlan 0 and 0 if vlan non-0. Then subtracting the
+                * result from 0 produces 0 - (-1) = 1 for vlan 0, and
+                * 0 - 0 = 0 for vlan non-0.
+                */
+               vlan0_7 = _mm256_cmpeq_epi32(zero4, vlan0_7);
+               /* vlan_id != 0 => 0, vlan_id == 0 => 1 */
+               vlan0_7 = _mm256_sub_epi32(zero4, vlan0_7);
+
+               /*
+                * Compute PKT_RX_VLAN and PKT_RX_VLAN_STRIPPED.
+                * Use 3 shifts, 1 or,  1 shuffle for 8 desc: 0.625 inst/desc
+                * VLAN offload flags in byte 0, 4, 8, 12, 16, 20, 24, 28
+                * Everything else is zero.
+                */
+               __m256i vlan_idx =
+                       _mm256_or_si256(/* vlan_stripped => bit 0 */
+                               _mm256_srli_epi32(_mm256_slli_epi32(flags0_7,
+                                       16), 31),
+                               /* (vlan_id == 0) => bit 1 */
+                               _mm256_slli_epi32(vlan0_7, 1));
+               /*
+                * The index captures 4 cases.
+                * stripped, id = 0   ==> 11b = 3
+                * stripped, id != 0  ==> 01b = 1
+                * not strip, id == 0 ==> 10b = 2
+                * not strip, id != 0 ==> 00b = 0
+                */
+               __m256i vlan_flags = _mm256_permutevar8x32_epi32(vlan_shuffle,
+                       vlan_idx);
+               /* Combine vlan and offload flags: 0.125 inst/desc */
+               rss_flags = _mm256_or_si256(rss_flags, vlan_flags);
+
+               /*
+                * Compute non-tunnel PTYPEs.
+                * 17 inst / 8 desc = 2.125 inst/desc
+                */
+               /* ETHER and ETHER_VLAN */
+               __m256i vlan_ptype =
+                       _mm256_permutevar8x32_epi32(vlan_ptype_shuffle,
+                               vlan_idx);
+               /* Build the ptype index from flags */
+               tcp_udp = _mm256_slli_epi32(flags0_7, 29);
+               tcp_udp = _mm256_slli_epi32(_mm256_srli_epi32(tcp_udp, 30), 2);
+               __m256i ip4_ip6 =
+                       _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 26), 30);
+               __m256i ptype_idx = _mm256_or_si256(tcp_udp, ip4_ip6);
+               __m256i frag_bit =
+                       _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 25), 31);
+               __m256i nonfrag_ptype =
+                       _mm256_shuffle_epi8(nonfrag_ptype_shuffle, ptype_idx);
+               __m256i frag_ptype =
+                       _mm256_shuffle_epi8(frag_ptype_shuffle, ptype_idx);
+               /*
+                * Zero out the unwanted types and combine the remaining bits.
+                * The effect is same as selecting non-frag or frag types
+                * depending on the frag bit.
+                */
+               nonfrag_ptype = _mm256_and_si256(nonfrag_ptype,
+                       _mm256_cmpeq_epi32(zero4, frag_bit));
+               frag_ptype = _mm256_and_si256(frag_ptype,
+                       _mm256_cmpgt_epi32(frag_bit, zero4));
+               __m256i ptype = _mm256_or_si256(nonfrag_ptype, frag_ptype);
+               ptype = _mm256_slli_epi32(ptype, 4);
+               /*
+                * Compute tunnel PTYPEs.
+                * 15 inst / 8 desc = 1.875 inst/desc
+                */
+               __m256i tnl_l3_ptype =
+                       _mm256_shuffle_epi8(tnl_l3_ptype_shuffle, ptype_idx);
+               tnl_l3_ptype = _mm256_slli_epi32(tnl_l3_ptype, 16);
+               /*
+                * Shift non-tunnel L4 types to make them tunnel types.
+                * RTE_PTYPE_L4_TCP << 16 == RTE_PTYPE_INNER_L4_TCP
+                */
+               __m256i tnl_l4_ptype =
+                       _mm256_slli_epi32(_mm256_and_si256(ptype,
+                               _mm256_set1_epi32(RTE_PTYPE_L4_MASK)), 16);
+               __m256i tnl_ptype =
+                       _mm256_or_si256(tnl_l3_ptype, tnl_l4_ptype);
+               tnl_ptype = _mm256_or_si256(tnl_ptype,
+                       _mm256_set1_epi32(RTE_PTYPE_TUNNEL_GRENAT |
+                               RTE_PTYPE_INNER_L2_ETHER));
+               /*
+                * Select non-tunnel or tunnel types by zeroing out the
+                * unwanted ones.
+                */
+               __m256i tnl_flags = _mm256_and_si256(overlay_enabled,
+                       _mm256_srli_epi32(_mm256_slli_epi32(flags0_7, 2), 31));
+               tnl_ptype = _mm256_and_si256(tnl_ptype,
+                       _mm256_sub_epi32(zero4, tnl_flags));
+               ptype = _mm256_and_si256(ptype,
+                       _mm256_cmpeq_epi32(zero4, tnl_flags));
+               /*
+                * Combine types and swap to have ptypes in the same order
+                * as desc.
+                * desc: 0 2 4 6 1 3 5 7
+                * 3 inst / 8 desc = 0.375 inst/desc
+                */
+               ptype = _mm256_or_si256(ptype, tnl_ptype);
+               ptype = _mm256_or_si256(ptype, vlan_ptype);
+               ptype = _mm256_permute4x64_epi64(ptype,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+               /*
+                * Mask packet length.
+                * Use 4 ands: 0.5 instructions/desc
+                */
+               cqd01 = _mm256_and_si256(cqd01, mask);
+               cqd23 = _mm256_and_si256(cqd23, mask);
+               cqd45 = _mm256_and_si256(cqd45, mask);
+               cqd67 = _mm256_and_si256(cqd67, mask);
+               /*
+                * Shuffle. Two 16B sets of the mbuf fields.
+                * packet_type, pkt_len, data_len, vlan_tci, rss
+                */
+               __m256i rearm01 = _mm256_shuffle_epi8(cqd01, shuffle_mask);
+               __m256i rearm23 = _mm256_shuffle_epi8(cqd23, shuffle_mask);
+               __m256i rearm45 = _mm256_shuffle_epi8(cqd45, shuffle_mask);
+               __m256i rearm67 = _mm256_shuffle_epi8(cqd67, shuffle_mask);
+
+               /*
+                * Blend in ptypes
+                * 4 blends and 3 shuffles for 8 desc: 0.875 inst/desc
+                */
+               rearm01 = _mm256_blend_epi32(rearm01, ptype, 0x11);
+               rearm23 = _mm256_blend_epi32(rearm23,
+                       _mm256_shuffle_epi32(ptype, 1), 0x11);
+               rearm45 = _mm256_blend_epi32(rearm45,
+                       _mm256_shuffle_epi32(ptype, 2), 0x11);
+               rearm67 = _mm256_blend_epi32(rearm67,
+                       _mm256_shuffle_epi32(ptype, 3), 0x11);
+
+               /*
+                * Move rss_flags into ol_flags in mbuf_init.
+                * Use 1 shift and 1 blend for each desc: 2 inst/desc
+                */
+               __m256i mbuf_init4_5 = _mm256_blend_epi32(mbuf_init,
+                       rss_flags, 0x44);
+               __m256i mbuf_init2_3 = _mm256_blend_epi32(mbuf_init,
+                       _mm256_slli_si256(rss_flags, 4), 0x44);
+               __m256i mbuf_init0_1 = _mm256_blend_epi32(mbuf_init,
+                       _mm256_slli_si256(rss_flags, 8), 0x44);
+               __m256i mbuf_init6_7 = _mm256_blend_epi32(mbuf_init,
+                       _mm256_srli_si256(rss_flags, 4), 0x44);
+
+               /*
+                * Build rearm, one per desc.
+                * 8 blends and 4 permutes: 1.5 inst/desc
+                */
+               __m256i rearm0 = _mm256_blend_epi32(rearm01,
+                       mbuf_init0_1, 0xf0);
+               __m256i rearm1 = _mm256_blend_epi32(mbuf_init0_1,
+                       rearm01, 0xf0);
+               __m256i rearm2 = _mm256_blend_epi32(rearm23,
+                       mbuf_init2_3, 0xf0);
+               __m256i rearm3 = _mm256_blend_epi32(mbuf_init2_3,
+                       rearm23, 0xf0);
+               /* Swap upper and lower 64 bits */
+               rearm0 = _mm256_permute4x64_epi64(rearm0,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+               rearm2 = _mm256_permute4x64_epi64(rearm2,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+               /* Second set of 4 descriptors */
+               __m256i rearm4 = _mm256_blend_epi32(rearm45,
+                       mbuf_init4_5, 0xf0);
+               __m256i rearm5 = _mm256_blend_epi32(mbuf_init4_5,
+                       rearm45, 0xf0);
+               __m256i rearm6 = _mm256_blend_epi32(rearm67,
+                       mbuf_init6_7, 0xf0);
+               __m256i rearm7 = _mm256_blend_epi32(mbuf_init6_7,
+                       rearm67, 0xf0);
+               rearm4 = _mm256_permute4x64_epi64(rearm4,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+               rearm6 = _mm256_permute4x64_epi64(rearm6,
+                       (1 << 6) + (0 << 4) + (3 << 2) + 2);
+
+               /*
+                * Write out 32B of mbuf fields.
+                * data_off    - off 0  (mbuf_init)
+                * refcnt      - 2      (mbuf_init)
+                * nb_segs     - 4      (mbuf_init)
+                * port        - 6      (mbuf_init)
+                * ol_flag     - 8      (from cqd)
+                * packet_type - 16     (from cqd)
+                * pkt_len     - 20     (from cqd)
+                * data_len    - 24     (from cqd)
+                * vlan_tci    - 26     (from cqd)
+                * rss         - 28     (from cqd)
+                */
+               _mm256_storeu_si256((__m256i *)&rxmb[0]->rearm_data, rearm0);
+               _mm256_storeu_si256((__m256i *)&rxmb[1]->rearm_data, rearm1);
+               _mm256_storeu_si256((__m256i *)&rxmb[2]->rearm_data, rearm2);
+               _mm256_storeu_si256((__m256i *)&rxmb[3]->rearm_data, rearm3);
+               _mm256_storeu_si256((__m256i *)&rxmb[4]->rearm_data, rearm4);
+               _mm256_storeu_si256((__m256i *)&rxmb[5]->rearm_data, rearm5);
+               _mm256_storeu_si256((__m256i *)&rxmb[6]->rearm_data, rearm6);
+               _mm256_storeu_si256((__m256i *)&rxmb[7]->rearm_data, rearm7);
+
+               max_rx -= 8;
+               cqd += 8;
+               rx += 8;
+               rxmb += 8;
+       }
+
+       /*
+        * Step 3: Slow path to handle a small (<8) number of packets and
+        * occasional truncated packets.
+        */
+       while (max_rx && ((cqd->type_color &
+                          CQ_DESC_COLOR_MASK_NOSHIFT) != color)) {
+               if (unlikely(cqd->bytes_written_flags &
+                            CQ_ENET_RQ_DESC_FLAGS_TRUNCATED)) {
+                       rte_pktmbuf_free(*rxmb++);
+                       rte_atomic64_inc(&enic->soft_stats.rx_packet_errors);
+               } else {
+                       *rx++ = rx_one(cqd, *rxmb++, enic);
+               }
+               cqd++;
+               max_rx--;
+       }
+
+       /* Number of descriptors visited */
+       nb_rx = cqd - (struct cq_enet_rq_desc *)(cq->ring.descs) - cq_idx;
+       if (nb_rx == 0)
+               return 0;
+       rqd = ((struct rq_enet_desc *)rq->ring.descs) + cq_idx;
+       rxmb = rq->mbuf_ring + cq_idx;
+       cq_idx += nb_rx;
+       rq->rx_nb_hold += nb_rx;
+       if (unlikely(cq_idx == cq->ring.desc_count)) {
+               cq_idx = 0;
+               cq->last_color ^= CQ_DESC_COLOR_MASK_NOSHIFT;
+       }
+       cq->to_clean = cq_idx;
+
+       /* Step 4: Restock RQ with new mbufs */
+       memcpy(rxmb, rq->free_mbufs + ENIC_RX_BURST_MAX - rq->num_free_mbufs,
+              sizeof(struct rte_mbuf *) * nb_rx);
+       rq->num_free_mbufs -= nb_rx;
+       while (nb_rx) {
+               rqd->address = (*rxmb)->buf_iova + RTE_PKTMBUF_HEADROOM;
+               nb_rx--;
+               rqd++;
+               rxmb++;
+       }
+       if (rq->rx_nb_hold > rq->rx_free_thresh) {
+               rq->posted_index = enic_ring_add(rq->ring.desc_count,
+                                                rq->posted_index,
+                                                rq->rx_nb_hold);
+               rq->rx_nb_hold = 0;
+               rte_wmb();
+               iowrite32_relaxed(rq->posted_index,
+                                 &rq->ctrl->posted_index);
+       }
+
+       return rx - rx_pkts;
+}
+
+bool
+enic_use_vector_rx_handler(struct enic *enic)
+{
+       struct rte_eth_dev *eth_dev;
+       struct rte_fdir_conf *fconf;
+
+       eth_dev = enic->rte_dev;
+       /* User needs to request for the avx2 handler */
+       if (!enic->enable_avx2_rx)
+               return false;
+       /* Do not support scatter Rx */
+       if (!(enic->rq_count > 0 && enic->rq[0].data_queue_enable == 0))
+               return false;
+       /* Do not support fdir/flow */
+       fconf = &eth_dev->data->dev_conf.fdir_conf;
+       if (fconf->mode != RTE_FDIR_MODE_NONE)
+               return false;
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) {
+               PMD_INIT_LOG(DEBUG, " use the non-scatter avx2 Rx handler");
+               eth_dev->rx_pkt_burst = &enic_noscatter_vec_recv_pkts;
+               return true;
+       }
+       return false;
+}
index bfd4e23..0644871 100644 (file)
@@ -17,3 +17,19 @@ sources = files(
        )
 deps += ['hash']
 includes += include_directories('base')
+
+# The current implementation assumes 64-bit pointers
+if dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX2') and cc.sizeof('void *') == 8
+       sources += files('enic_rxtx_vec_avx2.c')
+# Build the avx2 handler if the compiler supports it, even though 'machine'
+# does not. This is to support users who build for the min supported machine
+# and need to run the binary on newer CPUs too.
+# This part is from i40e meson.build
+elif cc.has_argument('-mavx2') and cc.sizeof('void *') == 8
+       enic_avx2_lib = static_library('enic_avx2_lib',
+                       'enic_rxtx_vec_avx2.c',
+                       dependencies: [static_rte_ethdev, static_rte_bus_pci],
+                       include_directories: includes,
+                       c_args: [cflags, '-mavx2'])
+       objs += enic_avx2_lib.extract_objects('enic_rxtx_vec_avx2.c')
+endif
index 657919f..06e859e 100644 (file)
@@ -71,7 +71,7 @@ failsafe_hotplug_alarm_install(struct rte_eth_dev *dev)
                return -EINVAL;
        if (PRIV(dev)->pending_alarm)
                return 0;
-       ret = rte_eal_alarm_set(hotplug_poll * 1000,
+       ret = rte_eal_alarm_set(failsafe_hotplug_poll * 1000,
                                fs_hotplug_alarm,
                                dev);
        if (ret) {
@@ -225,7 +225,7 @@ fs_eth_dev_create(struct rte_vdev_device *vdev)
                goto unregister_new_callback;
        }
        mac = &dev->data->mac_addrs[0];
-       if (mac_from_arg) {
+       if (failsafe_mac_from_arg) {
                /*
                 * If MAC address was provided as a parameter,
                 * apply to all probed slaves.
@@ -280,7 +280,8 @@ free_args:
 free_subs:
        fs_sub_device_free(dev);
 free_dev:
-       rte_free(PRIV(dev));
+       /* mac_addrs must not be freed alone because part of dev_private */
+       dev->data->mac_addrs = NULL;
        rte_eth_dev_release_port(dev);
        return -1;
 }
@@ -304,7 +305,9 @@ fs_rte_eth_free(const char *name)
        ret = pthread_mutex_destroy(&PRIV(dev)->hotplug_mutex);
        if (ret)
                ERROR("Error while destroying hotplug mutex");
-       rte_free(PRIV(dev));
+       rte_free(PRIV(dev)->mcast_addrs);
+       /* mac_addrs must not be freed alone because part of dev_private */
+       dev->data->mac_addrs = NULL;
        rte_eth_dev_release_port(dev);
        return ret;
 }
index 626883c..c4b220c 100644 (file)
 typedef int (parse_cb)(struct rte_eth_dev *dev, const char *params,
                uint8_t head);
 
-uint64_t hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
-int mac_from_arg = 0;
+uint64_t failsafe_hotplug_poll = FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS;
+int failsafe_mac_from_arg;
 
-const char *pmd_failsafe_init_parameters[] = {
+static const char * const pmd_failsafe_init_parameters[] = {
        PMD_FAILSAFE_HOTPLUG_POLL_KVARG,
        PMD_FAILSAFE_MAC_KVARG,
        NULL,
@@ -420,7 +420,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
                if (arg_count == 1) {
                        ret = rte_kvargs_process(kvlist,
                                        PMD_FAILSAFE_HOTPLUG_POLL_KVARG,
-                                       &fs_get_u64_arg, &hotplug_poll);
+                                       &fs_get_u64_arg, &failsafe_hotplug_poll);
                        if (ret < 0)
                                goto free_kvlist;
                }
@@ -435,7 +435,7 @@ failsafe_args_parse(struct rte_eth_dev *dev, const char *params)
                        if (ret < 0)
                                goto free_kvlist;
 
-                       mac_from_arg = 1;
+                       failsafe_mac_from_arg = 1;
                }
        }
        PRIV(dev)->state = DEV_PARSED;
index ce1633f..8a888b1 100644 (file)
@@ -144,8 +144,7 @@ fs_bus_uninit(struct rte_eth_dev *dev)
        int ret = 0;
 
        FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
-               sdev_ret = rte_eal_hotplug_remove(sdev->bus->name,
-                                                       sdev->dev->name);
+               sdev_ret = rte_dev_remove(sdev->dev);
                if (sdev_ret) {
                        ERROR("Failed to remove requested device %s (err: %d)",
                              sdev->dev->name, sdev_ret);
index 5b5cb3b..1783165 100644 (file)
@@ -179,6 +179,23 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
                        return ret;
                }
        }
+       /*
+        * Propagate multicast MAC addresses to sub-devices,
+        * if non zero number of addresses is set.
+        * The condition is required to avoid breakage of failsafe
+        * for sub-devices which do not support the operation
+        * if the feature is really not used.
+        */
+       if (PRIV(dev)->nb_mcast_addr > 0) {
+               DEBUG("Configuring multicast MAC addresses");
+               ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+                                                  PRIV(dev)->mcast_addrs,
+                                                  PRIV(dev)->nb_mcast_addr);
+               if (ret) {
+                       ERROR("Failed to apply multicast MAC addresses");
+                       return ret;
+               }
+       }
        /* VLAN filter */
        vfc1 = &dev->data->vlan_filter_conf;
        vfc2 = &edev->data->vlan_filter_conf;
@@ -230,9 +247,9 @@ fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
                        DEBUG("Creating flow #%" PRIu32, i++);
                        flow->flows[SUB_ID(sdev)] =
                                rte_flow_create(PORT_ID(sdev),
-                                               &flow->fd->attr,
-                                               flow->fd->items,
-                                               flow->fd->actions,
+                                               flow->rule.attr,
+                                               flow->rule.pattern,
+                                               flow->rule.actions,
                                                &ferror);
                        ret = rte_errno;
                        if (ret)
@@ -265,8 +282,7 @@ fs_dev_remove(struct sub_device *sdev)
                sdev->state = DEV_PROBED;
                /* fallthrough */
        case DEV_PROBED:
-               ret = rte_eal_hotplug_remove(sdev->bus->name,
-                                            sdev->dev->name);
+               ret = rte_dev_remove(sdev->dev);
                if (ret) {
                        ERROR("Bus detach failed for sub_device %u",
                              SUB_ID(sdev));
@@ -366,6 +382,88 @@ failsafe_dev_remove(struct rte_eth_dev *dev)
                }
 }
 
+static int
+failsafe_eth_dev_rx_queues_sync(struct rte_eth_dev *dev)
+{
+       struct rxq *rxq;
+       int ret;
+       uint16_t i;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               rxq = dev->data->rx_queues[i];
+
+               if (rxq->info.conf.rx_deferred_start &&
+                   dev->data->rx_queue_state[i] ==
+                                               RTE_ETH_QUEUE_STATE_STARTED) {
+                       /*
+                        * The subdevice Rx queue does not launch on device
+                        * start if deferred start flag is set. It needs to be
+                        * started manually in case an appropriate failsafe Rx
+                        * queue has been started earlier.
+                        */
+                       ret = dev->dev_ops->rx_queue_start(dev, i);
+                       if (ret) {
+                               ERROR("Could not synchronize Rx queue %d", i);
+                               return ret;
+                       }
+               } else if (dev->data->rx_queue_state[i] ==
+                                               RTE_ETH_QUEUE_STATE_STOPPED) {
+                       /*
+                        * The subdevice Rx queue needs to be stopped manually
+                        * in case an appropriate failsafe Rx queue has been
+                        * stopped earlier.
+                        */
+                       ret = dev->dev_ops->rx_queue_stop(dev, i);
+                       if (ret) {
+                               ERROR("Could not synchronize Rx queue %d", i);
+                               return ret;
+                       }
+               }
+       }
+       return 0;
+}
+
+static int
+failsafe_eth_dev_tx_queues_sync(struct rte_eth_dev *dev)
+{
+       struct txq *txq;
+       int ret;
+       uint16_t i;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               txq = dev->data->tx_queues[i];
+
+               if (txq->info.conf.tx_deferred_start &&
+                   dev->data->tx_queue_state[i] ==
+                                               RTE_ETH_QUEUE_STATE_STARTED) {
+                       /*
+                        * The subdevice Tx queue does not launch on device
+                        * start if deferred start flag is set. It needs to be
+                        * started manually in case an appropriate failsafe Tx
+                        * queue has been started earlier.
+                        */
+                       ret = dev->dev_ops->tx_queue_start(dev, i);
+                       if (ret) {
+                               ERROR("Could not synchronize Tx queue %d", i);
+                               return ret;
+                       }
+               } else if (dev->data->tx_queue_state[i] ==
+                                               RTE_ETH_QUEUE_STATE_STOPPED) {
+                       /*
+                        * The subdevice Tx queue needs to be stopped manually
+                        * in case an appropriate failsafe Tx queue has been
+                        * stopped earlier.
+                        */
+                       ret = dev->dev_ops->tx_queue_stop(dev, i);
+                       if (ret) {
+                               ERROR("Could not synchronize Tx queue %d", i);
+                               return ret;
+                       }
+               }
+       }
+       return 0;
+}
+
 int
 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
 {
@@ -422,6 +520,12 @@ failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
        if (PRIV(dev)->state < DEV_STARTED)
                return 0;
        ret = dev->dev_ops->dev_start(dev);
+       if (ret)
+               goto err_remove;
+       ret = failsafe_eth_dev_rx_queues_sync(dev);
+       if (ret)
+               goto err_remove;
+       ret = failsafe_eth_dev_tx_queues_sync(dev);
        if (ret)
                goto err_remove;
        return 0;
@@ -466,7 +570,7 @@ failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
        /* Switch as soon as possible tx_dev. */
        fs_switch_dev(sdev->fs_dev, sdev);
        /* Use safe bursts in any case. */
-       set_burst_fn(sdev->fs_dev, 1);
+       failsafe_set_burst_fn(sdev->fs_dev, 1);
        /*
         * Async removal, the sub-PMD will try to unregister
         * the callback at the source of the current thread context.
index bfe42fc..5e2b5f7 100644 (file)
@@ -3,8 +3,11 @@
  * Copyright 2017 Mellanox Technologies, Ltd
  */
 
+#include <stddef.h>
+#include <string.h>
 #include <sys/queue.h>
 
+#include <rte_errno.h>
 #include <rte_malloc.h>
 #include <rte_tailq.h>
 #include <rte_flow.h>
@@ -18,19 +21,33 @@ fs_flow_allocate(const struct rte_flow_attr *attr,
                 const struct rte_flow_action *actions)
 {
        struct rte_flow *flow;
-       size_t fdsz;
+       const struct rte_flow_conv_rule rule = {
+               .attr_ro = attr,
+               .pattern_ro = items,
+               .actions_ro = actions,
+       };
+       struct rte_flow_error error;
+       int ret;
 
-       fdsz = rte_flow_copy(NULL, 0, attr, items, actions);
-       flow = rte_zmalloc(NULL,
-                          sizeof(struct rte_flow) + fdsz,
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, NULL, 0, &rule, &error);
+       if (ret < 0) {
+               ERROR("Unable to process flow rule (%s): %s",
+                     error.message ? error.message : "unspecified",
+                     strerror(rte_errno));
+               return NULL;
+       }
+       flow = rte_zmalloc(NULL, offsetof(struct rte_flow, rule) + ret,
                           RTE_CACHE_LINE_SIZE);
        if (flow == NULL) {
                ERROR("Could not allocate new flow");
                return NULL;
        }
-       flow->fd = (void *)((uintptr_t)flow + sizeof(*flow));
-       if (rte_flow_copy(flow->fd, fdsz, attr, items, actions) != fdsz) {
-               ERROR("Failed to copy flow description");
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, &flow->rule, ret, &rule,
+                           &error);
+       if (ret < 0) {
+               ERROR("Failed to copy flow rule (%s): %s",
+                     error.message ? error.message : "unspecified",
+                     strerror(rte_errno));
                rte_free(flow);
                return NULL;
        }
index fc6ec37..1c2cb71 100644 (file)
@@ -372,7 +372,7 @@ void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev)
        for (qid = 0; qid < ETH(sdev)->data->nb_rx_queues; qid++) {
                if (qid < fsdev->data->nb_rx_queues) {
                        fsrxq = fsdev->data->rx_queues[qid];
-                       if (fsrxq->enable_events)
+                       if (fsrxq != NULL && fsrxq->enable_events)
                                rte_eth_dev_rx_intr_disable(PORT_ID(sdev),
                                                            qid);
                }
index 24e91c9..7f8bcd4 100644 (file)
@@ -57,7 +57,6 @@ static struct rte_eth_dev_info default_infos = {
                DEV_RX_OFFLOAD_VLAN_FILTER |
                DEV_RX_OFFLOAD_VLAN_EXTEND |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
-               DEV_RX_OFFLOAD_CRC_STRIP |
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_TIMESTAMP |
                DEV_RX_OFFLOAD_SECURITY,
@@ -74,7 +73,6 @@ static struct rte_eth_dev_info default_infos = {
                DEV_RX_OFFLOAD_VLAN_FILTER |
                DEV_RX_OFFLOAD_VLAN_EXTEND |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
-               DEV_RX_OFFLOAD_CRC_STRIP |
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_TIMESTAMP |
                DEV_RX_OFFLOAD_SECURITY,
@@ -88,6 +86,9 @@ static struct rte_eth_dev_info default_infos = {
                        ETH_RSS_IP |
                        ETH_RSS_UDP |
                        ETH_RSS_TCP,
+       .dev_capa =
+               RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
+               RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP,
 };
 
 static int
@@ -170,6 +171,27 @@ fs_dev_configure(struct rte_eth_dev *dev)
        return 0;
 }
 
+static void
+fs_set_queues_state_start(struct rte_eth_dev *dev)
+{
+       struct rxq *rxq;
+       struct txq *txq;
+       uint16_t i;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               rxq = dev->data->rx_queues[i];
+               if (rxq != NULL && !rxq->info.conf.rx_deferred_start)
+                       dev->data->rx_queue_state[i] =
+                                               RTE_ETH_QUEUE_STATE_STARTED;
+       }
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               txq = dev->data->tx_queues[i];
+               if (txq != NULL && !txq->info.conf.tx_deferred_start)
+                       dev->data->tx_queue_state[i] =
+                                               RTE_ETH_QUEUE_STATE_STARTED;
+       }
+}
+
 static int
 fs_dev_start(struct rte_eth_dev *dev)
 {
@@ -204,13 +226,30 @@ fs_dev_start(struct rte_eth_dev *dev)
                }
                sdev->state = DEV_STARTED;
        }
-       if (PRIV(dev)->state < DEV_STARTED)
+       if (PRIV(dev)->state < DEV_STARTED) {
                PRIV(dev)->state = DEV_STARTED;
+               fs_set_queues_state_start(dev);
+       }
        fs_switch_dev(dev, NULL);
        fs_unlock(dev, 0);
        return 0;
 }
 
+static void
+fs_set_queues_state_stop(struct rte_eth_dev *dev)
+{
+       uint16_t i;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++)
+               if (dev->data->rx_queues[i] != NULL)
+                       dev->data->rx_queue_state[i] =
+                                               RTE_ETH_QUEUE_STATE_STOPPED;
+       for (i = 0; i < dev->data->nb_tx_queues; i++)
+               if (dev->data->tx_queues[i] != NULL)
+                       dev->data->tx_queue_state[i] =
+                                               RTE_ETH_QUEUE_STATE_STOPPED;
+}
+
 static void
 fs_dev_stop(struct rte_eth_dev *dev)
 {
@@ -225,6 +264,7 @@ fs_dev_stop(struct rte_eth_dev *dev)
                sdev->state = DEV_STARTED - 1;
        }
        failsafe_rx_intr_uninstall(dev);
+       fs_set_queues_state_stop(dev);
        fs_unlock(dev, 0);
 }
 
@@ -294,6 +334,112 @@ fs_dev_close(struct rte_eth_dev *dev)
        fs_unlock(dev, 0);
 }
 
+static int
+fs_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct sub_device *sdev;
+       uint8_t i;
+       int ret;
+       int err = 0;
+       bool failure = true;
+
+       fs_lock(dev, 0);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               uint16_t port_id = ETH(sdev)->data->port_id;
+
+               ret = rte_eth_dev_rx_queue_stop(port_id, rx_queue_id);
+               ret = fs_err(sdev, ret);
+               if (ret) {
+                       ERROR("Rx queue stop failed for subdevice %d", i);
+                       err = ret;
+               } else {
+                       failure = false;
+               }
+       }
+       dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+       fs_unlock(dev, 0);
+       /* Return 0 in case of at least one successful queue stop */
+       return (failure) ? err : 0;
+}
+
+static int
+fs_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
+{
+       struct sub_device *sdev;
+       uint8_t i;
+       int ret;
+
+       fs_lock(dev, 0);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               uint16_t port_id = ETH(sdev)->data->port_id;
+
+               ret = rte_eth_dev_rx_queue_start(port_id, rx_queue_id);
+               ret = fs_err(sdev, ret);
+               if (ret) {
+                       ERROR("Rx queue start failed for subdevice %d", i);
+                       fs_rx_queue_stop(dev, rx_queue_id);
+                       fs_unlock(dev, 0);
+                       return ret;
+               }
+       }
+       dev->data->rx_queue_state[rx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+       fs_unlock(dev, 0);
+       return 0;
+}
+
+static int
+fs_tx_queue_stop(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+       struct sub_device *sdev;
+       uint8_t i;
+       int ret;
+       int err = 0;
+       bool failure = true;
+
+       fs_lock(dev, 0);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               uint16_t port_id = ETH(sdev)->data->port_id;
+
+               ret = rte_eth_dev_tx_queue_stop(port_id, tx_queue_id);
+               ret = fs_err(sdev, ret);
+               if (ret) {
+                       ERROR("Tx queue stop failed for subdevice %d", i);
+                       err = ret;
+               } else {
+                       failure = false;
+               }
+       }
+       dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STOPPED;
+       fs_unlock(dev, 0);
+       /* Return 0 in case of at least one successful queue stop */
+       return (failure) ? err : 0;
+}
+
+static int
+fs_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
+{
+       struct sub_device *sdev;
+       uint8_t i;
+       int ret;
+
+       fs_lock(dev, 0);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               uint16_t port_id = ETH(sdev)->data->port_id;
+
+               ret = rte_eth_dev_tx_queue_start(port_id, tx_queue_id);
+               ret = fs_err(sdev, ret);
+               if (ret) {
+                       ERROR("Tx queue start failed for subdevice %d", i);
+                       fs_tx_queue_stop(dev, tx_queue_id);
+                       fs_unlock(dev, 0);
+                       return ret;
+               }
+       }
+       dev->data->tx_queue_state[tx_queue_id] = RTE_ETH_QUEUE_STATE_STARTED;
+       fs_unlock(dev, 0);
+       return 0;
+}
+
 static void
 fs_rx_queue_release(void *queue)
 {
@@ -309,9 +455,13 @@ fs_rx_queue_release(void *queue)
        fs_lock(dev, 0);
        if (rxq->event_fd > 0)
                close(rxq->event_fd);
-       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
-               SUBOPS(sdev, rx_queue_release)
-                       (ETH(sdev)->data->rx_queues[rxq->qid]);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               if (ETH(sdev)->data->rx_queues != NULL &&
+                   ETH(sdev)->data->rx_queues[rxq->qid] != NULL) {
+                       SUBOPS(sdev, rx_queue_release)
+                               (ETH(sdev)->data->rx_queues[rxq->qid]);
+               }
+       }
        dev->data->rx_queues[rxq->qid] = NULL;
        rte_free(rxq);
        fs_unlock(dev, 0);
@@ -341,6 +491,16 @@ fs_rx_queue_setup(struct rte_eth_dev *dev,
        int ret;
 
        fs_lock(dev, 0);
+       if (rx_conf->rx_deferred_start) {
+               FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+                       if (SUBOPS(sdev, rx_queue_start) == NULL) {
+                               ERROR("Rx queue deferred start is not "
+                                       "supported for subdevice %d", i);
+                               fs_unlock(dev, 0);
+                               return -EINVAL;
+                       }
+               }
+       }
        rxq = dev->data->rx_queues[rx_queue_id];
        if (rxq != NULL) {
                fs_rx_queue_release(rxq);
@@ -477,9 +637,13 @@ fs_tx_queue_release(void *queue)
        txq = queue;
        dev = txq->priv->dev;
        fs_lock(dev, 0);
-       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
-               SUBOPS(sdev, tx_queue_release)
-                       (ETH(sdev)->data->tx_queues[txq->qid]);
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               if (ETH(sdev)->data->tx_queues != NULL &&
+                   ETH(sdev)->data->tx_queues[txq->qid] != NULL) {
+                       SUBOPS(sdev, tx_queue_release)
+                               (ETH(sdev)->data->tx_queues[txq->qid]);
+               }
+       }
        dev->data->tx_queues[txq->qid] = NULL;
        rte_free(txq);
        fs_unlock(dev, 0);
@@ -498,6 +662,16 @@ fs_tx_queue_setup(struct rte_eth_dev *dev,
        int ret;
 
        fs_lock(dev, 0);
+       if (tx_conf->tx_deferred_start) {
+               FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
+                       if (SUBOPS(sdev, tx_queue_start) == NULL) {
+                               ERROR("Tx queue deferred start is not "
+                                       "supported for subdevice %d", i);
+                               fs_unlock(dev, 0);
+                               return -EINVAL;
+                       }
+               }
+       }
        txq = dev->data->tx_queues[tx_queue_id];
        if (txq != NULL) {
                fs_tx_queue_release(txq);
@@ -716,6 +890,8 @@ fs_stats_reset(struct rte_eth_dev *dev)
  *      all sub_devices and the default capabilities.
  *      Uses a logical AND of TX capabilities among
  *      the active probed sub_device and the default capabilities.
+ *      Uses a logical AND of device capabilities among
+ *      all sub_devices and the default capabilities.
  *
  */
 static void
@@ -734,10 +910,12 @@ fs_dev_infos_get(struct rte_eth_dev *dev,
                uint64_t rx_offload_capa;
                uint64_t rxq_offload_capa;
                uint64_t rss_hf_offload_capa;
+               uint64_t dev_capa;
 
                rx_offload_capa = default_infos.rx_offload_capa;
                rxq_offload_capa = default_infos.rx_queue_offload_capa;
                rss_hf_offload_capa = default_infos.flow_type_rss_offloads;
+               dev_capa = default_infos.dev_capa;
                FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_PROBED) {
                        rte_eth_dev_info_get(PORT_ID(sdev),
                                        &PRIV(dev)->infos);
@@ -746,12 +924,14 @@ fs_dev_infos_get(struct rte_eth_dev *dev,
                                        PRIV(dev)->infos.rx_queue_offload_capa;
                        rss_hf_offload_capa &=
                                        PRIV(dev)->infos.flow_type_rss_offloads;
+                       dev_capa &= PRIV(dev)->infos.dev_capa;
                }
                sdev = TX_SUBDEV(dev);
                rte_eth_dev_info_get(PORT_ID(sdev), &PRIV(dev)->infos);
                PRIV(dev)->infos.rx_offload_capa = rx_offload_capa;
                PRIV(dev)->infos.rx_queue_offload_capa = rxq_offload_capa;
                PRIV(dev)->infos.flow_type_rss_offloads = rss_hf_offload_capa;
+               PRIV(dev)->infos.dev_capa = dev_capa;
                PRIV(dev)->infos.tx_offload_capa &=
                                        default_infos.tx_offload_capa;
                PRIV(dev)->infos.tx_queue_offload_capa &=
@@ -952,6 +1132,55 @@ fs_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
        return 0;
 }
 
+static int
+fs_set_mc_addr_list(struct rte_eth_dev *dev,
+                   struct ether_addr *mc_addr_set, uint32_t nb_mc_addr)
+{
+       struct sub_device *sdev;
+       uint8_t i;
+       int ret;
+       void *mcast_addrs;
+
+       fs_lock(dev, 0);
+
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               ret = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+                                                  mc_addr_set, nb_mc_addr);
+               if (ret != 0) {
+                       ERROR("Operation rte_eth_dev_set_mc_addr_list failed for sub_device %d with error %d",
+                             i, ret);
+                       goto rollback;
+               }
+       }
+
+       mcast_addrs = rte_realloc(PRIV(dev)->mcast_addrs,
+               nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0]), 0);
+       if (mcast_addrs == NULL && nb_mc_addr > 0) {
+               ret = -ENOMEM;
+               goto rollback;
+       }
+       rte_memcpy(mcast_addrs, mc_addr_set,
+                  nb_mc_addr * sizeof(PRIV(dev)->mcast_addrs[0]));
+       PRIV(dev)->nb_mcast_addr = nb_mc_addr;
+       PRIV(dev)->mcast_addrs = mcast_addrs;
+
+       fs_unlock(dev, 0);
+       return 0;
+
+rollback:
+       FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE) {
+               int rc = rte_eth_dev_set_mc_addr_list(PORT_ID(sdev),
+                       PRIV(dev)->mcast_addrs, PRIV(dev)->nb_mcast_addr);
+               if (rc != 0) {
+                       ERROR("Multicast MAC address list rollback for sub_device %d failed with error %d",
+                             i, rc);
+               }
+       }
+
+       fs_unlock(dev, 0);
+       return ret;
+}
+
 static int
 fs_rss_hash_update(struct rte_eth_dev *dev,
                        struct rte_eth_rss_conf *rss_conf)
@@ -1025,6 +1254,10 @@ const struct eth_dev_ops failsafe_ops = {
        .dev_supported_ptypes_get = fs_dev_supported_ptypes_get,
        .mtu_set = fs_mtu_set,
        .vlan_filter_set = fs_vlan_filter_set,
+       .rx_queue_start = fs_rx_queue_start,
+       .rx_queue_stop = fs_rx_queue_stop,
+       .tx_queue_start = fs_tx_queue_start,
+       .tx_queue_stop = fs_tx_queue_stop,
        .rx_queue_setup = fs_rx_queue_setup,
        .tx_queue_setup = fs_tx_queue_setup,
        .rx_queue_release = fs_rx_queue_release,
@@ -1036,6 +1269,7 @@ const struct eth_dev_ops failsafe_ops = {
        .mac_addr_remove = fs_mac_addr_remove,
        .mac_addr_add = fs_mac_addr_add,
        .mac_addr_set = fs_mac_addr_set,
+       .set_mc_addr_list = fs_set_mc_addr_list,
        .rss_hash_update = fs_rss_hash_update,
        .filter_ctrl = fs_filter_ctrl,
 };
index 886af86..7e31896 100644 (file)
@@ -6,6 +6,7 @@
 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_
 #define _RTE_ETH_FAILSAFE_PRIVATE_H_
 
+#include <stdint.h>
 #include <sys/queue.h>
 #include <pthread.h>
 
@@ -13,6 +14,7 @@
 #include <rte_dev.h>
 #include <rte_ethdev_driver.h>
 #include <rte_devargs.h>
+#include <rte_flow.h>
 #include <rte_interrupts.h>
 
 #define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
@@ -81,7 +83,8 @@ struct rte_flow {
        /* sub_flows */
        struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
        /* flow description for synchronization */
-       struct rte_flow_desc *fd;
+       struct rte_flow_conv_rule rule;
+       uint8_t rule_data[];
 };
 
 enum dev_state {
@@ -143,6 +146,8 @@ struct fs_priv {
        uint32_t nb_mac_addr;
        struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
        uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
+       uint32_t nb_mcast_addr;
+       struct ether_addr *mcast_addrs;
        /* current capabilities */
        struct rte_eth_dev_info infos;
        struct rte_eth_dev_owner my_owner; /* Unique owner. */
@@ -188,7 +193,7 @@ int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
 
 /* RX / TX */
 
-void set_burst_fn(struct rte_eth_dev *dev, int force_safe);
+void failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe);
 
 uint16_t failsafe_rx_burst(void *rxq,
                struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
@@ -234,8 +239,8 @@ int failsafe_eth_new_event_callback(uint16_t port_id,
 extern const char pmd_failsafe_driver_name[];
 extern const struct eth_dev_ops failsafe_ops;
 extern const struct rte_flow_ops fs_flow_ops;
-extern uint64_t hotplug_poll;
-extern int mac_from_arg;
+extern uint64_t failsafe_hotplug_poll;
+extern int failsafe_mac_from_arg;
 
 /* HELPERS */
 
@@ -468,7 +473,7 @@ fs_switch_dev(struct rte_eth_dev *dev,
        } else {
                return;
        }
-       set_burst_fn(dev, 0);
+       failsafe_set_burst_fn(dev, 0);
        rte_wmb();
 }
 
index 7bd0f96..034f47b 100644 (file)
@@ -29,7 +29,7 @@ fs_tx_unsafe(struct sub_device *sdev)
 }
 
 void
-set_burst_fn(struct rte_eth_dev *dev, int force_safe)
+failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe)
 {
        struct sub_device *sdev;
        uint8_t i;
index a8fc5fa..5525cdc 100644 (file)
@@ -15,6 +15,9 @@ error_cflags = ['-Wno-unused-parameter', '-Wno-unused-value',
        '-Wno-unused-variable', '-Wno-missing-field-initializers'
 ]
 c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 foreach flag: error_cflags
        if cc.has_argument(flag)
                c_args += flag
index 541a49b..c852022 100644 (file)
@@ -72,7 +72,7 @@ struct fm10k_xstats_name_off {
        unsigned offset;
 };
 
-struct fm10k_xstats_name_off fm10k_hw_stats_strings[] = {
+static const struct fm10k_xstats_name_off fm10k_hw_stats_strings[] = {
        {"completion_timeout_count", offsetof(struct fm10k_hw_stats, timeout)},
        {"unsupported_requests_count", offsetof(struct fm10k_hw_stats, ur)},
        {"completer_abort_count", offsetof(struct fm10k_hw_stats, ca)},
@@ -87,7 +87,7 @@ struct fm10k_xstats_name_off fm10k_hw_stats_strings[] = {
 #define FM10K_NB_HW_XSTATS (sizeof(fm10k_hw_stats_strings) / \
                sizeof(fm10k_hw_stats_strings[0]))
 
-struct fm10k_xstats_name_off fm10k_hw_stats_rx_q_strings[] = {
+static const struct fm10k_xstats_name_off fm10k_hw_stats_rx_q_strings[] = {
        {"packets", offsetof(struct fm10k_hw_stats_q, rx_packets)},
        {"bytes", offsetof(struct fm10k_hw_stats_q, rx_bytes)},
        {"dropped", offsetof(struct fm10k_hw_stats_q, rx_drops)},
@@ -96,7 +96,7 @@ struct fm10k_xstats_name_off fm10k_hw_stats_rx_q_strings[] = {
 #define FM10K_NB_RX_Q_XSTATS (sizeof(fm10k_hw_stats_rx_q_strings) / \
                sizeof(fm10k_hw_stats_rx_q_strings[0]))
 
-struct fm10k_xstats_name_off fm10k_hw_stats_tx_q_strings[] = {
+static const struct fm10k_xstats_name_off fm10k_hw_stats_tx_q_strings[] = {
        {"packets", offsetof(struct fm10k_hw_stats_q, tx_packets)},
        {"bytes", offsetof(struct fm10k_hw_stats_q, tx_bytes)},
 };
@@ -129,13 +129,13 @@ fm10k_mbx_unlock(struct fm10k_hw *hw)
 }
 
 /* Stubs needed for linkage when vPMD is disabled */
-int __attribute__((weak))
+__rte_weak int
 fm10k_rx_vec_condition_check(__rte_unused struct rte_eth_dev *dev)
 {
        return -1;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 fm10k_recv_pkts_vec(
        __rte_unused void *rx_queue,
        __rte_unused struct rte_mbuf **rx_pkts,
@@ -144,7 +144,7 @@ fm10k_recv_pkts_vec(
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 fm10k_recv_scattered_pkts_vec(
                __rte_unused void *rx_queue,
                __rte_unused struct rte_mbuf **rx_pkts,
@@ -153,33 +153,33 @@ fm10k_recv_scattered_pkts_vec(
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 fm10k_rxq_vec_setup(__rte_unused struct fm10k_rx_queue *rxq)
 
 {
        return -1;
 }
 
-void __attribute__((weak))
+__rte_weak void
 fm10k_rx_queue_release_mbufs_vec(
                __rte_unused struct fm10k_rx_queue *rxq)
 {
        return;
 }
 
-void __attribute__((weak))
+__rte_weak void
 fm10k_txq_vec_setup(__rte_unused struct fm10k_tx_queue *txq)
 {
        return;
 }
 
-int __attribute__((weak))
+__rte_weak int
 fm10k_tx_vec_condition_check(__rte_unused struct fm10k_tx_queue *txq)
 {
        return -1;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 fm10k_xmit_fixed_burst_vec(__rte_unused void *tx_queue,
                           __rte_unused struct rte_mbuf **tx_pkts,
                           __rte_unused uint16_t nb_pkts)
@@ -451,12 +451,6 @@ fm10k_dev_configure(struct rte_eth_dev *dev)
 
        PMD_INIT_FUNC_TRACE();
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
-               PMD_INIT_LOG(WARNING, "fm10k always strip CRC");
-
        /* multipe queue mode checking */
        ret  = fm10k_check_mq_mode(dev);
        if (ret != 0) {
@@ -1325,7 +1319,7 @@ fm10k_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats,
 static int
 fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 {
-       uint64_t ipackets, opackets, ibytes, obytes;
+       uint64_t ipackets, opackets, ibytes, obytes, imissed;
        struct fm10k_hw *hw =
                FM10K_DEV_PRIVATE_TO_HW(dev->data->dev_private);
        struct fm10k_hw_stats *hw_stats =
@@ -1336,22 +1330,25 @@ fm10k_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        fm10k_update_hw_stats(hw, hw_stats);
 
-       ipackets = opackets = ibytes = obytes = 0;
+       ipackets = opackets = ibytes = obytes = imissed = 0;
        for (i = 0; (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) &&
                (i < hw->mac.max_queues); ++i) {
                stats->q_ipackets[i] = hw_stats->q[i].rx_packets.count;
                stats->q_opackets[i] = hw_stats->q[i].tx_packets.count;
                stats->q_ibytes[i]   = hw_stats->q[i].rx_bytes.count;
                stats->q_obytes[i]   = hw_stats->q[i].tx_bytes.count;
+               stats->q_errors[i]   = hw_stats->q[i].rx_drops.count;
                ipackets += stats->q_ipackets[i];
                opackets += stats->q_opackets[i];
                ibytes   += stats->q_ibytes[i];
                obytes   += stats->q_obytes[i];
+               imissed  += stats->q_errors[i];
        }
        stats->ipackets = ipackets;
        stats->opackets = opackets;
        stats->ibytes = ibytes;
        stats->obytes = obytes;
+       stats->imissed = imissed;
        return 0;
 }
 
@@ -1796,7 +1793,6 @@ static uint64_t fm10k_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
                           DEV_RX_OFFLOAD_UDP_CKSUM   |
                           DEV_RX_OFFLOAD_TCP_CKSUM   |
                           DEV_RX_OFFLOAD_JUMBO_FRAME |
-                          DEV_RX_OFFLOAD_CRC_STRIP   |
                           DEV_RX_OFFLOAD_HEADER_SPLIT);
 }
 
@@ -1982,6 +1978,7 @@ static uint64_t fm10k_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
        RTE_SET_USED(dev);
 
        return (uint64_t)(DEV_TX_OFFLOAD_VLAN_INSERT |
+                         DEV_TX_OFFLOAD_MULTI_SEGS  |
                          DEV_TX_OFFLOAD_IPV4_CKSUM  |
                          DEV_TX_OFFLOAD_UDP_CKSUM   |
                          DEV_TX_OFFLOAD_TCP_CKSUM   |
@@ -3237,14 +3234,6 @@ eth_fm10k_dev_uninit(struct rte_eth_dev *dev)
                        fm10k_dev_interrupt_handler_vf, (void *)dev);
        }
 
-       /* free mac memory */
-       if (dev->data->mac_addrs) {
-               rte_free(dev->data->mac_addrs);
-               dev->data->mac_addrs = NULL;
-       }
-
-       memset(hw, 0, sizeof(*hw));
-
        return 0;
 }
 
index 247ba11..84f191f 100644 (file)
@@ -34,7 +34,7 @@ Intel® I40E driver
 ==================
 
 This directory contains source code of FreeBSD i40e driver of version
-cid-i40e.2018.01.02.tar.gz released by the team which develops
+cid-i40e.2018.09.13.tar.gz released by the team which develops
 basic drivers for any i40e NIC. The directory of base/ contains the
 original source package.
 This driver is valid for the product(s) listed below
index 612be88..38214a3 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_status.h"
 #include "i40e_type.h"
@@ -126,6 +97,7 @@ enum i40e_status_code i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
  **/
 void i40e_free_adminq_asq(struct i40e_hw *hw)
 {
+       i40e_free_virt_mem(hw, &hw->aq.asq.cmd_buf);
        i40e_free_dma_mem(hw, &hw->aq.asq.desc_buf);
 }
 
@@ -433,7 +405,7 @@ enum i40e_status_code i40e_init_asq(struct i40e_hw *hw)
        /* initialize base registers */
        ret_code = i40e_config_asq_regs(hw);
        if (ret_code != I40E_SUCCESS)
-               goto init_adminq_free_rings;
+               goto init_config_regs;
 
        /* success! */
        hw->aq.asq.count = hw->aq.num_asq_entries;
@@ -441,6 +413,10 @@ enum i40e_status_code i40e_init_asq(struct i40e_hw *hw)
 
 init_adminq_free_rings:
        i40e_free_adminq_asq(hw);
+       return ret_code;
+
+init_config_regs:
+       i40e_free_asq_bufs(hw);
 
 init_adminq_exit:
        return ret_code;
@@ -692,6 +668,12 @@ enum i40e_status_code i40e_init_adminq(struct i40e_hw *hw)
            hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
            hw->aq.api_min_ver >= I40E_MINOR_VER_GET_LINK_INFO_XL710) {
                hw->flags |= I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE;
+               hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
+       }
+       if (hw->mac.type == I40E_MAC_X722 &&
+           hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR &&
+           hw->aq.api_min_ver >= I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722) {
+               hw->flags |= I40E_HW_FLAG_FW_LLDP_STOPPABLE;
        }
 
        /* Newer versions of firmware require lock when reading the NVM */
@@ -987,6 +969,8 @@ enum i40e_status_code i40e_asq_send_command(struct i40e_hw *hw,
                cmd_completed = true;
                if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
                        status = I40E_SUCCESS;
+               else if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_EBUSY)
+                       status = I40E_ERR_NOT_READY;
                else
                        status = I40E_ERR_ADMIN_QUEUE_ERROR;
                hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
index de4ab3f..769d848 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_ADMINQ_H_
 #define _I40E_ADMINQ_H_
index 801c0ff..8306260 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_ADMINQ_CMD_H_
 #define _I40E_ADMINQ_CMD_H_
@@ -41,7 +12,7 @@ POSSIBILITY OF SUCH DAMAGE.
  */
 
 #define I40E_FW_API_VERSION_MAJOR      0x0001
-#define I40E_FW_API_VERSION_MINOR_X722 0x0005
+#define I40E_FW_API_VERSION_MINOR_X722 0x0006
 #define I40E_FW_API_VERSION_MINOR_X710 0x0007
 
 #define I40E_FW_MINOR_VERSION(_h) ((_h)->mac.type == I40E_MAC_XL710 ? \
@@ -50,6 +21,8 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* API version 1.7 implements additional link and PHY-specific APIs  */
 #define I40E_MINOR_VER_GET_LINK_INFO_XL710 0x0007
+/* API version 1.6 for X722 devices adds ability to stop FW LLDP agent */
+#define I40E_MINOR_VER_FW_LLDP_STOPPABLE_X722 0x0006
 
 struct i40e_aq_desc {
        __le16 flags;
@@ -804,7 +777,35 @@ struct i40e_aqc_set_switch_config {
         */
        __le16  first_tag;
        __le16  second_tag;
-       u8      reserved[6];
+       /* Next byte is split into following:
+        * Bit 7    : 0 : No action, 1: Switch to mode defined by bits 6:0
+        * Bit 6    : 0 : Destination Port, 1: source port
+        * Bit 5..4 : L4 type
+        * 0: rsvd
+        * 1: TCP
+        * 2: UDP
+        * 3: Both TCP and UDP
+        * Bits 3:0 Mode
+        * 0: default mode
+        * 1: L4 port only mode
+        * 2: non-tunneled mode
+        * 3: tunneled mode
+        */
+#define I40E_AQ_SET_SWITCH_BIT7_VALID          0x80
+
+#define I40E_AQ_SET_SWITCH_L4_SRC_PORT         0x40
+
+#define I40E_AQ_SET_SWITCH_L4_TYPE_RSVD                0x00
+#define I40E_AQ_SET_SWITCH_L4_TYPE_TCP         0x10
+#define I40E_AQ_SET_SWITCH_L4_TYPE_UDP         0x20
+#define I40E_AQ_SET_SWITCH_L4_TYPE_BOTH                0x30
+
+#define I40E_AQ_SET_SWITCH_MODE_DEFAULT                0x00
+#define I40E_AQ_SET_SWITCH_MODE_L4_PORT                0x01
+#define I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL     0x02
+#define I40E_AQ_SET_SWITCH_MODE_TUNNEL         0x03
+       u8      mode;
+       u8      rsvd5[5];
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_set_switch_config);
@@ -1359,6 +1360,7 @@ struct i40e_aqc_add_remove_cloud_filters {
                                        I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
        u8      big_buffer_flag;
 #define I40E_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER  1
+#define I40E_AQC_ADD_CLOUD_CMD_BB              1
        u8      reserved2[3];
        __le32  addr_high;
        __le32  addr_low;
@@ -1366,7 +1368,7 @@ struct i40e_aqc_add_remove_cloud_filters {
 
 I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
 
-struct i40e_aqc_add_remove_cloud_filters_element_data {
+struct i40e_aqc_cloud_filters_element_data {
        u8      outer_mac[6];
        u8      inner_mac[6];
        __le16  inner_vlan;
@@ -1378,6 +1380,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
                struct {
                        u8 data[16];
                } v6;
+               struct {
+                       __le16 data[8];
+               } raw_v6;
        } ipaddr;
        __le16  flags;
 #define I40E_AQC_ADD_CLOUD_FILTER_SHIFT                        0
@@ -1397,6 +1402,9 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
 #define I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC     0x000B
 #define I40E_AQC_ADD_CLOUD_FILTER_IIP                  0x000C
 /* 0x0010 to 0x0017 is for custom filters */
+#define I40E_AQC_ADD_CLOUD_FILTER_IP_PORT              0x0010 /* Dest IP + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT             0x0011 /* Dest MAC + L4 Port */
+#define I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT                0x0012 /* Dest MAC + VLAN + L4 Port */
 
 #define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE              0x0080
 #define I40E_AQC_ADD_CLOUD_VNK_SHIFT                   6
@@ -1436,7 +1444,7 @@ struct i40e_aqc_add_remove_cloud_filters_element_data {
  * DCR288
  */
 struct i40e_aqc_add_rm_cloud_filt_elem_ext {
-       struct i40e_aqc_add_remove_cloud_filters_element_data element;
+       struct i40e_aqc_cloud_filters_element_data element;
        u16     general_fields[32];
 #define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0   0
 #define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1   1
@@ -1471,6 +1479,49 @@ struct i40e_aqc_add_rm_cloud_filt_elem_ext {
 #define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7   30
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_cloud_filters_element_data);
+
+/* i40e_aqc_cloud_filters_element_bb is used when
+ * I40E_AQC_CLOUD_CMD_BB flag is set.
+ */
+struct i40e_aqc_cloud_filters_element_bb {
+       struct i40e_aqc_cloud_filters_element_data element;
+       u16     general_fields[32];
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD0   0
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD1   1
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X10_WORD2   2
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD0   3
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD1   4
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X11_WORD2   5
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD0   6
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD1   7
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X12_WORD2   8
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD0   9
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD1   10
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X13_WORD2   11
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD0   12
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD1   13
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X14_WORD2   14
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0   15
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD1   16
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD2   17
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD3   18
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD4   19
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD5   20
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD6   21
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD7   22
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD0   23
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD1   24
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD2   25
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD3   26
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD4   27
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD5   28
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD6   29
+#define I40E_AQC_ADD_CLOUD_FV_FLU_0X17_WORD7   30
+};
+
+I40E_CHECK_STRUCT_LEN(0x80, i40e_aqc_cloud_filters_element_bb);
+
 struct i40e_aqc_remove_cloud_filters_completion {
        __le16 perfect_ovlan_used;
        __le16 perfect_ovlan_free;
@@ -1491,6 +1542,8 @@ struct i40e_filter_data {
        u8 input[3];
 };
 
+I40E_CHECK_STRUCT_LEN(4, i40e_filter_data);
+
 struct i40e_aqc_replace_cloud_filters_cmd {
        u8      valid_flags;
 #define I40E_AQC_REPLACE_L1_FILTER             0x0
@@ -1501,11 +1554,14 @@ struct i40e_aqc_replace_cloud_filters_cmd {
        u8      old_filter_type;
        u8      new_filter_type;
        u8      tr_bit;
-       u8      reserved[4];
+       u8      tr_bit2;
+       u8      reserved[3];
        __le32 addr_high;
        __le32 addr_low;
 };
 
+I40E_CHECK_CMD_LENGTH(i40e_aqc_replace_cloud_filters_cmd);
+
 struct i40e_aqc_replace_cloud_filters_cmd_buf {
        u8      data[32];
 /* Filter type INPUT codes*/
@@ -1530,6 +1586,8 @@ struct i40e_aqc_replace_cloud_filters_cmd_buf {
        struct i40e_filter_data filters[8];
 };
 
+I40E_CHECK_STRUCT_LEN(0x40, i40e_aqc_replace_cloud_filters_cmd_buf);
+
 /* Add Mirror Rule (indirect or direct 0x0260)
  * Delete Mirror Rule (indirect or direct 0x0261)
  * note: some rule types (4,5) do not use an external buffer.
@@ -1878,23 +1936,115 @@ enum i40e_aq_phy_type {
        I40E_PHY_TYPE_25GBASE_LR                = 0x22,
        I40E_PHY_TYPE_25GBASE_AOC               = 0x23,
        I40E_PHY_TYPE_25GBASE_ACC               = 0x24,
+#ifdef CARLSVILLE_HW
+       I40E_PHY_TYPE_2_5GBASE_T                = 0x30,
+       I40E_PHY_TYPE_5GBASE_T                  = 0x31,
+#endif
        I40E_PHY_TYPE_MAX,
        I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP   = 0xFD,
        I40E_PHY_TYPE_EMPTY                     = 0xFE,
        I40E_PHY_TYPE_DEFAULT                   = 0xFF,
 };
 
+#ifdef CARLSVILLE_HW
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_SFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+                               BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+                               BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC) | \
+                               BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_5GBASE_T))
+#else
+#define I40E_PHY_TYPES_BITMASK (BIT_ULL(I40E_PHY_TYPE_SGMII) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_KX) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_XAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_SFI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLAUI) | \
+                               BIT_ULL(I40E_PHY_TYPE_XLPPI) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNRECOGNIZED) | \
+                               BIT_ULL(I40E_PHY_TYPE_UNSUPPORTED) | \
+                               BIT_ULL(I40E_PHY_TYPE_100BASE_TX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_T) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU) | \
+                               BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_SX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_LX) | \
+                               BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL) | \
+                               BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_KR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_CR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_SR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_LR) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_AOC) | \
+                               BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC))
+#endif
+
+#ifdef CARLSVILLE_HW
+#define I40E_LINK_SPEED_2_5GB_SHIFT    0x0
+#endif
 #define I40E_LINK_SPEED_100MB_SHIFT    0x1
 #define I40E_LINK_SPEED_1000MB_SHIFT   0x2
 #define I40E_LINK_SPEED_10GB_SHIFT     0x3
 #define I40E_LINK_SPEED_40GB_SHIFT     0x4
 #define I40E_LINK_SPEED_20GB_SHIFT     0x5
 #define I40E_LINK_SPEED_25GB_SHIFT     0x6
+#ifdef CARLSVILLE_HW
+#define I40E_LINK_SPEED_5GB_SHIFT      0x7
+#endif
 
 enum i40e_aq_link_speed {
        I40E_LINK_SPEED_UNKNOWN = 0,
        I40E_LINK_SPEED_100MB   = (1 << I40E_LINK_SPEED_100MB_SHIFT),
        I40E_LINK_SPEED_1GB     = (1 << I40E_LINK_SPEED_1000MB_SHIFT),
+#ifdef CARLSVILLE_HW
+       I40E_LINK_SPEED_2_5GB   = (1 << I40E_LINK_SPEED_2_5GB_SHIFT),
+       I40E_LINK_SPEED_5GB     = (1 << I40E_LINK_SPEED_5GB_SHIFT),
+#endif
        I40E_LINK_SPEED_10GB    = (1 << I40E_LINK_SPEED_10GB_SHIFT),
        I40E_LINK_SPEED_40GB    = (1 << I40E_LINK_SPEED_40GB_SHIFT),
        I40E_LINK_SPEED_20GB    = (1 << I40E_LINK_SPEED_20GB_SHIFT),
@@ -1940,6 +2090,10 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_TYPE_EXT_25G_LR    0x08
 #define I40E_AQ_PHY_TYPE_EXT_25G_AOC   0x10
 #define I40E_AQ_PHY_TYPE_EXT_25G_ACC   0x20
+#ifdef CARLSVILLE_HW
+#define I40E_AQ_PHY_TYPE_EXT_2_5GBASE_T        0x40
+#define I40E_AQ_PHY_TYPE_EXT_5GBASE_T  0x80
+#endif
        u8      fec_cfg_curr_mod_ext_info;
 #define I40E_AQ_ENABLE_FEC_KR          0x01
 #define I40E_AQ_ENABLE_FEC_RS          0x02
@@ -2184,7 +2338,9 @@ struct i40e_aqc_phy_register_access {
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL        1
 #define I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE 2
        u8      dev_addres;
-       u8      reserved1[2];
+       u8      cmd_flags;
+#define I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE   1
+       u8      reserved1;
        __le32  reg_address;
        __le32  reg_value;
        u8      reserved2[4];
@@ -2199,6 +2355,8 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_phy_register_access);
 struct i40e_aqc_nvm_update {
        u8      command_flags;
 #define I40E_AQ_NVM_LAST_CMD                   0x01
+#define I40E_AQ_NVM_REARRANGE_TO_FLAT          0x20
+#define I40E_AQ_NVM_REARRANGE_TO_STRUCT                0x40
 #define I40E_AQ_NVM_FLASH_ONLY                 0x80
 #define I40E_AQ_NVM_PRESERVATION_FLAGS_SHIFT   1
 #define I40E_AQ_NVM_PRESERVATION_FLAGS_MASK    0x03
index 38c2f65..4fc1860 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_ALLOC_H_
 #define _I40E_ALLOC_H_
index e0a5be1..8a98aff 100644 (file)
@@ -1,42 +1,12 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_type.h"
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"
 #include "virtchnl.h"
 
-
 /**
  * i40e_set_mac_type - Sets MAC type
  * @hw: pointer to the HW structure
@@ -65,6 +35,9 @@ STATIC enum i40e_status_code i40e_set_mac_type(struct i40e_hw *hw)
                case I40E_DEV_ID_QSFP_C:
                case I40E_DEV_ID_10G_BASE_T:
                case I40E_DEV_ID_10G_BASE_T4:
+#ifdef CARLSVILLE_HW
+               case I40E_DEV_ID_10G_BASE_T_BC:
+#endif
                case I40E_DEV_ID_20G_KR2:
                case I40E_DEV_ID_20G_KR2_A:
                case I40E_DEV_ID_25G_B:
@@ -1290,6 +1263,10 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
                break;
        case I40E_PHY_TYPE_100BASE_TX:
        case I40E_PHY_TYPE_1000BASE_T:
+#ifdef CARLSVILLE_HW
+       case I40E_PHY_TYPE_2_5GBASE_T:
+       case I40E_PHY_TYPE_5GBASE_T:
+#endif
        case I40E_PHY_TYPE_10GBASE_T:
                media = I40E_MEDIA_TYPE_BASET;
                break;
@@ -1326,6 +1303,29 @@ STATIC enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
        return media;
 }
 
+/**
+ * i40e_poll_globr - Poll for Global Reset completion
+ * @hw: pointer to the hardware structure
+ * @retry_limit: how many times to retry before failure
+ **/
+STATIC enum i40e_status_code i40e_poll_globr(struct i40e_hw *hw,
+                                            u32 retry_limit)
+{
+       u32 cnt, reg = 0;
+
+       for (cnt = 0; cnt < retry_limit; cnt++) {
+               reg = rd32(hw, I40E_GLGEN_RSTAT);
+               if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+                       return I40E_SUCCESS;
+               i40e_msec_delay(100);
+       }
+
+       DEBUGOUT("Global reset failed.\n");
+       DEBUGOUT1("I40E_GLGEN_RSTAT = 0x%x\n", reg);
+
+       return I40E_ERR_RESET_FAILED;
+}
+
 #define I40E_PF_RESET_WAIT_COUNT       200
 /**
  * i40e_pf_reset - Reset the PF
@@ -1349,7 +1349,7 @@ enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw)
                        I40E_GLGEN_RSTCTL_GRSTDEL_MASK) >>
                        I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
 
-       grst_del = grst_del * 20;
+       grst_del = min(grst_del * 20, 160U);
 
        for (cnt = 0; cnt < grst_del; cnt++) {
                reg = rd32(hw, I40E_GLGEN_RSTAT);
@@ -1395,14 +1395,14 @@ enum i40e_status_code i40e_pf_reset(struct i40e_hw *hw)
                        if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
                                break;
                        reg2 = rd32(hw, I40E_GLGEN_RSTAT);
-                       if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
-                               DEBUGOUT("Core reset upcoming. Skipping PF reset request.\n");
-                               DEBUGOUT1("I40E_GLGEN_RSTAT = 0x%x\n", reg2);
-                               return I40E_ERR_NOT_READY;
-                       }
+                       if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK)
+                               break;
                        i40e_msec_delay(1);
                }
-               if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+               if (reg2 & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+                       if (i40e_poll_globr(hw, grst_del) != I40E_SUCCESS)
+                               return I40E_ERR_RESET_FAILED;
+               } else if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
                        DEBUGOUT("PF reset polling failed to complete.\n");
                        return I40E_ERR_RESET_FAILED;
                }
@@ -1883,6 +1883,10 @@ enum i40e_status_code i40e_aq_set_mac_config(struct i40e_hw *hw,
        if (crc_en)
                cmd->params |= I40E_AQ_SET_MAC_CONFIG_CRC_EN;
 
+#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD    0x7FFF
+       cmd->fc_refresh_threshold =
+               CPU_TO_LE16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD);
+
        status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
        return status;
@@ -2711,13 +2715,14 @@ enum i40e_status_code i40e_aq_get_switch_config(struct i40e_hw *hw,
  * i40e_aq_set_switch_config
  * @hw: pointer to the hardware structure
  * @flags: bit flag values to set
+ * @mode: cloud filter mode
  * @valid_flags: which bit flags to set
  * @cmd_details: pointer to command details structure or NULL
  *
  * Set switch configuration bits
  **/
 enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
-                               u16 flags, u16 valid_flags,
+                               u16 flags, u16 valid_flags, u8 mode,
                                struct i40e_asq_cmd_details *cmd_details)
 {
        struct i40e_aq_desc desc;
@@ -2729,6 +2734,7 @@ enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
                                          i40e_aqc_opc_set_switch_config);
        scfg->flags = CPU_TO_LE16(flags);
        scfg->valid_flags = CPU_TO_LE16(valid_flags);
+       scfg->mode = mode;
        if (hw->flags & I40E_HW_FLAG_802_1AD_CAPABLE) {
                scfg->switch_tag = CPU_TO_LE16(hw->switch_tag);
                scfg->first_tag = CPU_TO_LE16(hw->first_tag);
@@ -3708,9 +3714,10 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
        u32 valid_functions, num_functions;
        u32 number, logical_id, phys_id;
        struct i40e_hw_capabilities *p;
+       enum i40e_status_code status;
+       u16 id, ocp_cfg_word0;
        u8 major_rev;
        u32 i = 0;
-       u16 id;
 
        cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
 
@@ -4002,6 +4009,26 @@ STATIC void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
                        hw->num_ports++;
        }
 
+       /* OCP cards case: if a mezz is removed the ethernet port is at
+        * disabled state in PRTGEN_CNF register. Additional NVM read is
+        * needed in order to check if we are dealing with OCP card.
+        * Those cards have 4 PFs at minimum, so using PRTGEN_CNF for counting
+        * physical ports results in wrong partition id calculation and thus
+        * not supporting WoL.
+        */
+       if (hw->mac.type == I40E_MAC_X722) {
+               if (i40e_acquire_nvm(hw, I40E_RESOURCE_READ) == I40E_SUCCESS) {
+                       status = i40e_aq_read_nvm(hw, I40E_SR_EMP_MODULE_PTR,
+                                                 2 * I40E_SR_OCP_CFG_WORD0,
+                                                 sizeof(ocp_cfg_word0),
+                                                 &ocp_cfg_word0, true, NULL);
+                       if (status == I40E_SUCCESS &&
+                           (ocp_cfg_word0 & I40E_SR_OCP_ENABLED))
+                               hw->num_ports = 4;
+                       i40e_release_nvm(hw);
+               }
+       }
+
        valid_functions = p->valid_functions;
        num_functions = 0;
        while (valid_functions) {
@@ -4131,6 +4158,43 @@ i40e_aq_update_nvm_exit:
        return status;
 }
 
+/**
+ * i40e_aq_rearrange_nvm
+ * @hw: pointer to the hw struct
+ * @rearrange_nvm: defines direction of rearrangement
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Rearrange NVM structure, available only for transition FW
+ **/
+enum i40e_status_code i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+                               u8 rearrange_nvm,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aqc_nvm_update *cmd;
+       enum i40e_status_code status;
+       struct i40e_aq_desc desc;
+
+       DEBUGFUNC("i40e_aq_rearrange_nvm");
+
+       cmd = (struct i40e_aqc_nvm_update *)&desc.params.raw;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_update);
+
+       rearrange_nvm &= (I40E_AQ_NVM_REARRANGE_TO_FLAT |
+                        I40E_AQ_NVM_REARRANGE_TO_STRUCT);
+
+       if (!rearrange_nvm) {
+               status = I40E_ERR_PARAM;
+               goto i40e_aq_rearrange_nvm_exit;
+       }
+
+       cmd->command_flags |= rearrange_nvm;
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+i40e_aq_rearrange_nvm_exit:
+       return status;
+}
+
 /**
  * i40e_aq_nvm_progress
  * @hw: pointer to the hw struct
@@ -4488,6 +4552,9 @@ i40e_aq_set_dcb_parameters(struct i40e_hw *hw, bool dcb_enable,
                (struct i40e_aqc_set_dcb_parameters *)&desc.params.raw;
        enum i40e_status_code status;
 
+       if (!(hw->flags & I40E_HW_FLAG_FW_LLDP_STOPPABLE))
+               return I40E_ERR_DEVICE_NOT_SUPPORTED;
+
        i40e_fill_default_direct_cmd_desc(&desc,
                                          i40e_aqc_opc_set_dcb_parameters);
 
@@ -5693,10 +5760,10 @@ void i40e_add_filter_to_drop_tx_flow_control_frames(struct i40e_hw *hw,
  * to be shifted 1 byte over from the VxLAN VNI
  **/
 STATIC void i40e_fix_up_geneve_vni(
-       struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+       struct i40e_aqc_cloud_filters_element_data *filters,
        u8 filter_count)
 {
-       struct i40e_aqc_add_remove_cloud_filters_element_data *f = filters;
+       struct i40e_aqc_cloud_filters_element_data *f = filters;
        int i;
 
        for (i = 0; i < filter_count; i++) {
@@ -5721,13 +5788,13 @@ STATIC void i40e_fix_up_geneve_vni(
  * @filter_count: number of filters contained in the buffer
  *
  * Set the cloud filters for a given VSI.  The contents of the
- * i40e_aqc_add_remove_cloud_filters_element_data are filled
+ * i40e_aqc_cloud_filters_element_data are filled
  * in by the caller of the function.
  *
  **/
 enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw,
        u16 seid,
-       struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
+       struct i40e_aqc_cloud_filters_element_data *filters,
        u8 filter_count)
 {
        struct i40e_aq_desc desc;
@@ -5753,21 +5820,21 @@ enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw,
 }
 
 /**
- * i40e_aq_add_cloud_filters_big_buffer
+ * i40e_aq_add_cloud_filters_bb
  * @hw: pointer to the hardware structure
  * @seid: VSI seid to add cloud filters from
  * @filters: Buffer which contains the filters in big buffer to be added
  * @filter_count: number of filters contained in the buffer
  *
  * Set the cloud filters for a given VSI.  The contents of the
- * i40e_aqc_add_rm_cloud_filt_elem_ext are filled in by the caller of
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
  * the function.
  *
  **/
-enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw *hw,
-       u16 seid,
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *filters,
-       u8 filter_count)
+enum i40e_status_code
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+                            struct i40e_aqc_cloud_filters_element_bb *filters,
+                            u8 filter_count)
 {
        struct i40e_aq_desc desc;
        struct i40e_aqc_add_remove_cloud_filters *cmd =
@@ -5784,9 +5851,8 @@ enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw *hw,
        desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
        cmd->num_filters = filter_count;
        cmd->seid = CPU_TO_LE16(seid);
-       cmd->big_buffer_flag = I40E_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER;
+       cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
 
-       /* adjust Geneve VNI for HW issue */
        for (i = 0; i < filter_count; i++) {
                u16 tnl_type;
                u32 ti;
@@ -5794,6 +5860,11 @@ enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw *hw,
                tnl_type = (LE16_TO_CPU(filters[i].element.flags) &
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+               /* Due to hardware eccentricities, the VNI for Geneve is shifted
+                * one more byte further than normally used for Tenant ID in
+                * other tunnel types.
+                */
                if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
                        ti = LE32_TO_CPU(filters[i].element.tenant_id);
                        filters[i].element.tenant_id = CPU_TO_LE32(ti << 8);
@@ -5806,21 +5877,21 @@ enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw *hw,
 }
 
 /**
- * i40e_aq_remove_cloud_filters
+ * i40e_aq_rem_cloud_filters
  * @hw: pointer to the hardware structure
  * @seid: VSI seid to remove cloud filters from
  * @filters: Buffer which contains the filters to be removed
  * @filter_count: number of filters contained in the buffer
  *
  * Remove the cloud filters for a given VSI.  The contents of the
- * i40e_aqc_add_remove_cloud_filters_element_data are filled
- * in by the caller of the function.
+ * i40e_aqc_cloud_filters_element_data are filled in by the caller
+ * of the function.
  *
  **/
-enum i40e_status_code i40e_aq_remove_cloud_filters(struct i40e_hw *hw,
-       u16 seid,
-       struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
-       u8 filter_count)
+enum i40e_status_code
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 seid,
+                         struct i40e_aqc_cloud_filters_element_data *filters,
+                         u8 filter_count)
 {
        struct i40e_aq_desc desc;
        struct i40e_aqc_add_remove_cloud_filters *cmd =
@@ -5845,22 +5916,21 @@ enum i40e_status_code i40e_aq_remove_cloud_filters(struct i40e_hw *hw,
 }
 
 /**
- * i40e_aq_remove_cloud_filters_big_buffer
+ * i40e_aq_rem_cloud_filters_bb
  * @hw: pointer to the hardware structure
  * @seid: VSI seid to remove cloud filters from
  * @filters: Buffer which contains the filters in big buffer to be removed
  * @filter_count: number of filters contained in the buffer
  *
- * Remove the cloud filters for a given VSI.  The contents of the
- * i40e_aqc_add_rm_cloud_filt_elem_ext are filled in by the caller of
- * the function.
+ * Remove the big buffer cloud filters for a given VSI.  The contents of the
+ * i40e_aqc_cloud_filters_element_bb are filled in by the caller of the
+ * function.
  *
  **/
-enum i40e_status_code i40e_aq_remove_cloud_filters_big_buffer(
-       struct i40e_hw *hw,
-       u16 seid,
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *filters,
-       u8 filter_count)
+enum i40e_status_code
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+                            struct i40e_aqc_cloud_filters_element_bb *filters,
+                            u8 filter_count)
 {
        struct i40e_aq_desc desc;
        struct i40e_aqc_add_remove_cloud_filters *cmd =
@@ -5877,9 +5947,8 @@ enum i40e_status_code i40e_aq_remove_cloud_filters_big_buffer(
        desc.flags |= CPU_TO_LE16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
        cmd->num_filters = filter_count;
        cmd->seid = CPU_TO_LE16(seid);
-       cmd->big_buffer_flag = I40E_AQC_ADD_REM_CLOUD_CMD_BIG_BUFFER;
+       cmd->big_buffer_flag = I40E_AQC_ADD_CLOUD_CMD_BB;
 
-       /* adjust Geneve VNI for HW issue */
        for (i = 0; i < filter_count; i++) {
                u16 tnl_type;
                u32 ti;
@@ -5887,6 +5956,11 @@ enum i40e_status_code i40e_aq_remove_cloud_filters_big_buffer(
                tnl_type = (LE16_TO_CPU(filters[i].element.flags) &
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_MASK) >>
                           I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT;
+
+               /* Due to hardware eccentricities, the VNI for Geneve is shifted
+                * one more byte further than normally used for Tenant ID in
+                * other tunnel types.
+                */
                if (tnl_type == I40E_AQC_ADD_CLOUD_TNL_TYPE_GENEVE) {
                        ti = LE32_TO_CPU(filters[i].element.tenant_id);
                        filters[i].element.tenant_id = CPU_TO_LE32(ti << 8);
@@ -5916,6 +5990,14 @@ i40e_status_code i40e_aq_replace_cloud_filters(struct i40e_hw *hw,
        enum i40e_status_code status = I40E_SUCCESS;
        int i = 0;
 
+       /* X722 doesn't support this command */
+       if (hw->mac.type == I40E_MAC_X722)
+               return I40E_ERR_DEVICE_NOT_SUPPORTED;
+
+       /* need FW version greater than 6.00 */
+       if (hw->aq.fw_maj_ver < 6)
+               return I40E_NOT_SUPPORTED;
+
        i40e_fill_default_direct_cmd_desc(&desc,
                                          i40e_aqc_opc_replace_cloud_filters);
 
@@ -5925,6 +6007,7 @@ i40e_status_code i40e_aq_replace_cloud_filters(struct i40e_hw *hw,
        cmd->new_filter_type = filters->new_filter_type;
        cmd->valid_flags = filters->valid_flags;
        cmd->tr_bit = filters->tr_bit;
+       cmd->tr_bit2 = filters->tr_bit2;
 
        status = i40e_asq_send_command(hw, &desc, cmd_buf,
                sizeof(struct i40e_aqc_replace_cloud_filters_cmd_buf),  NULL);
@@ -6618,6 +6701,9 @@ enum i40e_status_code i40e_write_phy_register(struct i40e_hw *hw,
                break;
        case I40E_DEV_ID_10G_BASE_T:
        case I40E_DEV_ID_10G_BASE_T4:
+#ifdef CARLSVILLE_HW
+       case I40E_DEV_ID_10G_BASE_T_BC:
+#endif
        case I40E_DEV_ID_10G_BASE_T_X722:
        case I40E_DEV_ID_25G_B:
        case I40E_DEV_ID_25G_SFP28:
@@ -6773,7 +6859,7 @@ static enum i40e_status_code i40e_led_get_reg(struct i40e_hw *hw, u16 led_addr,
        if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
                status = i40e_aq_get_phy_register(hw,
                                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                               I40E_PHY_COM_REG_PAGE,
+                                               I40E_PHY_COM_REG_PAGE, true,
                                                I40E_PHY_LED_PROV_REG_1,
                                                reg_val, NULL);
        } else {
@@ -6801,7 +6887,7 @@ static enum i40e_status_code i40e_led_set_reg(struct i40e_hw *hw, u16 led_addr,
        if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
                status = i40e_aq_set_phy_register(hw,
                                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                               I40E_PHY_COM_REG_PAGE,
+                                               I40E_PHY_COM_REG_PAGE, true,
                                                I40E_PHY_LED_PROV_REG_1,
                                                reg_val, NULL);
        } else {
@@ -6835,7 +6921,7 @@ enum i40e_status_code i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
        if (hw->flags & I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE) {
                status = i40e_aq_get_phy_register(hw,
                                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL,
-                                               I40E_PHY_COM_REG_PAGE,
+                                               I40E_PHY_COM_REG_PAGE, true,
                                                I40E_PHY_LED_PROV_REG_1,
                                                &reg_val_aq, NULL);
                if (status == I40E_SUCCESS)
@@ -7036,11 +7122,13 @@ do_retry:
                wr32(hw, reg_addr, reg_val);
 }
 
+#ifdef PF_DRIVER
 /**
  * i40e_aq_set_phy_register
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @page_change: enable auto page change
  * @reg_addr: PHY register address
  * @reg_val: new register value
  * @cmd_details: pointer to command details structure or NULL
@@ -7048,7 +7136,7 @@ do_retry:
  * Write the external PHY register.
  **/
 enum i40e_status_code i40e_aq_set_phy_register(struct i40e_hw *hw,
-                               u8 phy_select, u8 dev_addr,
+                               u8 phy_select, u8 dev_addr, bool page_change,
                                u32 reg_addr, u32 reg_val,
                                struct i40e_asq_cmd_details *cmd_details)
 {
@@ -7065,6 +7153,9 @@ enum i40e_status_code i40e_aq_set_phy_register(struct i40e_hw *hw,
        cmd->reg_address = CPU_TO_LE32(reg_addr);
        cmd->reg_value = CPU_TO_LE32(reg_val);
 
+       if (!page_change)
+               cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
        status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
 
        return status;
@@ -7075,6 +7166,7 @@ enum i40e_status_code i40e_aq_set_phy_register(struct i40e_hw *hw,
  * @hw: pointer to the hw struct
  * @phy_select: select which phy should be accessed
  * @dev_addr: PHY device address
+ * @page_change: enable auto page change
  * @reg_addr: PHY register address
  * @reg_val: read register value
  * @cmd_details: pointer to command details structure or NULL
@@ -7082,7 +7174,7 @@ enum i40e_status_code i40e_aq_set_phy_register(struct i40e_hw *hw,
  * Read the external PHY register.
  **/
 enum i40e_status_code i40e_aq_get_phy_register(struct i40e_hw *hw,
-                               u8 phy_select, u8 dev_addr,
+                               u8 phy_select, u8 dev_addr, bool page_change,
                                u32 reg_addr, u32 *reg_val,
                                struct i40e_asq_cmd_details *cmd_details)
 {
@@ -7098,6 +7190,9 @@ enum i40e_status_code i40e_aq_get_phy_register(struct i40e_hw *hw,
        cmd->dev_addres = dev_addr;
        cmd->reg_address = CPU_TO_LE32(reg_addr);
 
+       if (!page_change)
+               cmd->cmd_flags = I40E_AQ_PHY_REG_ACCESS_DONT_CHANGE_QSFP_PAGE;
+
        status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
        if (!status)
                *reg_val = LE32_TO_CPU(cmd->reg_value);
@@ -7105,6 +7200,7 @@ enum i40e_status_code i40e_aq_get_phy_register(struct i40e_hw *hw,
        return status;
 }
 
+#endif /* PF_DRIVER */
 #ifdef VF_DRIVER
 
 /**
index 7600c92..a26f82b 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_adminq.h"
 #include "i40e_prototype.h"
@@ -1291,18 +1262,20 @@ static enum i40e_status_code _i40e_read_lldp_cfg(struct i40e_hw *hw,
 {
        u32 address, offset = (2 * word_offset);
        enum i40e_status_code ret;
+       __le16 raw_mem;
        u16 mem;
 
        ret = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
        if (ret != I40E_SUCCESS)
                return ret;
 
-       ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(mem), &mem, true,
-                              NULL);
+       ret = i40e_aq_read_nvm(hw, 0x0, module * 2, sizeof(raw_mem), &raw_mem,
+                              true, NULL);
        i40e_release_nvm(hw);
        if (ret != I40E_SUCCESS)
                return ret;
 
+       mem = LE16_TO_CPU(raw_mem);
        /* Check if this pointer needs to be read in word size or 4K sector
         * units.
         */
@@ -1315,12 +1288,13 @@ static enum i40e_status_code _i40e_read_lldp_cfg(struct i40e_hw *hw,
        if (ret != I40E_SUCCESS)
                goto err_lldp_cfg;
 
-       ret = i40e_aq_read_nvm(hw, module, offset, sizeof(mem), &mem, true,
-                              NULL);
+       ret = i40e_aq_read_nvm(hw, module, offset, sizeof(raw_mem), &raw_mem,
+                              true, NULL);
        i40e_release_nvm(hw);
        if (ret != I40E_SUCCESS)
                return ret;
 
+       mem = LE16_TO_CPU(raw_mem);
        offset = mem + word_offset;
        offset *= 2;
 
index 3b709ef..85b0eed 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_DCB_H_
 #define _I40E_DCB_H_
index 66ff1cc..8b667c2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_DEVIDS_H_
 #define _I40E_DEVIDS_H_
@@ -51,6 +22,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define I40E_DEV_ID_10G_BASE_T4                0x1589
 #define I40E_DEV_ID_25G_B              0x158A
 #define I40E_DEV_ID_25G_SFP28          0x158B
+#ifdef CARLSVILLE_HW
+#define I40E_DEV_ID_10G_BASE_T_BC      0x15FF
+#endif
 #if defined(INTEGRATED_VF) || defined(VF_DRIVER) || defined(I40E_NDIS_SUPPORT)
 #define I40E_DEV_ID_VF                 0x154C
 #define I40E_DEV_ID_VF_HV              0x1571
index c3c76a0..3ccbea4 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_diag.h"
 #include "i40e_prototype.h"
@@ -84,7 +55,7 @@ static enum i40e_status_code i40e_diag_reg_pattern_test(struct i40e_hw *hw,
        return I40E_SUCCESS;
 }
 
-struct i40e_diag_reg_test_info i40e_reg_list[] = {
+static struct i40e_diag_reg_test_info i40e_reg_list[] = {
        /* offset               mask         elements   stride */
        {I40E_QTX_CTL(0),       0x0000FFBF, 1, I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
        {I40E_PFINT_ITR0(0),    0x00000FFF, 3, I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)},
index 105b119..4434fc9 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_DIAG_H_
 #define _I40E_DIAG_H_
@@ -50,8 +21,6 @@ struct i40e_diag_reg_test_info {
        u32 stride;     /* bytes between each element */
 };
 
-extern struct i40e_diag_reg_test_info i40e_reg_list[];
-
 enum i40e_status_code i40e_diag_set_loopback(struct i40e_hw *hw,
                                             enum i40e_lb_mode mode);
 enum i40e_status_code i40e_diag_fw_alive_test(struct i40e_hw *hw);
index 502407b..11c9ae2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_osdep.h"
 #include "i40e_register.h"
index 343b251..289264e 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_HMC_H_
 #define _I40E_HMC_H_
index f03f381..0afee49 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_osdep.h"
 #include "i40e_register.h"
@@ -143,7 +114,7 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
                ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
                DEBUGOUT3("i40e_init_lan_hmc: Tx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
                          txq_num, obj->max_cnt, ret_code);
-               goto init_lan_hmc_out;
+               goto free_hmc_out;
        }
 
        /* aggregate values into the full LAN object for later */
@@ -166,7 +137,7 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
                ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
                DEBUGOUT3("i40e_init_lan_hmc: Rx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
                          rxq_num, obj->max_cnt, ret_code);
-               goto init_lan_hmc_out;
+               goto free_hmc_out;
        }
 
        /* aggregate values into the full LAN object for later */
@@ -189,7 +160,7 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
                ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
                DEBUGOUT3("i40e_init_lan_hmc: FCoE context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
                          fcoe_cntx_num, obj->max_cnt, ret_code);
-               goto init_lan_hmc_out;
+               goto free_hmc_out;
        }
 
        /* aggregate values into the full LAN object for later */
@@ -212,7 +183,7 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
                ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
                DEBUGOUT3("i40e_init_lan_hmc: FCoE filter: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
                          fcoe_filt_num, obj->max_cnt, ret_code);
-               goto init_lan_hmc_out;
+               goto free_hmc_out;
        }
 
        /* aggregate values into the full LAN object for later */
@@ -233,7 +204,7 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
                                          (sizeof(struct i40e_hmc_sd_entry) *
                                          hw->hmc.sd_table.sd_cnt));
                if (ret_code)
-                       goto init_lan_hmc_out;
+                       goto free_hmc_out;
                hw->hmc.sd_table.sd_entry =
                        (struct i40e_hmc_sd_entry *)hw->hmc.sd_table.addr.va;
        }
@@ -241,6 +212,11 @@ enum i40e_status_code i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
        full_obj->size = l2fpm_size;
 
 init_lan_hmc_out:
+       return ret_code;
+free_hmc_out:
+       if (hw->hmc.hmc_obj_virt_mem.va)
+               i40e_free_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem);
+
        return ret_code;
 }
 
index b2a4310..e531ec4 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_LAN_HMC_H_
 #define _I40E_LAN_HMC_H_
index c77dac0..6c8ca87 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "i40e_prototype.h"
 
index 8e5c593..8a2d82a 100644 (file)
@@ -1,34 +1,6 @@
-/******************************************************************************
-
-  Copyright (c) 2001-2015, Intel Corporation
-  All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions are met:
-
-   1. Redistributions of source code must retain the above copyright notice,
-      this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in the
-      documentation and/or other materials provided with the distribution.
-
-   3. Neither the name of the Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived from
-      this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-  POSSIBILITY OF SUCH DAMAGE.
-******************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_OSDEP_H_
 #define _I40E_OSDEP_H_
@@ -233,9 +205,9 @@ struct i40e_spinlock {
 #define i40e_memcpy(a, b, c, d) rte_memcpy((a), (b), (c))
 
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#define DELAY(x) rte_delay_us(x)
-#define i40e_usec_delay(x) rte_delay_us(x)
-#define i40e_msec_delay(x) rte_delay_us(1000*(x))
+#define DELAY(x) rte_delay_us_sleep(x)
+#define i40e_usec_delay(x) DELAY(x)
+#define i40e_msec_delay(x) DELAY(1000 * (x))
 #define udelay(x) DELAY(x)
 #define msleep(x) DELAY(1000*(x))
 #define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000))
index c6ec2d7..0cf006d 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_PROTOTYPE_H_
 #define _I40E_PROTOTYPE_H_
@@ -228,7 +199,7 @@ enum i40e_status_code i40e_aq_get_switch_config(struct i40e_hw *hw,
                                u16 buf_size, u16 *start_seid,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_set_switch_config(struct i40e_hw *hw,
-                               u16 flags, u16 valid_flags,
+                               u16 flags, u16 valid_flags, u8 mode,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_request_resource(struct i40e_hw *hw,
                                enum i40e_aq_resources_ids resource,
@@ -265,6 +236,9 @@ enum i40e_status_code i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
                                u32 offset, u16 length, void *data,
                                bool last_command, u8 preservation_flags,
                                struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code i40e_aq_rearrange_nvm(struct i40e_hw *hw,
+                               u8 rearrange_nvm,
+                               struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_nvm_progress(struct i40e_hw *hw, u8 *progress,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
@@ -404,24 +378,24 @@ enum i40e_status_code i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_resume_port_tx(struct i40e_hw *hw,
                                struct i40e_asq_cmd_details *cmd_details);
+enum i40e_status_code
+i40e_aq_add_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+                            struct i40e_aqc_cloud_filters_element_bb *filters,
+                            u8 filter_count);
+enum i40e_status_code
+i40e_aq_add_cloud_filters(struct i40e_hw *hw, u16 vsi,
+                         struct i40e_aqc_cloud_filters_element_data *filters,
+                         u8 filter_count);
+enum i40e_status_code
+i40e_aq_rem_cloud_filters(struct i40e_hw *hw, u16 vsi,
+                         struct i40e_aqc_cloud_filters_element_data *filters,
+                         u8 filter_count);
+enum i40e_status_code
+i40e_aq_rem_cloud_filters_bb(struct i40e_hw *hw, u16 seid,
+                            struct i40e_aqc_cloud_filters_element_bb *filters,
+                            u8 filter_count);
 enum i40e_status_code i40e_read_lldp_cfg(struct i40e_hw *hw,
                                        struct i40e_lldp_variables *lldp_cfg);
-enum i40e_status_code i40e_aq_add_cloud_filters(struct i40e_hw *hw,
-               u16 vsi,
-               struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
-               u8 filter_count);
-enum i40e_status_code i40e_aq_add_cloud_filters_big_buffer(struct i40e_hw *hw,
-       u16 seid,
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *filters,
-       u8 filter_count);
-enum i40e_status_code i40e_aq_remove_cloud_filters(struct i40e_hw *hw,
-               u16 vsi,
-               struct i40e_aqc_add_remove_cloud_filters_element_data *filters,
-               u8 filter_count);
-enum i40e_status_code i40e_aq_remove_cloud_filters_big_buffer(
-       struct i40e_hw *hw, u16 seid,
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *filters,
-       u8 filter_count);
 enum i40e_status_code i40e_aq_replace_cloud_filters(struct i40e_hw *hw,
                struct i40e_aqc_replace_cloud_filters_cmd *filters,
                struct i40e_aqc_replace_cloud_filters_cmd_buf *cmd_buf);
@@ -574,11 +548,11 @@ enum i40e_status_code i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
                                struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
 enum i40e_status_code i40e_aq_set_phy_register(struct i40e_hw *hw,
-                               u8 phy_select, u8 dev_addr,
+                               u8 phy_select, u8 dev_addr, bool page_change,
                                u32 reg_addr, u32 reg_val,
                                struct i40e_asq_cmd_details *cmd_details);
 enum i40e_status_code i40e_aq_get_phy_register(struct i40e_hw *hw,
-                               u8 phy_select, u8 dev_addr,
+                               u8 phy_select, u8 dev_addr, bool page_change,
                                u32 reg_addr, u32 *reg_val,
                                struct i40e_asq_cmd_details *cmd_details);
 
index df66e76..e93ec3f 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_REGISTER_H_
 #define _I40E_REGISTER_H_
index 49af2d9..1dad4f4 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_STATUS_H_
 #define _I40E_STATUS_H_
index 006a11a..77562f2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _I40E_TYPE_H_
 #define _I40E_TYPE_H_
@@ -358,6 +329,14 @@ struct i40e_phy_info {
                                             I40E_PHY_TYPE_OFFSET)
 #define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
                                             I40E_PHY_TYPE_OFFSET)
+#ifdef CARLSVILLE_HW
+/* Offset for 2.5G/5G PHY Types value to bit number conversion */
+#define I40E_PHY_TYPE_OFFSET2 (-10)
+#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T + \
+                                            I40E_PHY_TYPE_OFFSET2)
+#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T + \
+                                            I40E_PHY_TYPE_OFFSET2)
+#endif
 #define I40E_HW_CAP_MAX_GPIO                   30
 #define I40E_HW_CAP_MDIO_PORT_MODE_MDIO                0
 #define I40E_HW_CAP_MDIO_PORT_MODE_I2C         1
@@ -378,6 +357,16 @@ struct i40e_hw_capabilities {
 #define I40E_NVM_IMAGE_TYPE_CLOUD      0x2
 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD  0x3
 
+       /* Cloud filter modes:
+        * Mode1: Filter on L4 port only
+        * Mode2: Filter for non-tunneled traffic
+        * Mode3: Filter for tunnel traffic
+        */
+#define I40E_CLOUD_FILTER_MODE1        0x6
+#define I40E_CLOUD_FILTER_MODE2        0x7
+#define I40E_CLOUD_FILTER_MODE3        0x8
+#define I40E_SWITCH_MODE_MASK  0xF
+
        u32  management_mode;
        u32  mng_protocols_over_mctp;
 #define I40E_MNG_PROTOCOL_PLDM         0x2
@@ -731,6 +720,7 @@ struct i40e_hw {
 #define I40E_HW_FLAG_802_1AD_CAPABLE        BIT_ULL(1)
 #define I40E_HW_FLAG_AQ_PHY_ACCESS_CAPABLE  BIT_ULL(2)
 #define I40E_HW_FLAG_NVM_READ_REQUIRES_LOCK BIT_ULL(3)
+#define I40E_HW_FLAG_FW_LLDP_STOPPABLE     BIT_ULL(4)
        u64 flags;
 
        /* Used in set switch config AQ command */
@@ -1541,7 +1531,9 @@ struct i40e_hw_port_stats {
 #define I40E_SR_CONTROL_WORD_1_MASK    (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
 #define I40E_SR_CONTROL_WORD_1_NVM_BANK_VALID  BIT(5)
 #define I40E_SR_NVM_MAP_STRUCTURE_TYPE         BIT(12)
-#define I40E_PTR_TYPE                           BIT(15)
+#define I40E_PTR_TYPE                          BIT(15)
+#define I40E_SR_OCP_CFG_WORD0                  0x2B
+#define I40E_SR_OCP_ENABLED                    BIT(15)
 
 /* Shadow RAM related */
 #define I40E_SR_SECTOR_SIZE_IN_WORDS   0x800
index 401a147..d4c8f87 100644 (file)
@@ -12,10 +12,13 @@ sources = [
 ]
 
 error_cflags = ['-Wno-sign-compare', '-Wno-unused-value',
-               '-Wno-format', '-Wno-unused-but-set-variable',
-               '-Wno-strict-aliasing'
+               '-Wno-format', '-Wno-error=format-security',
+               '-Wno-strict-aliasing', '-Wno-unused-but-set-variable'
 ]
 c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 foreach flag: error_cflags
        if cc.has_argument(flag)
                c_args += flag
index b2d5fe7..88096cb 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2013 - 2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _VIRTCHNL_H_
 #define _VIRTCHNL_H_
index 85a6a86..1c77906 100644 (file)
@@ -44,6 +44,7 @@
 #define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
 #define ETH_I40E_SUPPORT_MULTI_DRIVER  "support-multi-driver"
 #define ETH_I40E_QUEUE_NUM_PER_VF_ARG  "queue-num-per-vf"
+#define ETH_I40E_USE_LATEST_VEC        "use-latest-supported-vec"
 
 #define I40E_CLEAR_PXE_WAIT_MS     200
 
@@ -292,6 +293,7 @@ static void i40e_stat_update_48(struct i40e_hw *hw,
                               uint64_t *stat);
 static void i40e_pf_config_irq0(struct i40e_hw *hw, bool no_queue);
 static void i40e_dev_interrupt_handler(void *param);
+static void i40e_dev_alarm_handler(void *param);
 static int i40e_res_pool_init(struct i40e_res_pool_info *pool,
                                uint32_t base, uint32_t num);
 static void i40e_res_pool_destroy(struct i40e_res_pool_info *pool);
@@ -389,7 +391,7 @@ static int i40e_sw_ethertype_filter_insert(struct i40e_pf *pf,
                                   struct i40e_ethertype_filter *filter);
 
 static int i40e_tunnel_filter_convert(
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *cld_filter,
+       struct i40e_aqc_cloud_filters_element_bb *cld_filter,
        struct i40e_tunnel_filter *tunnel_filter);
 static int i40e_sw_tunnel_filter_insert(struct i40e_pf *pf,
                                struct i40e_tunnel_filter *tunnel_filter);
@@ -408,6 +410,7 @@ static const char *const valid_keys[] = {
        ETH_I40E_FLOATING_VEB_LIST_ARG,
        ETH_I40E_SUPPORT_MULTI_DRIVER,
        ETH_I40E_QUEUE_NUM_PER_VF_ARG,
+       ETH_I40E_USE_LATEST_VEC,
        NULL};
 
 static const struct rte_pci_id pci_id_i40e_map[] = {
@@ -1201,6 +1204,66 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw,
        return i40e_aq_debug_write_register(hw, reg_addr, reg_val, cmd_details);
 }
 
+static int
+i40e_parse_latest_vec_handler(__rte_unused const char *key,
+                               const char *value,
+                               void *opaque)
+{
+       struct i40e_adapter *ad;
+       int use_latest_vec;
+
+       ad = (struct i40e_adapter *)opaque;
+
+       use_latest_vec = atoi(value);
+
+       if (use_latest_vec != 0 && use_latest_vec != 1)
+               PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
+
+       ad->use_latest_vec = (uint8_t)use_latest_vec;
+
+       return 0;
+}
+
+static int
+i40e_use_latest_vec(struct rte_eth_dev *dev)
+{
+       struct i40e_adapter *ad =
+               I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+       struct rte_kvargs *kvlist;
+       int kvargs_count;
+
+       ad->use_latest_vec = false;
+
+       if (!dev->device->devargs)
+               return 0;
+
+       kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
+       if (!kvlist)
+               return -EINVAL;
+
+       kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
+       if (!kvargs_count) {
+               rte_kvargs_free(kvlist);
+               return 0;
+       }
+
+       if (kvargs_count > 1)
+               PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
+                           "the first invalid or last valid one is used !",
+                           ETH_I40E_USE_LATEST_VEC);
+
+       if (rte_kvargs_process(kvlist, ETH_I40E_USE_LATEST_VEC,
+                               i40e_parse_latest_vec_handler, ad) < 0) {
+               rte_kvargs_free(kvlist);
+               return -EINVAL;
+       }
+
+       rte_kvargs_free(kvlist);
+       return 0;
+}
+
+#define I40E_ALARM_INTERVAL 50000 /* us */
+
 static int
 eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 {
@@ -1263,13 +1326,12 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
 
        /* Check if need to support multi-driver */
        i40e_support_multi_driver(dev);
+       /* Check if users want the latest supported vec path */
+       i40e_use_latest_vec(dev);
 
        /* Make sure all is clean before doing PF reset */
        i40e_clear_hw(hw);
 
-       /* Initialize the hardware */
-       i40e_hw_init(dev);
-
        /* Reset here to make sure all is clean for each PF */
        ret = i40e_pf_reset(hw);
        if (ret) {
@@ -1284,6 +1346,23 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
                return ret;
        }
 
+       /* Initialize the parameters for adminq */
+       i40e_init_adminq_parameter(hw);
+       ret = i40e_init_adminq(hw);
+       if (ret != I40E_SUCCESS) {
+               PMD_INIT_LOG(ERR, "Failed to init adminq: %d", ret);
+               return -EIO;
+       }
+       PMD_INIT_LOG(INFO, "FW %d.%d API %d.%d NVM %02d.%02d.%02d eetrack %04x",
+                    hw->aq.fw_maj_ver, hw->aq.fw_min_ver,
+                    hw->aq.api_maj_ver, hw->aq.api_min_ver,
+                    ((hw->nvm.version >> 12) & 0xf),
+                    ((hw->nvm.version >> 4) & 0xff),
+                    (hw->nvm.version & 0xf), hw->nvm.eetrack);
+
+       /* Initialize the hardware */
+       i40e_hw_init(dev);
+
        i40e_config_automask(pf);
 
        i40e_set_default_pctype_table(dev);
@@ -1299,20 +1378,6 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
        /* Initialize the input set for filters (hash and fd) to default value */
        i40e_filter_input_set_init(pf);
 
-       /* Initialize the parameters for adminq */
-       i40e_init_adminq_parameter(hw);
-       ret = i40e_init_adminq(hw);
-       if (ret != I40E_SUCCESS) {
-               PMD_INIT_LOG(ERR, "Failed to init adminq: %d", ret);
-               return -EIO;
-       }
-       PMD_INIT_LOG(INFO, "FW %d.%d API %d.%d NVM %02d.%02d.%02d eetrack %04x",
-                    hw->aq.fw_maj_ver, hw->aq.fw_min_ver,
-                    hw->aq.api_maj_ver, hw->aq.api_min_ver,
-                    ((hw->nvm.version >> 12) & 0xf),
-                    ((hw->nvm.version >> 4) & 0xff),
-                    (hw->nvm.version & 0xf), hw->nvm.eetrack);
-
        /* initialise the L3_MAP register */
        if (!pf->support_multi_driver) {
                ret = i40e_aq_debug_write_global_register(hw,
@@ -1663,9 +1728,6 @@ eth_i40e_dev_uninit(struct rte_eth_dev *dev)
        /* uninitialize pf host driver */
        i40e_pf_host_uninit(dev);
 
-       rte_free(dev->data->mac_addrs);
-       dev->data->mac_addrs = NULL;
-
        /* disable uio intr before callback unregister */
        rte_intr_disable(intr_handle);
 
@@ -1722,6 +1784,10 @@ i40e_dev_configure(struct rte_eth_dev *dev)
        ad->tx_simple_allowed = true;
        ad->tx_vec_allowed = true;
 
+       /* Only legacy filter API needs the following fdir config. So when the
+        * legacy filter API is deprecated, the following codes should also be
+        * removed.
+        */
        if (dev->data->dev_conf.fdir_conf.mode == RTE_FDIR_MODE_PERFECT) {
                ret = i40e_fdir_setup(pf);
                if (ret != I40E_SUCCESS) {
@@ -1779,7 +1845,11 @@ err_dcb:
        rte_free(pf->vmdq);
        pf->vmdq = NULL;
 err:
-       /* need to release fdir resource if exists */
+       /* Need to release fdir resource if exists.
+        * Only legacy filter API needs the following fdir config. So when the
+        * legacy filter API is deprecated, the following code should also be
+        * removed.
+        */
        i40e_fdir_teardown(pf);
        return ret;
 }
@@ -2293,8 +2363,13 @@ i40e_dev_start(struct rte_eth_dev *dev)
                i40e_dev_link_update(dev, 0);
        }
 
-       /* enable uio intr after callback register */
-       rte_intr_enable(intr_handle);
+       if (dev->data->dev_conf.intr_conf.rxq == 0) {
+               rte_eal_alarm_set(I40E_ALARM_INTERVAL,
+                                 i40e_dev_alarm_handler, dev);
+       } else {
+               /* enable uio intr after callback register */
+               rte_intr_enable(intr_handle);
+       }
 
        i40e_filter_restore(pf);
 
@@ -2324,6 +2399,12 @@ i40e_dev_stop(struct rte_eth_dev *dev)
 
        if (hw->adapter_stopped == 1)
                return;
+
+       if (dev->data->dev_conf.intr_conf.rxq == 0) {
+               rte_eal_alarm_cancel(i40e_dev_alarm_handler, dev);
+               rte_intr_enable(intr_handle);
+       }
+
        /* Disable all queues */
        i40e_dev_switch_queues(pf, FALSE);
 
@@ -2406,6 +2487,11 @@ i40e_dev_close(struct rte_eth_dev *dev)
        i40e_pf_disable_irq0(hw);
        rte_intr_disable(intr_handle);
 
+       /*
+        * Only legacy filter API needs the following fdir config. So when the
+        * legacy filter API is deprecated, the following code should also be
+        * removed.
+        */
        i40e_fdir_teardown(pf);
 
        /* shutdown and destroy the HMC */
@@ -2498,6 +2584,10 @@ i40e_dev_promiscuous_disable(struct rte_eth_dev *dev)
        if (status != I40E_SUCCESS)
                PMD_DRV_LOG(ERR, "Failed to disable unicast promiscuous");
 
+       /* must remain in all_multicast mode */
+       if (dev->data->all_multicast == 1)
+               return;
+
        status = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid,
                                                        false, NULL);
        if (status != I40E_SUCCESS)
@@ -3363,8 +3453,8 @@ i40e_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM |
                DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-               DEV_RX_OFFLOAD_CRC_STRIP |
                DEV_RX_OFFLOAD_KEEP_CRC |
+               DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_VLAN_EXTEND |
                DEV_RX_OFFLOAD_VLAN_FILTER |
                DEV_RX_OFFLOAD_JUMBO_FRAME;
@@ -3577,7 +3667,7 @@ i40e_vlan_tpid_set(struct rte_eth_dev *dev,
                        if (vlan_type == ETH_VLAN_TYPE_OUTER)
                                hw->second_tag = rte_cpu_to_le_16(tpid);
                }
-               ret = i40e_aq_set_switch_config(hw, 0, 0, NULL);
+               ret = i40e_aq_set_switch_config(hw, 0, 0, 0, NULL);
                if (ret != I40E_SUCCESS) {
                        PMD_DRV_LOG(ERR,
                                    "Set switch config failed aq_err: %d",
@@ -5282,7 +5372,7 @@ i40e_enable_pf_lb(struct i40e_pf *pf)
        int ret;
 
        /* Use the FW API if FW >= v5.0 */
-       if (hw->aq.fw_maj_ver < 5) {
+       if (hw->aq.fw_maj_ver < 5 && hw->mac.type != I40E_MAC_X722) {
                PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback");
                return;
        }
@@ -5553,7 +5643,7 @@ i40e_vsi_setup(struct i40e_pf *pf,
                ctxt.flags = I40E_AQ_VSI_TYPE_VF;
 
                /* Use the VEB configuration if FW >= v5.0 */
-               if (hw->aq.fw_maj_ver >= 5) {
+               if (hw->aq.fw_maj_ver >= 5 || hw->mac.type == I40E_MAC_X722) {
                        /* Configure switch ID */
                        ctxt.info.valid_sections |=
                        rte_cpu_to_le_16(I40E_AQ_VSI_PROP_SWITCH_VALID);
@@ -6549,7 +6639,53 @@ i40e_dev_interrupt_handler(void *param)
 done:
        /* Enable interrupt */
        i40e_pf_enable_irq0(hw);
-       rte_intr_enable(dev->intr_handle);
+}
+
+static void
+i40e_dev_alarm_handler(void *param)
+{
+       struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
+       struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+       uint32_t icr0;
+
+       /* Disable interrupt */
+       i40e_pf_disable_irq0(hw);
+
+       /* read out interrupt causes */
+       icr0 = I40E_READ_REG(hw, I40E_PFINT_ICR0);
+
+       /* No interrupt event indicated */
+       if (!(icr0 & I40E_PFINT_ICR0_INTEVENT_MASK))
+               goto done;
+       if (icr0 & I40E_PFINT_ICR0_ECC_ERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: unrecoverable ECC error");
+       if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: malicious programming detected");
+       if (icr0 & I40E_PFINT_ICR0_GRST_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: global reset requested");
+       if (icr0 & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: PCI exception activated");
+       if (icr0 & I40E_PFINT_ICR0_STORM_DETECT_MASK)
+               PMD_DRV_LOG(INFO, "ICR0: a change in the storm control state");
+       if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: HMC error");
+       if (icr0 & I40E_PFINT_ICR0_PE_CRITERR_MASK)
+               PMD_DRV_LOG(ERR, "ICR0: protocol engine critical error");
+
+       if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
+               PMD_DRV_LOG(INFO, "ICR0: VF reset detected");
+               i40e_dev_handle_vfr_event(dev);
+       }
+       if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
+               PMD_DRV_LOG(INFO, "ICR0: adminq event");
+               i40e_dev_handle_aq_msg(dev);
+       }
+
+done:
+       /* Enable interrupt */
+       i40e_pf_enable_irq0(hw);
+       rte_eal_alarm_set(I40E_ALARM_INTERVAL,
+                         i40e_dev_alarm_handler, dev);
 }
 
 int
@@ -7370,7 +7506,7 @@ i40e_dev_get_filter_type(uint16_t filter_type, uint16_t *flag)
 /* Convert tunnel filter structure */
 static int
 i40e_tunnel_filter_convert(
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *cld_filter,
+       struct i40e_aqc_cloud_filters_element_bb *cld_filter,
        struct i40e_tunnel_filter *tunnel_filter)
 {
        ether_addr_copy((struct ether_addr *)&cld_filter->element.outer_mac,
@@ -7468,8 +7604,8 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
        int val, ret = 0;
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        struct i40e_vsi *vsi = pf->main_vsi;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *cld_filter;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *pfilter;
+       struct i40e_aqc_cloud_filters_element_bb *cld_filter;
+       struct i40e_aqc_cloud_filters_element_bb *pfilter;
        struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
        struct i40e_tunnel_filter *tunnel, *node;
        struct i40e_tunnel_filter check_filter; /* Check if filter exists */
@@ -7577,7 +7713,7 @@ i40e_dev_tunnel_filter_set(struct i40e_pf *pf,
                if (ret < 0)
                        rte_free(tunnel);
        } else {
-               ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
+               ret = i40e_aq_rem_cloud_filters(hw, vsi->seid,
                                                   &cld_filter->element, 1);
                if (ret < 0) {
                        PMD_DRV_LOG(ERR, "Failed to delete a tunnel filter.");
@@ -7910,8 +8046,8 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
        struct i40e_pf_vf *vf = NULL;
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        struct i40e_vsi *vsi;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *cld_filter;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext *pfilter;
+       struct i40e_aqc_cloud_filters_element_bb *cld_filter;
+       struct i40e_aqc_cloud_filters_element_bb *pfilter;
        struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
        struct i40e_tunnel_filter *tunnel, *node;
        struct i40e_tunnel_filter check_filter; /* Check if filter exists */
@@ -8114,7 +8250,7 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
 
        if (add) {
                if (big_buffer)
-                       ret = i40e_aq_add_cloud_filters_big_buffer(hw,
+                       ret = i40e_aq_add_cloud_filters_bb(hw,
                                                   vsi->seid, cld_filter, 1);
                else
                        ret = i40e_aq_add_cloud_filters(hw,
@@ -8137,11 +8273,11 @@ i40e_dev_consistent_tunnel_filter_set(struct i40e_pf *pf,
                        rte_free(tunnel);
        } else {
                if (big_buffer)
-                       ret = i40e_aq_remove_cloud_filters_big_buffer(
+                       ret = i40e_aq_rem_cloud_filters_bb(
                                hw, vsi->seid, cld_filter, 1);
                else
-                       ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
-                                                  &cld_filter->element, 1);
+                       ret = i40e_aq_rem_cloud_filters(hw, vsi->seid,
+                                               &cld_filter->element, 1);
                if (ret < 0) {
                        PMD_DRV_LOG(ERR, "Failed to delete a tunnel filter.");
                        rte_free(cld_filter);
@@ -11249,6 +11385,16 @@ i40e_dcb_init_configure(struct rte_eth_dev *dev, bool sw_dcb)
         * LLDP MIB change event.
         */
        if (sw_dcb == TRUE) {
+               /* When using NVM 6.01 or later, the RX data path does
+                * not hang if the FW LLDP is stopped.
+                */
+               if (((hw->nvm.version >> 12) & 0xf) >= 6 &&
+                   ((hw->nvm.version >> 4) & 0xff) >= 1) {
+                       ret = i40e_aq_stop_lldp(hw, TRUE, NULL);
+                       if (ret != I40E_SUCCESS)
+                               PMD_INIT_LOG(DEBUG, "Failed to stop lldp");
+               }
+
                ret = i40e_init_dcb(hw);
                /* If lldp agent is stopped, the return value from
                 * i40e_init_dcb we expect is failure with I40E_AQ_RC_EPERM
@@ -11584,7 +11730,7 @@ static int i40e_get_module_info(struct rte_eth_dev *dev,
        case I40E_MODULE_TYPE_SFP:
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_I2C_EEPROM_DEV_ADDR, 1,
                                I40E_MODULE_SFF_8472_COMP,
                                &sff8472_comp, NULL);
                if (status)
@@ -11592,7 +11738,7 @@ static int i40e_get_module_info(struct rte_eth_dev *dev,
 
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               I40E_I2C_EEPROM_DEV_ADDR,
+                               I40E_I2C_EEPROM_DEV_ADDR, 1,
                                I40E_MODULE_SFF_8472_SWAP,
                                &sff8472_swap, NULL);
                if (status)
@@ -11620,7 +11766,7 @@ static int i40e_get_module_info(struct rte_eth_dev *dev,
                /* Read from memory page 0. */
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               0,
+                               0, 1,
                                I40E_MODULE_REVISION_ADDR,
                                &sff8636_rev, NULL);
                if (status)
@@ -11681,7 +11827,7 @@ static int i40e_get_module_eeprom(struct rte_eth_dev *dev,
                }
                status = i40e_aq_get_phy_register(hw,
                                I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
-                               addr, offset, &value, NULL);
+                               addr, offset, 1, &value, NULL);
                if (status)
                        return -EIO;
                data[i] = (uint8_t)value;
@@ -11812,7 +11958,7 @@ i40e_tunnel_filter_restore(struct i40e_pf *pf)
        struct i40e_tunnel_filter_list
                *tunnel_list = &pf->tunnel.tunnel_list;
        struct i40e_tunnel_filter *f;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext cld_filter;
+       struct i40e_aqc_cloud_filters_element_bb cld_filter;
        bool big_buffer = 0;
 
        TAILQ_FOREACH(f, tunnel_list, rules) {
@@ -11847,8 +11993,8 @@ i40e_tunnel_filter_restore(struct i40e_pf *pf)
                        big_buffer = 1;
 
                if (big_buffer)
-                       i40e_aq_add_cloud_filters_big_buffer(hw,
-                                            vsi->seid, &cld_filter, 1);
+                       i40e_aq_add_cloud_filters_bb(hw,
+                                       vsi->seid, &cld_filter, 1);
                else
                        i40e_aq_add_cloud_filters(hw, vsi->seid,
                                                  &cld_filter.element, 1);
@@ -12527,4 +12673,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
                              ETH_I40E_FLOATING_VEB_ARG "=1"
                              ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
                              ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
-                             ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
+                             ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
+                             ETH_I40E_USE_LATEST_VEC "=0|1");
index 3fffe5a..11ecfc3 100644 (file)
@@ -1078,6 +1078,9 @@ struct i40e_adapter {
        uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
        uint64_t flow_types_mask;
        uint64_t pctypes_mask;
+
+       /* For devargs */
+       uint8_t use_latest_vec;
 };
 
 /**
@@ -1393,6 +1396,8 @@ i40e_calc_itr_interval(bool is_pf, bool is_multi_drv)
        (((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_KR) || \
        ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_CR) || \
        ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_SR) || \
-       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_LR))
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_LR) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_AOC) || \
+       ((phy_type) & I40E_CAP_PHY_TYPE_25GBASE_ACC))
 
 #endif /* _I40E_ETHDEV_H_ */
index 001c301..ae55b9b 100644 (file)
@@ -1384,10 +1384,8 @@ i40evf_dev_alarm_handler(void *param)
        icr0 = I40E_READ_REG(hw, I40E_VFINT_ICR01);
 
        /* No interrupt event indicated */
-       if (!(icr0 & I40E_VFINT_ICR01_INTEVENT_MASK)) {
-               PMD_DRV_LOG(DEBUG, "No interrupt event, nothing to do");
+       if (!(icr0 & I40E_VFINT_ICR01_INTEVENT_MASK))
                goto done;
-       }
 
        if (icr0 & I40E_VFINT_ICR01_ADMINQ_MASK) {
                PMD_DRV_LOG(DEBUG, "ICR01_ADMINQ is reported");
@@ -1485,9 +1483,6 @@ i40evf_dev_uninit(struct rte_eth_dev *eth_dev)
                return -1;
        }
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        return 0;
 }
 
@@ -1522,8 +1517,6 @@ i40evf_dev_configure(struct rte_eth_dev *dev)
 {
        struct i40e_adapter *ad =
                I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
-       struct rte_eth_conf *conf = &dev->data->dev_conf;
-       struct i40e_vf *vf;
 
        /* Initialize to TRUE. If any of Rx queues doesn't meet the bulk
         * allocation or vector Rx preconditions we will reset it.
@@ -1533,19 +1526,6 @@ i40evf_dev_configure(struct rte_eth_dev *dev)
        ad->tx_simple_allowed = true;
        ad->tx_vec_allowed = true;
 
-       /* For non-DPDK PF drivers, VF has no ability to disable HW
-        * CRC strip, and is implicitly enabled by the PF.
-        */
-       if (rte_eth_dev_must_keep_crc(conf->rxmode.offloads)) {
-               vf = I40EVF_DEV_PRIVATE_TO_VF(dev->data->dev_private);
-               if ((vf->version_major == VIRTCHNL_VERSION_MAJOR) &&
-                   (vf->version_minor <= VIRTCHNL_VERSION_MINOR)) {
-                       /* Peer is running non-DPDK PF driver. */
-                       PMD_INIT_LOG(ERR, "VF can't disable HW CRC Strip");
-                       return -EINVAL;
-               }
-       }
-
        return i40evf_init_vlan(dev);
 }
 
@@ -2180,8 +2160,6 @@ i40evf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM |
                DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
-               DEV_RX_OFFLOAD_CRC_STRIP |
-               DEV_RX_OFFLOAD_KEEP_CRC |
                DEV_RX_OFFLOAD_SCATTER |
                DEV_RX_OFFLOAD_JUMBO_FRAME |
                DEV_RX_OFFLOAD_VLAN_FILTER;
@@ -2268,7 +2246,6 @@ i40evf_dev_close(struct rte_eth_dev *dev)
 {
        struct i40e_hw *hw = I40E_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
-       rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
        i40evf_dev_stop(dev);
        i40e_dev_free_queues(dev);
        /*
@@ -2282,6 +2259,7 @@ i40evf_dev_close(struct rte_eth_dev *dev)
        i40evf_reset_vf(hw);
        i40e_shutdown_adminq(hw);
        i40evf_disable_irq0(hw);
+       rte_eal_alarm_cancel(i40evf_dev_alarm_handler, dev);
 }
 
 /*
index c67b264..3694df2 100644 (file)
@@ -133,8 +133,8 @@ const struct rte_flow_ops i40e_flow_ops = {
        .flush = i40e_flow_flush,
 };
 
-union i40e_filter_t cons_filter;
-enum rte_filter_type cons_filter_type = RTE_ETH_FILTER_NONE;
+static union i40e_filter_t cons_filter;
+static enum rte_filter_type cons_filter_type = RTE_ETH_FILTER_NONE;
 
 /* Pattern matched ethertype filter */
 static enum rte_flow_item_type pattern_ethertype[] = {
@@ -3127,6 +3127,7 @@ i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev,
                            struct rte_flow_error *error,
                            union i40e_filter_t *filter)
 {
+       struct i40e_pf *pf = I40E_DEV_PRIVATE_TO_PF(dev->data->dev_private);
        struct i40e_fdir_filter_conf *fdir_filter =
                &filter->fdir_filter;
        int ret;
@@ -3148,14 +3149,29 @@ i40e_flow_parse_fdir_filter(struct rte_eth_dev *dev,
 
        if (dev->data->dev_conf.fdir_conf.mode !=
            RTE_FDIR_MODE_PERFECT) {
-               rte_flow_error_set(error, ENOTSUP,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                  NULL,
-                                  "Check the mode in fdir_conf.");
-               return -rte_errno;
+               /* Enable fdir when fdir flow is added at first time. */
+               ret = i40e_fdir_setup(pf);
+               if (ret != I40E_SUCCESS) {
+                       rte_flow_error_set(error, ENOTSUP,
+                                          RTE_FLOW_ERROR_TYPE_HANDLE,
+                                          NULL, "Failed to setup fdir.");
+                       return -rte_errno;
+               }
+               ret = i40e_fdir_configure(dev);
+               if (ret < 0) {
+                       rte_flow_error_set(error, ENOTSUP,
+                                          RTE_FLOW_ERROR_TYPE_HANDLE,
+                                          NULL, "Failed to configure fdir.");
+                       goto err;
+               }
+
+               dev->data->dev_conf.fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
        }
 
        return 0;
+err:
+       i40e_fdir_teardown(pf);
+       return -rte_errno;
 }
 
 /* Parse to get the action info of a tunnel filter
@@ -4708,6 +4724,13 @@ i40e_flow_destroy(struct rte_eth_dev *dev,
        case RTE_ETH_FILTER_FDIR:
                ret = i40e_flow_add_del_fdir_filter(dev,
                       &((struct i40e_fdir_filter *)flow->rule)->fdir, 0);
+
+               /* If the last flow is destroyed, disable fdir. */
+               if (!ret && !TAILQ_EMPTY(&pf->fdir.fdir_list)) {
+                       i40e_fdir_teardown(pf);
+                       dev->data->dev_conf.fdir_conf.mode =
+                                  RTE_FDIR_MODE_NONE;
+               }
                break;
        case RTE_ETH_FILTER_HASH:
                ret = i40e_config_rss_filter_del(dev,
@@ -4773,7 +4796,7 @@ i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
        struct i40e_hw *hw = I40E_PF_TO_HW(pf);
        struct i40e_vsi *vsi;
        struct i40e_pf_vf *vf;
-       struct i40e_aqc_add_rm_cloud_filt_elem_ext cld_filter;
+       struct i40e_aqc_cloud_filters_element_bb cld_filter;
        struct i40e_tunnel_rule *tunnel_rule = &pf->tunnel;
        struct i40e_tunnel_filter *node;
        bool big_buffer = 0;
@@ -4808,11 +4831,11 @@ i40e_flow_destroy_tunnel_filter(struct i40e_pf *pf,
                big_buffer = 1;
 
        if (big_buffer)
-               ret = i40e_aq_remove_cloud_filters_big_buffer(hw, vsi->seid,
-                                                             &cld_filter, 1);
+               ret = i40e_aq_rem_cloud_filters_bb(hw, vsi->seid,
+                                               &cld_filter, 1);
        else
-               ret = i40e_aq_remove_cloud_filters(hw, vsi->seid,
-                                                  &cld_filter.element, 1);
+               ret = i40e_aq_rem_cloud_filters(hw, vsi->seid,
+                                               &cld_filter.element, 1);
        if (ret < 0)
                return -ENOTSUP;
 
@@ -4900,6 +4923,8 @@ i40e_flow_flush_fdir_filter(struct i40e_pf *pf)
                        pf->fdir.inset_flag[pctype] = 0;
        }
 
+       i40e_fdir_teardown(pf);
+
        return ret;
 }
 
index 2a28ee3..8bfa251 100644 (file)
                PKT_TX_OUTER_IP_CKSUM)
 
 #define I40E_TX_OFFLOAD_MASK (  \
+               PKT_TX_OUTER_IPV4 |     \
+               PKT_TX_OUTER_IPV6 |     \
+               PKT_TX_IPV4 |           \
+               PKT_TX_IPV6 |           \
                PKT_TX_IP_CKSUM |       \
                PKT_TX_L4_MASK |        \
                PKT_TX_OUTER_IP_CKSUM | \
@@ -83,7 +87,8 @@ i40e_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union i40e_rx_desc *rxdp)
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
        if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
                (1 << I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT)) {
-               mb->ol_flags |= PKT_RX_QINQ_STRIPPED;
+               mb->ol_flags |= PKT_RX_QINQ_STRIPPED | PKT_RX_QINQ |
+                       PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
                mb->vlan_tci_outer = mb->vlan_tci;
                mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
                PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
@@ -1828,7 +1833,7 @@ i40e_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->queue_id = queue_idx;
        rxq->reg_idx = reg_idx;
        rxq->port_id = dev->data->port_id;
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = ETHER_CRC_LEN;
        else
                rxq->crc_len = 0;
@@ -2909,6 +2914,35 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
        qinfo->conf.offloads = txq->offloads;
 }
 
+static eth_rx_burst_t
+i40e_get_latest_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+                                i40e_recv_pkts_vec_avx2;
+#endif
+       return scatter ? i40e_recv_scattered_pkts_vec :
+                        i40e_recv_pkts_vec;
+}
+
+static eth_rx_burst_t
+i40e_get_recommend_rx_vec(bool scatter)
+{
+#ifdef RTE_ARCH_X86
+       /*
+        * since AVX frequency can be different to base frequency, limit
+        * use of AVX2 version to later plaforms, not all those that could
+        * theoretically run it.
+        */
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+               return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
+                                i40e_recv_pkts_vec_avx2;
+#endif
+       return scatter ? i40e_recv_scattered_pkts_vec :
+                        i40e_recv_pkts_vec;
+}
+
 void __attribute__((cold))
 i40e_set_rx_function(struct rte_eth_dev *dev)
 {
@@ -2940,57 +2974,17 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
                }
        }
 
-       if (dev->data->scattered_rx) {
-               /* Set the non-LRO scattered callback: there are Vector and
-                * single allocation versions.
-                */
-               if (ad->rx_vec_allowed) {
-                       PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
-                                           "callback (port=%d).",
-                                    dev->data->port_id);
-
-                       dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
-#ifdef RTE_ARCH_X86
-                       /*
-                        * since AVX frequency can be different to base
-                        * frequency, limit use of AVX2 version to later
-                        * plaforms, not all those that could theoretically
-                        * run it.
-                        */
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                               dev->rx_pkt_burst =
-                                       i40e_recv_scattered_pkts_vec_avx2;
-#endif
-               } else {
-                       PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
-                                          "allocation callback (port=%d).",
-                                    dev->data->port_id);
-                       dev->rx_pkt_burst = i40e_recv_scattered_pkts;
-               }
-       /* If parameters allow we are going to choose between the following
-        * callbacks:
-        *    - Vector
-        *    - Bulk Allocation
-        *    - Single buffer allocation (the simplest one)
-        */
-       } else if (ad->rx_vec_allowed) {
-               PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
-                                   "burst size no less than %d (port=%d).",
-                            RTE_I40E_DESCS_PER_LOOP,
-                            dev->data->port_id);
-
-               dev->rx_pkt_burst = i40e_recv_pkts_vec;
-#ifdef RTE_ARCH_X86
-               /*
-                * since AVX frequency can be different to base
-                * frequency, limit use of AVX2 version to later
-                * plaforms, not all those that could theoretically
-                * run it.
-                */
-               if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                       dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
-#endif
-       } else if (ad->rx_bulk_alloc_allowed) {
+       if (ad->rx_vec_allowed) {
+               /* Vec Rx path */
+               PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
+                               dev->data->port_id);
+               if (ad->use_latest_vec)
+                       dev->rx_pkt_burst =
+                       i40e_get_latest_rx_vec(dev->data->scattered_rx);
+               else
+                       dev->rx_pkt_burst =
+                       i40e_get_recommend_rx_vec(dev->data->scattered_rx);
+       } else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
                PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
                                    "satisfied. Rx Burst Bulk Alloc function "
                                    "will be used on port=%d.",
@@ -2998,12 +2992,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
 
                dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
        } else {
-               PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
-                                   "satisfied, or Scattered Rx is requested "
-                                   "(port=%d).",
+               /* Simple Rx Path. */
+               PMD_INIT_LOG(DEBUG, "Simple Rx path will be used on port=%d.",
                             dev->data->port_id);
-
-               dev->rx_pkt_burst = i40e_recv_pkts;
+               dev->rx_pkt_burst = dev->data->scattered_rx ?
+                                       i40e_recv_scattered_pkts :
+                                       i40e_recv_pkts;
        }
 
        /* Propagate information about RX function choice through all queues. */
@@ -3049,6 +3043,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
                                txq->queue_id);
 }
 
+static eth_tx_burst_t
+i40e_get_latest_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
+               return i40e_xmit_pkts_vec_avx2;
+#endif
+       return i40e_xmit_pkts_vec;
+}
+
+static eth_tx_burst_t
+i40e_get_recommend_tx_vec(void)
+{
+#ifdef RTE_ARCH_X86
+       /*
+        * since AVX frequency can be different to base frequency, limit
+        * use of AVX2 version to later plaforms, not all those that could
+        * theoretically run it.
+        */
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
+               return i40e_xmit_pkts_vec_avx2;
+#endif
+       return i40e_xmit_pkts_vec;
+}
+
 void __attribute__((cold))
 i40e_set_tx_function(struct rte_eth_dev *dev)
 {
@@ -3073,17 +3092,12 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
        if (ad->tx_simple_allowed) {
                if (ad->tx_vec_allowed) {
                        PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
-                       dev->tx_pkt_burst = i40e_xmit_pkts_vec;
-#ifdef RTE_ARCH_X86
-                       /*
-                        * since AVX frequency can be different to base
-                        * frequency, limit use of AVX2 version to later
-                        * plaforms, not all those that could theoretically
-                        * run it.
-                        */
-                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
-                               dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
-#endif
+                       if (ad->use_latest_vec)
+                               dev->tx_pkt_burst =
+                                       i40e_get_latest_tx_vec();
+                       else
+                               dev->tx_pkt_burst =
+                                       i40e_get_recommend_tx_vec();
                } else {
                        PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
                        dev->tx_pkt_burst = i40e_xmit_pkts_simple;
@@ -3166,13 +3180,13 @@ i40e_set_default_pctype_table(struct rte_eth_dev *dev)
 }
 
 /* Stubs needed for linkage when CONFIG_RTE_I40E_INC_VECTOR is set to 'n' */
-int __attribute__((weak))
+__rte_weak int
 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
        return -1;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_recv_pkts_vec(
        void __rte_unused *rx_queue,
        struct rte_mbuf __rte_unused **rx_pkts,
@@ -3181,7 +3195,7 @@ i40e_recv_pkts_vec(
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_recv_scattered_pkts_vec(
        void __rte_unused *rx_queue,
        struct rte_mbuf __rte_unused **rx_pkts,
@@ -3190,7 +3204,7 @@ i40e_recv_scattered_pkts_vec(
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_recv_pkts_vec_avx2(void __rte_unused *rx_queue,
                        struct rte_mbuf __rte_unused **rx_pkts,
                        uint16_t __rte_unused nb_pkts)
@@ -3198,7 +3212,7 @@ i40e_recv_pkts_vec_avx2(void __rte_unused *rx_queue,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_recv_scattered_pkts_vec_avx2(void __rte_unused *rx_queue,
                        struct rte_mbuf __rte_unused **rx_pkts,
                        uint16_t __rte_unused nb_pkts)
@@ -3206,25 +3220,25 @@ i40e_recv_scattered_pkts_vec_avx2(void __rte_unused *rx_queue,
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 i40e_rxq_vec_setup(struct i40e_rx_queue __rte_unused *rxq)
 {
        return -1;
 }
 
-int __attribute__((weak))
+__rte_weak int
 i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused *txq)
 {
        return -1;
 }
 
-void __attribute__((weak))
+__rte_weak void
 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue __rte_unused*rxq)
 {
        return;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_xmit_fixed_burst_vec(void __rte_unused * tx_queue,
                          struct rte_mbuf __rte_unused **tx_pkts,
                          uint16_t __rte_unused nb_pkts)
@@ -3232,7 +3246,7 @@ i40e_xmit_fixed_burst_vec(void __rte_unused * tx_queue,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 i40e_xmit_pkts_vec_avx2(void __rte_unused * tx_queue,
                          struct rte_mbuf __rte_unused **tx_pkts,
                          uint16_t __rte_unused nb_pkts)
index 63cb177..f00f6d6 100644 (file)
@@ -199,9 +199,7 @@ i40e_rx_vec_dev_conf_condition_check_default(struct rte_eth_dev *dev)
        if (fconf->mode != RTE_FDIR_MODE_NONE)
                return -1;
 
-        /* - no csum error report support
-        * - no header split support
-        */
+        /* no header split support */
        if (rxmode->offloads & DEV_RX_OFFLOAD_HEADER_SPLIT)
                return -1;
 
index f9f1316..45a15d3 100644 (file)
@@ -48,6 +48,7 @@ i40e_vf_representor_dev_infos_get(struct rte_eth_dev *ethdev,
                DEV_RX_OFFLOAD_UDP_CKSUM |
                DEV_RX_OFFLOAD_TCP_CKSUM;
        dev_info->tx_offload_capa =
+               DEV_TX_OFFLOAD_MULTI_SEGS  |
                DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_QINQ_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM |
@@ -419,7 +420,7 @@ i40e_vf_representor_vlan_pvid_set(struct rte_eth_dev *ethdev, uint16_t vlan_id,
                representor->vf_id, vlan_id);
 }
 
-struct eth_dev_ops i40e_representor_dev_ops = {
+static const struct eth_dev_ops i40e_representor_dev_ops = {
        .dev_infos_get        = i40e_vf_representor_dev_infos_get,
 
        .dev_start            = i40e_vf_representor_dev_start,
@@ -486,9 +487,6 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params)
        if (representor->vf_id >= pf->vf_num)
                return -ENODEV;
 
-       /** representor shares the same driver as it's PF device */
-       ethdev->device->driver = representor->adapter->eth_dev->device->driver;
-
        /* Set representor device ops */
        ethdev->dev_ops = &i40e_representor_dev_ops;
 
@@ -506,6 +504,7 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params)
        }
 
        ethdev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
+       ethdev->data->representor_id = representor->vf_id;
 
        /* Setting the number queues allocated to the VF */
        ethdev->data->nb_rx_queues = vf->vsi->nb_qps;
@@ -525,7 +524,10 @@ i40e_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params)
 }
 
 int
-i40e_vf_representor_uninit(struct rte_eth_dev *ethdev __rte_unused)
+i40e_vf_representor_uninit(struct rte_eth_dev *ethdev)
 {
+       /* mac_addrs must not be freed because part of i40e_pf_vf */
+       ethdev->data->mac_addrs = NULL;
+
        return 0;
 }
index bba62b1..7ce5d02 100644 (file)
@@ -338,7 +338,7 @@ i40e_vsi_set_tx_loopback(struct i40e_vsi *vsi, uint8_t on)
        hw = I40E_VSI_TO_HW(vsi);
 
        /* Use the FW API if FW >= v5.0 */
-       if (hw->aq.fw_maj_ver < 5) {
+       if (hw->aq.fw_maj_ver < 5 && hw->mac.type != I40E_MAC_X722) {
                PMD_INIT_LOG(ERR, "FW < v5.0, cannot enable loopback");
                return -ENOTSUP;
        }
index 4b22d9e..3c0b2df 100644 (file)
@@ -249,7 +249,7 @@ ifcvf_hw_disable(struct ifcvf_hw *hw)
                IFCVF_WRITE_REG16(IFCVF_MSI_NO_VECTOR, &cfg->queue_msix_vector);
                ring_state = *(u32 *)(hw->lm_cfg + IFCVF_LM_RING_STATE_OFFSET +
                                (i / 2) * IFCVF_LM_CFG_SIZE + (i % 2) * 4);
-               hw->vring[i].last_avail_idx = (u16)ring_state;
+               hw->vring[i].last_avail_idx = (u16)(ring_state >> 16);
                hw->vring[i].last_used_idx = (u16)(ring_state >> 16);
        }
 }
@@ -278,6 +278,37 @@ ifcvf_stop_hw(struct ifcvf_hw *hw)
        ifcvf_reset(hw);
 }
 
+void
+ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size)
+{
+       u8 *lm_cfg;
+
+       lm_cfg = hw->lm_cfg;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_LOW) =
+               log_base & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_BASE_ADDR_HIGH) =
+               (log_base >> 32) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_LOW) =
+               (log_base + log_size) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_END_ADDR_HIGH) =
+               ((log_base + log_size) >> 32) & IFCVF_32_BIT_MASK;
+
+       *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_ENABLE_VF;
+}
+
+void
+ifcvf_disable_logging(struct ifcvf_hw *hw)
+{
+       u8 *lm_cfg;
+
+       lm_cfg = hw->lm_cfg;
+       *(u32 *)(lm_cfg + IFCVF_LM_LOGGING_CTRL) = IFCVF_LM_DISABLE;
+}
+
 void
 ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid)
 {
index badacb6..f026c70 100644 (file)
@@ -49,6 +49,7 @@
 #define IFCVF_LM_DISABLE               0x0
 #define IFCVF_LM_ENABLE_VF             0x1
 #define IFCVF_LM_ENABLE_PF             0x3
+#define IFCVF_LOG_BASE                 0x100000000000
 
 #define IFCVF_32_BIT_MASK              0xffffffff
 
@@ -142,6 +143,12 @@ ifcvf_start_hw(struct ifcvf_hw *hw);
 void
 ifcvf_stop_hw(struct ifcvf_hw *hw);
 
+void
+ifcvf_enable_logging(struct ifcvf_hw *hw, u64 log_base, u64 log_size);
+
+void
+ifcvf_disable_logging(struct ifcvf_hw *hw);
+
 void
 ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid);
 
index cf151ef..6aef25e 100644 (file)
@@ -17,7 +17,7 @@
 #define DEBUGOUT(S, args...)    RTE_LOG(DEBUG, PMD, S, ##args)
 #define STATIC                  static
 
-#define msec_delay     rte_delay_ms
+#define msec_delay(x)  rte_delay_us_sleep(1000 * (x))
 
 #define IFCVF_READ_REG8(reg)           rte_read8(reg)
 #define IFCVF_WRITE_REG8(val, reg)     rte_write8((val), (reg))
index 88d8140..97a57f1 100644 (file)
@@ -7,6 +7,7 @@
 #include <fcntl.h>
 #include <sys/ioctl.h>
 #include <sys/epoll.h>
+#include <linux/virtio_net.h>
 
 #include <rte_malloc.h>
 #include <rte_memory.h>
@@ -111,7 +112,6 @@ ifcvf_vfio_setup(struct ifcvf_internal *internal)
        struct rte_pci_device *dev = internal->pdev;
        char devname[RTE_DEV_NAME_MAX_LEN] = {0};
        int iommu_group_num;
-       int ret = 0;
        int i;
 
        internal->vfio_dev_fd = -1;
@@ -145,9 +145,8 @@ ifcvf_vfio_setup(struct ifcvf_internal *internal)
                internal->hw.mem_resource[i].len =
                        internal->pdev->mem_resource[i].len;
        }
-       ret = ifcvf_init_hw(&internal->hw, internal->pdev);
 
-       return ret;
+       return 0;
 
 err:
        rte_vfio_container_destroy(internal->vfio_container_fd);
@@ -205,7 +204,7 @@ exit:
 }
 
 static uint64_t
-qva_to_gpa(int vid, uint64_t qva)
+hva_to_gpa(int vid, uint64_t hva)
 {
        struct rte_vhost_memory *mem = NULL;
        struct rte_vhost_mem_region *reg;
@@ -218,9 +217,9 @@ qva_to_gpa(int vid, uint64_t qva)
        for (i = 0; i < mem->nregions; i++) {
                reg = &mem->regions[i];
 
-               if (qva >= reg->host_user_addr &&
-                               qva < reg->host_user_addr + reg->size) {
-                       gpa = qva - reg->host_user_addr + reg->guest_phys_addr;
+               if (hva >= reg->host_user_addr &&
+                               hva < reg->host_user_addr + reg->size) {
+                       gpa = hva - reg->host_user_addr + reg->guest_phys_addr;
                        break;
                }
        }
@@ -246,21 +245,21 @@ vdpa_ifcvf_start(struct ifcvf_internal *internal)
 
        for (i = 0; i < nr_vring; i++) {
                rte_vhost_get_vhost_vring(vid, i, &vq);
-               gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
+               gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.desc);
                if (gpa == 0) {
                        DRV_LOG(ERR, "Fail to get GPA for descriptor ring.");
                        return -1;
                }
                hw->vring[i].desc = gpa;
 
-               gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
+               gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.avail);
                if (gpa == 0) {
                        DRV_LOG(ERR, "Fail to get GPA for available ring.");
                        return -1;
                }
                hw->vring[i].avail = gpa;
 
-               gpa = qva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
+               gpa = hva_to_gpa(vid, (uint64_t)(uintptr_t)vq.used);
                if (gpa == 0) {
                        DRV_LOG(ERR, "Fail to get GPA for used ring.");
                        return -1;
@@ -276,12 +275,30 @@ vdpa_ifcvf_start(struct ifcvf_internal *internal)
        return ifcvf_start_hw(&internal->hw);
 }
 
+static void
+ifcvf_used_ring_log(struct ifcvf_hw *hw, uint32_t queue, uint8_t *log_buf)
+{
+       uint32_t i, size;
+       uint64_t pfn;
+
+       pfn = hw->vring[queue].used / PAGE_SIZE;
+       size = hw->vring[queue].size * sizeof(struct vring_used_elem) +
+                       sizeof(uint16_t) * 3;
+
+       for (i = 0; i <= size / PAGE_SIZE; i++)
+               __sync_fetch_and_or_8(&log_buf[(pfn + i) / 8],
+                               1 << ((pfn + i) % 8));
+}
+
 static void
 vdpa_ifcvf_stop(struct ifcvf_internal *internal)
 {
        struct ifcvf_hw *hw = &internal->hw;
        uint32_t i;
        int vid;
+       uint64_t features;
+       uint64_t log_base, log_size;
+       uint8_t *log_buf;
 
        vid = internal->vid;
        ifcvf_stop_hw(hw);
@@ -289,6 +306,21 @@ vdpa_ifcvf_stop(struct ifcvf_internal *internal)
        for (i = 0; i < hw->nr_vring; i++)
                rte_vhost_set_vring_base(vid, i, hw->vring[i].last_avail_idx,
                                hw->vring[i].last_used_idx);
+
+       rte_vhost_get_negotiated_features(vid, &features);
+       if (RTE_VHOST_NEED_LOG(features)) {
+               ifcvf_disable_logging(hw);
+               rte_vhost_get_log_base(internal->vid, &log_base, &log_size);
+               rte_vfio_container_dma_unmap(internal->vfio_container_fd,
+                               log_base, IFCVF_LOG_BASE, log_size);
+               /*
+                * IFCVF marks dirty memory pages for only packet buffer,
+                * SW helps to mark the used ring as dirty after device stops.
+                */
+               log_buf = (uint8_t *)(uintptr_t)log_base;
+               for (i = 0; i < hw->nr_vring; i++)
+                       ifcvf_used_ring_log(hw, i, log_buf);
+       }
 }
 
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
@@ -469,11 +501,11 @@ update_datapath(struct ifcvf_internal *internal)
                if (ret)
                        goto err;
 
-               ret = setup_notify_relay(internal);
+               ret = vdpa_ifcvf_start(internal);
                if (ret)
                        goto err;
 
-               ret = vdpa_ifcvf_start(internal);
+               ret = setup_notify_relay(internal);
                if (ret)
                        goto err;
 
@@ -481,12 +513,12 @@ update_datapath(struct ifcvf_internal *internal)
        } else if (rte_atomic32_read(&internal->running) &&
                   (!rte_atomic32_read(&internal->started) ||
                    !rte_atomic32_read(&internal->dev_attached))) {
-               vdpa_ifcvf_stop(internal);
-
                ret = unset_notify_relay(internal);
                if (ret)
                        goto err;
 
+               vdpa_ifcvf_stop(internal);
+
                ret = vdpa_disable_vfio_intr(internal);
                if (ret)
                        goto err;
@@ -548,6 +580,35 @@ ifcvf_dev_close(int vid)
        return 0;
 }
 
+static int
+ifcvf_set_features(int vid)
+{
+       uint64_t features;
+       int did;
+       struct internal_list *list;
+       struct ifcvf_internal *internal;
+       uint64_t log_base, log_size;
+
+       did = rte_vhost_get_vdpa_device_id(vid);
+       list = find_internal_resource_by_did(did);
+       if (list == NULL) {
+               DRV_LOG(ERR, "Invalid device id: %d", did);
+               return -1;
+       }
+
+       internal = list->internal;
+       rte_vhost_get_negotiated_features(vid, &features);
+
+       if (RTE_VHOST_NEED_LOG(features)) {
+               rte_vhost_get_log_base(vid, &log_base, &log_size);
+               rte_vfio_container_dma_map(internal->vfio_container_fd,
+                               log_base, IFCVF_LOG_BASE, log_size);
+               ifcvf_enable_logging(&internal->hw, IFCVF_LOG_BASE, log_size);
+       }
+
+       return 0;
+}
+
 static int
 ifcvf_get_vfio_group_fd(int vid)
 {
@@ -657,14 +718,14 @@ ifcvf_get_protocol_features(int did __rte_unused, uint64_t *features)
        return 0;
 }
 
-struct rte_vdpa_dev_ops ifcvf_ops = {
+static struct rte_vdpa_dev_ops ifcvf_ops = {
        .get_queue_num = ifcvf_get_queue_num,
        .get_features = ifcvf_get_vdpa_features,
        .get_protocol_features = ifcvf_get_protocol_features,
        .dev_conf = ifcvf_dev_config,
        .dev_close = ifcvf_dev_close,
        .set_vring_state = NULL,
-       .set_features = NULL,
+       .set_features = ifcvf_set_features,
        .migration_done = NULL,
        .get_vfio_group_fd = ifcvf_get_vfio_group_fd,
        .get_vfio_device_fd = ifcvf_get_vfio_device_fd,
@@ -695,11 +756,18 @@ ifcvf_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        if (ifcvf_vfio_setup(internal) < 0)
                return -1;
 
+       if (ifcvf_init_hw(&internal->hw, internal->pdev) < 0)
+               return -1;
+
        internal->max_queues = IFCVF_MAX_QUEUES;
        features = ifcvf_get_features(&internal->hw);
        internal->features = (features &
                ~(1ULL << VIRTIO_F_IOMMU_PLATFORM)) |
-               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES);
+               (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
+               (1ULL << VIRTIO_NET_F_CTRL_VQ) |
+               (1ULL << VIRTIO_NET_F_STATUS) |
+               (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) |
+               (1ULL << VHOST_F_LOG_ALL);
 
        internal->dev_addr.pci_addr = pci_dev->addr;
        internal->dev_addr.type = PCI_ADDR;
index 70fdfe7..431be02 100644 (file)
@@ -1,7 +1,7 @@
 ..
      BSD LICENSE
    
-     Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
+     Copyright(c) 2010-2018 Intel Corporation. All rights reserved.
      All rights reserved.
    
      Redistribution and use in source and binary forms, with or without
@@ -34,7 +34,7 @@ Intel® IXGBE driver
 ===================
 
 This directory contains source code of FreeBSD ixgbe driver of version
-cid-ixgbe.2018.01.02.tar.gz released by the team which develop
+cid-ixgbe.2018.08.28.tar.gz released by the team which develop
 basic drivers for any ixgbe NIC. The sub-directory of base/
 contains the original source package.
 This driver is valid for the product(s) listed below
index ee7ce2e..245ff75 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_type.h"
 #include "ixgbe_82598.h"
index 20aab9f..8013f49 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_82598_H_
 #define _IXGBE_82598_H_
index 2621721..7de753f 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_type.h"
 #include "ixgbe_82599.h"
@@ -87,9 +58,6 @@ void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw)
                mac->ops.setup_mac_link = ixgbe_setup_mac_link_82599;
                mac->ops.set_rate_select_speed =
                                               ixgbe_set_hard_rate_select_speed;
-               if (ixgbe_get_media_type(hw) == ixgbe_media_type_fiber_fixed)
-                       mac->ops.set_rate_select_speed =
-                                              ixgbe_set_soft_rate_select_speed;
        } else {
                if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) &&
                     (hw->phy.smart_speed == ixgbe_smart_speed_auto ||
@@ -561,16 +529,9 @@ enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_82599_T3_LOM:
                media_type = ixgbe_media_type_copper;
                break;
-       case IXGBE_DEV_ID_82599_LS:
-               media_type = ixgbe_media_type_fiber_lco;
-               break;
        case IXGBE_DEV_ID_82599_QSFP_SF_QP:
                media_type = ixgbe_media_type_fiber_qsfp;
                break;
-       case IXGBE_DEV_ID_82599_BYPASS:
-               media_type = ixgbe_media_type_fiber_fixed;
-               hw->phy.multispeed_fiber = true;
-               break;
        default:
                media_type = ixgbe_media_type_unknown;
                break;
index d555dbc..a32eb1f 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_82599_H_
 #define _IXGBE_82599_H_
index e50c104..873c079 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
@@ -177,8 +148,6 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
        case IXGBE_DEV_ID_82599_QSFP_SF_QP:
        case IXGBE_DEV_ID_82599EN_SFP:
        case IXGBE_DEV_ID_82599_CX4:
-       case IXGBE_DEV_ID_82599_LS:
-       case IXGBE_DEV_ID_82599_BYPASS:
        case IXGBE_DEV_ID_82599_T3_LOM:
                hw->mac.type = ixgbe_mac_82599EB;
                break;
@@ -193,7 +162,6 @@ s32 ixgbe_set_mac_type(struct ixgbe_hw *hw)
                break;
        case IXGBE_DEV_ID_X540T:
        case IXGBE_DEV_ID_X540T1:
-       case IXGBE_DEV_ID_X540_BYPASS:
                hw->mac.type = ixgbe_mac_X540;
                hw->mvals = ixgbe_mvals_X540;
                break;
@@ -1359,6 +1327,18 @@ void ixgbe_restore_mdd_vf(struct ixgbe_hw *hw, u32 vf)
                hw->mac.ops.restore_mdd_vf(hw, vf);
 }
 
+/**
+ *  ixgbe_fw_recovery_mode - Check if in FW NVM recovery mode
+ *  @hw: pointer to hardware structure
+ *
+ **/
+bool ixgbe_fw_recovery_mode(struct ixgbe_hw *hw)
+{
+       if (hw->mac.ops.fw_recovery_mode)
+               return hw->mac.ops.fw_recovery_mode(hw);
+       return false;
+}
+
 /**
  *  ixgbe_enter_lplu - Transition to low power states
  *  @hw: pointer to hardware structure
index 2f532aa..ff8f7b2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_API_H_
 #define _IXGBE_API_H_
@@ -214,6 +185,7 @@ void ixgbe_disable_mdd(struct ixgbe_hw *hw);
 void ixgbe_enable_mdd(struct ixgbe_hw *hw);
 void ixgbe_mdd_event(struct ixgbe_hw *hw, u32 *vf_bitmap);
 void ixgbe_restore_mdd_vf(struct ixgbe_hw *hw, u32 vf);
+bool ixgbe_fw_recovery_mode(struct ixgbe_hw *hw);
 s32 ixgbe_enter_lplu(struct ixgbe_hw *hw);
 s32 ixgbe_handle_lasi(struct ixgbe_hw *hw);
 void ixgbe_set_rate_select_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed);
index e7e9256..21f973e 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_common.h"
 #include "ixgbe_phy.h"
@@ -167,7 +138,6 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
        DEBUGFUNC("ixgbe_device_supports_autoneg_fc");
 
        switch (hw->phy.media_type) {
-       case ixgbe_media_type_fiber_fixed:
        case ixgbe_media_type_fiber_qsfp:
        case ixgbe_media_type_fiber:
                /* flow control autoneg black list */
@@ -201,7 +171,6 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
                case IXGBE_DEV_ID_82599_T3_LOM:
                case IXGBE_DEV_ID_X540T:
                case IXGBE_DEV_ID_X540T1:
-               case IXGBE_DEV_ID_X540_BYPASS:
                case IXGBE_DEV_ID_X550T:
                case IXGBE_DEV_ID_X550T1:
                case IXGBE_DEV_ID_X550EM_X_10G_T:
@@ -267,7 +236,6 @@ s32 ixgbe_setup_fc_generic(struct ixgbe_hw *hw)
                        goto out;
 
                /* fall through - only backplane uses autoc */
-       case ixgbe_media_type_fiber_fixed:
        case ixgbe_media_type_fiber_qsfp:
        case ixgbe_media_type_fiber:
                reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA);
@@ -3127,7 +3095,6 @@ void ixgbe_fc_autoneg(struct ixgbe_hw *hw)
 
        switch (hw->phy.media_type) {
        /* Autoneg flow control on fiber adapters */
-       case ixgbe_media_type_fiber_fixed:
        case ixgbe_media_type_fiber_qsfp:
        case ixgbe_media_type_fiber:
                if (speed == IXGBE_LINK_SPEED_1GB_FULL)
@@ -5267,7 +5234,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
 
                /* Set the module link speed */
                switch (hw->phy.media_type) {
-               case ixgbe_media_type_fiber_fixed:
                case ixgbe_media_type_fiber:
                        ixgbe_set_rate_select_speed(hw,
                                                    IXGBE_LINK_SPEED_10GB_FULL);
@@ -5296,7 +5262,7 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
                 * Section 73.10.2, we may have to wait up to 500ms if KR is
                 * attempted.  82599 uses the same timing for 10g SFI.
                 */
-               for (i = 0; i < 5; i++) {
+               for (i = 0; i < 10; i++) {
                        /* Wait for the link partner to also set speed */
                        msec_delay(100);
 
@@ -5318,7 +5284,6 @@ s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw,
 
                /* Set the module link speed */
                switch (hw->phy.media_type) {
-               case ixgbe_media_type_fiber_fixed:
                case ixgbe_media_type_fiber:
                        ixgbe_set_rate_select_speed(hw,
                                                    IXGBE_LINK_SPEED_1GB_FULL);
index fd35dcc..3bb2475 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_COMMON_H_
 #define _IXGBE_COMMON_H_
index 2877f22..a590e0e 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 
 #include "ixgbe_type.h"
index 4120804..503d060 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_DCB_H_
 #define _IXGBE_DCB_H_
index 3ed8337..d87cb58 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 
 #include "ixgbe_type.h"
index eb88b3d..1a14744 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_DCB_82598_H_
 #define _IXGBE_DCB_82598_H_
index 8f9e159..f4f0ff0 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 
 #include "ixgbe_type.h"
index dc0fb28..085ada2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_DCB_82599_H_
 #define _IXGBE_DCB_82599_H_
index 40dad77..67a124d 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_vf.h"
 #include "ixgbe_hv_vf.h"
index 9119f29..9664f3b 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2016, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_HV_VF_H_
 #define _IXGBE_HV_VF_H_
index 2785bba..cb82942 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_type.h"
 #include "ixgbe_mbx.h"
index bde50a5..5d32cbc 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_MBX_H_
 #define _IXGBE_MBX_H_
index bb5dfd2..ea8dc1c 100644 (file)
@@ -1,36 +1,6 @@
-/******************************************************************************
-
-  Copyright (c) 2001-2015, Intel Corporation
-  All rights reserved.
-  
-  Redistribution and use in source and binary forms, with or without 
-  modification, are permitted provided that the following conditions are met:
-  
-   1. Redistributions of source code must retain the above copyright notice, 
-      this list of conditions and the following disclaimer.
-  
-   2. Redistributions in binary form must reproduce the above copyright 
-      notice, this list of conditions and the following disclaimer in the 
-      documentation and/or other materials provided with the distribution.
-  
-   3. Neither the name of the Intel Corporation nor the names of its 
-      contributors may be used to endorse or promote products derived from 
-      this software without specific prior written permission.
-  
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
-  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
-  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
-  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
-  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
-  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
-  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
-  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
-  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-  POSSIBILITY OF SUCH DAMAGE.
-
-******************************************************************************/
-/*$FreeBSD$*/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_OS_H_
 #define _IXGBE_OS_H_
@@ -51,7 +21,7 @@
 
 #define ASSERT(x) if(!(x)) rte_panic("IXGBE: x")
 
-#define DELAY(x) rte_delay_us(x)
+#define DELAY(x) rte_delay_us_sleep(x)
 #define usec_delay(x) DELAY(x)
 #define msec_delay(x) DELAY(1000*(x))
 
index 2df068e..6cdd8fb 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_api.h"
 #include "ixgbe_common.h"
@@ -2594,7 +2565,6 @@ STATIC bool ixgbe_get_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl)
 {
        u32 data_oe_bit = IXGBE_I2C_DATA_OE_N_EN_BY_MAC(hw);
        bool data;
-       UNREFERENCED_1PARAMETER(hw);
 
        DEBUGFUNC("ixgbe_get_i2c_data");
 
index cf8cadd..132fa54 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_PHY_H_
 #define _IXGBE_PHY_H_
index 6e03089..cee6ba2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_TYPE_H_
 #define _IXGBE_TYPE_H_
@@ -46,8 +17,8 @@ POSSIBILITY OF SUCH DAMAGE.
  *
  * - IXGBE_ERROR_POLLING
  * This category is for errors related to polling/timeout issues and should be
- * used in any case where the timeout occured, or a failure to obtain a lock, or
- * failure to receive data within the time limit.
+ * used in any case where the timeout occurred, or a failure to obtain a lock,
+ * or failure to receive data within the time limit.
  *
  * - IXGBE_ERROR_CAUTION
  * This category should be used for reporting issues that may be the cause of
@@ -122,12 +93,9 @@ POSSIBILITY OF SUCH DAMAGE.
 #define IXGBE_DEV_ID_82599_T3_LOM              0x151C
 #define IXGBE_DEV_ID_82599_VF                  0x10ED
 #define IXGBE_DEV_ID_82599_VF_HV               0x152E
-#define IXGBE_DEV_ID_82599_LS                  0x154F
-#define IXGBE_DEV_ID_82599_BYPASS              0x155D
 #define IXGBE_DEV_ID_X540T                     0x1528
 #define IXGBE_DEV_ID_X540_VF                   0x1515
 #define IXGBE_DEV_ID_X540_VF_HV                        0x1530
-#define IXGBE_DEV_ID_X540_BYPASS               0x155C
 #define IXGBE_DEV_ID_X540T1                    0x1560
 #define IXGBE_DEV_ID_X550T                     0x1563
 #define IXGBE_DEV_ID_X550T1                    0x15D1
@@ -882,6 +850,10 @@ struct ixgbe_dmac_config {
 #define IXGBE_RTTDQSEL         0x04904
 #define IXGBE_RTTDT1C          0x04908
 #define IXGBE_RTTDT1S          0x0490C
+#define IXGBE_RTTQCNCR         0x08B00
+#define IXGBE_RTTQCNTG         0x04A90
+#define IXGBE_RTTBCNRD         0x0498C
+#define IXGBE_RTTQCNRR         0x0498C
 #define IXGBE_RTTDTECC         0x04990
 #define IXGBE_RTTDTECC_NO_BCN  0x00000100
 
@@ -892,6 +864,7 @@ struct ixgbe_dmac_config {
 #define IXGBE_RTTBCNRC_RF_INT_MASK \
        (IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT)
 #define IXGBE_RTTBCNRM 0x04980
+#define IXGBE_RTTQCNRM 0x04980
 
 /* BCN (for DCB) Registers */
 #define IXGBE_RTTBCNRS 0x04988
@@ -1099,6 +1072,9 @@ struct ixgbe_dmac_config {
 #define IXGBE_FWSM_MODE_MASK   0xE
 #define IXGBE_FWSM_TS_ENABLED  0x1
 #define IXGBE_FWSM_FW_MODE_PT  0x4
+#define IXGBE_FWSM_FW_NVM_RECOVERY_MODE (1 << 5)
+#define IXGBE_FWSM_EXT_ERR_IND_MASK 0x01F80000
+#define IXGBE_FWSM_FW_VAL_BIT  (1 << 15)
 
 /* ARC Subsystem registers */
 #define IXGBE_HICR             0x15F00
@@ -3755,9 +3731,7 @@ enum ixgbe_sfp_type {
 enum ixgbe_media_type {
        ixgbe_media_type_unknown = 0,
        ixgbe_media_type_fiber,
-       ixgbe_media_type_fiber_fixed,
        ixgbe_media_type_fiber_qsfp,
-       ixgbe_media_type_fiber_lco,
        ixgbe_media_type_copper,
        ixgbe_media_type_backplane,
        ixgbe_media_type_cx4,
@@ -4050,6 +4024,7 @@ struct ixgbe_mac_operations {
        void (*enable_mdd)(struct ixgbe_hw *hw);
        void (*mdd_event)(struct ixgbe_hw *hw, u32 *vf_bitmap);
        void (*restore_mdd_vf)(struct ixgbe_hw *hw, u32 vf);
+       bool (*fw_recovery_mode)(struct ixgbe_hw *hw);
 };
 
 struct ixgbe_phy_operations {
index 5b25a6b..aac3782 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 
 #include "ixgbe_api.h"
index 3efffe8..dba643f 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_VF_H_
 #define _IXGBE_VF_H_
index 716664b..f00f0ea 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_x540.h"
 #include "ixgbe_type.h"
index 8a19ae2..231dfe5 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_X540_H_
 #define _IXGBE_X540_H_
index f66f540..f7b98af 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #include "ixgbe_x550.h"
 #include "ixgbe_x540.h"
@@ -82,6 +53,7 @@ s32 ixgbe_init_ops_X550(struct ixgbe_hw *hw)
        mac->ops.enable_mdd = ixgbe_enable_mdd_X550;
        mac->ops.mdd_event = ixgbe_mdd_event_X550;
        mac->ops.restore_mdd_vf = ixgbe_restore_mdd_vf_X550;
+       mac->ops.fw_recovery_mode = ixgbe_fw_recovery_mode_X550;
        mac->ops.disable_rx = ixgbe_disable_rx_x550;
        /* Manageability interface */
        mac->ops.set_fw_drv_ver = ixgbe_set_fw_drv_ver_x550;
@@ -349,7 +321,7 @@ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
 
        switch (hw->device_id) {
        case IXGBE_DEV_ID_X550EM_A_SFP:
-               return ixgbe_identify_module_generic(hw);
+               return ixgbe_identify_sfp_module_X550em(hw);
        case IXGBE_DEV_ID_X550EM_X_SFP:
                /* set up for CS4227 usage */
                ixgbe_setup_mux_ctl(hw);
@@ -357,7 +329,7 @@ STATIC s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw)
                /* Fallthrough */
 
        case IXGBE_DEV_ID_X550EM_A_SFP_N:
-               return ixgbe_identify_module_generic(hw);
+               return ixgbe_identify_sfp_module_X550em(hw);
                break;
        case IXGBE_DEV_ID_X550EM_X_KX4:
                hw->phy.type = ixgbe_phy_x550em_kx4;
@@ -2808,9 +2780,9 @@ s32 ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw,
                                 (IXGBE_CS4227_EDC_MODE_SR << 1));
 
                if (setup_linear)
-                       reg_phy_ext = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+                       reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
                else
-                       reg_phy_ext = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
+                       reg_phy_ext |= (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
                ret_val = hw->phy.ops.write_reg(hw, reg_slice,
                                         IXGBE_MDIO_ZERO_DEV_TYPE, reg_phy_ext);
 
@@ -4661,3 +4633,18 @@ s32 ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min,
 
        return ret_val;
 }
+
+/**
+ * ixgbe_fw_recovery_mode_X550 - Check FW NVM recovery mode
+ * @hw: pointer t hardware structure
+ *
+ * Returns true if in FW NVM recovery mode.
+ **/
+bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw)
+{
+       u32 fwsm;
+
+       fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM_BY_MAC(hw));
+
+       return !!(fwsm & IXGBE_FWSM_FW_NVM_RECOVERY_MODE);
+}
index 6d18874..3bd98f2 100644 (file)
@@ -1,35 +1,6 @@
-/*******************************************************************************
-
-Copyright (c) 2001-2015, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- 1. Redistributions of source code must retain the above copyright notice,
-    this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
- 3. Neither the name of the Intel Corporation nor the names of its
-    contributors may be used to endorse or promote products derived from
-    this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
-
-***************************************************************************/
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2001-2018
+ */
 
 #ifndef _IXGBE_X550_H_
 #define _IXGBE_X550_H_
@@ -121,4 +92,5 @@ s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw);
 s32 ixgbe_identify_sfp_module_X550em(struct ixgbe_hw *hw);
 s32 ixgbe_led_on_t_X550em(struct ixgbe_hw *hw, u32 led_idx);
 s32 ixgbe_led_off_t_X550em(struct ixgbe_hw *hw, u32 led_idx);
+bool ixgbe_fw_recovery_mode_X550(struct ixgbe_hw *hw);
 #endif /* _IXGBE_X550_H_ */
index 3147e11..21ac64b 100644 (file)
@@ -20,6 +20,9 @@ sources = [
 error_cflags = ['-Wno-unused-value',
                '-Wno-unused-but-set-variable']
 c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 foreach flag: error_cflags
        if cc.has_argument(flag)
                c_args += flag
index 26b1927..269595b 100644 (file)
@@ -217,8 +217,7 @@ static int ixgbe_dev_lsc_interrupt_setup(struct rte_eth_dev *dev, uint8_t on);
 static int ixgbe_dev_macsec_interrupt_setup(struct rte_eth_dev *dev);
 static int ixgbe_dev_rxq_interrupt_setup(struct rte_eth_dev *dev);
 static int ixgbe_dev_interrupt_get_status(struct rte_eth_dev *dev);
-static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev,
-                                     struct rte_intr_handle *handle);
+static int ixgbe_dev_interrupt_action(struct rte_eth_dev *dev);
 static void ixgbe_dev_interrupt_handler(void *param);
 static void ixgbe_dev_interrupt_delayed_handler(void *param);
 static int ixgbe_add_rar(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
@@ -437,7 +436,6 @@ static const struct rte_pci_id pci_id_ixgbe_map[] = {
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP) },
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM) },
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM) },
-       { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_LS) },
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T) },
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1) },
        { RTE_PCI_DEVICE(IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP) },
@@ -1119,6 +1117,14 @@ eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev, void *init_params __rte_unused)
                return -EIO;
        }
 
+       if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
+               PMD_INIT_LOG(ERR, "\nERROR: "
+                       "Firmware recovery mode detected. Limiting functionality.\n"
+                       "Refer to the Intel(R) Ethernet Adapters and Devices "
+                       "User Guide for details on firmware recovery mode.");
+               return -EIO;
+       }
+
        /* pick up the PCI bus settings for reporting later */
        ixgbe_get_bus_info(hw);
 
@@ -1331,12 +1337,6 @@ eth_ixgbe_dev_uninit(struct rte_eth_dev *eth_dev)
        /* uninitialize PF if max_vfs not zero */
        ixgbe_pf_host_uninit(eth_dev);
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
-       rte_free(eth_dev->data->hash_mac_addrs);
-       eth_dev->data->hash_mac_addrs = NULL;
-
        /* remove all the fdir filters & hash */
        ixgbe_fdir_filter_uninit(eth_dev);
 
@@ -1619,7 +1619,12 @@ eth_ixgbevf_dev_init(struct rte_eth_dev *eth_dev)
         */
        if ((diag != IXGBE_SUCCESS) && (diag != IXGBE_ERR_INVALID_MAC_ADDR)) {
                PMD_INIT_LOG(ERR, "VF Initialization Failure: %d", diag);
-               return diag;
+               /*
+                * This error code will be propagated to the app by
+                * rte_eth_dev_reset, so use a public error code rather than
+                * the internal-only IXGBE_ERR_RESET_FAILED
+                */
+               return -EAGAIN;
        }
 
        /* negotiate mailbox API version to use with the PF. */
@@ -1711,9 +1716,6 @@ eth_ixgbevf_dev_uninit(struct rte_eth_dev *eth_dev)
        /* Disable the interrupts for VF */
        ixgbevf_intr_disable(eth_dev);
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        rte_intr_disable(intr_handle);
        rte_intr_callback_unregister(intr_handle,
                                     ixgbevf_dev_interrupt_handler, eth_dev);
@@ -4282,8 +4284,7 @@ ixgbe_dev_link_status_print(struct rte_eth_dev *dev)
  *  - On failure, a negative value.
  */
 static int
-ixgbe_dev_interrupt_action(struct rte_eth_dev *dev,
-                          struct rte_intr_handle *intr_handle)
+ixgbe_dev_interrupt_action(struct rte_eth_dev *dev)
 {
        struct ixgbe_interrupt *intr =
                IXGBE_DEV_PRIVATE_TO_INTR(dev->data->dev_private);
@@ -4334,7 +4335,6 @@ ixgbe_dev_interrupt_action(struct rte_eth_dev *dev,
 
        PMD_DRV_LOG(DEBUG, "enable intr immediately");
        ixgbe_enable_intr(dev);
-       rte_intr_enable(intr_handle);
 
        return 0;
 }
@@ -4417,7 +4417,7 @@ ixgbe_dev_interrupt_handler(void *param)
        struct rte_eth_dev *dev = (struct rte_eth_dev *)param;
 
        ixgbe_dev_interrupt_get_status(dev);
-       ixgbe_dev_interrupt_action(dev, dev->intr_handle);
+       ixgbe_dev_interrupt_action(dev);
 }
 
 static int
@@ -5008,14 +5008,14 @@ ixgbevf_dev_configure(struct rte_eth_dev *dev)
         * Keep the persistent behavior the same as Host PF
         */
 #ifndef RTE_LIBRTE_IXGBE_PF_DISABLE_STRIP_CRC
-       if (rte_eth_dev_must_keep_crc(conf->rxmode.offloads)) {
+       if (conf->rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                PMD_INIT_LOG(NOTICE, "VF can't disable HW CRC Strip");
-               conf->rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
+               conf->rxmode.offloads &= ~DEV_RX_OFFLOAD_KEEP_CRC;
        }
 #else
-       if (!rte_eth_dev_must_keep_crc(conf->rxmode.offloads)) {
+       if (!(conf->rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
                PMD_INIT_LOG(NOTICE, "VF can't enable HW CRC Strip");
-               conf->rxmode.offloads &= ~DEV_RX_OFFLOAD_CRC_STRIP;
+               conf->rxmode.offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
        }
 #endif
 
index 1adf1b8..f0fafeb 100644 (file)
@@ -363,6 +363,17 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
                                item, "Not supported by ntuple filter");
                        return -rte_errno;
                }
+               if ((ipv4_mask->hdr.src_addr != 0 &&
+                       ipv4_mask->hdr.src_addr != UINT32_MAX) ||
+                       (ipv4_mask->hdr.dst_addr != 0 &&
+                       ipv4_mask->hdr.dst_addr != UINT32_MAX) ||
+                       (ipv4_mask->hdr.next_proto_id != UINT8_MAX &&
+                       ipv4_mask->hdr.next_proto_id != 0)) {
+                       rte_flow_error_set(error,
+                               EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
 
                filter->dst_ip_mask = ipv4_mask->hdr.dst_addr;
                filter->src_ip_mask = ipv4_mask->hdr.src_addr;
@@ -432,6 +443,15 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
                                item, "Not supported by ntuple filter");
                        return -rte_errno;
                }
+               if ((tcp_mask->hdr.src_port != 0 &&
+                       tcp_mask->hdr.src_port != UINT16_MAX) ||
+                       (tcp_mask->hdr.dst_port != 0 &&
+                       tcp_mask->hdr.dst_port != UINT16_MAX)) {
+                       rte_flow_error_set(error,
+                               EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
 
                filter->dst_port_mask  = tcp_mask->hdr.dst_port;
                filter->src_port_mask  = tcp_mask->hdr.src_port;
@@ -467,6 +487,15 @@ cons_parse_ntuple_filter(const struct rte_flow_attr *attr,
                                item, "Not supported by ntuple filter");
                        return -rte_errno;
                }
+               if ((udp_mask->hdr.src_port != 0 &&
+                       udp_mask->hdr.src_port != UINT16_MAX) ||
+                       (udp_mask->hdr.dst_port != 0 &&
+                       udp_mask->hdr.dst_port != UINT16_MAX)) {
+                       rte_flow_error_set(error,
+                               EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
+                               item, "Not supported by ntuple filter");
+                       return -rte_errno;
+               }
 
                filter->dst_port_mask = udp_mask->hdr.dst_port;
                filter->src_port_mask = udp_mask->hdr.src_port;
index 08405f1..5a41688 100644 (file)
@@ -364,6 +364,7 @@ ixgbe_crypto_create_session(void *device,
                        conf->crypto_xform->aead.algo !=
                                        RTE_CRYPTO_AEAD_AES_GCM) {
                PMD_DRV_LOG(ERR, "Unsupported crypto transformation mode\n");
+               rte_mempool_put(mempool, (void *)ic_session);
                return -ENOTSUP;
        }
        aead_xform = &conf->crypto_xform->aead;
@@ -373,6 +374,7 @@ ixgbe_crypto_create_session(void *device,
                        ic_session->op = IXGBE_OP_AUTHENTICATED_DECRYPTION;
                } else {
                        PMD_DRV_LOG(ERR, "IPsec decryption not enabled\n");
+                       rte_mempool_put(mempool, (void *)ic_session);
                        return -ENOTSUP;
                }
        } else {
@@ -380,6 +382,7 @@ ixgbe_crypto_create_session(void *device,
                        ic_session->op = IXGBE_OP_AUTHENTICATED_ENCRYPTION;
                } else {
                        PMD_DRV_LOG(ERR, "IPsec encryption not enabled\n");
+                       rte_mempool_put(mempool, (void *)ic_session);
                        return -ENOTSUP;
                }
        }
@@ -395,6 +398,7 @@ ixgbe_crypto_create_session(void *device,
        if (ic_session->op == IXGBE_OP_AUTHENTICATED_ENCRYPTION) {
                if (ixgbe_crypto_add_sa(ic_session)) {
                        PMD_DRV_LOG(ERR, "Failed to add SA\n");
+                       rte_mempool_put(mempool, (void *)ic_session);
                        return -EPERM;
                }
        }
@@ -609,7 +613,7 @@ ixgbe_crypto_enable_ipsec(struct rte_eth_dev *dev)
                PMD_DRV_LOG(ERR, "RSC and IPsec not supported");
                return -1;
        }
-       if (rte_eth_dev_must_keep_crc(rx_offloads)) {
+       if (rx_offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                PMD_DRV_LOG(ERR, "HW CRC strip needs to be enabled for IPsec");
                return -1;
        }
index f82b74a..2f0262a 100644 (file)
 #endif
 /* Bit Mask to indicate what bits required for building TX context */
 #define IXGBE_TX_OFFLOAD_MASK (                         \
+               PKT_TX_OUTER_IPV6 |              \
+               PKT_TX_OUTER_IPV4 |              \
+               PKT_TX_IPV6 |                    \
+               PKT_TX_IPV4 |                    \
                PKT_TX_VLAN_PKT |                \
                PKT_TX_IP_CKSUM |                \
                PKT_TX_L4_MASK |                 \
@@ -2057,8 +2061,7 @@ next_desc:
                 * of the ixgbe PMD.
                 *
                 * TODO:
-                *    - Get rid of "volatile" crap and let the compiler do its
-                *      job.
+                *    - Get rid of "volatile" and let the compiler do its job.
                 *    - Use the proper memory barrier (rte_rmb()) to ensure the
                 *      memory ordering below.
                 */
@@ -2848,7 +2851,6 @@ ixgbe_get_rx_port_offloads(struct rte_eth_dev *dev)
        offloads = DEV_RX_OFFLOAD_IPV4_CKSUM  |
                   DEV_RX_OFFLOAD_UDP_CKSUM   |
                   DEV_RX_OFFLOAD_TCP_CKSUM   |
-                  DEV_RX_OFFLOAD_CRC_STRIP   |
                   DEV_RX_OFFLOAD_KEEP_CRC    |
                   DEV_RX_OFFLOAD_JUMBO_FRAME |
                   DEV_RX_OFFLOAD_SCATTER;
@@ -2936,7 +2938,7 @@ ixgbe_dev_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
                queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
        rxq->port_id = dev->data->port_id;
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads))
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                rxq->crc_len = ETHER_CRC_LEN;
        else
                rxq->crc_len = 0;
@@ -4705,7 +4707,7 @@ ixgbe_set_rsc(struct rte_eth_dev *dev)
 
        /* RSC global configuration (chapter 4.6.7.2.1 of 82599 Spec) */
 
-       if (rte_eth_dev_must_keep_crc(rx_conf->offloads) &&
+       if ((rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC) &&
             (rx_conf->offloads & DEV_RX_OFFLOAD_TCP_LRO)) {
                /*
                 * According to chapter of 4.6.7.2.1 of the Spec Rev.
@@ -4854,7 +4856,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
         * Configure CRC stripping, if any.
         */
        hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0);
-       if (rte_eth_dev_must_keep_crc(rx_conf->offloads))
+       if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                hlreg0 &= ~IXGBE_HLREG0_RXCRCSTRP;
        else
                hlreg0 |= IXGBE_HLREG0_RXCRCSTRP;
@@ -4895,8 +4897,10 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
                 * Reset crc_len in case it was changed after queue setup by a
                 * call to configure.
                 */
-               rxq->crc_len = rte_eth_dev_must_keep_crc(rx_conf->offloads) ?
-                               ETHER_CRC_LEN : 0;
+               if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
+                       rxq->crc_len = ETHER_CRC_LEN;
+               else
+                       rxq->crc_len = 0;
 
                /* Setup the Base and Length of the Rx Descriptor Rings */
                bus_addr = rxq->rx_ring_phys_addr;
@@ -4965,7 +4969,7 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)
        if (hw->mac.type == ixgbe_mac_82599EB ||
            hw->mac.type == ixgbe_mac_X540) {
                rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
-               if (rte_eth_dev_must_keep_crc(rx_conf->offloads))
+               if (rx_conf->offloads & DEV_RX_OFFLOAD_KEEP_CRC)
                        rdrxctl &= ~IXGBE_RDRXCTL_CRCSTRIP;
                else
                        rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
@@ -5702,7 +5706,7 @@ ixgbe_config_rss_filter(struct rte_eth_dev *dev,
         */
        if ((rss_conf.rss_hf & IXGBE_RSS_OFFLOAD_ALL) == 0) {
                ixgbe_rss_disable(dev);
-               return -EINVAL;
+               return 0;
        }
        if (rss_conf.rss_key == NULL)
                rss_conf.rss_key = rss_intel_key; /* Default hash key */
@@ -5715,13 +5719,13 @@ ixgbe_config_rss_filter(struct rte_eth_dev *dev,
 }
 
 /* Stubs needed for linkage when CONFIG_RTE_IXGBE_INC_VECTOR is set to 'n' */
-int __attribute__((weak))
+__rte_weak int
 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev __rte_unused *dev)
 {
        return -1;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 ixgbe_recv_pkts_vec(
        void __rte_unused *rx_queue,
        struct rte_mbuf __rte_unused **rx_pkts,
@@ -5730,7 +5734,7 @@ ixgbe_recv_pkts_vec(
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 ixgbe_recv_scattered_pkts_vec(
        void __rte_unused *rx_queue,
        struct rte_mbuf __rte_unused **rx_pkts,
@@ -5739,7 +5743,7 @@ ixgbe_recv_scattered_pkts_vec(
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue __rte_unused *rxq)
 {
        return -1;
index db516d9..5d2e3e0 100644 (file)
@@ -65,7 +65,7 @@ ixgbe_vf_representor_dev_infos_get(struct rte_eth_dev *ethdev,
        dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT |
                DEV_TX_OFFLOAD_IPV4_CKSUM | DEV_TX_OFFLOAD_UDP_CKSUM |
                DEV_TX_OFFLOAD_TCP_CKSUM | DEV_TX_OFFLOAD_SCTP_CKSUM |
-               DEV_TX_OFFLOAD_TCP_TSO;
+               DEV_TX_OFFLOAD_TCP_TSO | DEV_TX_OFFLOAD_MULTI_SEGS;
        /**< Device TX offload capabilities. */
 
        dev_info->speed_capa =
@@ -135,7 +135,7 @@ ixgbe_vf_representor_vlan_strip_queue_set(struct rte_eth_dev *ethdev,
                representor->vf_id, on);
 }
 
-struct eth_dev_ops ixgbe_vf_representor_dev_ops = {
+static const struct eth_dev_ops ixgbe_vf_representor_dev_ops = {
        .dev_infos_get          = ixgbe_vf_representor_dev_infos_get,
 
        .dev_start              = ixgbe_vf_representor_dev_start,
@@ -192,6 +192,7 @@ ixgbe_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params)
                return -ENODEV;
 
        ethdev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
+       ethdev->data->representor_id = representor->vf_id;
 
        /* Set representor device ops */
        ethdev->dev_ops = &ixgbe_vf_representor_dev_ops;
@@ -225,7 +226,10 @@ ixgbe_vf_representor_init(struct rte_eth_dev *ethdev, void *init_params)
 }
 
 int
-ixgbe_vf_representor_uninit(struct rte_eth_dev *ethdev __rte_unused)
+ixgbe_vf_representor_uninit(struct rte_eth_dev *ethdev)
 {
+       /* mac_addrs must not be freed because part of ixgbe_vf_info */
+       ethdev->data->mac_addrs = NULL;
+
        return 0;
 }
index 02d5ef5..544a141 100644 (file)
@@ -5,10 +5,11 @@ version = 2
 
 cflags += ['-DRTE_LIBRTE_IXGBE_BYPASS']
 
+allow_experimental_apis = true
+
 subdir('base')
 objs = [base_objs]
 
-allow_experimental_apis = true
 sources = files(
        'ixgbe_82599_bypass.c',
        'ixgbe_bypass.c',
index 085bb84..a1e9970 100644 (file)
@@ -207,7 +207,6 @@ eth_kni_dev_info(struct rte_eth_dev *dev __rte_unused,
        dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
        dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
@@ -411,8 +410,7 @@ eth_kni_probe(struct rte_vdev_device *vdev)
        params = rte_vdev_device_args(vdev);
        PMD_LOG(INFO, "Initializing eth_kni for %s", name);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(params) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        PMD_LOG(ERR, "Failed to probe %s", name);
@@ -465,13 +463,17 @@ eth_kni_remove(struct rte_vdev_device *vdev)
        if (eth_dev == NULL)
                return -1;
 
+       /* mac_addrs must not be freed alone because part of dev_private */
+       eth_dev->data->mac_addrs = NULL;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return rte_eth_dev_release_port(eth_dev);
+
        eth_kni_dev_stop(eth_dev);
 
        internals = eth_dev->data->dev_private;
        rte_kni_release(internals->kni);
 
-       rte_free(internals);
-
        rte_eth_dev_release_port(eth_dev);
 
        is_kni_initialized--;
index 93e8900..d13ab06 100644 (file)
@@ -2038,14 +2038,11 @@ lio_eth_dev_uninit(struct rte_eth_dev *eth_dev)
        PMD_INIT_FUNC_TRACE();
 
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
-               return -EPERM;
+               return 0;
 
        /* lio_free_sc_buffer_pool */
        lio_free_sc_buffer_pool(lio_dev);
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        eth_dev->dev_ops = NULL;
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
index 9c28ed4..980eec2 100644 (file)
@@ -3,6 +3,8 @@
 
 drivers = ['af_packet',
        'ark',
+       'atlantic',
+       'avf',
        'avp',
        'axgbe', 'bonding',
        'bnx2x',
@@ -11,6 +13,7 @@ drivers = ['af_packet',
        'dpaa', 'dpaa2',
        'e1000',
        'ena',
+       'enetc',
        'enic',
        'failsafe',
        'fm10k', 'i40e',
@@ -18,16 +21,23 @@ drivers = ['af_packet',
        'ixgbe',
        'kni',
        'liquidio',
+       'mlx4',
+       'mlx5',
+       'mvneta',
        'mvpp2',
        'netvsc',
        'nfp',
-       'null', 'octeontx', 'pcap', 'ring',
+       'null', 'octeontx', 'pcap', 'qede', 'ring',
        'sfc',
        'softnic',
        'szedata2',
+       'tap',
        'thunderx',
+       'vdev_netvsc',
        'vhost',
-       'virtio']
+       'virtio',
+       'vmxnet3',
+]
 std_deps = ['ethdev', 'kvargs'] # 'ethdev' also pulls in mbuf, net, eal etc
 std_deps += ['bus_pci']         # very many PMDs depend on PCI, so make std
 std_deps += ['bus_vdev']        # same with vdev bus
diff --git a/drivers/net/mlx4/meson.build b/drivers/net/mlx4/meson.build
new file mode 100644 (file)
index 0000000..7de571e
--- /dev/null
@@ -0,0 +1,102 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 6WIND S.A.
+# Copyright 2018 Mellanox Technologies, Ltd
+
+pmd_dlopen = get_option('enable_driver_mlx_glue')
+LIB_GLUE_BASE = 'librte_pmd_mlx4_glue.so'
+LIB_GLUE_VERSION = '18.02.0'
+LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
+if pmd_dlopen
+       dpdk_conf.set('RTE_LIBRTE_MLX4_DLOPEN_DEPS', 1)
+       cflags += [
+               '-DMLX4_GLUE="@0@"'.format(LIB_GLUE),
+               '-DMLX4_GLUE_VERSION="@0@"'.format(LIB_GLUE_VERSION),
+       ]
+endif
+libs = [
+       cc.find_library('mnl', required:false),
+       cc.find_library('mlx4', required:false),
+       cc.find_library('ibverbs', required:false),
+]
+build = true
+foreach lib:libs
+       if not lib.found()
+               build = false
+       endif
+endforeach
+# Compile PMD
+if build
+       allow_experimental_apis = true
+       ext_deps += libs
+       sources = files(
+               'mlx4.c',
+               'mlx4_ethdev.c',
+               'mlx4_flow.c',
+               'mlx4_intr.c',
+               'mlx4_mr.c',
+               'mlx4_rxq.c',
+               'mlx4_rxtx.c',
+               'mlx4_txq.c',
+               'mlx4_utils.c',
+       )
+       if not pmd_dlopen
+               sources += files('mlx4_glue.c')
+       endif
+       cflags_options = [
+               '-Wextra',
+               '-std=c11',
+               '-Wno-strict-prototypes',
+               '-D_BSD_SOURCE',
+               '-D_DEFAULT_SOURCE',
+               '-D_XOPEN_SOURCE=600'
+       ]
+       foreach option:cflags_options
+               if cc.has_argument(option)
+                       cflags += option
+               endif
+       endforeach
+       if get_option('buildtype').contains('debug')
+               cflags += [ '-pedantic', '-UNDEBUG', '-DPEDANTIC' ]
+       else
+               cflags += [ '-DNDEBUG', '-UPEDANTIC' ]
+       endif
+       # To maintain the compatibility with the make build system
+       # mlx4_autoconf.h file is still generated.
+       # input array for meson member search:
+       # [ "MACRO to define if found", "header for the search",
+       #   "symbol to search","struct member to search" ]
+       #
+       has_member_args = [
+               [ 'HAVE_IBV_MLX4_WQE_LSO_SEG', 'infiniband/mlx4dv.h',
+               'struct mlx4_wqe_lso_seg', 'mss_hdr_size' ],
+       ]
+       config = configuration_data()
+       foreach arg:has_member_args
+               file_prefix = '#include<' + arg[1] + '>'
+               config.set(arg[0], cc.has_member(arg[2], arg[3],
+                       prefix : file_prefix))
+       endforeach
+       configure_file(output : 'mlx4_autoconf.h', configuration : config)
+endif
+# Build Glue Library
+if pmd_dlopen and build
+       dlopen_name = 'mlx4_glue'
+       dlopen_lib_name = driver_name_fmt.format(dlopen_name)
+       dlopen_so_version = LIB_GLUE_VERSION
+       dlopen_sources = files('mlx4_glue.c')
+       dlopen_install_dir = [ eal_pmd_path + '-glue' ]
+       shared_lib = shared_library(
+               dlopen_lib_name,
+               dlopen_sources,
+               include_directories: global_inc,
+               c_args: cflags,
+               dependencies: libs,
+               link_args: [
+               '-Wl,-export-dynamic',
+               '-Wl,-h,@0@'.format(LIB_GLUE),
+               ],
+               soversion: dlopen_so_version,
+               install: true,
+               install_dir: dlopen_install_dir,
+       )
+endif
index defc0d4..7f07b8d 100644 (file)
@@ -734,7 +734,6 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                eth_dev->data->mac_addrs = priv->mac;
                eth_dev->device = &pci_dev->device;
                rte_eth_copy_pci_info(eth_dev, pci_dev);
-               eth_dev->device->driver = &mlx4_driver.driver;
                /* Initialize local interrupt handle for current port. */
                priv->intr_handle = (struct rte_intr_handle){
                        .fd = -1,
@@ -782,12 +781,17 @@ mlx4_pci_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
                continue;
 port_error:
                rte_free(priv);
+               if (eth_dev != NULL)
+                       eth_dev->data->dev_private = NULL;
                if (pd)
                        claim_zero(mlx4_glue->dealloc_pd(pd));
                if (ctx)
                        claim_zero(mlx4_glue->close_device(ctx));
-               if (eth_dev)
+               if (eth_dev != NULL) {
+                       /* mac_addrs must not be freed because part of dev_private */
+                       eth_dev->data->mac_addrs = NULL;
                        rte_eth_dev_release_port(eth_dev);
+               }
                break;
        }
        /*
index d23d3c6..bee8586 100644 (file)
@@ -289,6 +289,23 @@ mr_find_next_chunk(struct mlx4_mr *mr, struct mlx4_mr_cache *entry,
        uintptr_t end = 0;
        uint32_t idx = 0;
 
+       /* MR for external memory doesn't have memseg list. */
+       if (mr->msl == NULL) {
+               struct ibv_mr *ibv_mr = mr->ibv_mr;
+
+               assert(mr->ms_bmp_n == 1);
+               assert(mr->ms_n == 1);
+               assert(base_idx == 0);
+               /*
+                * Can't search it from memseg list but get it directly from
+                * verbs MR as there's only one chunk.
+                */
+               entry->start = (uintptr_t)ibv_mr->addr;
+               entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length;
+               entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
+               /* Returning 1 ends iteration. */
+               return 1;
+       }
        for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) {
                if (rte_bitmap_get(mr->ms_bmp, idx)) {
                        const struct rte_memseg_list *msl;
@@ -809,6 +826,7 @@ mlx4_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len)
                mr = mr_lookup_dev_list(dev, &entry, start);
                if (mr == NULL)
                        continue;
+               assert(mr->msl); /* Can't be external memory. */
                ms = rte_mem_virt2memseg((void *)start, msl);
                assert(ms != NULL);
                assert(msl->page_sz == ms->hugepage_sz);
@@ -1055,6 +1073,133 @@ mlx4_mr_flush_local_cache(struct mlx4_mr_ctrl *mr_ctrl)
              (void *)mr_ctrl, mr_ctrl->cur_gen);
 }
 
+/**
+ * Called during rte_mempool_mem_iter() by mlx4_mr_update_ext_mp().
+ *
+ * Externally allocated chunk is registered and a MR is created for the chunk.
+ * The MR object is added to the global list. If memseg list of a MR object
+ * (mr->msl) is null, the MR object can be regarded as externally allocated
+ * memory.
+ *
+ * Once external memory is registered, it should be static. If the memory is
+ * freed and the virtual address range has different physical memory mapped
+ * again, it may cause crash on device due to the wrong translation entry. PMD
+ * can't track the free event of the external memory for now.
+ */
+static void
+mlx4_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
+                        struct rte_mempool_memhdr *memhdr,
+                        unsigned mem_idx __rte_unused)
+{
+       struct mr_update_mp_data *data = opaque;
+       struct rte_eth_dev *dev = data->dev;
+       struct priv *priv = dev->data->dev_private;
+       struct mlx4_mr_ctrl *mr_ctrl = data->mr_ctrl;
+       struct mlx4_mr *mr = NULL;
+       uintptr_t addr = (uintptr_t)memhdr->addr;
+       size_t len = memhdr->len;
+       struct mlx4_mr_cache entry;
+       uint32_t lkey;
+
+       /* If already registered, it should return. */
+       rte_rwlock_read_lock(&priv->mr.rwlock);
+       lkey = mr_lookup_dev(dev, &entry, addr);
+       rte_rwlock_read_unlock(&priv->mr.rwlock);
+       if (lkey != UINT32_MAX)
+               return;
+       mr = rte_zmalloc_socket(NULL,
+                               RTE_ALIGN_CEIL(sizeof(*mr),
+                                              RTE_CACHE_LINE_SIZE),
+                               RTE_CACHE_LINE_SIZE, mp->socket_id);
+       if (mr == NULL) {
+               WARN("port %u unable to allocate memory for a new MR of"
+                    " mempool (%s).",
+                    dev->data->port_id, mp->name);
+               data->ret = -1;
+               return;
+       }
+       DEBUG("port %u register MR for chunk #%d of mempool (%s)",
+             dev->data->port_id, mem_idx, mp->name);
+       mr->ibv_mr = mlx4_glue->reg_mr(priv->pd, (void *)addr, len,
+                                      IBV_ACCESS_LOCAL_WRITE);
+       if (mr->ibv_mr == NULL) {
+               WARN("port %u fail to create a verbs MR for address (%p)",
+                    dev->data->port_id, (void *)addr);
+               rte_free(mr);
+               data->ret = -1;
+               return;
+       }
+       mr->msl = NULL; /* Mark it is external memory. */
+       mr->ms_bmp = NULL;
+       mr->ms_n = 1;
+       mr->ms_bmp_n = 1;
+       rte_rwlock_write_lock(&priv->mr.rwlock);
+       LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
+       DEBUG("port %u MR CREATED (%p) for external memory %p:\n"
+             "  [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
+             " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
+             dev->data->port_id, (void *)mr, (void *)addr,
+             addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
+             mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
+       /* Insert to the global cache table. */
+       mr_insert_dev_cache(dev, mr);
+       rte_rwlock_write_unlock(&priv->mr.rwlock);
+       /* Insert to the local cache table */
+       mlx4_mr_addr2mr_bh(dev, mr_ctrl, addr);
+}
+
+/**
+ * Register MR for entire memory chunks in a Mempool having externally allocated
+ * memory and fill in local cache.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param mr_ctrl
+ *   Pointer to per-queue MR control structure.
+ * @param mp
+ *   Pointer to registering Mempool.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+static uint32_t
+mlx4_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
+                     struct rte_mempool *mp)
+{
+       struct mr_update_mp_data data = {
+               .dev = dev,
+               .mr_ctrl = mr_ctrl,
+               .ret = 0,
+       };
+
+       rte_mempool_mem_iter(mp, mlx4_mr_update_ext_mp_cb, &data);
+       return data.ret;
+}
+
+/**
+ * Register MR entire memory chunks in a Mempool having externally allocated
+ * memory and search LKey of the address to return.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param addr
+ *   Search key.
+ * @param mp
+ *   Pointer to registering Mempool where addr belongs.
+ *
+ * @return
+ *   LKey for address on success, UINT32_MAX on failure.
+ */
+uint32_t
+mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr, struct rte_mempool *mp)
+{
+       struct mlx4_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
+       struct priv *priv = txq->priv;
+
+       mlx4_mr_update_ext_mp(priv->dev, mr_ctrl, mp);
+       return mlx4_tx_addr2mr_bh(txq, addr);
+}
+
 /* Called during rte_mempool_mem_iter() by mlx4_mr_update_mp(). */
 static void
 mlx4_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque,
@@ -1098,6 +1243,10 @@ mlx4_mr_update_mp(struct rte_eth_dev *dev, struct mlx4_mr_ctrl *mr_ctrl,
        };
 
        rte_mempool_mem_iter(mp, mlx4_mr_update_mp_cb, &data);
+       if (data.ret < 0 && rte_errno == ENXIO) {
+               /* Mempool may have externally allocated memory. */
+               return mlx4_mr_update_ext_mp(dev, mr_ctrl, mp);
+       }
        return data.ret;
 }
 
index 9737da2..6804c63 100644 (file)
@@ -678,7 +678,6 @@ uint64_t
 mlx4_get_rx_queue_offloads(struct priv *priv)
 {
        uint64_t offloads = DEV_RX_OFFLOAD_SCATTER |
-                           DEV_RX_OFFLOAD_CRC_STRIP |
                            DEV_RX_OFFLOAD_KEEP_CRC |
                            DEV_RX_OFFLOAD_JUMBO_FRAME;
 
@@ -780,7 +779,7 @@ mlx4_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        }
        /* By default, FCS (CRC) is stripped by hardware. */
        crc_present = 0;
-       if (rte_eth_dev_must_keep_crc(offloads)) {
+       if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                if (priv->hw_fcs_strip) {
                        crc_present = 1;
                } else {
index ffa8abf..1be060c 100644 (file)
@@ -163,6 +163,26 @@ void mlx4_tx_queue_release(void *dpdk_txq);
 void mlx4_mr_flush_local_cache(struct mlx4_mr_ctrl *mr_ctrl);
 uint32_t mlx4_rx_addr2mr_bh(struct rxq *rxq, uintptr_t addr);
 uint32_t mlx4_tx_addr2mr_bh(struct txq *txq, uintptr_t addr);
+uint32_t mlx4_tx_update_ext_mp(struct txq *txq, uintptr_t addr,
+                              struct rte_mempool *mp);
+
+/**
+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
+ * cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ *   Pointer to mbuf.
+ *
+ * @return
+ *   Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+mlx4_mb2mp(struct rte_mbuf *buf)
+{
+       if (unlikely(RTE_MBUF_INDIRECT(buf)))
+               return rte_mbuf_from_indirect(buf)->pool;
+       return buf->pool;
+}
 
 /**
  * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
@@ -222,6 +242,19 @@ mlx4_tx_addr2mr(struct txq *txq, uintptr_t addr)
        return mlx4_tx_addr2mr_bh(txq, addr);
 }
 
-#define mlx4_tx_mb2mr(rxq, mb) mlx4_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
+static __rte_always_inline uint32_t
+mlx4_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
+{
+       uintptr_t addr = (uintptr_t)mb->buf_addr;
+       uint32_t lkey = mlx4_tx_addr2mr(txq, addr);
+
+       if (likely(lkey != UINT32_MAX))
+               return lkey;
+       if (rte_errno == ENXIO) {
+               /* Mempool may have externally allocated memory. */
+               lkey = mlx4_tx_update_ext_mp(txq, addr, mlx4_mb2mp(mb));
+       }
+       return lkey;
+}
 
 #endif /* MLX4_RXTX_H_ */
index 2e70dec..fecb57c 100644 (file)
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_pmd_mlx5.a
 LIB_GLUE = $(LIB_GLUE_BASE).$(LIB_GLUE_VERSION)
 LIB_GLUE_BASE = librte_pmd_mlx5_glue.so
-LIB_GLUE_VERSION = 18.05.0
+LIB_GLUE_VERSION = 18.11.0
 
 # Sources.
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
@@ -31,9 +31,11 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_stats.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rss.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mr.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_dv.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_tcf.c
+SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_flow_verbs.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl.c
-SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_nl_flow.c
 
 ifeq ($(CONFIG_RTE_LIBRTE_MLX5_DLOPEN_DEPS),y)
 INSTALL-$(CONFIG_RTE_LIBRTE_MLX5_PMD)-lib += $(LIB_GLUE)
@@ -134,6 +136,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                infiniband/mlx5dv.h \
                enum MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_IBV_FLOW_DV_SUPPORT \
+               infiniband/mlx5dv.h \
+               enum MLX5DV_FLOW_ACTION_TAG \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_ETHTOOL_LINK_MODE_25G \
                /usr/include/linux/ethtool.h \
@@ -150,10 +157,15 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                enum ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT \
                $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
-               HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT \
+               HAVE_IBV_DEVICE_COUNTERS_SET_V42 \
                infiniband/verbs.h \
                type 'struct ibv_counter_set_init_attr' \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_IBV_DEVICE_COUNTERS_SET_V45 \
+               infiniband/verbs.h \
+               type 'struct ibv_counters_init_attr' \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_RDMA_NL_NLDEV \
                rdma/rdma_netlink.h \
@@ -199,6 +211,11 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                linux/if_link.h \
                enum IFLA_PHYS_PORT_NAME \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_TCA_CHAIN \
+               linux/rtnetlink.h \
+               enum TCA_CHAIN \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_TCA_FLOWER_ACT \
                linux/pkt_cls.h \
@@ -334,11 +351,31 @@ mlx5_autoconf.h.new: $(RTE_SDK)/buildtools/auto-config-h.sh
                linux/pkt_cls.h \
                enum TCA_FLOWER_KEY_VLAN_ETH_TYPE \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_TCA_FLOWER_KEY_TCP_FLAGS \
+               linux/pkt_cls.h \
+               enum TCA_FLOWER_KEY_TCP_FLAGS \
+               $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK \
+               linux/pkt_cls.h \
+               enum TCA_FLOWER_KEY_TCP_FLAGS_MASK \
+               $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_TC_ACT_GOTO_CHAIN \
+               linux/pkt_cls.h \
+               define TC_ACT_GOTO_CHAIN \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_TC_ACT_VLAN \
                linux/tc_act/tc_vlan.h \
                enum TCA_VLAN_PUSH_VLAN_PRIORITY \
                $(AUTOCONF_OUTPUT)
+       $Q sh -- '$<' '$@' \
+               HAVE_TC_ACT_PEDIT \
+               linux/tc_act/tc_pedit.h \
+               enum TCA_PEDIT_KEY_EX_HDR_TYPE_UDP \
+               $(AUTOCONF_OUTPUT)
        $Q sh -- '$<' '$@' \
                HAVE_SUPPORTED_40000baseKR4_Full \
                /usr/include/linux/ethtool.h \
diff --git a/drivers/net/mlx5/meson.build b/drivers/net/mlx5/meson.build
new file mode 100644 (file)
index 0000000..e8cbe3e
--- /dev/null
@@ -0,0 +1,244 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 6WIND S.A.
+# Copyright 2018 Mellanox Technologies, Ltd
+
+pmd_dlopen = get_option('enable_driver_mlx_glue')
+LIB_GLUE_BASE = 'librte_pmd_mlx5_glue.so'
+LIB_GLUE_VERSION = '18.11.0'
+LIB_GLUE = LIB_GLUE_BASE + '.' + LIB_GLUE_VERSION
+if pmd_dlopen
+       dpdk_conf.set('RTE_LIBRTE_MLX5_DLOPEN_DEPS', 1)
+       cflags += [
+               '-DMLX5_GLUE="@0@"'.format(LIB_GLUE),
+               '-DMLX5_GLUE_VERSION="@0@"'.format(LIB_GLUE_VERSION),
+       ]
+endif
+libs = [
+       cc.find_library('mnl', required:false),
+       cc.find_library('mlx5', required:false),
+       cc.find_library('ibverbs', required:false),
+]
+build = true
+foreach lib:libs
+       if not lib.found()
+               build = false
+       endif
+endforeach
+if build
+       allow_experimental_apis = true
+       ext_deps += libs
+       sources = files(
+               'mlx5.c',
+               'mlx5_ethdev.c',
+               'mlx5_flow.c',
+               'mlx5_flow_dv.c',
+               'mlx5_flow_tcf.c',
+               'mlx5_flow_verbs.c',
+               'mlx5_mac.c',
+               'mlx5_mr.c',
+               'mlx5_nl.c',
+               'mlx5_rss.c',
+               'mlx5_rxmode.c',
+               'mlx5_rxq.c',
+               'mlx5_rxtx.c',
+               'mlx5_socket.c',
+               'mlx5_stats.c',
+               'mlx5_trigger.c',
+               'mlx5_txq.c',
+               'mlx5_vlan.c',
+       )
+       if dpdk_conf.has('RTE_ARCH_X86_64') or dpdk_conf.has('RTE_ARCH_ARM64')
+               sources += files('mlx5_rxtx_vec.c')
+       endif
+       if not pmd_dlopen
+               sources += files('mlx5_glue.c')
+       endif
+       cflags_options = [
+               '-Wextra',
+               '-std=c11',
+               '-Wno-strict-prototypes',
+               '-D_BSD_SOURCE',
+               '-D_DEFAULT_SOURCE',
+               '-D_XOPEN_SOURCE=600'
+       ]
+       foreach option:cflags_options
+               if cc.has_argument(option)
+                       cflags += option
+               endif
+       endforeach
+       if get_option('buildtype').contains('debug')
+               cflags += [ '-pedantic', '-UNDEBUG', '-DPEDANTIC' ]
+       else
+               cflags += [ '-DNDEBUG', '-UPEDANTIC' ]
+       endif
+       # To maintain the compatibility with the make build system
+       # mlx5_autoconf.h file is still generated.
+       # input array for meson member search:
+       # [ "MACRO to define if found", "header for the search",
+       #   "symbol to search", "struct member to search" ]
+       has_member_args = [
+               [ 'HAVE_IBV_MLX5_MOD_SWP', 'infiniband/mlx5dv.h',
+               'struct mlx5dv_sw_parsing_caps', 'sw_parsing_offloads' ],
+               [ 'HAVE_IBV_DEVICE_COUNTERS_SET_V42', 'infiniband/verbs.h',
+               'struct ibv_counter_set_init_attr', 'counter_set_id' ],
+               [ 'HAVE_IBV_DEVICE_COUNTERS_SET_V45', 'infiniband/verbs.h',
+               'struct ibv_counters_init_attr', 'comp_mask' ],
+       ]
+       # input array for meson symbol search:
+       # [ "MACRO to define if found", "header for the search",
+       #   "symbol to search" ]
+       has_sym_args = [
+               [ 'HAVE_IBV_DEVICE_STRIDING_RQ_SUPPORT', 'infiniband/mlx5dv.h',
+               'MLX5DV_CQE_RES_FORMAT_CSUM_STRIDX' ],
+               [ 'HAVE_IBV_DEVICE_TUNNEL_SUPPORT', 'infiniband/mlx5dv.h',
+               'MLX5DV_CONTEXT_MASK_TUNNEL_OFFLOADS' ],
+               [ 'HAVE_IBV_MLX5_MOD_MPW', 'infiniband/mlx5dv.h',
+               'MLX5DV_CONTEXT_FLAGS_MPW_ALLOWED' ],
+               [ 'HAVE_IBV_MLX5_MOD_CQE_128B_COMP', 'infiniband/mlx5dv.h',
+               'MLX5DV_CONTEXT_FLAGS_CQE_128B_COMP' ],
+               [ 'HAVE_IBV_FLOW_DV_SUPPORT', 'infiniband/mlx5dv.h',
+               'MLX5DV_FLOW_ACTION_TAG' ],
+               [ 'HAVE_IBV_DEVICE_MPLS_SUPPORT', 'infiniband/verbs.h',
+               'IBV_FLOW_SPEC_MPLS' ],
+               [ 'HAVE_IBV_WQ_FLAG_RX_END_PADDING', 'infiniband/verbs.h',
+               'IBV_WQ_FLAG_RX_END_PADDING' ],
+               [ 'HAVE_SUPPORTED_40000baseKR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_40000baseKR4_Full' ],
+               [ 'HAVE_SUPPORTED_40000baseCR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_40000baseCR4_Full' ],
+               [ 'HAVE_SUPPORTED_40000baseSR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_40000baseSR4_Full' ],
+               [ 'HAVE_SUPPORTED_40000baseLR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_40000baseLR4_Full' ],
+               [ 'HAVE_SUPPORTED_56000baseKR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_56000baseKR4_Full' ],
+               [ 'HAVE_SUPPORTED_56000baseCR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_56000baseCR4_Full' ],
+               [ 'HAVE_SUPPORTED_56000baseSR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_56000baseSR4_Full' ],
+               [ 'HAVE_SUPPORTED_56000baseLR4_Full', 'linux/ethtool.h',
+               'SUPPORTED_56000baseLR4_Full' ],
+               [ 'HAVE_ETHTOOL_LINK_MODE_25G', 'linux/ethtool.h',
+               'ETHTOOL_LINK_MODE_25000baseCR_Full_BIT' ],
+               [ 'HAVE_ETHTOOL_LINK_MODE_50G', 'linux/ethtool.h',
+               'ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT' ],
+               [ 'HAVE_ETHTOOL_LINK_MODE_100G', 'linux/ethtool.h',
+               'ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT' ],
+               [ 'HAVE_IFLA_PHYS_SWITCH_ID', 'linux/if_link.h',
+               'IFLA_PHYS_SWITCH_ID' ],
+               [ 'HAVE_IFLA_PHYS_PORT_NAME', 'linux/if_link.h',
+               'IFLA_PHYS_PORT_NAME' ],
+               [ 'HAVE_TCA_CHAIN', 'linux/rtnetlink.h',
+               'TCA_CHAIN' ],
+               [ 'HAVE_TCA_FLOWER_ACT', 'linux/pkt_cls.h',
+               'TCA_FLOWER_ACT' ],
+               [ 'HAVE_TCA_FLOWER_FLAGS', 'linux/pkt_cls.h',
+               'TCA_FLOWER_FLAGS' ],
+               [ 'HAVE_TCA_FLOWER_KEY_ETH_TYPE', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_ETH_TYPE' ],
+               [ 'HAVE_TCA_FLOWER_KEY_ETH_DST', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_ETH_DST' ],
+               [ 'HAVE_TCA_FLOWER_KEY_ETH_DST_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_ETH_DST_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_ETH_SRC', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_ETH_SRC' ],
+               [ 'HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_ETH_SRC_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IP_PROTO', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IP_PROTO' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV4_SRC', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV4_SRC' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV4_SRC_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV4_DST', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV4_DST' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV4_DST_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV6_SRC', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV6_SRC' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV6_SRC_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV6_DST', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV6_DST' ],
+               [ 'HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_IPV6_DST_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_SRC', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_SRC' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_SRC_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_DST', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_DST' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_DST_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_DST_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_UDP_SRC', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_UDP_SRC' ],
+               [ 'HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_UDP_SRC_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_UDP_DST', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_UDP_DST' ],
+               [ 'HAVE_TCA_FLOWER_KEY_UDP_DST_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_UDP_DST_MASK' ],
+               [ 'HAVE_TCA_FLOWER_KEY_VLAN_ID', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_VLAN_ID' ],
+               [ 'HAVE_TCA_FLOWER_KEY_VLAN_PRIO', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_VLAN_PRIO' ],
+               [ 'HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_VLAN_ETH_TYPE' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_FLAGS', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_FLAGS' ],
+               [ 'HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK', 'linux/pkt_cls.h',
+               'TCA_FLOWER_KEY_TCP_FLAGS_MASK' ],
+               [ 'HAVE_TC_ACT_GOTO_CHAIN', 'linux/pkt_cls.h',
+               'TC_ACT_GOTO_CHAIN' ],
+               [ 'HAVE_TC_ACT_VLAN', 'linux/tc_act/tc_vlan.h',
+               'TCA_VLAN_PUSH_VLAN_PRIORITY' ],
+               [ 'HAVE_TC_ACT_PEDIT', 'linux/tc_act/tc_pedit.h',
+               'TCA_PEDIT_KEY_EX_HDR_TYPE_UDP' ],
+               [ 'HAVE_RDMA_NL_NLDEV', 'rdma/rdma_netlink.h',
+               'RDMA_NL_NLDEV' ],
+               [ 'HAVE_RDMA_NLDEV_CMD_GET', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_CMD_GET' ],
+               [ 'HAVE_RDMA_NLDEV_CMD_PORT_GET', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_CMD_PORT_GET' ],
+               [ 'HAVE_RDMA_NLDEV_ATTR_DEV_INDEX', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_ATTR_DEV_INDEX' ],
+               [ 'HAVE_RDMA_NLDEV_ATTR_DEV_NAME', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_ATTR_DEV_NAME' ],
+               [ 'HAVE_RDMA_NLDEV_ATTR_PORT_INDEX', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_ATTR_PORT_INDEX' ],
+               [ 'HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX', 'rdma/rdma_netlink.h',
+               'RDMA_NLDEV_ATTR_NDEV_INDEX' ],
+       ]
+       config = configuration_data()
+       foreach arg:has_sym_args
+               config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+       endforeach
+       foreach arg:has_member_args
+               file_prefix = '#include<' + arg[1] + '>'
+               config.set(arg[0], cc.has_member(arg[2], arg[3],
+                       prefix : file_prefix))
+       endforeach
+       configure_file(output : 'mlx5_autoconf.h', configuration : config)
+endif
+# Build Glue Library
+if pmd_dlopen and build
+       dlopen_name = 'mlx5_glue'
+       dlopen_lib_name = driver_name_fmt.format(dlopen_name)
+       dlopen_so_version = LIB_GLUE_VERSION
+       dlopen_sources = files('mlx5_glue.c')
+       dlopen_install_dir = [ eal_pmd_path + '-glue' ]
+       shared_lib = shared_library(
+               dlopen_lib_name,
+               dlopen_sources,
+               include_directories: global_inc,
+               c_args: cflags,
+               dependencies: libs,
+               link_args: [
+               '-Wl,-export-dynamic',
+               '-Wl,-h,@0@'.format(LIB_GLUE),
+               ],
+               soversion: dlopen_so_version,
+               install: true,
+               install_dir: dlopen_install_dir,
+       )
+endif
index ec63bc6..a277b57 100644 (file)
@@ -46,6 +46,7 @@
 #include "mlx5_defs.h"
 #include "mlx5_glue.h"
 #include "mlx5_mr.h"
+#include "mlx5_flow.h"
 
 /* Device parameter to enable RX completion queue compression. */
 #define MLX5_RXQ_CQE_COMP_EN "rxq_cqe_comp_en"
@@ -89,6 +90,9 @@
 /* Allow L3 VXLAN flow creation. */
 #define MLX5_L3_VXLAN_EN "l3_vxlan_en"
 
+/* Activate DV flow steering. */
+#define MLX5_DV_FLOW_EN "dv_flow_en"
+
 /* Activate Netlink support in VF mode. */
 #define MLX5_VF_NL_EN "vf_nl_en"
 
@@ -282,8 +286,8 @@ mlx5_dev_close(struct rte_eth_dev *dev)
                close(priv->nl_socket_route);
        if (priv->nl_socket_rdma >= 0)
                close(priv->nl_socket_rdma);
-       if (priv->mnl_socket)
-               mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+       if (priv->tcf_context)
+               mlx5_flow_tcf_context_destroy(priv->tcf_context);
        ret = mlx5_hrxq_ibv_verify(dev);
        if (ret)
                DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -333,6 +337,17 @@ mlx5_dev_close(struct rte_eth_dev *dev)
        }
        memset(priv, 0, sizeof(*priv));
        priv->domain_id = RTE_ETH_DEV_SWITCH_DOMAIN_ID_INVALID;
+       /*
+        * flag to rte_eth_dev_close() that it should release the port resources
+        * (calling rte_eth_dev_release_port()) in addition to closing it.
+        */
+       dev->data->dev_flags |= RTE_ETH_DEV_CLOSE_REMOVE;
+       /*
+        * Reset mac_addrs to NULL such that it is not freed as part of
+        * rte_eth_dev_release_port(). mac_addrs is part of dev_private so
+        * it is freed when dev_private is freed.
+        */
+       dev->data->mac_addrs = NULL;
 }
 
 const struct eth_dev_ops mlx5_dev_ops = {
@@ -477,7 +492,7 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
        } else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
                config->txqs_inline = tmp;
        } else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
-               config->mps = !!tmp ? config->mps : 0;
+               config->mps = !!tmp;
        } else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
                config->mpw_hdr_dseg = !!tmp;
        } else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
@@ -490,6 +505,8 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
                config->l3_vxlan_en = !!tmp;
        } else if (strcmp(MLX5_VF_NL_EN, key) == 0) {
                config->vf_nl_en = !!tmp;
+       } else if (strcmp(MLX5_DV_FLOW_EN, key) == 0) {
+               config->dv_flow_en = !!tmp;
        } else {
                DRV_LOG(WARNING, "%s: unknown parameter", key);
                rte_errno = EINVAL;
@@ -527,6 +544,7 @@ mlx5_args(struct mlx5_dev_config *config, struct rte_devargs *devargs)
                MLX5_RX_VEC_EN,
                MLX5_L3_VXLAN_EN,
                MLX5_VF_NL_EN,
+               MLX5_DV_FLOW_EN,
                MLX5_REPRESENTOR,
                NULL,
        };
@@ -568,11 +586,13 @@ static struct rte_pci_driver mlx5_driver;
 static void *uar_base;
 
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+find_lower_va_bound(const struct rte_memseg_list *msl,
                const struct rte_memseg *ms, void *arg)
 {
        void **addr = arg;
 
+       if (msl->external)
+               return 0;
        if (*addr == NULL)
                *addr = ms->addr;
        else
@@ -685,9 +705,10 @@ mlx5_uar_init_secondary(struct rte_eth_dev *dev)
  *
  * @return
  *   A valid Ethernet device object on success, NULL otherwise and rte_errno
- *   is set. The following error is defined:
+ *   is set. The following errors are defined:
  *
  *   EBUSY: device is not supposed to be spawned.
+ *   EEXIST: device is already spawned
  */
 static struct rte_eth_dev *
 mlx5_dev_spawn(struct rte_device *dpdk_dev,
@@ -702,6 +723,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        struct mlx5dv_context dv_attr = { .comp_mask = 0 };
        struct mlx5_dev_config config = {
                .vf = !!vf,
+               .mps = MLX5_ARG_UNSET,
                .tx_vec_en = 1,
                .rx_vec_en = 1,
                .mpw_hdr_dseg = 0,
@@ -729,12 +751,10 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        unsigned int mprq_max_stride_size_n = 0;
        unsigned int mprq_min_stride_num_n = 0;
        unsigned int mprq_max_stride_num_n = 0;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-       struct ibv_counter_set_description cs_desc = { .counter_type = 0 };
-#endif
        struct ether_addr mac;
        char name[RTE_ETH_NAME_MAX_LEN];
        int own_domain_id = 0;
+       uint16_t port_id;
        unsigned int i;
 
        /* Determine if this port representor is supposed to be spawned. */
@@ -757,6 +777,17 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                        return NULL;
                }
        }
+       /* Build device name. */
+       if (!switch_info->representor)
+               rte_strlcpy(name, dpdk_dev->name, sizeof(name));
+       else
+               snprintf(name, sizeof(name), "%s_representor_%u",
+                        dpdk_dev->name, switch_info->port_name);
+       /* check if the device is already spawned */
+       if (rte_eth_dev_get_port_by_name(name, &port_id) == 0) {
+               rte_errno = EEXIST;
+               return NULL;
+       }
        /* Prepare shared data between primary and secondary process. */
        mlx5_prepare_shared_data();
        errno = 0;
@@ -791,7 +822,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                DRV_LOG(DEBUG, "MPW isn't supported");
                mps = MLX5_MPW_DISABLED;
        }
-       config.mps = mps;
 #ifdef HAVE_IBV_MLX5_MOD_SWP
        if (dv_attr.comp_mask & MLX5DV_CONTEXT_MASK_SWP)
                swp = dv_attr.sw_parsing_caps.sw_parsing_offloads;
@@ -864,11 +894,6 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                DEBUG("ibv_query_device_ex() failed");
                goto error;
        }
-       if (!switch_info->representor)
-               rte_strlcpy(name, dpdk_dev->name, sizeof(name));
-       else
-               snprintf(name, sizeof(name), "%s_representor_%u",
-                        dpdk_dev->name, switch_info->port_name);
        DRV_LOG(DEBUG, "naming Ethernet device \"%s\"", name);
        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
@@ -1000,12 +1025,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        config.hw_csum = !!(attr.device_cap_flags_ex & IBV_DEVICE_RAW_IP_CSUM);
        DRV_LOG(DEBUG, "checksum offloading is %ssupported",
                (config.hw_csum ? "" : "not "));
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-       config.flow_counter_en = !!attr.max_counter_sets;
-       mlx5_glue->describe_counter_set(ctx, 0, &cs_desc);
-       DRV_LOG(DEBUG, "counter type = %d, num of cs = %ld, attributes = %d",
-               cs_desc.counter_type, cs_desc.num_of_cs,
-               cs_desc.attributes);
+#if !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) && \
+       !defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       DRV_LOG(DEBUG, "counters are not supported");
+#endif
+#ifndef HAVE_IBV_FLOW_DV_SUPPORT
+       if (config.dv_flow_en) {
+               DRV_LOG(WARNING, "DV flow is not supported");
+               config.dv_flow_en = 0;
+       }
 #endif
        config.ind_table_max_size =
                attr.rss_caps.max_rwq_indirection_table_size;
@@ -1035,13 +1063,15 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                       (1 << IBV_QPT_RAW_PACKET)));
        if (config.tso)
                config.tso_max_payload_sz = attr.tso_caps.max_tso;
-       if (config.mps && !mps) {
-               DRV_LOG(ERR,
-                       "multi-packet send not supported on this device"
-                       " (" MLX5_TXQ_MPW_EN ")");
-               err = ENOTSUP;
-               goto error;
-       }
+       /*
+        * MPW is disabled by default, while the Enhanced MPW is enabled
+        * by default.
+        */
+       if (config.mps == MLX5_ARG_UNSET)
+               config.mps = (mps == MLX5_MPW_ENHANCED) ? MLX5_MPW_ENHANCED :
+                                                         MLX5_MPW_DISABLED;
+       else
+               config.mps = config.mps ? mps : MLX5_MPW_DISABLED;
        DRV_LOG(INFO, "%sMPS is %s",
                config.mps == MLX5_MPW_ENHANCED ? "enhanced " : "",
                config.mps != MLX5_MPW_DISABLED ? "enabled" : "disabled");
@@ -1073,13 +1103,14 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                err = ENOMEM;
                goto error;
        }
-       if (priv->representor)
+       if (priv->representor) {
                eth_dev->data->dev_flags |= RTE_ETH_DEV_REPRESENTOR;
+               eth_dev->data->representor_id = priv->representor_id;
+       }
        eth_dev->data->dev_private = priv;
        priv->dev_data = eth_dev->data;
        eth_dev->data->mac_addrs = priv->mac;
        eth_dev->device = dpdk_dev;
-       eth_dev->device->driver = &mlx5_driver.driver;
        err = mlx5_uar_init_primary(eth_dev);
        if (err) {
                err = rte_errno;
@@ -1128,8 +1159,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
        claim_zero(mlx5_mac_addr_add(eth_dev, &mac, 0, 0));
        if (vf && config.vf_nl_en)
                mlx5_nl_mac_addr_sync(eth_dev);
-       priv->mnl_socket = mlx5_nl_flow_socket_create();
-       if (!priv->mnl_socket) {
+       priv->tcf_context = mlx5_flow_tcf_context_create();
+       if (!priv->tcf_context) {
                err = -rte_errno;
                DRV_LOG(WARNING,
                        "flow rules relying on switch offloads will not be"
@@ -1144,16 +1175,16 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
                        error.message =
                                "cannot retrieve network interface index";
                } else {
-                       err = mlx5_nl_flow_init(priv->mnl_socket, ifindex,
-                                               &error);
+                       err = mlx5_flow_tcf_init(priv->tcf_context,
+                                                ifindex, &error);
                }
                if (err) {
                        DRV_LOG(WARNING,
                                "flow rules relying on switch offloads will"
                                " not be supported: %s: %s",
                                error.message, strerror(rte_errno));
-                       mlx5_nl_flow_socket_destroy(priv->mnl_socket);
-                       priv->mnl_socket = NULL;
+                       mlx5_flow_tcf_context_destroy(priv->tcf_context);
+                       priv->tcf_context = NULL;
                }
        }
        TAILQ_INIT(&priv->flows);
@@ -1208,16 +1239,21 @@ error:
                        close(priv->nl_socket_route);
                if (priv->nl_socket_rdma >= 0)
                        close(priv->nl_socket_rdma);
-               if (priv->mnl_socket)
-                       mlx5_nl_flow_socket_destroy(priv->mnl_socket);
+               if (priv->tcf_context)
+                       mlx5_flow_tcf_context_destroy(priv->tcf_context);
                if (own_domain_id)
                        claim_zero(rte_eth_switch_domain_free(priv->domain_id));
                rte_free(priv);
+               if (eth_dev != NULL)
+                       eth_dev->data->dev_private = NULL;
        }
        if (pd)
                claim_zero(mlx5_glue->dealloc_pd(pd));
-       if (eth_dev)
+       if (eth_dev != NULL) {
+               /* mac_addrs must not be freed alone because part of dev_private */
+               eth_dev->data->mac_addrs = NULL;
                rte_eth_dev_release_port(eth_dev);
+       }
        if (ctx)
                claim_zero(mlx5_glue->close_device(ctx));
        assert(err > 0);
@@ -1404,9 +1440,9 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                list[i].eth_dev = mlx5_dev_spawn
                        (&pci_dev->device, list[i].ibv_dev, vf, &list[i].info);
                if (!list[i].eth_dev) {
-                       if (rte_errno != EBUSY)
+                       if (rte_errno != EBUSY && rte_errno != EEXIST)
                                break;
-                       /* Device is disabled, ignore it. */
+                       /* Device is disabled or already spawned. Ignore it. */
                        continue;
                }
                restore = list[i].eth_dev->data->dev_flags;
@@ -1437,8 +1473,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
                        if (!list[i].eth_dev)
                                continue;
                        mlx5_dev_close(list[i].eth_dev);
-                       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-                               rte_free(list[i].eth_dev->data->dev_private);
+                       /* mac_addrs must not be freed because in dev_private */
+                       list[i].eth_dev->data->mac_addrs = NULL;
                        claim_zero(rte_eth_dev_release_port(list[i].eth_dev));
                }
                /* Restore original error. */
@@ -1449,6 +1485,32 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
        return ret;
 }
 
+/**
+ * DPDK callback to remove a PCI device.
+ *
+ * This function removes all Ethernet devices belong to a given PCI device.
+ *
+ * @param[in] pci_dev
+ *   Pointer to the PCI device.
+ *
+ * @return
+ *   0 on success, the function cannot fail.
+ */
+static int
+mlx5_pci_remove(struct rte_pci_device *pci_dev)
+{
+       uint16_t port_id;
+       struct rte_eth_dev *port;
+
+       for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+               port = &rte_eth_devices[port_id];
+               if (port->state != RTE_ETH_DEV_UNUSED &&
+                               port->device == &pci_dev->device)
+                       rte_eth_dev_close(port_id);
+       }
+       return 0;
+}
+
 static const struct rte_pci_id mlx5_pci_id_map[] = {
        {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
@@ -1486,6 +1548,10 @@ static const struct rte_pci_id mlx5_pci_id_map[] = {
                RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
                               PCI_DEVICE_ID_MELLANOX_CONNECTX5BF)
        },
+       {
+               RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX,
+                              PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF)
+       },
        {
                .vendor_id = 0
        }
@@ -1497,7 +1563,9 @@ static struct rte_pci_driver mlx5_driver = {
        },
        .id_table = mlx5_pci_id_map,
        .probe = mlx5_pci_probe,
-       .drv_flags = RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV,
+       .remove = mlx5_pci_remove,
+       .drv_flags = (RTE_PCI_DRV_INTR_LSC | RTE_PCI_DRV_INTR_RMV |
+                     RTE_PCI_DRV_PROBE_AGAIN),
 };
 
 #ifdef RTE_LIBRTE_MLX5_DLOPEN_DEPS
index a3a34cf..74d87c0 100644 (file)
@@ -51,6 +51,7 @@ enum {
        PCI_DEVICE_ID_MELLANOX_CONNECTX5EX = 0x1019,
        PCI_DEVICE_ID_MELLANOX_CONNECTX5EXVF = 0x101a,
        PCI_DEVICE_ID_MELLANOX_CONNECTX5BF = 0xa2d2,
+       PCI_DEVICE_ID_MELLANOX_CONNECTX5BFVF = 0xa2d3,
 };
 
 /** Switch information returned by mlx5_nl_switch_info(). */
@@ -71,12 +72,23 @@ struct mlx5_shared_data {
 
 extern struct mlx5_shared_data *mlx5_shared_data;
 
+struct mlx5_counter_ctrl {
+       /* Name of the counter. */
+       char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
+       /* Name of the counter on the device table. */
+       char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
+       uint32_t ib:1; /**< Nonzero for IB counters. */
+};
+
 struct mlx5_xstats_ctrl {
        /* Number of device stats. */
        uint16_t stats_n;
+       /* Number of device stats identified by PMD. */
+       uint16_t  mlx5_stats_n;
        /* Index in the device counters table. */
        uint16_t dev_table_idx[MLX5_MAX_XSTATS];
        uint64_t base[MLX5_MAX_XSTATS];
+       struct mlx5_counter_ctrl info[MLX5_MAX_XSTATS];
 };
 
 /* Flow list . */
@@ -99,11 +111,9 @@ struct mlx5_dev_config {
        unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
        unsigned int hw_padding:1; /* End alignment padding is supported. */
        unsigned int vf:1; /* This is a VF. */
-       unsigned int mps:2; /* Multi-packet send supported mode. */
        unsigned int tunnel_en:1;
        /* Whether tunnel stateless offloads are supported. */
        unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
-       unsigned int flow_counter_en:1; /* Whether flow counter is supported. */
        unsigned int cqe_comp:1; /* CQE compression is enabled. */
        unsigned int tso:1; /* Whether TSO is supported. */
        unsigned int tx_vec_en:1; /* Tx vector is enabled. */
@@ -111,6 +121,7 @@ struct mlx5_dev_config {
        unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
        unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
        unsigned int vf_nl_en:1; /* Enable Netlink requests in VF mode. */
+       unsigned int dv_flow_en:1; /* Enable DV flow. */
        unsigned int swp:1; /* Tx generic tunnel checksum and TSO offload. */
        struct {
                unsigned int enabled:1; /* Whether MPRQ is enabled. */
@@ -122,6 +133,7 @@ struct mlx5_dev_config {
                unsigned int min_rxqs_num;
                /* Rx queue count threshold to enable MPRQ. */
        } mprq; /* Configurations for Multi-Packet RQ. */
+       int mps; /* Multi-packet send supported mode. */
        unsigned int flow_prio; /* Number of flow priorities. */
        unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
        unsigned int ind_table_max_size; /* Maximum indirection table size. */
@@ -156,13 +168,7 @@ struct mlx5_drop {
        struct mlx5_rxq_ibv *rxq; /* Verbs Rx queue. */
 };
 
-/** DPDK port to network interface index (ifindex) conversion. */
-struct mlx5_nl_flow_ptoi {
-       uint16_t port_id; /**< DPDK port ID. */
-       unsigned int ifindex; /**< Network interface index. */
-};
-
-struct mnl_socket;
+struct mlx5_flow_tcf_context;
 
 struct priv {
        LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -212,6 +218,7 @@ struct priv {
        LIST_HEAD(txqibv, mlx5_txq_ibv) txqsibv; /* Verbs Tx queues. */
        /* Verbs Indirection tables. */
        LIST_HEAD(ind_tables, mlx5_ind_table_ibv) ind_tbls;
+       LIST_HEAD(matchers, mlx5_flow_dv_matcher) matchers;
        uint32_t link_speed_capa; /* Link speed capabilities. */
        struct mlx5_xstats_ctrl xstats_ctrl; /* Extended stats control. */
        int primary_socket; /* Unix socket for primary process. */
@@ -228,7 +235,7 @@ struct priv {
        rte_spinlock_t uar_lock[MLX5_UAR_PAGE_NUM_MAX];
        /* UAR same-page access control required in 32bit implementations. */
 #endif
-       struct mnl_socket *mnl_socket; /* Libmnl socket. */
+       struct mlx5_flow_tcf_context *tcf_context; /* TC flower context. */
 };
 
 #define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -240,12 +247,9 @@ int mlx5_getenv_int(const char *);
 
 /* mlx5_ethdev.c */
 
-int mlx5_get_master_ifname(const struct rte_eth_dev *dev,
-                          char (*ifname)[IF_NAMESIZE]);
 int mlx5_get_ifname(const struct rte_eth_dev *dev, char (*ifname)[IF_NAMESIZE]);
 unsigned int mlx5_ifindex(const struct rte_eth_dev *dev);
-int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
-              int master);
+int mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr);
 int mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu);
 int mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep,
                   unsigned int flags);
@@ -396,23 +400,4 @@ unsigned int mlx5_nl_ifindex(int nl, const char *name);
 int mlx5_nl_switch_info(int nl, unsigned int ifindex,
                        struct mlx5_switch_info *info);
 
-/* mlx5_nl_flow.c */
-
-int mlx5_nl_flow_transpose(void *buf,
-                          size_t size,
-                          const struct mlx5_nl_flow_ptoi *ptoi,
-                          const struct rte_flow_attr *attr,
-                          const struct rte_flow_item *pattern,
-                          const struct rte_flow_action *actions,
-                          struct rte_flow_error *error);
-void mlx5_nl_flow_brand(void *buf, uint32_t handle);
-int mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-                       struct rte_flow_error *error);
-int mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-                        struct rte_flow_error *error);
-int mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-                     struct rte_flow_error *error);
-struct mnl_socket *mlx5_nl_flow_socket_create(void);
-void mlx5_nl_flow_socket_destroy(struct mnl_socket *nl);
-
 #endif /* RTE_PMD_MLX5_H_ */
index 34c5b95..d178ed6 100644 (file)
@@ -3,8 +3,6 @@
  * Copyright 2015 Mellanox Technologies, Ltd
  */
 
-#define _GNU_SOURCE
-
 #include <stddef.h>
 #include <assert.h>
 #include <inttypes.h>
@@ -129,7 +127,7 @@ struct ethtool_link_settings {
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-int
+static int
 mlx5_get_master_ifname(const struct rte_eth_dev *dev,
                       char (*ifname)[IF_NAMESIZE])
 {
@@ -270,16 +268,12 @@ mlx5_ifindex(const struct rte_eth_dev *dev)
  *   Request number to pass to ioctl().
  * @param[out] ifr
  *   Interface request structure output buffer.
- * @param master
- *   When device is a port representor, perform request on master device
- *   instead.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
 int
-mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
-          int master)
+mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr)
 {
        int sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
        int ret = 0;
@@ -288,10 +282,7 @@ mlx5_ifreq(const struct rte_eth_dev *dev, int req, struct ifreq *ifr,
                rte_errno = errno;
                return -rte_errno;
        }
-       if (master)
-               ret = mlx5_get_master_ifname(dev, &ifr->ifr_name);
-       else
-               ret = mlx5_get_ifname(dev, &ifr->ifr_name);
+       ret = mlx5_get_ifname(dev, &ifr->ifr_name);
        if (ret)
                goto error;
        ret = ioctl(sock, req, ifr);
@@ -321,7 +312,7 @@ int
 mlx5_get_mtu(struct rte_eth_dev *dev, uint16_t *mtu)
 {
        struct ifreq request;
-       int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request, 0);
+       int ret = mlx5_ifreq(dev, SIOCGIFMTU, &request);
 
        if (ret)
                return ret;
@@ -345,7 +336,7 @@ mlx5_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 {
        struct ifreq request = { .ifr_mtu = mtu, };
 
-       return mlx5_ifreq(dev, SIOCSIFMTU, &request, 0);
+       return mlx5_ifreq(dev, SIOCSIFMTU, &request);
 }
 
 /**
@@ -365,13 +356,13 @@ int
 mlx5_set_flags(struct rte_eth_dev *dev, unsigned int keep, unsigned int flags)
 {
        struct ifreq request;
-       int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request, 0);
+       int ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &request);
 
        if (ret)
                return ret;
        request.ifr_flags &= keep;
        request.ifr_flags |= flags & ~keep;
-       return mlx5_ifreq(dev, SIOCSIFFLAGS, &request, 0);
+       return mlx5_ifreq(dev, SIOCSIFFLAGS, &request);
 }
 
 /**
@@ -627,17 +618,20 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
        int link_speed = 0;
        int ret;
 
-       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
        if (ret) {
                DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
                        dev->data->port_id, strerror(rte_errno));
                return ret;
        }
-       memset(&dev_link, 0, sizeof(dev_link));
-       dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
-                               (ifr.ifr_flags & IFF_RUNNING));
-       ifr.ifr_data = (void *)&edata;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       dev_link = (struct rte_eth_link) {
+               .link_status = ((ifr.ifr_flags & IFF_UP) &&
+                               (ifr.ifr_flags & IFF_RUNNING)),
+       };
+       ifr = (struct ifreq) {
+               .ifr_data = (void *)&edata,
+       };
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING,
                        "port %u ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
@@ -666,8 +660,8 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev,
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                        ETH_LINK_SPEED_FIXED);
-       if ((dev_link.link_speed && !dev_link.link_status) ||
-           (!dev_link.link_speed && dev_link.link_status)) {
+       if (((dev_link.link_speed && !dev_link.link_status) ||
+            (!dev_link.link_speed && dev_link.link_status))) {
                rte_errno = EAGAIN;
                return -rte_errno;
        }
@@ -698,17 +692,20 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
        uint64_t sc;
        int ret;
 
-       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCGIFFLAGS, &ifr);
        if (ret) {
                DRV_LOG(WARNING, "port %u ioctl(SIOCGIFFLAGS) failed: %s",
                        dev->data->port_id, strerror(rte_errno));
                return ret;
        }
-       memset(&dev_link, 0, sizeof(dev_link));
-       dev_link.link_status = ((ifr.ifr_flags & IFF_UP) &&
-                               (ifr.ifr_flags & IFF_RUNNING));
-       ifr.ifr_data = (void *)&gcmd;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       dev_link = (struct rte_eth_link) {
+               .link_status = ((ifr.ifr_flags & IFF_UP) &&
+                               (ifr.ifr_flags & IFF_RUNNING)),
+       };
+       ifr = (struct ifreq) {
+               .ifr_data = (void *)&gcmd,
+       };
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(DEBUG,
                        "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
@@ -725,7 +722,7 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
 
        *ecmd = gcmd;
        ifr.ifr_data = (void *)ecmd;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(DEBUG,
                        "port %u ioctl(SIOCETHTOOL, ETHTOOL_GLINKSETTINGS)"
@@ -775,8 +772,8 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev,
                                ETH_LINK_HALF_DUPLEX : ETH_LINK_FULL_DUPLEX);
        dev_link.link_autoneg = !(dev->data->dev_conf.link_speeds &
                                  ETH_LINK_SPEED_FIXED);
-       if ((dev_link.link_speed && !dev_link.link_status) ||
-           (!dev_link.link_speed && dev_link.link_status)) {
+       if (((dev_link.link_speed && !dev_link.link_status) ||
+            (!dev_link.link_speed && dev_link.link_status))) {
                rte_errno = EAGAIN;
                return -rte_errno;
        }
@@ -888,7 +885,7 @@ mlx5_dev_get_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
        int ret;
 
        ifr.ifr_data = (void *)&ethpause;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING,
                        "port %u ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM) failed:"
@@ -941,7 +938,7 @@ mlx5_dev_set_flow_ctrl(struct rte_eth_dev *dev, struct rte_eth_fc_conf *fc_conf)
                ethpause.tx_pause = 1;
        else
                ethpause.tx_pause = 0;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 0);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING,
                        "port %u ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
@@ -1306,10 +1303,7 @@ mlx5_dev_to_port_id(const struct rte_device *dev, uint16_t *port_list,
        RTE_ETH_FOREACH_DEV(id) {
                struct rte_eth_dev *ldev = &rte_eth_devices[id];
 
-               if (!ldev->device ||
-                   !ldev->device->driver ||
-                   strcmp(ldev->device->driver->name, MLX5_DRIVER_NAME) ||
-                   ldev->device != dev)
+               if (ldev->device != dev)
                        continue;
                if (n < port_list_n)
                        port_list[n] = id;
index ca4625b..280af0a 100644 (file)
@@ -3,6 +3,7 @@
  * Copyright 2016 Mellanox Technologies, Ltd
  */
 
+#include <netinet/in.h>
 #include <sys/queue.h>
 #include <stdalign.h>
 #include <stdint.h>
 #include "mlx5_defs.h"
 #include "mlx5_prm.h"
 #include "mlx5_glue.h"
+#include "mlx5_flow.h"
 
 /* Dev ops structure defined in mlx5.c */
 extern const struct eth_dev_ops mlx5_dev_ops;
 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
 
-/* Pattern outer Layer bits. */
-#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
-#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
-#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
-#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
-#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
-
-/* Pattern inner Layer bits. */
-#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
-#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
-#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
-#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
-#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
-
-/* Pattern tunnel Layer bits. */
-#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
-#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
-#define MLX5_FLOW_LAYER_GRE (1u << 14)
-#define MLX5_FLOW_LAYER_MPLS (1u << 15)
-
-/* Outer Masks. */
-#define MLX5_FLOW_LAYER_OUTER_L3 \
-       (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
-#define MLX5_FLOW_LAYER_OUTER_L4 \
-       (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
-#define MLX5_FLOW_LAYER_OUTER \
-       (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
-        MLX5_FLOW_LAYER_OUTER_L4)
-
-/* Tunnel Masks. */
-#define MLX5_FLOW_LAYER_TUNNEL \
-       (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
-        MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
-
-/* Inner Masks. */
-#define MLX5_FLOW_LAYER_INNER_L3 \
-       (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
-#define MLX5_FLOW_LAYER_INNER_L4 \
-       (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
-#define MLX5_FLOW_LAYER_INNER \
-       (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
-        MLX5_FLOW_LAYER_INNER_L4)
-
-/* Actions that modify the fate of matching traffic. */
-#define MLX5_FLOW_FATE_DROP (1u << 0)
-#define MLX5_FLOW_FATE_QUEUE (1u << 1)
-#define MLX5_FLOW_FATE_RSS (1u << 2)
-
-/* Modify a packet. */
-#define MLX5_FLOW_MOD_FLAG (1u << 0)
-#define MLX5_FLOW_MOD_MARK (1u << 1)
-#define MLX5_FLOW_MOD_COUNT (1u << 2)
-
-/* possible L3 layers protocols filtering. */
-#define MLX5_IP_PROTOCOL_TCP 6
-#define MLX5_IP_PROTOCOL_UDP 17
-#define MLX5_IP_PROTOCOL_GRE 47
-#define MLX5_IP_PROTOCOL_MPLS 147
-
-/* Priority reserved for default flows. */
-#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
+/** Device flow drivers. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
+#endif
+extern const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops;
+extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
+
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
+
+const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
+       [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
+#endif
+       [MLX5_FLOW_TYPE_TCF] = &mlx5_flow_tcf_drv_ops,
+       [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
+       [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
+};
 
 enum mlx5_expansion {
        MLX5_EXPANSION_ROOT,
@@ -270,53 +227,6 @@ static const struct rte_flow_expand_node mlx5_support_expansion[] = {
        },
 };
 
-/** Handles information leading to a drop fate. */
-struct mlx5_flow_verbs {
-       LIST_ENTRY(mlx5_flow_verbs) next;
-       unsigned int size; /**< Size of the attribute. */
-       struct {
-               struct ibv_flow_attr *attr;
-               /**< Pointer to the Specification buffer. */
-               uint8_t *specs; /**< Pointer to the specifications. */
-       };
-       struct ibv_flow *flow; /**< Verbs flow pointer. */
-       struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
-       uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
-};
-
-/* Counters information. */
-struct mlx5_flow_counter {
-       LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
-       uint32_t shared:1; /**< Share counter ID with other flow rules. */
-       uint32_t ref_cnt:31; /**< Reference counter. */
-       uint32_t id; /**< Counter ID. */
-       struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
-       uint64_t hits; /**< Number of packets matched by the rule. */
-       uint64_t bytes; /**< Number of bytes matched by the rule. */
-};
-
-/* Flow structure. */
-struct rte_flow {
-       TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
-       struct rte_flow_attr attributes; /**< User flow attribute. */
-       uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
-       uint32_t layers;
-       /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
-       uint32_t modifier;
-       /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
-       uint32_t fate;
-       /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
-       uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
-       LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
-       struct mlx5_flow_verbs *cur_verbs;
-       /**< Current Verbs flow structure being filled. */
-       struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
-       struct rte_flow_action_rss rss;/**< RSS context. */
-       uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
-       uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
-       void *nl_flow; /**< Netlink flow buffer if relevant. */
-};
-
 static const struct rte_flow_ops mlx5_flow_ops = {
        .validate = mlx5_flow_validate,
        .create = mlx5_flow_create,
@@ -352,23 +262,6 @@ struct mlx5_fdir {
        struct rte_flow_action_queue queue;
 };
 
-/* Verbs specification header. */
-struct ibv_spec_header {
-       enum ibv_flow_spec_type type;
-       uint16_t size;
-};
-
-/*
- * Number of sub priorities.
- * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
- * matching on the NIC (firmware dependent) L4 most have the higher priority
- * followed by L3 and ending with L2.
- */
-#define MLX5_PRIORITY_MAP_L2 2
-#define MLX5_PRIORITY_MAP_L3 1
-#define MLX5_PRIORITY_MAP_L4 0
-#define MLX5_PRIORITY_MAP_MAX 3
-
 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
        { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
@@ -413,7 +306,7 @@ static struct mlx5_flow_tunnel_info tunnels_info[] = {
  * Discover the maximum number of priority available.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
+ *   Pointer to the Ethernet device structure.
  *
  * @return
  *   number of supported flow priority on success, a negative errno
@@ -478,160 +371,33 @@ mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
 }
 
 /**
- * Adjust flow priority.
+ * Adjust flow priority based on the highest layer and the request priority.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param flow
- *   Pointer to an rte flow.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] priority
+ *   The rule base priority.
+ * @param[in] subpriority
+ *   The priority based on the items.
+ *
+ * @return
+ *   The new priority.
  */
-static void
-mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+                                  uint32_t subpriority)
 {
+       uint32_t res = 0;
        struct priv *priv = dev->data->dev_private;
-       uint32_t priority = flow->attributes.priority;
-       uint32_t subpriority = flow->cur_verbs->attr->priority;
 
        switch (priv->config.flow_prio) {
        case RTE_DIM(priority_map_3):
-               priority = priority_map_3[priority][subpriority];
+               res = priority_map_3[priority][subpriority];
                break;
        case RTE_DIM(priority_map_5):
-               priority = priority_map_5[priority][subpriority];
+               res = priority_map_5[priority][subpriority];
                break;
        }
-       flow->cur_verbs->attr->priority = priority;
-}
-
-/**
- * Get a flow counter.
- *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in] shared
- *   Indicate if this counter is shared with other flows.
- * @param[in] id
- *   Counter identifier.
- *
- * @return
- *   A pointer to the counter, NULL otherwise and rte_errno is set.
- */
-static struct mlx5_flow_counter *
-mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
-{
-       struct priv *priv = dev->data->dev_private;
-       struct mlx5_flow_counter *cnt;
-
-       LIST_FOREACH(cnt, &priv->flow_counters, next) {
-               if (!cnt->shared || cnt->shared != shared)
-                       continue;
-               if (cnt->id != id)
-                       continue;
-               cnt->ref_cnt++;
-               return cnt;
-       }
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-
-       struct mlx5_flow_counter tmpl = {
-               .shared = shared,
-               .id = id,
-               .cs = mlx5_glue->create_counter_set
-                       (priv->ctx,
-                        &(struct ibv_counter_set_init_attr){
-                                .counter_set_id = id,
-                        }),
-               .hits = 0,
-               .bytes = 0,
-       };
-
-       if (!tmpl.cs) {
-               rte_errno = errno;
-               return NULL;
-       }
-       cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
-       if (!cnt) {
-               rte_errno = ENOMEM;
-               return NULL;
-       }
-       *cnt = tmpl;
-       LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
-       return cnt;
-#endif
-       rte_errno = ENOTSUP;
-       return NULL;
-}
-
-/**
- * Release a flow counter.
- *
- * @param[in] counter
- *   Pointer to the counter handler.
- */
-static void
-mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
-{
-       if (--counter->ref_cnt == 0) {
-               claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
-               LIST_REMOVE(counter, next);
-               rte_free(counter);
-       }
-}
-
-/**
- * Verify the @p attributes will be correctly understood by the NIC and store
- * them in the @p flow if everything is correct.
- *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in] attributes
- *   Pointer to flow attributes
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_flow_attributes(struct rte_eth_dev *dev,
-                    const struct rte_flow_attr *attributes,
-                    struct rte_flow *flow,
-                    struct rte_flow_error *error)
-{
-       uint32_t priority_max =
-               ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
-
-       if (attributes->group)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-                                         NULL,
-                                         "groups is not supported");
-       if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
-           attributes->priority >= priority_max)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-                                         NULL,
-                                         "priority out of range");
-       if (attributes->egress)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
-                                         NULL,
-                                         "egress is not supported");
-       if (attributes->transfer)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
-                                         NULL,
-                                         "transfer is not supported");
-       if (!attributes->ingress)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-                                         NULL,
-                                         "ingress attribute is mandatory");
-       flow->attributes = *attributes;
-       if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
-               flow->attributes.priority = priority_max;
-       return 0;
+       return  res;
 }
 
 /**
@@ -652,7 +418,7 @@ mlx5_flow_attributes(struct rte_eth_dev *dev,
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
+int
 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                          const uint8_t *mask,
                          const uint8_t *nic_mask,
@@ -671,8 +437,7 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                                                  " bits");
        if (!item->spec && (item->mask || item->last))
                return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
                                          "mask/last without a spec is not"
                                          " supported");
        if (item->spec && item->last) {
@@ -687,2163 +452,1469 @@ mlx5_flow_item_acceptable(const struct rte_flow_item *item,
                }
                ret = memcmp(spec, last, size);
                if (ret != 0)
-                       return rte_flow_error_set(error, ENOTSUP,
+                       return rte_flow_error_set(error, EINVAL,
                                                  RTE_FLOW_ERROR_TYPE_ITEM,
                                                  item,
-                                                 "range is not supported");
+                                                 "range is not valid");
        }
        return 0;
 }
 
 /**
- * Add a verbs item specification into @p flow.
- *
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] src
- *   Create specification.
- * @param[in] size
- *   Size in bytes of the specification to copy.
- */
-static void
-mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
-{
-       struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-
-       if (verbs->specs) {
-               void *dst;
-
-               dst = (void *)(verbs->specs + verbs->size);
-               memcpy(dst, src, size);
-               ++verbs->attr->num_of_specs;
-       }
-       verbs->size += size;
-}
-
-/**
- * Adjust verbs hash fields according to the @p flow information.
+ * Adjust the hash fields according to the @p flow information.
  *
- * @param[in, out] flow.
- *   Pointer to flow structure.
+ * @param[in] dev_flow.
+ *   Pointer to the mlx5_flow.
  * @param[in] tunnel
  *   1 when the hash field is for a tunnel item.
  * @param[in] layer_types
  *   ETH_RSS_* types.
  * @param[in] hash_fields
  *   Item hash fields.
+ *
+ * @return
+ *   The hash fileds that should be used.
  */
-static void
-mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
-                                 int tunnel __rte_unused,
-                                 uint32_t layer_types, uint64_t hash_fields)
+uint64_t
+mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
+                           int tunnel __rte_unused, uint64_t layer_types,
+                           uint64_t hash_fields)
 {
+       struct rte_flow *flow = dev_flow->flow;
 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-       hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
-       if (flow->rss.level == 2 && !tunnel)
-               hash_fields = 0;
-       else if (flow->rss.level < 2 && tunnel)
-               hash_fields = 0;
+       int rss_request_inner = flow->rss.level >= 2;
+
+       /* Check RSS hash level for tunnel. */
+       if (tunnel && rss_request_inner)
+               hash_fields |= IBV_RX_HASH_INNER;
+       else if (tunnel || rss_request_inner)
+               return 0;
 #endif
+       /* Check if requested layer matches RSS hash fields. */
        if (!(flow->rss.types & layer_types))
-               hash_fields = 0;
-       flow->cur_verbs->hash_fields |= hash_fields;
+               return 0;
+       return hash_fields;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
+ * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
+ * if several tunnel rules are used on this queue, the tunnel ptype will be
+ * cleared.
  *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param rxq_ctrl
+ *   Rx queue to update.
  */
-static int
-mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
-                  const size_t flow_size, struct rte_flow_error *error)
+static void
+flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
 {
-       const struct rte_flow_item_eth *spec = item->spec;
-       const struct rte_flow_item_eth *mask = item->mask;
-       const struct rte_flow_item_eth nic_mask = {
-               .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-               .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-               .type = RTE_BE16(0xffff),
-       };
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       const unsigned int size = sizeof(struct ibv_flow_spec_eth);
-       struct ibv_flow_spec_eth eth = {
-               .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
-
-       if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-                           MLX5_FLOW_LAYER_OUTER_L2))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L2 layers already configured");
-       if (!mask)
-               mask = &rte_flow_item_eth_mask;
-       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
-                                       (const uint8_t *)&nic_mask,
-                                       sizeof(struct rte_flow_item_eth),
-                                       error);
-       if (ret)
-               return ret;
-       flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-               MLX5_FLOW_LAYER_OUTER_L2;
-       if (size > flow_size)
-               return size;
-       if (spec) {
-               unsigned int i;
+       unsigned int i;
+       uint32_t tunnel_ptype = 0;
 
-               memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
-               memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
-               eth.val.ether_type = spec->type;
-               memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
-               memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
-               eth.mask.ether_type = mask->type;
-               /* Remove unwanted bits from values. */
-               for (i = 0; i < ETHER_ADDR_LEN; ++i) {
-                       eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
-                       eth.val.src_mac[i] &= eth.mask.src_mac[i];
+       /* Look up for the ptype to use. */
+       for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
+               if (!rxq_ctrl->flow_tunnels_n[i])
+                       continue;
+               if (!tunnel_ptype) {
+                       tunnel_ptype = tunnels_info[i].ptype;
+               } else {
+                       tunnel_ptype = 0;
+                       break;
                }
-               eth.val.ether_type &= eth.mask.ether_type;
        }
-       flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-       mlx5_flow_spec_verbs_add(flow, &eth, size);
-       return size;
+       rxq_ctrl->rxq.tunnel = tunnel_ptype;
 }
 
 /**
- * Update the VLAN tag in the Verbs Ethernet specification.
+ * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
+ * flow.
  *
- * @param[in, out] attr
- *   Pointer to Verbs attributes structure.
- * @param[in] eth
- *   Verbs structure containing the VLAN information to copy.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to device flow structure.
  */
 static void
-mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
-                          struct ibv_flow_spec_eth *eth)
+flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
 {
+       struct priv *priv = dev->data->dev_private;
+       struct rte_flow *flow = dev_flow->flow;
+       const int mark = !!(flow->actions &
+                           (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
+       const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
        unsigned int i;
-       const enum ibv_flow_spec_type search = eth->type;
-       struct ibv_spec_header *hdr = (struct ibv_spec_header *)
-               ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
-       for (i = 0; i != attr->num_of_specs; ++i) {
-               if (hdr->type == search) {
-                       struct ibv_flow_spec_eth *e =
-                               (struct ibv_flow_spec_eth *)hdr;
-
-                       e->val.vlan_tag = eth->val.vlan_tag;
-                       e->mask.vlan_tag = eth->mask.vlan_tag;
-                       e->val.ether_type = eth->val.ether_type;
-                       e->mask.ether_type = eth->mask.ether_type;
-                       break;
+
+       for (i = 0; i != flow->rss.queue_num; ++i) {
+               int idx = (*flow->queue)[i];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of((*priv->rxqs)[idx],
+                                    struct mlx5_rxq_ctrl, rxq);
+
+               if (mark) {
+                       rxq_ctrl->rxq.mark = 1;
+                       rxq_ctrl->flow_mark_n++;
+               }
+               if (tunnel) {
+                       unsigned int j;
+
+                       /* Increase the counter matching the flow. */
+                       for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
+                               if ((tunnels_info[j].tunnel &
+                                    dev_flow->layers) ==
+                                   tunnels_info[j].tunnel) {
+                                       rxq_ctrl->flow_tunnels_n[j]++;
+                                       break;
+                               }
+                       }
+                       flow_rxq_tunnel_ptype_update(rxq_ctrl);
                }
-               hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
        }
 }
 
 /**
- * Convert the @p item into @p flow (or by updating the already present
- * Ethernet Verbs) specification after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
  *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] flow
  *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
  */
-static int
-mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
-                   const size_t flow_size, struct rte_flow_error *error)
+static void
+flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
-       const struct rte_flow_item_vlan *spec = item->spec;
-       const struct rte_flow_item_vlan *mask = item->mask;
-       const struct rte_flow_item_vlan nic_mask = {
-               .tci = RTE_BE16(0x0fff),
-               .inner_type = RTE_BE16(0xffff),
-       };
-       unsigned int size = sizeof(struct ibv_flow_spec_eth);
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       struct ibv_flow_spec_eth eth = {
-               .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
-       const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
-                                       MLX5_FLOW_LAYER_INNER_L4) :
-               (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
-       const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
-               MLX5_FLOW_LAYER_OUTER_VLAN;
-       const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
-               MLX5_FLOW_LAYER_OUTER_L2;
+       struct mlx5_flow *dev_flow;
 
-       if (flow->layers & vlanm)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "VLAN layer already configured");
-       else if ((flow->layers & l34m) != 0)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L2 layer cannot follow L3/L4 layer");
-       if (!mask)
-               mask = &rte_flow_item_vlan_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_vlan), error);
-       if (ret)
-               return ret;
-       if (spec) {
-               eth.val.vlan_tag = spec->tci;
-               eth.mask.vlan_tag = mask->tci;
-               eth.val.vlan_tag &= eth.mask.vlan_tag;
-               eth.val.ether_type = spec->inner_type;
-               eth.mask.ether_type = mask->inner_type;
-               eth.val.ether_type &= eth.mask.ether_type;
-       }
-       /*
-        * From verbs perspective an empty VLAN is equivalent
-        * to a packet without VLAN layer.
-        */
-       if (!eth.mask.vlan_tag)
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-                                         item->spec,
-                                         "VLAN cannot be empty");
-       if (!(flow->layers & l2m)) {
-               if (size <= flow_size) {
-                       flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-                       mlx5_flow_spec_verbs_add(flow, &eth, size);
-               }
-       } else {
-               if (flow->cur_verbs)
-                       mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
-                                                  &eth);
-               size = 0; /* Only an update is done in eth specification. */
-       }
-       flow->layers |= tunnel ?
-               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
-               (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
-       return size;
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next)
+               flow_drv_rxq_flags_set(dev, dev_flow);
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param[out] error
- *   Pointer to error structure.
+ * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
+ * device flow if no other flow uses it with the same kind of request.
  *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param[in] dev_flow
+ *   Pointer to the device flow.
  */
-static int
-mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
-                   const size_t flow_size, struct rte_flow_error *error)
+static void
+flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
 {
-       const struct rte_flow_item_ipv4 *spec = item->spec;
-       const struct rte_flow_item_ipv4 *mask = item->mask;
-       const struct rte_flow_item_ipv4 nic_mask = {
-               .hdr = {
-                       .src_addr = RTE_BE32(0xffffffff),
-                       .dst_addr = RTE_BE32(0xffffffff),
-                       .type_of_service = 0xff,
-                       .next_proto_id = 0xff,
-               },
-       };
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
-       struct ibv_flow_spec_ipv4_ext ipv4 = {
-               .type = IBV_FLOW_SPEC_IPV4_EXT |
-                       (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
+       struct priv *priv = dev->data->dev_private;
+       struct rte_flow *flow = dev_flow->flow;
+       const int mark = !!(flow->actions &
+                           (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
+       const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+       unsigned int i;
 
-       if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-                           MLX5_FLOW_LAYER_OUTER_L3))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "multiple L3 layers not supported");
-       else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-                                MLX5_FLOW_LAYER_OUTER_L4))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 cannot follow an L4 layer.");
-       if (!mask)
-               mask = &rte_flow_item_ipv4_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_ipv4), error);
-       if (ret < 0)
-               return ret;
-       flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
-               MLX5_FLOW_LAYER_OUTER_L3_IPV4;
-       if (spec) {
-               ipv4.val = (struct ibv_flow_ipv4_ext_filter){
-                       .src_ip = spec->hdr.src_addr,
-                       .dst_ip = spec->hdr.dst_addr,
-                       .proto = spec->hdr.next_proto_id,
-                       .tos = spec->hdr.type_of_service,
-               };
-               ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
-                       .src_ip = mask->hdr.src_addr,
-                       .dst_ip = mask->hdr.dst_addr,
-                       .proto = mask->hdr.next_proto_id,
-                       .tos = mask->hdr.type_of_service,
-               };
-               /* Remove unwanted bits from values. */
-               ipv4.val.src_ip &= ipv4.mask.src_ip;
-               ipv4.val.dst_ip &= ipv4.mask.dst_ip;
-               ipv4.val.proto &= ipv4.mask.proto;
-               ipv4.val.tos &= ipv4.mask.tos;
-       }
-       flow->l3_protocol_en = !!ipv4.mask.proto;
-       flow->l3_protocol = ipv4.val.proto;
-       if (size <= flow_size) {
-               mlx5_flow_verbs_hashfields_adjust
-                       (flow, tunnel,
-                        (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
-                         ETH_RSS_NONFRAG_IPV4_OTHER),
-                        (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
-               mlx5_flow_spec_verbs_add(flow, &ipv4, size);
+       assert(dev->data->dev_started);
+       for (i = 0; i != flow->rss.queue_num; ++i) {
+               int idx = (*flow->queue)[i];
+               struct mlx5_rxq_ctrl *rxq_ctrl =
+                       container_of((*priv->rxqs)[idx],
+                                    struct mlx5_rxq_ctrl, rxq);
+
+               if (mark) {
+                       rxq_ctrl->flow_mark_n--;
+                       rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
+               }
+               if (tunnel) {
+                       unsigned int j;
+
+                       /* Decrease the counter matching the flow. */
+                       for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
+                               if ((tunnels_info[j].tunnel &
+                                    dev_flow->layers) ==
+                                   tunnels_info[j].tunnel) {
+                                       rxq_ctrl->flow_tunnels_n[j]--;
+                                       break;
+                               }
+                       }
+                       flow_rxq_tunnel_ptype_update(rxq_ctrl);
+               }
        }
-       return size;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
+ * @p flow if no other flow uses it with the same kind of request.
  *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the flow.
+ */
+static void
+flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow *dev_flow;
+
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next)
+               flow_drv_rxq_flags_trim(dev, dev_flow);
+}
+
+/**
+ * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ */
+static void
+flow_rxq_flags_clear(struct rte_eth_dev *dev)
+{
+       struct priv *priv = dev->data->dev_private;
+       unsigned int i;
+
+       for (i = 0; i != priv->rxqs_n; ++i) {
+               struct mlx5_rxq_ctrl *rxq_ctrl;
+               unsigned int j;
+
+               if (!(*priv->rxqs)[i])
+                       continue;
+               rxq_ctrl = container_of((*priv->rxqs)[i],
+                                       struct mlx5_rxq_ctrl, rxq);
+               rxq_ctrl->flow_mark_n = 0;
+               rxq_ctrl->rxq.mark = 0;
+               for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
+                       rxq_ctrl->flow_tunnels_n[j] = 0;
+               rxq_ctrl->rxq.tunnel = 0;
+       }
+}
+
+/*
+ * Validate the flag action.
+ *
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
-                   const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_action_flag(uint64_t action_flags,
+                              const struct rte_flow_attr *attr,
+                              struct rte_flow_error *error)
 {
-       const struct rte_flow_item_ipv6 *spec = item->spec;
-       const struct rte_flow_item_ipv6 *mask = item->mask;
-       const struct rte_flow_item_ipv6 nic_mask = {
-               .hdr = {
-                       .src_addr =
-                               "\xff\xff\xff\xff\xff\xff\xff\xff"
-                               "\xff\xff\xff\xff\xff\xff\xff\xff",
-                       .dst_addr =
-                               "\xff\xff\xff\xff\xff\xff\xff\xff"
-                               "\xff\xff\xff\xff\xff\xff\xff\xff",
-                       .vtc_flow = RTE_BE32(0xffffffff),
-                       .proto = 0xff,
-                       .hop_limits = 0xff,
-               },
-       };
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
-       struct ibv_flow_spec_ipv6 ipv6 = {
-               .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
 
-       if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-                           MLX5_FLOW_LAYER_OUTER_L3))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "multiple L3 layers not supported");
-       else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-                                MLX5_FLOW_LAYER_OUTER_L4))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 cannot follow an L4 layer.");
-       /*
-        * IPv6 is not recognised by the NIC inside a GRE tunnel.
-        * Such support has to be disabled as the rule will be
-        * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
-        * Mellanox OFED 4.4-1.0.0.0.
-        */
-       if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
+       if (action_flags & MLX5_FLOW_ACTION_DROP)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't drop and flag in same flow");
+       if (action_flags & MLX5_FLOW_ACTION_MARK)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't mark and flag in same flow");
+       if (action_flags & MLX5_FLOW_ACTION_FLAG)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't have 2 flag"
+                                         " actions in same flow");
+       if (attr->egress)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "IPv6 inside a GRE tunnel is"
-                                         " not recognised.");
-       if (!mask)
-               mask = &rte_flow_item_ipv6_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&nic_mask,
-                sizeof(struct rte_flow_item_ipv6), error);
-       if (ret < 0)
-               return ret;
-       flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
-               MLX5_FLOW_LAYER_OUTER_L3_IPV6;
-       if (spec) {
-               unsigned int i;
-               uint32_t vtc_flow_val;
-               uint32_t vtc_flow_mask;
-
-               memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
-                      RTE_DIM(ipv6.val.src_ip));
-               memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
-                      RTE_DIM(ipv6.val.dst_ip));
-               memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
-                      RTE_DIM(ipv6.mask.src_ip));
-               memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
-                      RTE_DIM(ipv6.mask.dst_ip));
-               vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
-               vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
-               ipv6.val.flow_label =
-                       rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
-                                        IPV6_HDR_FL_SHIFT);
-               ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
-                                        IPV6_HDR_TC_SHIFT;
-               ipv6.val.next_hdr = spec->hdr.proto;
-               ipv6.val.hop_limit = spec->hdr.hop_limits;
-               ipv6.mask.flow_label =
-                       rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
-                                        IPV6_HDR_FL_SHIFT);
-               ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
-                                         IPV6_HDR_TC_SHIFT;
-               ipv6.mask.next_hdr = mask->hdr.proto;
-               ipv6.mask.hop_limit = mask->hdr.hop_limits;
-               /* Remove unwanted bits from values. */
-               for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
-                       ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
-                       ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
-               }
-               ipv6.val.flow_label &= ipv6.mask.flow_label;
-               ipv6.val.traffic_class &= ipv6.mask.traffic_class;
-               ipv6.val.next_hdr &= ipv6.mask.next_hdr;
-               ipv6.val.hop_limit &= ipv6.mask.hop_limit;
-       }
-       flow->l3_protocol_en = !!ipv6.mask.next_hdr;
-       flow->l3_protocol = ipv6.val.next_hdr;
-       if (size <= flow_size) {
-               mlx5_flow_verbs_hashfields_adjust
-                       (flow, tunnel,
-                        (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
-                        (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
-               mlx5_flow_spec_verbs_add(flow, &ipv6, size);
-       }
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "flag action not supported for "
+                                         "egress");
+       return 0;
 }
 
-/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+/*
+ * Validate the mark action.
  *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
-                  const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+                              uint64_t action_flags,
+                              const struct rte_flow_attr *attr,
+                              struct rte_flow_error *error)
 {
-       const struct rte_flow_item_udp *spec = item->spec;
-       const struct rte_flow_item_udp *mask = item->mask;
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
-       struct ibv_flow_spec_tcp_udp udp = {
-               .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
+       const struct rte_flow_action_mark *mark = action->conf;
 
-       if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "protocol filtering not compatible"
-                                         " with UDP layer");
-       if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-                             MLX5_FLOW_LAYER_OUTER_L3)))
+       if (!mark)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         action,
+                                         "configuration cannot be null");
+       if (mark->id >= MLX5_FLOW_MARK_MAX)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &mark->id,
+                                         "mark id must in 0 <= id < "
+                                         RTE_STR(MLX5_FLOW_MARK_MAX));
+       if (action_flags & MLX5_FLOW_ACTION_DROP)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't drop and mark in same flow");
+       if (action_flags & MLX5_FLOW_ACTION_FLAG)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't flag and mark in same flow");
+       if (action_flags & MLX5_FLOW_ACTION_MARK)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't have 2 mark actions in same"
+                                         " flow");
+       if (attr->egress)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 is mandatory to filter"
-                                         " on L4");
-       if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-                           MLX5_FLOW_LAYER_OUTER_L4))
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "mark action not supported for "
+                                         "egress");
+       return 0;
+}
+
+/*
+ * Validate the drop action.
+ *
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+int
+mlx5_flow_validate_action_drop(uint64_t action_flags,
+                              const struct rte_flow_attr *attr,
+                              struct rte_flow_error *error)
+{
+       if (action_flags & MLX5_FLOW_ACTION_FLAG)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't drop and flag in same flow");
+       if (action_flags & MLX5_FLOW_ACTION_MARK)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't drop and mark in same flow");
+       if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't have 2 fate actions in"
+                                         " same flow");
+       if (attr->egress)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L4 layer is already"
-                                         " present");
-       if (!mask)
-               mask = &rte_flow_item_udp_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_udp_mask,
-                sizeof(struct rte_flow_item_udp), error);
-       if (ret < 0)
-               return ret;
-       flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
-               MLX5_FLOW_LAYER_OUTER_L4_UDP;
-       if (spec) {
-               udp.val.dst_port = spec->hdr.dst_port;
-               udp.val.src_port = spec->hdr.src_port;
-               udp.mask.dst_port = mask->hdr.dst_port;
-               udp.mask.src_port = mask->hdr.src_port;
-               /* Remove unwanted bits from values. */
-               udp.val.src_port &= udp.mask.src_port;
-               udp.val.dst_port &= udp.mask.dst_port;
-       }
-       if (size <= flow_size) {
-               mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
-                                                 (IBV_RX_HASH_SRC_PORT_UDP |
-                                                  IBV_RX_HASH_DST_PORT_UDP));
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
-               mlx5_flow_spec_verbs_add(flow, &udp, size);
-       }
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "drop action not supported for "
+                                         "egress");
+       return 0;
 }
 
-/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+/*
+ * Validate the queue action.
  *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
-mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
-                  const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+                               uint64_t action_flags,
+                               struct rte_eth_dev *dev,
+                               const struct rte_flow_attr *attr,
+                               struct rte_flow_error *error)
 {
-       const struct rte_flow_item_tcp *spec = item->spec;
-       const struct rte_flow_item_tcp *mask = item->mask;
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
-       struct ibv_flow_spec_tcp_udp tcp = {
-               .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
-               .size = size,
-       };
-       int ret;
+       struct priv *priv = dev->data->dev_private;
+       const struct rte_flow_action_queue *queue = action->conf;
 
-       if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "protocol filtering not compatible"
-                                         " with TCP layer");
-       if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
-                             MLX5_FLOW_LAYER_OUTER_L3)))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 is mandatory to filter on L4");
-       if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
-                           MLX5_FLOW_LAYER_OUTER_L4))
+       if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't have 2 fate actions in"
+                                         " same flow");
+       if (queue->index >= priv->rxqs_n)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &queue->index,
+                                         "queue index out of range");
+       if (!(*priv->rxqs)[queue->index])
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &queue->index,
+                                         "queue is not configured");
+       if (attr->egress)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L4 layer is already present");
-       if (!mask)
-               mask = &rte_flow_item_tcp_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_tcp_mask,
-                sizeof(struct rte_flow_item_tcp), error);
-       if (ret < 0)
-               return ret;
-       flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
-               MLX5_FLOW_LAYER_OUTER_L4_TCP;
-       if (spec) {
-               tcp.val.dst_port = spec->hdr.dst_port;
-               tcp.val.src_port = spec->hdr.src_port;
-               tcp.mask.dst_port = mask->hdr.dst_port;
-               tcp.mask.src_port = mask->hdr.src_port;
-               /* Remove unwanted bits from values. */
-               tcp.val.src_port &= tcp.mask.src_port;
-               tcp.val.dst_port &= tcp.mask.dst_port;
-       }
-       if (size <= flow_size) {
-               mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
-                                                 (IBV_RX_HASH_SRC_PORT_TCP |
-                                                  IBV_RX_HASH_DST_PORT_TCP));
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
-               mlx5_flow_spec_verbs_add(flow, &tcp, size);
-       }
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "queue action not supported for "
+                                         "egress");
+       return 0;
 }
 
-/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+/*
+ * Validate the rss action.
  *
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] action
+ *   Pointer to the queue action.
+ * @param[in] action_flags
+ *   Bit-fields that holds the actions detected until now.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
-mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
-                    const size_t flow_size, struct rte_flow_error *error)
+int
+mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+                             uint64_t action_flags,
+                             struct rte_eth_dev *dev,
+                             const struct rte_flow_attr *attr,
+                             struct rte_flow_error *error)
 {
-       const struct rte_flow_item_vxlan *spec = item->spec;
-       const struct rte_flow_item_vxlan *mask = item->mask;
-       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-       struct ibv_flow_spec_tunnel vxlan = {
-               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-               .size = size,
-       };
-       int ret;
-       union vni {
-               uint32_t vlan_id;
-               uint8_t vni[4];
-       } id = { .vlan_id = 0, };
+       struct priv *priv = dev->data->dev_private;
+       const struct rte_flow_action_rss *rss = action->conf;
+       unsigned int i;
 
-       if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
+       if (action_flags & MLX5_FLOW_FATE_ACTIONS)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
+                                         "can't have 2 fate actions"
+                                         " in same flow");
+       if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
+           rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "a tunnel is already present");
-       /*
-        * Verify only UDPv4 is present as defined in
-        * https://tools.ietf.org/html/rfc7348
-        */
-       if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->func,
+                                         "RSS hash function not supported");
+#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
+       if (rss->level > 2)
+#else
+       if (rss->level > 1)
+#endif
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "no outer UDP layer found");
-       if (!mask)
-               mask = &rte_flow_item_vxlan_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_vxlan_mask,
-                sizeof(struct rte_flow_item_vxlan), error);
-       if (ret < 0)
-               return ret;
-       if (spec) {
-               memcpy(&id.vni[1], spec->vni, 3);
-               vxlan.val.tunnel_id = id.vlan_id;
-               memcpy(&id.vni[1], mask->vni, 3);
-               vxlan.mask.tunnel_id = id.vlan_id;
-               /* Remove unwanted bits from values. */
-               vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
-       }
-       /*
-        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
-        * only this layer is defined in the Verbs specification it is
-        * interpreted as wildcard and all packets will match this
-        * rule, if it follows a full stack layer (ex: eth / ipv4 /
-        * udp), all packets matching the layers before will also
-        * match this rule.  To avoid such situation, VNI 0 is
-        * currently refused.
-        */
-       if (!vxlan.val.tunnel_id)
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "VXLAN vni cannot be 0");
-       if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "VXLAN tunnel must be fully defined");
-       if (size <= flow_size) {
-               mlx5_flow_spec_verbs_add(flow, &vxlan, size);
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->level,
+                                         "tunnel RSS is not supported");
+       if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->key_len,
+                                         "RSS hash key too small");
+       if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->key_len,
+                                         "RSS hash key too large");
+       if (rss->queue_num > priv->config.ind_table_max_size)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->queue_num,
+                                         "number of queues too large");
+       if (rss->types & MLX5_RSS_HF_MASK)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                         &rss->types,
+                                         "some RSS protocols are not"
+                                         " supported");
+       for (i = 0; i != rss->queue_num; ++i) {
+               if (!(*priv->rxqs)[rss->queue[i]])
+                       return rte_flow_error_set
+                               (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                &rss->queue[i], "queue is not configured");
        }
-       flow->layers |= MLX5_FLOW_LAYER_VXLAN;
-       return size;
+       if (attr->egress)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "rss action not supported for "
+                                         "egress");
+       return 0;
 }
 
-/**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+/*
+ * Validate the count action.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param[in] item
- *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Attributes of flow that includes this action.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
-mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
-                        const struct rte_flow_item *item,
-                        struct rte_flow *flow, const size_t flow_size,
-                        struct rte_flow_error *error)
+int
+mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
+                               const struct rte_flow_attr *attr,
+                               struct rte_flow_error *error)
 {
-       const struct rte_flow_item_vxlan_gpe *spec = item->spec;
-       const struct rte_flow_item_vxlan_gpe *mask = item->mask;
-       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-       struct ibv_flow_spec_tunnel vxlan_gpe = {
-               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-               .size = size,
-       };
-       int ret;
-       union vni {
-               uint32_t vlan_id;
-               uint8_t vni[4];
-       } id = { .vlan_id = 0, };
-
-       if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 VXLAN is not enabled by device"
-                                         " parameter and/or not configured in"
-                                         " firmware");
-       if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "a tunnel is already present");
-       /*
-        * Verify only UDPv4 is present as defined in
-        * https://tools.ietf.org/html/rfc7348
-        */
-       if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+       if (attr->egress)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "no outer UDP layer found");
-       if (!mask)
-               mask = &rte_flow_item_vxlan_gpe_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
-                sizeof(struct rte_flow_item_vxlan_gpe), error);
-       if (ret < 0)
-               return ret;
-       if (spec) {
-               memcpy(&id.vni[1], spec->vni, 3);
-               vxlan_gpe.val.tunnel_id = id.vlan_id;
-               memcpy(&id.vni[1], mask->vni, 3);
-               vxlan_gpe.mask.tunnel_id = id.vlan_id;
-               if (spec->protocol)
-                       return rte_flow_error_set
-                               (error, EINVAL,
-                                RTE_FLOW_ERROR_TYPE_ITEM,
-                                item,
-                                "VxLAN-GPE protocol not supported");
-               /* Remove unwanted bits from values. */
-               vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
-       }
-       /*
-        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
-        * layer is defined in the Verbs specification it is interpreted as
-        * wildcard and all packets will match this rule, if it follows a full
-        * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
-        * before will also match this rule.  To avoid such situation, VNI 0
-        * is currently refused.
-        */
-       if (!vxlan_gpe.val.tunnel_id)
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "VXLAN-GPE vni cannot be 0");
-       if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "VXLAN-GPE tunnel must be fully"
-                                         " defined");
-       if (size <= flow_size) {
-               mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-       }
-       flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "count action not supported for "
+                                         "egress");
+       return 0;
 }
 
 /**
- * Update the protocol in Verbs IPv4/IPv6 spec.
- *
- * @param[in, out] attr
- *   Pointer to Verbs attributes structure.
- * @param[in] search
- *   Specification type to search in order to update the IP protocol.
- * @param[in] protocol
- *   Protocol value to set if none is present in the specification.
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static void
-mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
-                                     enum ibv_flow_spec_type search,
-                                     uint8_t protocol)
+int
+mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+                             const struct rte_flow_attr *attributes,
+                             struct rte_flow_error *error)
 {
-       unsigned int i;
-       struct ibv_spec_header *hdr = (struct ibv_spec_header *)
-               ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
-
-       if (!attr)
-               return;
-       for (i = 0; i != attr->num_of_specs; ++i) {
-               if (hdr->type == search) {
-                       union {
-                               struct ibv_flow_spec_ipv4_ext *ipv4;
-                               struct ibv_flow_spec_ipv6 *ipv6;
-                       } ip;
-
-                       switch (search) {
-                       case IBV_FLOW_SPEC_IPV4_EXT:
-                               ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
-                               if (!ip.ipv4->val.proto) {
-                                       ip.ipv4->val.proto = protocol;
-                                       ip.ipv4->mask.proto = 0xff;
-                               }
-                               break;
-                       case IBV_FLOW_SPEC_IPV6:
-                               ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
-                               if (!ip.ipv6->val.next_hdr) {
-                                       ip.ipv6->val.next_hdr = protocol;
-                                       ip.ipv6->mask.next_hdr = 0xff;
-                               }
-                               break;
-                       default:
-                               break;
-                       }
-                       break;
-               }
-               hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
-       }
+       struct priv *priv = dev->data->dev_private;
+       uint32_t priority_max = priv->config.flow_prio - 1;
+
+       if (attributes->group)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                                         NULL, "groups is not supported");
+       if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+           attributes->priority >= priority_max)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                         NULL, "priority out of range");
+       if (attributes->egress)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
+                                         "egress is not supported");
+       if (attributes->transfer)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+                                         NULL, "transfer is not supported");
+       if (!attributes->ingress)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                         NULL,
+                                         "ingress attribute is mandatory");
+       return 0;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * It will also update the previous L3 layer with the protocol value matching
- * the GRE.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate Ethernet item.
  *
- * @param dev
- *   Pointer to Ethernet device.
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_item_gre(const struct rte_flow_item *item,
-                  struct rte_flow *flow, const size_t flow_size,
-                  struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+                           uint64_t item_flags,
+                           struct rte_flow_error *error)
 {
-       struct mlx5_flow_verbs *verbs = flow->cur_verbs;
-       const struct rte_flow_item_gre *spec = item->spec;
-       const struct rte_flow_item_gre *mask = item->mask;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-       unsigned int size = sizeof(struct ibv_flow_spec_gre);
-       struct ibv_flow_spec_gre tunnel = {
-               .type = IBV_FLOW_SPEC_GRE,
-               .size = size,
-       };
-#else
-       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
-       struct ibv_flow_spec_tunnel tunnel = {
-               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
-               .size = size,
+       const struct rte_flow_item_eth *mask = item->mask;
+       const struct rte_flow_item_eth nic_mask = {
+               .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               .type = RTE_BE16(0xffff),
        };
-#endif
        int ret;
+       int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
 
-       if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "protocol filtering not compatible"
-                                         " with this GRE layer");
-       if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L2)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "a tunnel is already present");
-       if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "3 levels of l2 are not supported");
+       if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "L3 Layer is missing");
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "2 L2 without tunnel are not supported");
        if (!mask)
-               mask = &rte_flow_item_gre_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_gre_mask,
-                sizeof(struct rte_flow_item_gre), error);
-       if (ret < 0)
-               return ret;
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-       if (spec) {
-               tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
-               tunnel.val.protocol = spec->protocol;
-               tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
-               tunnel.mask.protocol = mask->protocol;
-               /* Remove unwanted bits from values. */
-               tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
-               tunnel.val.protocol &= tunnel.mask.protocol;
-               tunnel.val.key &= tunnel.mask.key;
-       }
-#else
-       if (spec && (spec->protocol & mask->protocol))
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "without MPLS support the"
-                                         " specification cannot be used for"
-                                         " filtering");
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
-       if (size <= flow_size) {
-               if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
-                       mlx5_flow_item_gre_ip_protocol_update
-                               (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
-                                MLX5_IP_PROTOCOL_GRE);
-               else
-                       mlx5_flow_item_gre_ip_protocol_update
-                               (verbs->attr, IBV_FLOW_SPEC_IPV6,
-                                MLX5_IP_PROTOCOL_GRE);
-               mlx5_flow_spec_verbs_add(flow, &tunnel, size);
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
-       }
-       flow->layers |= MLX5_FLOW_LAYER_GRE;
-       return size;
+               mask = &rte_flow_item_eth_mask;
+       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                       (const uint8_t *)&nic_mask,
+                                       sizeof(struct rte_flow_item_eth),
+                                       error);
+       return ret;
 }
 
 /**
- * Convert the @p item into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate VLAN item.
  *
  * @param[in] item
  *   Item specification.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p item has fully been converted,
- *   otherwise another call with this returned memory size should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
-                   struct rte_flow *flow __rte_unused,
-                   const size_t flow_size __rte_unused,
-                   struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+                            int64_t item_flags,
+                            struct rte_flow_error *error)
 {
-#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
-       const struct rte_flow_item_mpls *spec = item->spec;
-       const struct rte_flow_item_mpls *mask = item->mask;
-       unsigned int size = sizeof(struct ibv_flow_spec_mpls);
-       struct ibv_flow_spec_mpls mpls = {
-               .type = IBV_FLOW_SPEC_MPLS,
-               .size = size,
+       const struct rte_flow_item_vlan *spec = item->spec;
+       const struct rte_flow_item_vlan *mask = item->mask;
+       const struct rte_flow_item_vlan nic_mask = {
+               .tci = RTE_BE16(0x0fff),
+               .inner_type = RTE_BE16(0xffff),
        };
+       uint16_t vlan_tag = 0;
+       const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
        int ret;
+       const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
+                                       MLX5_FLOW_LAYER_INNER_L4) :
+                                      (MLX5_FLOW_LAYER_OUTER_L3 |
+                                       MLX5_FLOW_LAYER_OUTER_L4);
+       const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+                                       MLX5_FLOW_LAYER_OUTER_VLAN;
 
-       if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "protocol filtering not compatible"
-                                         " with MPLS layer");
-       /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
-       if (flow->layers & MLX5_FLOW_LAYER_TUNNEL &&
-           (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ITEM,
-                                         item,
-                                         "a tunnel is already"
-                                         " present");
+       if (item_flags & vlanm)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "VLAN layer already configured");
+       else if ((item_flags & l34m) != 0)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L2 layer cannot follow L3/L4 layer");
        if (!mask)
-               mask = &rte_flow_item_mpls_mask;
-       ret = mlx5_flow_item_acceptable
-               (item, (const uint8_t *)mask,
-                (const uint8_t *)&rte_flow_item_mpls_mask,
-                sizeof(struct rte_flow_item_mpls), error);
-       if (ret < 0)
+               mask = &rte_flow_item_vlan_mask;
+       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                       (const uint8_t *)&nic_mask,
+                                       sizeof(struct rte_flow_item_vlan),
+                                       error);
+       if (ret)
                return ret;
        if (spec) {
-               memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
-               memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
-               /* Remove unwanted bits from values.  */
-               mpls.val.label &= mpls.mask.label;
-       }
-       if (size <= flow_size) {
-               mlx5_flow_spec_verbs_add(flow, &mpls, size);
-               flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+               vlan_tag = spec->tci;
+               vlan_tag &= mask->tci;
        }
-       flow->layers |= MLX5_FLOW_LAYER_MPLS;
-       return size;
-#endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
-       return rte_flow_error_set(error, ENOTSUP,
-                                 RTE_FLOW_ERROR_TYPE_ITEM,
-                                 item,
-                                 "MPLS is not supported by Verbs, please"
-                                 " update.");
+       /*
+        * From verbs perspective an empty VLAN is equivalent
+        * to a packet without VLAN layer.
+        */
+       if (!vlan_tag)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+                                         item->spec,
+                                         "VLAN cannot be empty");
+       return 0;
 }
 
 /**
- * Convert the @p pattern into a Verbs specifications after ensuring the NIC
- * will understand and process it correctly.
- * The conversion is performed item per item, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p pattern, unless an error is encountered.
- *
- * @param[in] pattern
- *   Flow pattern.
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
+ * Validate IPV4 item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @pattern  has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_items(struct rte_eth_dev *dev,
-               const struct rte_flow_item pattern[],
-               struct rte_flow *flow, const size_t flow_size,
-               struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+                            int64_t item_flags,
+                            struct rte_flow_error *error)
 {
-       int remain = flow_size;
-       size_t size = 0;
-
-       for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
-               int ret = 0;
-
-               switch (pattern->type) {
-               case RTE_FLOW_ITEM_TYPE_VOID:
-                       break;
-               case RTE_FLOW_ITEM_TYPE_ETH:
-                       ret = mlx5_flow_item_eth(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_VLAN:
-                       ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_IPV4:
-                       ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_IPV6:
-                       ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_UDP:
-                       ret = mlx5_flow_item_udp(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_TCP:
-                       ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_VXLAN:
-                       ret = mlx5_flow_item_vxlan(pattern, flow, remain,
-                                                  error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
-                       ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
-                                                      remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_GRE:
-                       ret = mlx5_flow_item_gre(pattern, flow, remain, error);
-                       break;
-               case RTE_FLOW_ITEM_TYPE_MPLS:
-                       ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
-                       break;
-               default:
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ITEM,
-                                                 pattern,
-                                                 "item not supported");
-               }
-               if (ret < 0)
-                       return ret;
-               if (remain > ret)
-                       remain -= ret;
-               else
-                       remain = 0;
-               size += ret;
-       }
-       if (!flow->layers) {
-               const struct rte_flow_item item = {
-                       .type = RTE_FLOW_ITEM_TYPE_ETH,
-               };
+       const struct rte_flow_item_ipv4 *mask = item->mask;
+       const struct rte_flow_item_ipv4 nic_mask = {
+               .hdr = {
+                       .src_addr = RTE_BE32(0xffffffff),
+                       .dst_addr = RTE_BE32(0xffffffff),
+                       .type_of_service = 0xff,
+                       .next_proto_id = 0xff,
+               },
+       };
+       const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       int ret;
 
-               return mlx5_flow_item_eth(&item, flow, flow_size, error);
-       }
-       return size;
+       if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+                                  MLX5_FLOW_LAYER_OUTER_L3))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "multiple L3 layers not supported");
+       else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+                                       MLX5_FLOW_LAYER_OUTER_L4))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 cannot follow an L4 layer.");
+       if (!mask)
+               mask = &rte_flow_item_ipv4_mask;
+       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                       (const uint8_t *)&nic_mask,
+                                       sizeof(struct rte_flow_item_ipv4),
+                                       error);
+       if (ret < 0)
+               return ret;
+       return 0;
 }
 
 /**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate IPV6 item.
  *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_action_drop(const struct rte_flow_action *action,
-                     struct rte_flow *flow, const size_t flow_size,
-                     struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+                            uint64_t item_flags,
+                            struct rte_flow_error *error)
 {
-       unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
-       struct ibv_flow_spec_action_drop drop = {
-                       .type = IBV_FLOW_SPEC_ACTION_DROP,
-                       .size = size,
+       const struct rte_flow_item_ipv6 *mask = item->mask;
+       const struct rte_flow_item_ipv6 nic_mask = {
+               .hdr = {
+                       .src_addr =
+                               "\xff\xff\xff\xff\xff\xff\xff\xff"
+                               "\xff\xff\xff\xff\xff\xff\xff\xff",
+                       .dst_addr =
+                               "\xff\xff\xff\xff\xff\xff\xff\xff"
+                               "\xff\xff\xff\xff\xff\xff\xff\xff",
+                       .vtc_flow = RTE_BE32(0xffffffff),
+                       .proto = 0xff,
+                       .hop_limits = 0xff,
+               },
        };
+       const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       int ret;
 
-       if (flow->fate)
+       if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+                                  MLX5_FLOW_LAYER_OUTER_L3))
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "multiple fate actions are not"
-                                         " supported");
-       if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "multiple L3 layers not supported");
+       else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+                                       MLX5_FLOW_LAYER_OUTER_L4))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 cannot follow an L4 layer.");
+       /*
+        * IPv6 is not recognised by the NIC inside a GRE tunnel.
+        * Such support has to be disabled as the rule will be
+        * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
+        * Mellanox OFED 4.4-1.0.0.0.
+        */
+       if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "drop is not compatible with"
-                                         " flag/mark action");
-       if (size < flow_size)
-               mlx5_flow_spec_verbs_add(flow, &drop, size);
-       flow->fate |= MLX5_FLOW_FATE_DROP;
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "IPv6 inside a GRE tunnel is"
+                                         " not recognised.");
+       if (!mask)
+               mask = &rte_flow_item_ipv6_mask;
+       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                       (const uint8_t *)&nic_mask,
+                                       sizeof(struct rte_flow_item_ipv6),
+                                       error);
+       if (ret < 0)
+               return ret;
+       return 0;
 }
 
 /**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
+ * Validate UDP item.
  *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in] action
- *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[in] flow_mask
+ *   mlx5 flow-specific (TCF, DV, verbs, etc.) supported header fields mask.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
  *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_action_queue(struct rte_eth_dev *dev,
-                      const struct rte_flow_action *action,
-                      struct rte_flow *flow,
-                      struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+                           uint64_t item_flags,
+                           uint8_t target_protocol,
+                           struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
-       const struct rte_flow_action_queue *queue = action->conf;
+       const struct rte_flow_item_udp *mask = item->mask;
+       const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       int ret;
 
-       if (flow->fate)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "multiple fate actions are not"
-                                         " supported");
-       if (queue->index >= priv->rxqs_n)
+       if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
                return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &queue->index,
-                                         "queue index out of range");
-       if (!(*priv->rxqs)[queue->index])
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "protocol filtering not compatible"
+                                         " with UDP layer");
+       if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+                                    MLX5_FLOW_LAYER_OUTER_L3)))
                return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &queue->index,
-                                         "queue is not configured");
-       if (flow->queue)
-               (*flow->queue)[0] = queue->index;
-       flow->rss.queue_num = 1;
-       flow->fate |= MLX5_FLOW_FATE_QUEUE;
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 is mandatory to filter on L4");
+       if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+                                  MLX5_FLOW_LAYER_OUTER_L4))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L4 layer is already present");
+       if (!mask)
+               mask = &rte_flow_item_udp_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)&rte_flow_item_udp_mask,
+                sizeof(struct rte_flow_item_udp), error);
+       if (ret < 0)
+               return ret;
        return 0;
 }
 
 /**
- * Ensure the @p action will be understood and used correctly by the  NIC.
+ * Validate TCP item.
  *
- * @param dev
- *   Pointer to Ethernet device structure.
- * @param action[in]
- *   Pointer to flow actions array.
- * @param flow[in, out]
- *   Pointer to the rte_flow structure.
- * @param error[in, out]
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success @p flow->queue array and @p flow->rss are filled and valid.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_action_rss(struct rte_eth_dev *dev,
-                    const struct rte_flow_action *action,
-                    struct rte_flow *flow,
-                    struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+                           uint64_t item_flags,
+                           uint8_t target_protocol,
+                           const struct rte_flow_item_tcp *flow_mask,
+                           struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
-       const struct rte_flow_action_rss *rss = action->conf;
-       unsigned int i;
+       const struct rte_flow_item_tcp *mask = item->mask;
+       const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       int ret;
 
-       if (flow->fate)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "multiple fate actions are not"
-                                         " supported");
-       if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
-           rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->func,
-                                         "RSS hash function not supported");
-#ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
-       if (rss->level > 2)
-#else
-       if (rss->level > 1)
-#endif
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->level,
-                                         "tunnel RSS is not supported");
-       if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->key_len,
-                                         "RSS hash key too small");
-       if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->key_len,
-                                         "RSS hash key too large");
-       if (!rss->queue_num)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         rss,
-                                         "no queues were provided for RSS");
-       if (rss->queue_num > priv->config.ind_table_max_size)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->queue_num,
-                                         "number of queues too large");
-       if (rss->types & MLX5_RSS_HF_MASK)
-               return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &rss->types,
-                                         "some RSS protocols are not"
-                                         " supported");
-       for (i = 0; i != rss->queue_num; ++i) {
-               if (rss->queue[i] >= priv->rxqs_n)
-                       return rte_flow_error_set
-                               (error, EINVAL,
-                                RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                rss,
-                                "queue index out of range");
-               if (!(*priv->rxqs)[rss->queue[i]])
-                       return rte_flow_error_set
-                               (error, EINVAL,
-                                RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                &rss->queue[i],
-                                "queue is not configured");
-       }
-       if (flow->queue)
-               memcpy((*flow->queue), rss->queue,
-                      rss->queue_num * sizeof(uint16_t));
-       flow->rss.queue_num = rss->queue_num;
-       memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
-       flow->rss.types = rss->types;
-       flow->rss.level = rss->level;
-       flow->fate |= MLX5_FLOW_FATE_RSS;
+       assert(flow_mask);
+       if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "protocol filtering not compatible"
+                                         " with TCP layer");
+       if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
+                                    MLX5_FLOW_LAYER_OUTER_L3)))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 is mandatory to filter on L4");
+       if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
+                                  MLX5_FLOW_LAYER_OUTER_L4))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L4 layer is already present");
+       if (!mask)
+               mask = &rte_flow_item_tcp_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)flow_mask,
+                sizeof(struct rte_flow_item_tcp), error);
+       if (ret < 0)
+               return ret;
        return 0;
 }
 
 /**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate VXLAN item.
  *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_action_flag(const struct rte_flow_action *action,
-                     struct rte_flow *flow, const size_t flow_size,
-                     struct rte_flow_error *error)
-{
-       unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-       struct ibv_flow_spec_action_tag tag = {
-               .type = IBV_FLOW_SPEC_ACTION_TAG,
-               .size = size,
-               .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
-       };
-       struct mlx5_flow_verbs *verbs = flow->cur_verbs;
+int
+mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+                             uint64_t item_flags,
+                             struct rte_flow_error *error)
+{
+       const struct rte_flow_item_vxlan *spec = item->spec;
+       const struct rte_flow_item_vxlan *mask = item->mask;
+       int ret;
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id = { .vlan_id = 0, };
+       uint32_t vlan_id = 0;
 
-       if (flow->modifier & MLX5_FLOW_MOD_FLAG)
+
+       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "flag action already present");
-       if (flow->fate & MLX5_FLOW_FATE_DROP)
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "a tunnel is already present");
+       /*
+        * Verify only UDPv4 is present as defined in
+        * https://tools.ietf.org/html/rfc7348
+        */
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "no outer UDP layer found");
+       if (!mask)
+               mask = &rte_flow_item_vxlan_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)&rte_flow_item_vxlan_mask,
+                sizeof(struct rte_flow_item_vxlan),
+                error);
+       if (ret < 0)
+               return ret;
+       if (spec) {
+               memcpy(&id.vni[1], spec->vni, 3);
+               vlan_id = id.vlan_id;
+               memcpy(&id.vni[1], mask->vni, 3);
+               vlan_id &= id.vlan_id;
+       }
+       /*
+        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
+        * only this layer is defined in the Verbs specification it is
+        * interpreted as wildcard and all packets will match this
+        * rule, if it follows a full stack layer (ex: eth / ipv4 /
+        * udp), all packets matching the layers before will also
+        * match this rule.  To avoid such situation, VNI 0 is
+        * currently refused.
+        */
+       if (!vlan_id)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "flag is not compatible with drop"
-                                         " action");
-       if (flow->modifier & MLX5_FLOW_MOD_MARK)
-               size = 0;
-       else if (size <= flow_size && verbs)
-               mlx5_flow_spec_verbs_add(flow, &tag, size);
-       flow->modifier |= MLX5_FLOW_MOD_FLAG;
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "VXLAN vni cannot be 0");
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "VXLAN tunnel must be fully defined");
+       return 0;
 }
 
 /**
- * Update verbs specification to modify the flag to mark.
+ * Validate VXLAN_GPE item.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] priv
+ *   Pointer to the private data structure.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
+ * @param[out] error
+ *   Pointer to error structure.
  *
- * @param[in, out] verbs
- *   Pointer to the mlx5_flow_verbs structure.
- * @param[in] mark_id
- *   Mark identifier to replace the flag.
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static void
-mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+int
+mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+                                 uint64_t item_flags,
+                                 struct rte_eth_dev *dev,
+                                 struct rte_flow_error *error)
 {
-       struct ibv_spec_header *hdr;
-       int i;
+       struct priv *priv = dev->data->dev_private;
+       const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+       const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+       int ret;
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id = { .vlan_id = 0, };
+       uint32_t vlan_id = 0;
 
-       if (!verbs)
-               return;
-       /* Update Verbs specification. */
-       hdr = (struct ibv_spec_header *)verbs->specs;
-       if (!hdr)
-               return;
-       for (i = 0; i != verbs->attr->num_of_specs; ++i) {
-               if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
-                       struct ibv_flow_spec_action_tag *t =
-                               (struct ibv_flow_spec_action_tag *)hdr;
-
-                       t->tag_id = mlx5_flow_mark_set(mark_id);
-               }
-               hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
+       if (!priv->config.l3_vxlan_en)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 VXLAN is not enabled by device"
+                                         " parameter and/or not configured in"
+                                         " firmware");
+       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "a tunnel is already present");
+       /*
+        * Verify only UDPv4 is present as defined in
+        * https://tools.ietf.org/html/rfc7348
+        */
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "no outer UDP layer found");
+       if (!mask)
+               mask = &rte_flow_item_vxlan_gpe_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
+                sizeof(struct rte_flow_item_vxlan_gpe),
+                error);
+       if (ret < 0)
+               return ret;
+       if (spec) {
+               if (spec->protocol)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 item,
+                                                 "VxLAN-GPE protocol"
+                                                 " not supported");
+               memcpy(&id.vni[1], spec->vni, 3);
+               vlan_id = id.vlan_id;
+               memcpy(&id.vni[1], mask->vni, 3);
+               vlan_id &= id.vlan_id;
        }
+       /*
+        * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
+        * layer is defined in the Verbs specification it is interpreted as
+        * wildcard and all packets will match this rule, if it follows a full
+        * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
+        * before will also match this rule.  To avoid such situation, VNI 0
+        * is currently refused.
+        */
+       if (!vlan_id)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "VXLAN-GPE vni cannot be 0");
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "VXLAN-GPE tunnel must be fully"
+                                         " defined");
+       return 0;
 }
 
 /**
- * Convert the @p action into @p flow (or by updating the already present
- * Flag Verbs specification) after ensuring the NIC will understand and
- * process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
+ * Validate GRE item.
  *
- * @param[in] action
- *   Action configuration.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit flags to mark detected items.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_action_mark(const struct rte_flow_action *action,
-                     struct rte_flow *flow, const size_t flow_size,
-                     struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+                           uint64_t item_flags,
+                           uint8_t target_protocol,
+                           struct rte_flow_error *error)
 {
-       const struct rte_flow_action_mark *mark = action->conf;
-       unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
-       struct ibv_flow_spec_action_tag tag = {
-               .type = IBV_FLOW_SPEC_ACTION_TAG,
-               .size = size,
-       };
-       struct mlx5_flow_verbs *verbs = flow->cur_verbs;
+       const struct rte_flow_item_gre *spec __rte_unused = item->spec;
+       const struct rte_flow_item_gre *mask = item->mask;
+       int ret;
 
-       if (!mark)
-               return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "configuration cannot be null");
-       if (mark->id >= MLX5_FLOW_MARK_MAX)
+       if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
                return rte_flow_error_set(error, EINVAL,
-                                         RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                         &mark->id,
-                                         "mark id must in 0 <= id < "
-                                         RTE_STR(MLX5_FLOW_MARK_MAX));
-       if (flow->modifier & MLX5_FLOW_MOD_MARK)
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "protocol filtering not compatible"
+                                         " with this GRE layer");
+       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "mark action already present");
-       if (flow->fate & MLX5_FLOW_FATE_DROP)
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "a tunnel is already present");
+       if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "mark is not compatible with drop"
-                                         " action");
-       if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
-               mlx5_flow_verbs_mark_update(verbs, mark->id);
-               size = 0;
-       } else if (size <= flow_size) {
-               tag.tag_id = mlx5_flow_mark_set(mark->id);
-               mlx5_flow_spec_verbs_add(flow, &tag, size);
-       }
-       flow->modifier |= MLX5_FLOW_MOD_MARK;
-       return size;
-}
-
-/**
- * Convert the @p action into a Verbs specification after ensuring the NIC
- * will understand and process it correctly.
- * If the necessary size for the conversion is greater than the @p flow_size,
- * nothing is written in @p flow, the validation is still performed.
- *
- * @param action[in]
- *   Action configuration.
- * @param flow[in, out]
- *   Pointer to flow structure.
- * @param flow_size[in]
- *   Size in bytes of the available space in @p flow, if too small, nothing is
- *   written.
- * @param error[int, out]
- *   Pointer to error structure.
- *
- * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p action has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
- */
-static int
-mlx5_flow_action_count(struct rte_eth_dev *dev,
-                      const struct rte_flow_action *action,
-                      struct rte_flow *flow,
-                      const size_t flow_size __rte_unused,
-                      struct rte_flow_error *error)
-{
-       const struct rte_flow_action_count *count = action->conf;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-       unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
-       struct ibv_flow_spec_counter_action counter = {
-               .type = IBV_FLOW_SPEC_ACTION_COUNT,
-               .size = size,
-       };
-#endif
-
-       if (!flow->counter) {
-               flow->counter = mlx5_flow_counter_new(dev, count->shared,
-                                                     count->id);
-               if (!flow->counter)
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 action,
-                                                 "cannot get counter"
-                                                 " context.");
-       }
-       if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "L3 Layer is missing");
+       if (!mask)
+               mask = &rte_flow_item_gre_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)&rte_flow_item_gre_mask,
+                sizeof(struct rte_flow_item_gre), error);
+       if (ret < 0)
+               return ret;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       if (spec && (spec->protocol & mask->protocol))
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_ACTION,
-                                         action,
-                                         "flow counters are not supported.");
-       flow->modifier |= MLX5_FLOW_MOD_COUNT;
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-       counter.counter_set_handle = flow->counter->cs->handle;
-       if (size <= flow_size)
-               mlx5_flow_spec_verbs_add(flow, &counter, size);
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "without MPLS support the"
+                                         " specification cannot be used for"
+                                         " filtering");
 #endif
        return 0;
 }
 
 /**
- * Convert the @p action into @p flow after ensuring the NIC will understand
- * and process it correctly.
- * The conversion is performed action per action, each of them is written into
- * the @p flow if its size is lesser or equal to @p flow_size.
- * Validation and memory consumption computation are still performed until the
- * end of @p action, unless an error is encountered.
+ * Validate MPLS item.
  *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in] actions
- *   Pointer to flow actions array.
- * @param[in, out] flow
- *   Pointer to the rte_flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit-fields that holds the items detected until now.
+ * @param[in] target_protocol
+ *   The next protocol in the previous item.
  * @param[out] error
  *   Pointer to error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the @p actions has fully been
- *   converted, otherwise another call with this returned memory size should
- *   be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static int
-mlx5_flow_actions(struct rte_eth_dev *dev,
-                 const struct rte_flow_action actions[],
-                 struct rte_flow *flow, const size_t flow_size,
-                 struct rte_flow_error *error)
+int
+mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
+                            uint64_t item_flags __rte_unused,
+                            uint8_t target_protocol __rte_unused,
+                            struct rte_flow_error *error)
 {
-       size_t size = 0;
-       int remain = flow_size;
-       int ret = 0;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       const struct rte_flow_item_mpls *mask = item->mask;
+       int ret;
 
-       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-               switch (actions->type) {
-               case RTE_FLOW_ACTION_TYPE_VOID:
-                       break;
-               case RTE_FLOW_ACTION_TYPE_FLAG:
-                       ret = mlx5_flow_action_flag(actions, flow, remain,
-                                                   error);
-                       break;
-               case RTE_FLOW_ACTION_TYPE_MARK:
-                       ret = mlx5_flow_action_mark(actions, flow, remain,
-                                                   error);
-                       break;
-               case RTE_FLOW_ACTION_TYPE_DROP:
-                       ret = mlx5_flow_action_drop(actions, flow, remain,
-                                                   error);
-                       break;
-               case RTE_FLOW_ACTION_TYPE_QUEUE:
-                       ret = mlx5_flow_action_queue(dev, actions, flow, error);
-                       break;
-               case RTE_FLOW_ACTION_TYPE_RSS:
-                       ret = mlx5_flow_action_rss(dev, actions, flow, error);
-                       break;
-               case RTE_FLOW_ACTION_TYPE_COUNT:
-                       ret = mlx5_flow_action_count(dev, actions, flow, remain,
-                                                    error);
-                       break;
-               default:
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 actions,
-                                                 "action not supported");
-               }
-               if (ret < 0)
-                       return ret;
-               if (remain > ret)
-                       remain -= ret;
-               else
-                       remain = 0;
-               size += ret;
-       }
-       if (!flow->fate)
+       if (target_protocol != 0xff && target_protocol != IPPROTO_MPLS)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "protocol filtering not compatible"
+                                         " with MPLS layer");
+       if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
                return rte_flow_error_set(error, ENOTSUP,
-                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                         NULL,
-                                         "no fate action found");
-       return size;
+                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                         "a tunnel is already"
+                                         " present");
+       if (!mask)
+               mask = &rte_flow_item_mpls_mask;
+       ret = mlx5_flow_item_acceptable
+               (item, (const uint8_t *)mask,
+                (const uint8_t *)&rte_flow_item_mpls_mask,
+                sizeof(struct rte_flow_item_mpls), error);
+       if (ret < 0)
+               return ret;
+       return 0;
+#endif
+       return rte_flow_error_set(error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                 "MPLS is not supported by Verbs, please"
+                                 " update.");
+}
+
+static int
+flow_null_validate(struct rte_eth_dev *dev __rte_unused,
+                  const struct rte_flow_attr *attr __rte_unused,
+                  const struct rte_flow_item items[] __rte_unused,
+                  const struct rte_flow_action actions[] __rte_unused,
+                  struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return -rte_errno;
+}
+
+static struct mlx5_flow *
+flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
+                 const struct rte_flow_item items[] __rte_unused,
+                 const struct rte_flow_action actions[] __rte_unused,
+                 uint64_t *item_flags __rte_unused,
+                 uint64_t *action_flags __rte_unused,
+                 struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return NULL;
+}
+
+static int
+flow_null_translate(struct rte_eth_dev *dev __rte_unused,
+                   struct mlx5_flow *dev_flow __rte_unused,
+                   const struct rte_flow_attr *attr __rte_unused,
+                   const struct rte_flow_item items[] __rte_unused,
+                   const struct rte_flow_action actions[] __rte_unused,
+                   struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return -rte_errno;
+}
+
+static int
+flow_null_apply(struct rte_eth_dev *dev __rte_unused,
+               struct rte_flow *flow __rte_unused,
+               struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return -rte_errno;
+}
+
+static void
+flow_null_remove(struct rte_eth_dev *dev __rte_unused,
+                struct rte_flow *flow __rte_unused)
+{
+}
+
+static void
+flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
+                 struct rte_flow *flow __rte_unused)
+{
+}
+
+static int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+               struct rte_flow *flow __rte_unused,
+               const struct rte_flow_action *actions __rte_unused,
+               void *data __rte_unused,
+               struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return -rte_errno;
 }
 
+/* Void driver to protect from null pointer reference. */
+const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
+       .validate = flow_null_validate,
+       .prepare = flow_null_prepare,
+       .translate = flow_null_translate,
+       .apply = flow_null_apply,
+       .remove = flow_null_remove,
+       .destroy = flow_null_destroy,
+       .query = flow_null_query,
+};
+
 /**
- * Validate flow rule and fill flow structure accordingly.
+ * Select flow driver type according to flow attributes and device
+ * configuration.
  *
- * @param dev
- *   Pointer to Ethernet device.
- * @param[out] flow
- *   Pointer to flow structure.
- * @param flow_size
- *   Size of allocated space for @p flow.
+ * @param[in] dev
+ *   Pointer to the dev structure.
  * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
- * @param[in] actions
- *   Associated actions (list terminated by the END action).
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
+ *   Pointer to the flow attributes.
  *
  * @return
- *   A positive value representing the size of the flow object in bytes
- *   regardless of @p flow_size on success, a negative errno value otherwise
- *   and rte_errno is set.
+ *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
  */
-static int
-mlx5_flow_merge_switch(struct rte_eth_dev *dev,
-                      struct rte_flow *flow,
-                      size_t flow_size,
-                      const struct rte_flow_attr *attr,
-                      const struct rte_flow_item pattern[],
-                      const struct rte_flow_action actions[],
-                      struct rte_flow_error *error)
-{
-       unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
-       uint16_t port_id[!n + n];
-       struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
-       size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
-       unsigned int i;
-       unsigned int own = 0;
-       int ret;
-
-       /* At least one port is needed when no switch domain is present. */
-       if (!n) {
-               n = 1;
-               port_id[0] = dev->data->port_id;
-       } else {
-               n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
-       }
-       for (i = 0; i != n; ++i) {
-               struct rte_eth_dev_info dev_info;
-
-               rte_eth_dev_info_get(port_id[i], &dev_info);
-               if (port_id[i] == dev->data->port_id)
-                       own = i;
-               ptoi[i].port_id = port_id[i];
-               ptoi[i].ifindex = dev_info.if_index;
-       }
-       /* Ensure first entry of ptoi[] is the current device. */
-       if (own) {
-               ptoi[n] = ptoi[0];
-               ptoi[0] = ptoi[own];
-               ptoi[own] = ptoi[n];
-       }
-       /* An entry with zero ifindex terminates ptoi[]. */
-       ptoi[n].port_id = 0;
-       ptoi[n].ifindex = 0;
-       if (flow_size < off)
-               flow_size = 0;
-       ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
-                                    flow_size ? flow_size - off : 0,
-                                    ptoi, attr, pattern, actions, error);
-       if (ret < 0)
-               return ret;
-       if (flow_size) {
-               *flow = (struct rte_flow){
-                       .attributes = *attr,
-                       .nl_flow = (uint8_t *)flow + off,
-               };
-               /*
-                * Generate a reasonably unique handle based on the address
-                * of the target buffer.
-                *
-                * This is straightforward on 32-bit systems where the flow
-                * pointer can be used directly. Otherwise, its least
-                * significant part is taken after shifting it by the
-                * previous power of two of the pointed buffer size.
-                */
-               if (sizeof(flow) <= 4)
-                       mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
-               else
-                       mlx5_nl_flow_brand
-                               (flow->nl_flow,
-                                (uintptr_t)flow >>
-                                rte_log2_u32(rte_align32prevpow2(flow_size)));
-       }
-       return off + ret;
-}
-
-static unsigned int
-mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
+static enum mlx5_flow_drv_type
+flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
 {
-       const struct rte_flow_item *item;
-       unsigned int has_vlan = 0;
+       struct priv *priv = dev->data->dev_private;
+       enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
 
-       for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
-               if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
-                       has_vlan = 1;
-                       break;
-               }
-       }
-       if (has_vlan)
-               return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
-                                      MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
-       return rss_level < 2 ? MLX5_EXPANSION_ROOT :
-                              MLX5_EXPANSION_ROOT_OUTER;
+       if (attr->transfer)
+               type = MLX5_FLOW_TYPE_TCF;
+       else
+               type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
+                                                MLX5_FLOW_TYPE_VERBS;
+       return type;
 }
 
+#define flow_get_drv_ops(type) flow_drv_ops[type]
+
 /**
- * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
- * after ensuring the NIC will understand and process it correctly.
- * The conversion is only performed item/action per item/action, each of
- * them is written into the @p flow if its size is lesser or equal to @p
- * flow_size.
- * Validation and memory consumption computation are still performed until the
- * end, unless an error is encountered.
+ * Flow driver validation API. This abstracts calling driver specific functions.
+ * The type of flow driver is determined according to flow attributes.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[in] flow_size
- *   Size in bytes of the available space in @p flow, if too small some
- *   garbage may be present.
- * @param[in] attributes
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification (list terminated by the END pattern item).
+ *   Pointer to the dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
  * @param[in] actions
- *   Associated actions (list terminated by the END action).
+ *   Pointer to the list of actions.
  * @param[out] error
- *   Perform verbose error reporting if not NULL.
+ *   Pointer to the error structure.
  *
  * @return
- *   On success the number of bytes consumed/necessary, if the returned value
- *   is lesser or equal to @p flow_size, the flow has fully been converted and
- *   can be applied, otherwise another call with this returned memory size
- *   should be done.
- *   On error, a negative errno value is returned and rte_errno is set.
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static int
-mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
-               const size_t flow_size,
-               const struct rte_flow_attr *attributes,
-               const struct rte_flow_item pattern[],
-               const struct rte_flow_action actions[],
-               struct rte_flow_error *error)
+static inline int
+flow_drv_validate(struct rte_eth_dev *dev,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item items[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error)
 {
-       struct rte_flow local_flow = { .layers = 0, };
-       size_t size = sizeof(*flow);
-       union {
-               struct rte_flow_expand_rss buf;
-               uint8_t buffer[2048];
-       } expand_buffer;
-       struct rte_flow_expand_rss *buf = &expand_buffer.buf;
-       struct mlx5_flow_verbs *original_verbs = NULL;
-       size_t original_verbs_size = 0;
-       uint32_t original_layers = 0;
-       int expanded_pattern_idx = 0;
-       int ret;
-       uint32_t i;
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
 
-       if (attributes->transfer)
-               return mlx5_flow_merge_switch(dev, flow, flow_size,
-                                             attributes, pattern,
-                                             actions, error);
-       if (size > flow_size)
-               flow = &local_flow;
-       ret = mlx5_flow_attributes(dev, attributes, flow, error);
-       if (ret < 0)
-               return ret;
-       ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
-       if (ret < 0)
-               return ret;
-       if (local_flow.rss.types) {
-               unsigned int graph_root;
+       fops = flow_get_drv_ops(type);
+       return fops->validate(dev, attr, items, actions, error);
+}
 
-               graph_root = mlx5_find_graph_root(pattern,
-                                                 local_flow.rss.level);
-               ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
-                                         pattern, local_flow.rss.types,
-                                         mlx5_support_expansion,
-                                         graph_root);
-               assert(ret > 0 &&
-                      (unsigned int)ret < sizeof(expand_buffer.buffer));
-       } else {
-               buf->entries = 1;
-               buf->entry[0].pattern = (void *)(uintptr_t)pattern;
-       }
-       size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
-                              sizeof(void *));
-       if (size <= flow_size)
-               flow->queue = (void *)(flow + 1);
-       LIST_INIT(&flow->verbs);
-       flow->layers = 0;
-       flow->modifier = 0;
-       flow->fate = 0;
-       for (i = 0; i != buf->entries; ++i) {
-               size_t off = size;
-               size_t off2;
-
-               flow->layers = original_layers;
-               size += sizeof(struct ibv_flow_attr) +
-                       sizeof(struct mlx5_flow_verbs);
-               off2 = size;
-               if (size < flow_size) {
-                       flow->cur_verbs = (void *)((uintptr_t)flow + off);
-                       flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
-                       flow->cur_verbs->specs =
-                               (void *)(flow->cur_verbs->attr + 1);
-               }
-               /* First iteration convert the pattern into Verbs. */
-               if (i == 0) {
-                       /* Actions don't need to be converted several time. */
-                       ret = mlx5_flow_actions(dev, actions, flow,
-                                               (size < flow_size) ?
-                                               flow_size - size : 0,
-                                               error);
-                       if (ret < 0)
-                               return ret;
-                       size += ret;
-               } else {
-                       /*
-                        * Next iteration means the pattern has already been
-                        * converted and an expansion is necessary to match
-                        * the user RSS request.  For that only the expanded
-                        * items will be converted, the common part with the
-                        * user pattern are just copied into the next buffer
-                        * zone.
-                        */
-                       size += original_verbs_size;
-                       if (size < flow_size) {
-                               rte_memcpy(flow->cur_verbs->attr,
-                                          original_verbs->attr,
-                                          original_verbs_size +
-                                          sizeof(struct ibv_flow_attr));
-                               flow->cur_verbs->size = original_verbs_size;
-                       }
-               }
-               ret = mlx5_flow_items
-                       (dev,
-                        (const struct rte_flow_item *)
-                        &buf->entry[i].pattern[expanded_pattern_idx],
-                        flow,
-                        (size < flow_size) ? flow_size - size : 0, error);
-               if (ret < 0)
-                       return ret;
-               size += ret;
-               if (size <= flow_size) {
-                       mlx5_flow_adjust_priority(dev, flow);
-                       LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
-               }
-               /*
-                * Keep a pointer of the first verbs conversion and the layers
-                * it has encountered.
-                */
-               if (i == 0) {
-                       original_verbs = flow->cur_verbs;
-                       original_verbs_size = size - off2;
-                       original_layers = flow->layers;
-                       /*
-                        * move the index of the expanded pattern to the
-                        * first item not addressed yet.
-                        */
-                       if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
-                               expanded_pattern_idx++;
-                       } else {
-                               const struct rte_flow_item *item = pattern;
-
-                               for (item = pattern;
-                                    item->type != RTE_FLOW_ITEM_TYPE_END;
-                                    ++item)
-                                       expanded_pattern_idx++;
-                       }
-               }
-       }
-       /* Restore the origin layers in the flow. */
-       flow->layers = original_layers;
-       return size;
+/**
+ * Flow driver preparation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * calculates the size of memory required for device flow, allocates the memory,
+ * initializes the device flow and returns the pointer.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to device flow on success, otherwise NULL and rte_ernno is set.
+ */
+static inline struct mlx5_flow *
+flow_drv_prepare(struct rte_flow *flow,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                uint64_t *item_flags,
+                uint64_t *action_flags,
+                struct rte_flow_error *error)
+{
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = flow->drv_type;
+
+       assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(type);
+       return fops->prepare(attr, items, actions, item_flags, action_flags,
+                            error);
 }
 
 /**
- * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
- * if several tunnel rules are used on this queue, the tunnel ptype will be
- * cleared.
+ * Flow driver translation API. This abstracts calling driver specific
+ * functions. Parent flow (rte_flow) should have driver type (drv_type). It
+ * translates a generic flow into a driver flow. flow_drv_prepare() must
+ * precede.
  *
- * @param rxq_ctrl
- *   Rx queue to update.
+ *
+ * @param[in] dev
+ *   Pointer to the rte dev structure.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
  */
-static void
-mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
+static inline int
+flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item items[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error)
 {
-       unsigned int i;
-       uint32_t tunnel_ptype = 0;
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
 
-       /* Look up for the ptype to use. */
-       for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
-               if (!rxq_ctrl->flow_tunnels_n[i])
-                       continue;
-               if (!tunnel_ptype) {
-                       tunnel_ptype = tunnels_info[i].ptype;
-               } else {
-                       tunnel_ptype = 0;
-                       break;
-               }
-       }
-       rxq_ctrl->rxq.tunnel = tunnel_ptype;
+       assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(type);
+       return fops->translate(dev, dev_flow, attr, items, actions, error);
 }
 
 /**
- * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
+ * Flow driver apply API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It applies
+ * translated driver flows on to device. flow_drv_translate() must precede.
  *
  * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in] flow
+ *   Pointer to Ethernet device structure.
+ * @param[in, out] flow
  *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
  */
-static void
-mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
+static inline int
+flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+              struct rte_flow_error *error)
 {
-       struct priv *priv = dev->data->dev_private;
-       const int mark = !!(flow->modifier &
-                           (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int i;
-
-       for (i = 0; i != flow->rss.queue_num; ++i) {
-               int idx = (*flow->queue)[i];
-               struct mlx5_rxq_ctrl *rxq_ctrl =
-                       container_of((*priv->rxqs)[idx],
-                                    struct mlx5_rxq_ctrl, rxq);
-
-               if (mark) {
-                       rxq_ctrl->rxq.mark = 1;
-                       rxq_ctrl->flow_mark_n++;
-               }
-               if (tunnel) {
-                       unsigned int j;
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = flow->drv_type;
 
-                       /* Increase the counter matching the flow. */
-                       for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
-                               if ((tunnels_info[j].tunnel & flow->layers) ==
-                                   tunnels_info[j].tunnel) {
-                                       rxq_ctrl->flow_tunnels_n[j]++;
-                                       break;
-                               }
-                       }
-                       mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
-               }
-       }
+       assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(type);
+       return fops->apply(dev, flow, error);
 }
 
 /**
- * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
- * @p flow if no other flow uses it with the same kind of request.
+ * Flow driver remove API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device. All the resources of the flow should be freed by calling
+ * flow_dv_destroy().
  *
- * @param dev
+ * @param[in] dev
  *   Pointer to Ethernet device.
- * @param[in] flow
- *   Pointer to the flow.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
  */
-static void
-mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
+static inline void
+flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
-       struct priv *priv = dev->data->dev_private;
-       const int mark = !!(flow->modifier &
-                           (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
-       const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
-       unsigned int i;
-
-       assert(dev->data->dev_started);
-       for (i = 0; i != flow->rss.queue_num; ++i) {
-               int idx = (*flow->queue)[i];
-               struct mlx5_rxq_ctrl *rxq_ctrl =
-                       container_of((*priv->rxqs)[idx],
-                                    struct mlx5_rxq_ctrl, rxq);
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = flow->drv_type;
 
-               if (mark) {
-                       rxq_ctrl->flow_mark_n--;
-                       rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
-               }
-               if (tunnel) {
-                       unsigned int j;
-
-                       /* Decrease the counter matching the flow. */
-                       for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
-                               if ((tunnels_info[j].tunnel & flow->layers) ==
-                                   tunnels_info[j].tunnel) {
-                                       rxq_ctrl->flow_tunnels_n[j]--;
-                                       break;
-                               }
-                       }
-                       mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
-               }
-       }
+       assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(type);
+       fops->remove(dev, flow);
 }
 
 /**
- * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
+ * Flow driver destroy API. This abstracts calling driver specific functions.
+ * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
+ * on device and releases resources of the flow.
  *
- * @param dev
+ * @param[in] dev
  *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
  */
-static void
-mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
+static inline void
+flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
 {
-       struct priv *priv = dev->data->dev_private;
-       unsigned int i;
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type type = flow->drv_type;
 
-       for (i = 0; i != priv->rxqs_n; ++i) {
-               struct mlx5_rxq_ctrl *rxq_ctrl;
-               unsigned int j;
-
-               if (!(*priv->rxqs)[i])
-                       continue;
-               rxq_ctrl = container_of((*priv->rxqs)[i],
-                                       struct mlx5_rxq_ctrl, rxq);
-               rxq_ctrl->flow_mark_n = 0;
-               rxq_ctrl->rxq.mark = 0;
-               for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
-                       rxq_ctrl->flow_tunnels_n[j] = 0;
-               rxq_ctrl->rxq.tunnel = 0;
-       }
+       assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(type);
+       fops->destroy(dev, flow);
 }
 
 /**
@@ -2859,134 +1930,55 @@ mlx5_flow_validate(struct rte_eth_dev *dev,
                   const struct rte_flow_action actions[],
                   struct rte_flow_error *error)
 {
-       int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
+       int ret;
 
+       ret = flow_drv_validate(dev, attr, items, actions, error);
        if (ret < 0)
                return ret;
        return 0;
 }
 
 /**
- * Remove the flow.
+ * Get RSS action from the action list.
  *
- * @param[in] dev
- *   Pointer to Ethernet device.
- * @param[in, out] flow
- *   Pointer to flow structure.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ *
+ * @return
+ *   Pointer to the RSS action if exist, else return NULL.
  */
-static void
-mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+static const struct rte_flow_action_rss*
+flow_get_rss_action(const struct rte_flow_action actions[])
 {
-       struct priv *priv = dev->data->dev_private;
-       struct mlx5_flow_verbs *verbs;
-
-       if (flow->nl_flow && priv->mnl_socket)
-               mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
-       LIST_FOREACH(verbs, &flow->verbs, next) {
-               if (verbs->flow) {
-                       claim_zero(mlx5_glue->destroy_flow(verbs->flow));
-                       verbs->flow = NULL;
-               }
-               if (verbs->hrxq) {
-                       if (flow->fate & MLX5_FLOW_FATE_DROP)
-                               mlx5_hrxq_drop_release(dev);
-                       else
-                               mlx5_hrxq_release(dev, verbs->hrxq);
-                       verbs->hrxq = NULL;
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       return (const struct rte_flow_action_rss *)
+                              actions->conf;
+               default:
+                       break;
                }
        }
-       if (flow->counter) {
-               mlx5_flow_counter_release(flow->counter);
-               flow->counter = NULL;
-       }
+       return NULL;
 }
 
-/**
- * Apply the flow.
- *
- * @param[in] dev
- *   Pointer to Ethernet device structure.
- * @param[in, out] flow
- *   Pointer to flow structure.
- * @param[out] error
- *   Pointer to error structure.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
-               struct rte_flow_error *error)
+static unsigned int
+find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
 {
-       struct priv *priv = dev->data->dev_private;
-       struct mlx5_flow_verbs *verbs;
-       int err;
-
-       LIST_FOREACH(verbs, &flow->verbs, next) {
-               if (flow->fate & MLX5_FLOW_FATE_DROP) {
-                       verbs->hrxq = mlx5_hrxq_drop_new(dev);
-                       if (!verbs->hrxq) {
-                               rte_flow_error_set
-                                       (error, errno,
-                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                        NULL,
-                                        "cannot get drop hash queue");
-                               goto error;
-                       }
-               } else {
-                       struct mlx5_hrxq *hrxq;
-
-                       hrxq = mlx5_hrxq_get(dev, flow->key,
-                                            MLX5_RSS_HASH_KEY_LEN,
-                                            verbs->hash_fields,
-                                            (*flow->queue),
-                                            flow->rss.queue_num);
-                       if (!hrxq)
-                               hrxq = mlx5_hrxq_new(dev, flow->key,
-                                                    MLX5_RSS_HASH_KEY_LEN,
-                                                    verbs->hash_fields,
-                                                    (*flow->queue),
-                                                    flow->rss.queue_num,
-                                                    !!(flow->layers &
-                                                     MLX5_FLOW_LAYER_TUNNEL));
-                       if (!hrxq) {
-                               rte_flow_error_set
-                                       (error, rte_errno,
-                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                        NULL,
-                                        "cannot get hash queue");
-                               goto error;
-                       }
-                       verbs->hrxq = hrxq;
-               }
-               verbs->flow =
-                       mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
-               if (!verbs->flow) {
-                       rte_flow_error_set(error, errno,
-                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                          NULL,
-                                          "hardware refuses to create flow");
-                       goto error;
-               }
-       }
-       if (flow->nl_flow &&
-           priv->mnl_socket &&
-           mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
-               goto error;
-       return 0;
-error:
-       err = rte_errno; /* Save rte_errno before cleanup. */
-       LIST_FOREACH(verbs, &flow->verbs, next) {
-               if (verbs->hrxq) {
-                       if (flow->fate & MLX5_FLOW_FATE_DROP)
-                               mlx5_hrxq_drop_release(dev);
-                       else
-                               mlx5_hrxq_release(dev, verbs->hrxq);
-                       verbs->hrxq = NULL;
+       const struct rte_flow_item *item;
+       unsigned int has_vlan = 0;
+
+       for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+               if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+                       has_vlan = 1;
+                       break;
                }
        }
-       rte_errno = err; /* Restore rte_errno. */
-       return -rte_errno;
+       if (has_vlan)
+               return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
+                                      MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
+       return rss_level < 2 ? MLX5_EXPANSION_ROOT :
+                              MLX5_EXPANSION_ROOT_OUTER;
 }
 
 /**
@@ -3009,50 +2001,90 @@ error:
  *   A flow on success, NULL otherwise and rte_errno is set.
  */
 static struct rte_flow *
-mlx5_flow_list_create(struct rte_eth_dev *dev,
-                     struct mlx5_flows *list,
-                     const struct rte_flow_attr *attr,
-                     const struct rte_flow_item items[],
-                     const struct rte_flow_action actions[],
-                     struct rte_flow_error *error)
+flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
 {
        struct rte_flow *flow = NULL;
-       size_t size = 0;
+       struct mlx5_flow *dev_flow;
+       uint64_t action_flags = 0;
+       uint64_t item_flags = 0;
+       const struct rte_flow_action_rss *rss;
+       union {
+               struct rte_flow_expand_rss buf;
+               uint8_t buffer[2048];
+       } expand_buffer;
+       struct rte_flow_expand_rss *buf = &expand_buffer.buf;
        int ret;
+       uint32_t i;
+       uint32_t flow_size;
 
-       ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
+       ret = flow_drv_validate(dev, attr, items, actions, error);
        if (ret < 0)
                return NULL;
-       size = ret;
-       flow = rte_calloc(__func__, 1, size, 0);
-       if (!flow) {
-               rte_flow_error_set(error, ENOMEM,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                  NULL,
-                                  "not enough memory to create flow");
-               return NULL;
+       flow_size = sizeof(struct rte_flow);
+       rss = flow_get_rss_action(actions);
+       if (rss)
+               flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
+                                           sizeof(void *));
+       else
+               flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
+       flow = rte_calloc(__func__, 1, flow_size, 0);
+       flow->drv_type = flow_get_drv_type(dev, attr);
+       assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
+              flow->drv_type < MLX5_FLOW_TYPE_MAX);
+       flow->queue = (void *)(flow + 1);
+       LIST_INIT(&flow->dev_flows);
+       if (rss && rss->types) {
+               unsigned int graph_root;
+
+               graph_root = find_graph_root(items, rss->level);
+               ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
+                                         items, rss->types,
+                                         mlx5_support_expansion,
+                                         graph_root);
+               assert(ret > 0 &&
+                      (unsigned int)ret < sizeof(expand_buffer.buffer));
+       } else {
+               buf->entries = 1;
+               buf->entry[0].pattern = (void *)(uintptr_t)items;
        }
-       ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
-       if (ret < 0) {
-               rte_free(flow);
-               return NULL;
+       for (i = 0; i < buf->entries; ++i) {
+               dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern,
+                                           actions, &item_flags, &action_flags,
+                                           error);
+               if (!dev_flow)
+                       goto error;
+               dev_flow->flow = flow;
+               dev_flow->layers = item_flags;
+               /* Store actions once as expanded flows have same actions. */
+               if (i == 0)
+                       flow->actions = action_flags;
+               assert(flow->actions == action_flags);
+               LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
+               ret = flow_drv_translate(dev, dev_flow, attr,
+                                        buf->entry[i].pattern,
+                                        actions, error);
+               if (ret < 0)
+                       goto error;
        }
-       assert((size_t)ret == size);
        if (dev->data->dev_started) {
-               ret = mlx5_flow_apply(dev, flow, error);
-               if (ret < 0) {
-                       ret = rte_errno; /* Save rte_errno before cleanup. */
-                       if (flow) {
-                               mlx5_flow_remove(dev, flow);
-                               rte_free(flow);
-                       }
-                       rte_errno = ret; /* Restore rte_errno. */
-                       return NULL;
-               }
+               ret = flow_drv_apply(dev, flow, error);
+               if (ret < 0)
+                       goto error;
        }
        TAILQ_INSERT_TAIL(list, flow, next);
-       mlx5_flow_rxq_flags_set(dev, flow);
+       flow_rxq_flags_set(dev, flow);
        return flow;
+error:
+       ret = rte_errno; /* Save rte_errno before cleanup. */
+       assert(flow);
+       flow_drv_destroy(dev, flow);
+       rte_free(flow);
+       rte_errno = ret; /* Restore rte_errno. */
+       return NULL;
 }
 
 /**
@@ -3068,9 +2100,9 @@ mlx5_flow_create(struct rte_eth_dev *dev,
                 const struct rte_flow_action actions[],
                 struct rte_flow_error *error)
 {
-       return mlx5_flow_list_create
-               (dev, &((struct priv *)dev->data->dev_private)->flows,
-                attr, items, actions, error);
+       return flow_list_create(dev,
+                               &((struct priv *)dev->data->dev_private)->flows,
+                               attr, items, actions, error);
 }
 
 /**
@@ -3084,17 +2116,17 @@ mlx5_flow_create(struct rte_eth_dev *dev,
  *   Flow to destroy.
  */
 static void
-mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
-                      struct rte_flow *flow)
+flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
+                 struct rte_flow *flow)
 {
-       mlx5_flow_remove(dev, flow);
+       flow_drv_destroy(dev, flow);
        TAILQ_REMOVE(list, flow, next);
        /*
         * Update RX queue flags only if port is started, otherwise it is
         * already clean.
         */
        if (dev->data->dev_started)
-               mlx5_flow_rxq_flags_trim(dev, flow);
+               flow_rxq_flags_trim(dev, flow);
        rte_free(flow);
 }
 
@@ -3113,7 +2145,7 @@ mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
                struct rte_flow *flow;
 
                flow = TAILQ_FIRST(list);
-               mlx5_flow_list_destroy(dev, list, flow);
+               flow_list_destroy(dev, list, flow);
        }
 }
 
@@ -3131,8 +2163,8 @@ mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
        struct rte_flow *flow;
 
        TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
-               mlx5_flow_remove(dev, flow);
-       mlx5_flow_rxq_flags_clear(dev);
+               flow_drv_remove(dev, flow);
+       flow_rxq_flags_clear(dev);
 }
 
 /**
@@ -3154,10 +2186,10 @@ mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
        int ret = 0;
 
        TAILQ_FOREACH(flow, list, next) {
-               ret = mlx5_flow_apply(dev, flow, &error);
+               ret = flow_drv_apply(dev, flow, &error);
                if (ret < 0)
                        goto error;
-               mlx5_flow_rxq_flags_set(dev, flow);
+               flow_rxq_flags_set(dev, flow);
        }
        return 0;
 error:
@@ -3228,7 +2260,7 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
                },
                {
                        .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
-                               RTE_FLOW_ITEM_TYPE_END,
+                                             RTE_FLOW_ITEM_TYPE_END,
                        .spec = vlan_spec,
                        .last = NULL,
                        .mask = vlan_mask,
@@ -3266,8 +2298,8 @@ mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
        }
        for (i = 0; i != priv->reta_idx_n; ++i)
                queue[i] = (*priv->reta_idx)[i];
-       flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
-                                    actions, &error);
+       flow = flow_list_create(dev, &priv->ctrl_flows,
+                               &attr, items, actions, &error);
        if (!flow)
                return -rte_errno;
        return 0;
@@ -3307,7 +2339,7 @@ mlx5_flow_destroy(struct rte_eth_dev *dev,
 {
        struct priv *priv = dev->data->dev_private;
 
-       mlx5_flow_list_destroy(dev, &priv->flows, flow);
+       flow_list_destroy(dev, &priv->flows, flow);
        return 0;
 }
 
@@ -3356,92 +2388,45 @@ mlx5_flow_isolate(struct rte_eth_dev *dev,
 }
 
 /**
- * Query flow counter.
- *
- * @param flow
- *   Pointer to the flow.
+ * Query a flow.
  *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
+ * @see rte_flow_query()
+ * @see rte_flow_ops
  */
 static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
-                     void *data __rte_unused,
-                     struct rte_flow_error *error)
-{
-#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
-       if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
-               struct rte_flow_query_count *qc = data;
-               uint64_t counters[2] = {0, 0};
-               struct ibv_query_counter_set_attr query_cs_attr = {
-                       .cs = flow->counter->cs,
-                       .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
-               };
-               struct ibv_counter_set_data query_out = {
-                       .out = counters,
-                       .outlen = 2 * sizeof(uint64_t),
-               };
-               int err = mlx5_glue->query_counter_set(&query_cs_attr,
-                                                      &query_out);
+flow_drv_query(struct rte_eth_dev *dev,
+              struct rte_flow *flow,
+              const struct rte_flow_action *actions,
+              void *data,
+              struct rte_flow_error *error)
+{
+       const struct mlx5_flow_driver_ops *fops;
+       enum mlx5_flow_drv_type ftype = flow->drv_type;
 
-               if (err)
-                       return rte_flow_error_set
-                               (error, err,
-                                RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                NULL,
-                                "cannot read counter");
-               qc->hits_set = 1;
-               qc->bytes_set = 1;
-               qc->hits = counters[0] - flow->counter->hits;
-               qc->bytes = counters[1] - flow->counter->bytes;
-               if (qc->reset) {
-                       flow->counter->hits = counters[0];
-                       flow->counter->bytes = counters[1];
-               }
-               return 0;
-       }
-       return rte_flow_error_set(error, ENOTSUP,
-                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                 NULL,
-                                 "flow does not have counter");
-#endif
-       return rte_flow_error_set(error, ENOTSUP,
-                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                 NULL,
-                                 "counters are not available");
+       assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+       fops = flow_get_drv_ops(ftype);
+
+       return fops->query(dev, flow, actions, data, error);
 }
 
 /**
- * Query a flows.
+ * Query a flow.
  *
  * @see rte_flow_query()
  * @see rte_flow_ops
  */
 int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
                struct rte_flow *flow,
                const struct rte_flow_action *actions,
                void *data,
                struct rte_flow_error *error)
 {
-       int ret = 0;
+       int ret;
 
-       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
-               switch (actions->type) {
-               case RTE_FLOW_ACTION_TYPE_VOID:
-                       break;
-               case RTE_FLOW_ACTION_TYPE_COUNT:
-                       ret = mlx5_flow_query_count(flow, data, error);
-                       break;
-               default:
-                       return rte_flow_error_set(error, ENOTSUP,
-                                                 RTE_FLOW_ERROR_TYPE_ACTION,
-                                                 actions,
-                                                 "action not supported");
-               }
-               if (ret < 0)
-                       return ret;
-       }
+       ret = flow_drv_query(dev, flow, actions, data, error);
+       if (ret < 0)
+               return ret;
        return 0;
 }
 
@@ -3511,7 +2496,6 @@ mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
                        .dst_addr = input->flow.ip4_flow.dst_ip,
                        .time_to_live = input->flow.ip4_flow.ttl,
                        .type_of_service = input->flow.ip4_flow.tos,
-                       .next_proto_id = input->flow.ip4_flow.proto,
                };
                attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
                        .src_addr = mask->ipv4_mask.src_ip,
@@ -3663,9 +2647,8 @@ mlx5_fdir_filter_add(struct rte_eth_dev *dev,
        ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
        if (ret)
                return ret;
-       flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
-                                    attributes.items, attributes.actions,
-                                    &error);
+       flow = flow_list_create(dev, &priv->flows, &attributes.attr,
+                               attributes.items, attributes.actions, &error);
        if (flow) {
                DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
                        (void *)flow);
diff --git a/drivers/net/mlx5/mlx5_flow.h b/drivers/net/mlx5/mlx5_flow.h
new file mode 100644 (file)
index 0000000..61299d6
--- /dev/null
@@ -0,0 +1,375 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#ifndef RTE_PMD_MLX5_FLOW_H_
+#define RTE_PMD_MLX5_FLOW_H_
+
+#include <netinet/in.h>
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+/* Pattern outer Layer bits. */
+#define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
+#define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
+#define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
+#define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
+#define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
+
+/* Pattern inner Layer bits. */
+#define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
+#define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
+#define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
+#define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
+#define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
+
+/* Pattern tunnel Layer bits. */
+#define MLX5_FLOW_LAYER_VXLAN (1u << 12)
+#define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
+#define MLX5_FLOW_LAYER_GRE (1u << 14)
+#define MLX5_FLOW_LAYER_MPLS (1u << 15)
+
+/* General pattern items bits. */
+#define MLX5_FLOW_ITEM_METADATA (1u << 16)
+
+/* Outer Masks. */
+#define MLX5_FLOW_LAYER_OUTER_L3 \
+       (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
+#define MLX5_FLOW_LAYER_OUTER_L4 \
+       (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
+#define MLX5_FLOW_LAYER_OUTER \
+       (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
+        MLX5_FLOW_LAYER_OUTER_L4)
+
+/* Tunnel Masks. */
+#define MLX5_FLOW_LAYER_TUNNEL \
+       (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
+        MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
+
+/* Inner Masks. */
+#define MLX5_FLOW_LAYER_INNER_L3 \
+       (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
+#define MLX5_FLOW_LAYER_INNER_L4 \
+       (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
+#define MLX5_FLOW_LAYER_INNER \
+       (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
+        MLX5_FLOW_LAYER_INNER_L4)
+
+/* Actions */
+#define MLX5_FLOW_ACTION_DROP (1u << 0)
+#define MLX5_FLOW_ACTION_QUEUE (1u << 1)
+#define MLX5_FLOW_ACTION_RSS (1u << 2)
+#define MLX5_FLOW_ACTION_FLAG (1u << 3)
+#define MLX5_FLOW_ACTION_MARK (1u << 4)
+#define MLX5_FLOW_ACTION_COUNT (1u << 5)
+#define MLX5_FLOW_ACTION_PORT_ID (1u << 6)
+#define MLX5_FLOW_ACTION_OF_POP_VLAN (1u << 7)
+#define MLX5_FLOW_ACTION_OF_PUSH_VLAN (1u << 8)
+#define MLX5_FLOW_ACTION_OF_SET_VLAN_VID (1u << 9)
+#define MLX5_FLOW_ACTION_OF_SET_VLAN_PCP (1u << 10)
+#define MLX5_FLOW_ACTION_SET_IPV4_SRC (1u << 11)
+#define MLX5_FLOW_ACTION_SET_IPV4_DST (1u << 12)
+#define MLX5_FLOW_ACTION_SET_IPV6_SRC (1u << 13)
+#define MLX5_FLOW_ACTION_SET_IPV6_DST (1u << 14)
+#define MLX5_FLOW_ACTION_SET_TP_SRC (1u << 15)
+#define MLX5_FLOW_ACTION_SET_TP_DST (1u << 16)
+#define MLX5_FLOW_ACTION_JUMP (1u << 17)
+#define MLX5_FLOW_ACTION_SET_TTL (1u << 18)
+#define MLX5_FLOW_ACTION_DEC_TTL (1u << 19)
+#define MLX5_FLOW_ACTION_SET_MAC_SRC (1u << 20)
+#define MLX5_FLOW_ACTION_SET_MAC_DST (1u << 21)
+
+#define MLX5_FLOW_FATE_ACTIONS \
+       (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
+
+#ifndef IPPROTO_MPLS
+#define IPPROTO_MPLS 137
+#endif
+
+/* UDP port numbers for VxLAN. */
+#define MLX5_UDP_PORT_VXLAN 4789
+#define MLX5_UDP_PORT_VXLAN_GPE 4790
+
+/* Priority reserved for default flows. */
+#define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
+
+/*
+ * Number of sub priorities.
+ * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
+ * matching on the NIC (firmware dependent) L4 most have the higher priority
+ * followed by L3 and ending with L2.
+ */
+#define MLX5_PRIORITY_MAP_L2 2
+#define MLX5_PRIORITY_MAP_L3 1
+#define MLX5_PRIORITY_MAP_L4 0
+#define MLX5_PRIORITY_MAP_MAX 3
+
+/* Valid layer type for IPV4 RSS. */
+#define MLX5_IPV4_LAYER_TYPES \
+       (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 | \
+        ETH_RSS_NONFRAG_IPV4_TCP | ETH_RSS_NONFRAG_IPV4_UDP | \
+        ETH_RSS_NONFRAG_IPV4_OTHER)
+
+/* IBV hash source bits  for IPV4. */
+#define MLX5_IPV4_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4)
+
+/* Valid layer type for IPV6 RSS. */
+#define MLX5_IPV6_LAYER_TYPES \
+       (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 | ETH_RSS_NONFRAG_IPV6_TCP | \
+        ETH_RSS_NONFRAG_IPV6_UDP | ETH_RSS_IPV6_EX  | ETH_RSS_IPV6_TCP_EX | \
+        ETH_RSS_IPV6_UDP_EX | ETH_RSS_NONFRAG_IPV6_OTHER)
+
+/* IBV hash source bits  for IPV6. */
+#define MLX5_IPV6_IBV_RX_HASH (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6)
+
+/* Max number of actions per DV flow. */
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
+enum mlx5_flow_drv_type {
+       MLX5_FLOW_TYPE_MIN,
+       MLX5_FLOW_TYPE_DV,
+       MLX5_FLOW_TYPE_TCF,
+       MLX5_FLOW_TYPE_VERBS,
+       MLX5_FLOW_TYPE_MAX,
+};
+
+/* Matcher PRM representation */
+struct mlx5_flow_dv_match_params {
+       size_t size;
+       /**< Size of match value. Do NOT split size and key! */
+       uint32_t buf[MLX5_ST_SZ_DW(fte_match_param)];
+       /**< Matcher value. This value is used as the mask or as a key. */
+};
+
+#define MLX5_DV_MAX_NUMBER_OF_ACTIONS 8
+
+/* Matcher structure. */
+struct mlx5_flow_dv_matcher {
+       LIST_ENTRY(mlx5_flow_dv_matcher) next;
+       /* Pointer to the next element. */
+       rte_atomic32_t refcnt; /**< Reference counter. */
+       void *matcher_object; /**< Pointer to DV matcher */
+       uint16_t crc; /**< CRC of key. */
+       uint16_t priority; /**< Priority of matcher. */
+       uint8_t egress; /**< Egress matcher. */
+       struct mlx5_flow_dv_match_params mask; /**< Matcher mask. */
+};
+
+/* DV flows structure. */
+struct mlx5_flow_dv {
+       uint64_t hash_fields; /**< Fields that participate in the hash. */
+       struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
+       /* Flow DV api: */
+       struct mlx5_flow_dv_matcher *matcher; /**< Cache to matcher. */
+       struct mlx5_flow_dv_match_params value;
+       /**< Holds the value that the packet is compared to. */
+       struct ibv_flow *flow; /**< Installed flow. */
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       struct mlx5dv_flow_action_attr actions[MLX5_DV_MAX_NUMBER_OF_ACTIONS];
+       /**< Action list. */
+#endif
+       int actions_n; /**< number of actions. */
+};
+
+/** Linux TC flower driver for E-Switch flow. */
+struct mlx5_flow_tcf {
+       struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
+};
+
+/* Verbs specification header. */
+struct ibv_spec_header {
+       enum ibv_flow_spec_type type;
+       uint16_t size;
+};
+
+/** Handles information leading to a drop fate. */
+struct mlx5_flow_verbs {
+       LIST_ENTRY(mlx5_flow_verbs) next;
+       unsigned int size; /**< Size of the attribute. */
+       struct {
+               struct ibv_flow_attr *attr;
+               /**< Pointer to the Specification buffer. */
+               uint8_t *specs; /**< Pointer to the specifications. */
+       };
+       struct ibv_flow *flow; /**< Verbs flow pointer. */
+       struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
+       uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
+};
+
+/** Device flow structure. */
+struct mlx5_flow {
+       LIST_ENTRY(mlx5_flow) next;
+       struct rte_flow *flow; /**< Pointer to the main flow. */
+       uint64_t layers;
+       /**< Bit-fields of present layers, see MLX5_FLOW_LAYER_*. */
+       union {
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+               struct mlx5_flow_dv dv;
+#endif
+               struct mlx5_flow_tcf tcf;
+               struct mlx5_flow_verbs verbs;
+       };
+};
+
+/* Counters information. */
+struct mlx5_flow_counter {
+       LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
+       uint32_t shared:1; /**< Share counter ID with other flow rules. */
+       uint32_t ref_cnt:31; /**< Reference counter. */
+       uint32_t id; /**< Counter ID. */
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+       struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       struct ibv_counters *cs; /**< Holds the counters for the rule. */
+#endif
+       uint64_t hits; /**< Number of packets matched by the rule. */
+       uint64_t bytes; /**< Number of bytes matched by the rule. */
+};
+
+/* Flow structure. */
+struct rte_flow {
+       TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
+       enum mlx5_flow_drv_type drv_type; /**< Drvier type. */
+       struct mlx5_flow_counter *counter; /**< Holds flow counter. */
+       struct rte_flow_action_rss rss;/**< RSS context. */
+       uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
+       uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
+       LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
+       /**< Device flows that are part of the flow. */
+       uint64_t actions;
+       /**< Bit-fields of detected actions, see MLX5_FLOW_ACTION_*. */
+};
+typedef int (*mlx5_flow_validate_t)(struct rte_eth_dev *dev,
+                                   const struct rte_flow_attr *attr,
+                                   const struct rte_flow_item items[],
+                                   const struct rte_flow_action actions[],
+                                   struct rte_flow_error *error);
+typedef struct mlx5_flow *(*mlx5_flow_prepare_t)
+       (const struct rte_flow_attr *attr, const struct rte_flow_item items[],
+        const struct rte_flow_action actions[], uint64_t *item_flags,
+        uint64_t *action_flags, struct rte_flow_error *error);
+typedef int (*mlx5_flow_translate_t)(struct rte_eth_dev *dev,
+                                    struct mlx5_flow *dev_flow,
+                                    const struct rte_flow_attr *attr,
+                                    const struct rte_flow_item items[],
+                                    const struct rte_flow_action actions[],
+                                    struct rte_flow_error *error);
+typedef int (*mlx5_flow_apply_t)(struct rte_eth_dev *dev, struct rte_flow *flow,
+                                struct rte_flow_error *error);
+typedef void (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
+                                  struct rte_flow *flow);
+typedef void (*mlx5_flow_destroy_t)(struct rte_eth_dev *dev,
+                                   struct rte_flow *flow);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+                                struct rte_flow *flow,
+                                const struct rte_flow_action *actions,
+                                void *data,
+                                struct rte_flow_error *error);
+struct mlx5_flow_driver_ops {
+       mlx5_flow_validate_t validate;
+       mlx5_flow_prepare_t prepare;
+       mlx5_flow_translate_t translate;
+       mlx5_flow_apply_t apply;
+       mlx5_flow_remove_t remove;
+       mlx5_flow_destroy_t destroy;
+       mlx5_flow_query_t query;
+};
+
+/* mlx5_flow.c */
+
+uint64_t mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow, int tunnel,
+                                    uint64_t layer_types,
+                                    uint64_t hash_fields);
+uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
+                                  uint32_t subpriority);
+int mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
+                                   const struct rte_flow_attr *attr,
+                                   struct rte_flow_error *error);
+int mlx5_flow_validate_action_drop(uint64_t action_flags,
+                                  const struct rte_flow_attr *attr,
+                                  struct rte_flow_error *error);
+int mlx5_flow_validate_action_flag(uint64_t action_flags,
+                                  const struct rte_flow_attr *attr,
+                                  struct rte_flow_error *error);
+int mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
+                                  uint64_t action_flags,
+                                  const struct rte_flow_attr *attr,
+                                  struct rte_flow_error *error);
+int mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
+                                   uint64_t action_flags,
+                                   struct rte_eth_dev *dev,
+                                   const struct rte_flow_attr *attr,
+                                   struct rte_flow_error *error);
+int mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
+                                 uint64_t action_flags,
+                                 struct rte_eth_dev *dev,
+                                 const struct rte_flow_attr *attr,
+                                 struct rte_flow_error *error);
+int mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
+                                 const struct rte_flow_attr *attributes,
+                                 struct rte_flow_error *error);
+int mlx5_flow_item_acceptable(const struct rte_flow_item *item,
+                             const uint8_t *mask,
+                             const uint8_t *nic_mask,
+                             unsigned int size,
+                             struct rte_flow_error *error);
+int mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
+                               uint64_t item_flags,
+                               struct rte_flow_error *error);
+int mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
+                               uint64_t item_flags,
+                               uint8_t target_protocol,
+                               struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
+                                int64_t item_flags,
+                                struct rte_flow_error *error);
+int mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
+                                uint64_t item_flags,
+                                struct rte_flow_error *error);
+int mlx5_flow_validate_item_mpls(const struct rte_flow_item *item,
+                                uint64_t item_flags,
+                                uint8_t target_protocol,
+                                struct rte_flow_error *error);
+int mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
+                               uint64_t item_flags,
+                               uint8_t target_protocol,
+                               const struct rte_flow_item_tcp *flow_mask,
+                               struct rte_flow_error *error);
+int mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
+                               uint64_t item_flags,
+                               uint8_t target_protocol,
+                               struct rte_flow_error *error);
+int mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
+                                int64_t item_flags,
+                                struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
+                                 uint64_t item_flags,
+                                 struct rte_flow_error *error);
+int mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
+                                     uint64_t item_flags,
+                                     struct rte_eth_dev *dev,
+                                     struct rte_flow_error *error);
+
+/* mlx5_flow_tcf.c */
+
+int mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+                      unsigned int ifindex, struct rte_flow_error *error);
+struct mlx5_flow_tcf_context *mlx5_flow_tcf_context_create(void);
+void mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx);
+
+#endif /* RTE_PMD_MLX5_FLOW_H_ */
diff --git a/drivers/net/mlx5/mlx5_flow_dv.c b/drivers/net/mlx5/mlx5_flow_dv.c
new file mode 100644 (file)
index 0000000..8f729f4
--- /dev/null
@@ -0,0 +1,1492 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+
+/**
+ * Validate META item.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] item
+ *   Item specification.
+ * @param[in] attr
+ *   Attributes of flow that includes this item.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_item_meta(struct rte_eth_dev *dev,
+                          const struct rte_flow_item *item,
+                          const struct rte_flow_attr *attr,
+                          struct rte_flow_error *error)
+{
+       const struct rte_flow_item_meta *spec = item->spec;
+       const struct rte_flow_item_meta *mask = item->mask;
+       const struct rte_flow_item_meta nic_mask = {
+               .data = RTE_BE32(UINT32_MAX)
+       };
+       int ret;
+       uint64_t offloads = dev->data->dev_conf.txmode.offloads;
+
+       if (!(offloads & DEV_TX_OFFLOAD_MATCH_METADATA))
+               return rte_flow_error_set(error, EPERM,
+                                         RTE_FLOW_ERROR_TYPE_ITEM,
+                                         NULL,
+                                         "match on metadata offload "
+                                         "configuration is off for this port");
+       if (!spec)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+                                         item->spec,
+                                         "data cannot be empty");
+       if (!spec->data)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+                                         NULL,
+                                         "data cannot be zero");
+       if (!mask)
+               mask = &rte_flow_item_meta_mask;
+       ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
+                                       (const uint8_t *)&nic_mask,
+                                       sizeof(struct rte_flow_item_meta),
+                                       error);
+       if (ret < 0)
+               return ret;
+       if (attr->ingress)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                         NULL,
+                                         "pattern not supported for ingress");
+       return 0;
+}
+
+/**
+ * Verify the @p attributes will be correctly understood by the NIC and store
+ * them in the @p flow if everything is correct.
+ *
+ * @param[in] dev
+ *   Pointer to dev struct.
+ * @param[in] attributes
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_validate_attributes(struct rte_eth_dev *dev,
+                           const struct rte_flow_attr *attributes,
+                           struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       uint32_t priority_max = priv->config.flow_prio - 1;
+
+       if (attributes->group)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                                         NULL,
+                                         "groups is not supported");
+       if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
+           attributes->priority >= priority_max)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                         NULL,
+                                         "priority out of range");
+       if (attributes->transfer)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
+                                         NULL,
+                                         "transfer is not supported");
+       if (!(attributes->egress ^ attributes->ingress))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR, NULL,
+                                         "must specify exactly one of "
+                                         "ingress or egress");
+       return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ *   Pointer to the rte_eth_dev structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_validate(struct rte_eth_dev *dev, const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
+{
+       int ret;
+       uint64_t action_flags = 0;
+       uint64_t item_flags = 0;
+       int tunnel = 0;
+       uint8_t next_protocol = 0xff;
+       int actions_n = 0;
+
+       if (items == NULL)
+               return -1;
+       ret = flow_dv_validate_attributes(dev, attr, error);
+       if (ret < 0)
+               return ret;
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       ret = mlx5_flow_validate_item_eth(items, item_flags,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                                              MLX5_FLOW_LAYER_OUTER_L2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       ret = mlx5_flow_validate_item_vlan(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+                                              MLX5_FLOW_LAYER_OUTER_VLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       if (items->mask != NULL &&
+                           ((const struct rte_flow_item_ipv4 *)
+                            items->mask)->hdr.next_proto_id)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv4 *)
+                                        (items->spec))->hdr.next_proto_id;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       if (items->mask != NULL &&
+                           ((const struct rte_flow_item_ipv6 *)
+                            items->mask)->hdr.proto)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv6 *)
+                                        items->spec)->hdr.proto;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       ret = mlx5_flow_validate_item_tcp
+                                               (items, item_flags,
+                                                next_protocol,
+                                                &rte_flow_item_tcp_mask,
+                                                error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       ret = mlx5_flow_validate_item_udp(items, item_flags,
+                                                         next_protocol,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_GRE:
+               case RTE_FLOW_ITEM_TYPE_NVGRE:
+                       ret = mlx5_flow_validate_item_gre(items, item_flags,
+                                                         next_protocol, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_GRE;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+                                                           error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+                       ret = mlx5_flow_validate_item_vxlan_gpe(items,
+                                                               item_flags, dev,
+                                                               error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_META:
+                       ret = flow_dv_validate_item_meta(dev, items, attr,
+                                                        error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_ITEM_METADATA;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL, "item not supported");
+               }
+       }
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               if (actions_n == MLX5_DV_MAX_NUMBER_OF_ACTIONS)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions, "too many actions");
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_FLAG:
+                       ret = mlx5_flow_validate_action_flag(action_flags,
+                                                            attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_FLAG;
+                       ++actions_n;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_MARK:
+                       ret = mlx5_flow_validate_action_mark(actions,
+                                                            action_flags,
+                                                            attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_MARK;
+                       ++actions_n;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       ret = mlx5_flow_validate_action_drop(action_flags,
+                                                            attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_DROP;
+                       ++actions_n;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       ret = mlx5_flow_validate_action_queue(actions,
+                                                             action_flags, dev,
+                                                             attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_QUEUE;
+                       ++actions_n;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       ret = mlx5_flow_validate_action_rss(actions,
+                                                           action_flags, dev,
+                                                           attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_RSS;
+                       ++actions_n;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = mlx5_flow_validate_action_count(dev, attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_COUNT;
+                       ++actions_n;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       if (!(action_flags & MLX5_FLOW_FATE_ACTIONS) && attr->ingress)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "no fate action is found");
+       return 0;
+}
+
+/**
+ * Internal preparation function. Allocates the DV flow size,
+ * this size is constant.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success,
+ *   otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_dv_prepare(const struct rte_flow_attr *attr __rte_unused,
+               const struct rte_flow_item items[] __rte_unused,
+               const struct rte_flow_action actions[] __rte_unused,
+               uint64_t *item_flags __rte_unused,
+               uint64_t *action_flags __rte_unused,
+               struct rte_flow_error *error)
+{
+       uint32_t size = sizeof(struct mlx5_flow);
+       struct mlx5_flow *flow;
+
+       flow = rte_calloc(__func__, 1, size, 0);
+       if (!flow) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "not enough memory to create flow");
+               return NULL;
+       }
+       flow->dv.value.size = MLX5_ST_SZ_DB(fte_match_param);
+       return flow;
+}
+
+/**
+ * Add Ethernet item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_eth(void *matcher, void *key,
+                          const struct rte_flow_item *item, int inner)
+{
+       const struct rte_flow_item_eth *eth_m = item->mask;
+       const struct rte_flow_item_eth *eth_v = item->spec;
+       const struct rte_flow_item_eth nic_mask = {
+               .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               .type = RTE_BE16(0xffff),
+       };
+       void *headers_m;
+       void *headers_v;
+       char *l24_v;
+       unsigned int i;
+
+       if (!eth_v)
+               return;
+       if (!eth_m)
+               eth_m = &nic_mask;
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, dmac_47_16),
+              &eth_m->dst, sizeof(eth_m->dst));
+       /* The value must be in the range of the mask. */
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16);
+       for (i = 0; i < sizeof(eth_m->dst); ++i)
+               l24_v[i] = eth_m->dst.addr_bytes[i] & eth_v->dst.addr_bytes[i];
+       memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m, smac_47_16),
+              &eth_m->src, sizeof(eth_m->src));
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16);
+       /* The value must be in the range of the mask. */
+       for (i = 0; i < sizeof(eth_m->dst); ++i)
+               l24_v[i] = eth_m->src.addr_bytes[i] & eth_v->src.addr_bytes[i];
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ethertype,
+                rte_be_to_cpu_16(eth_m->type));
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, ethertype);
+       *(uint16_t *)(l24_v) = eth_m->type & eth_v->type;
+}
+
+/**
+ * Add VLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vlan(void *matcher, void *key,
+                           const struct rte_flow_item *item,
+                           int inner)
+{
+       const struct rte_flow_item_vlan *vlan_m = item->mask;
+       const struct rte_flow_item_vlan *vlan_v = item->spec;
+       const struct rte_flow_item_vlan nic_mask = {
+               .tci = RTE_BE16(0x0fff),
+               .inner_type = RTE_BE16(0xffff),
+       };
+       void *headers_m;
+       void *headers_v;
+       uint16_t tci_m;
+       uint16_t tci_v;
+
+       if (!vlan_v)
+               return;
+       if (!vlan_m)
+               vlan_m = &nic_mask;
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       tci_m = rte_be_to_cpu_16(vlan_m->tci);
+       tci_v = rte_be_to_cpu_16(vlan_m->tci & vlan_v->tci);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, cvlan_tag, 1);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_vid, tci_m);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, tci_v);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_cfi, tci_m >> 12);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, tci_v >> 12);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, first_prio, tci_m >> 13);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, tci_v >> 13);
+}
+
+/**
+ * Add IPV4 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv4(void *matcher, void *key,
+                           const struct rte_flow_item *item,
+                           int inner)
+{
+       const struct rte_flow_item_ipv4 *ipv4_m = item->mask;
+       const struct rte_flow_item_ipv4 *ipv4_v = item->spec;
+       const struct rte_flow_item_ipv4 nic_mask = {
+               .hdr = {
+                       .src_addr = RTE_BE32(0xffffffff),
+                       .dst_addr = RTE_BE32(0xffffffff),
+                       .type_of_service = 0xff,
+                       .next_proto_id = 0xff,
+               },
+       };
+       void *headers_m;
+       void *headers_v;
+       char *l24_m;
+       char *l24_v;
+       uint8_t tos;
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 4);
+       if (!ipv4_v)
+               return;
+       if (!ipv4_m)
+               ipv4_m = &nic_mask;
+       l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+                            dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                            dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+       *(uint32_t *)l24_m = ipv4_m->hdr.dst_addr;
+       *(uint32_t *)l24_v = ipv4_m->hdr.dst_addr & ipv4_v->hdr.dst_addr;
+       l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+                         src_ipv4_src_ipv6.ipv4_layout.ipv4);
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                         src_ipv4_src_ipv6.ipv4_layout.ipv4);
+       *(uint32_t *)l24_m = ipv4_m->hdr.src_addr;
+       *(uint32_t *)l24_v = ipv4_m->hdr.src_addr & ipv4_v->hdr.src_addr;
+       tos = ipv4_m->hdr.type_of_service & ipv4_v->hdr.type_of_service;
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn,
+                ipv4_m->hdr.type_of_service);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, tos);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp,
+                ipv4_m->hdr.type_of_service >> 2);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, tos >> 2);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+                ipv4_m->hdr.next_proto_id);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+                ipv4_v->hdr.next_proto_id & ipv4_m->hdr.next_proto_id);
+}
+
+/**
+ * Add IPV6 item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_ipv6(void *matcher, void *key,
+                           const struct rte_flow_item *item,
+                           int inner)
+{
+       const struct rte_flow_item_ipv6 *ipv6_m = item->mask;
+       const struct rte_flow_item_ipv6 *ipv6_v = item->spec;
+       const struct rte_flow_item_ipv6 nic_mask = {
+               .hdr = {
+                       .src_addr =
+                               "\xff\xff\xff\xff\xff\xff\xff\xff"
+                               "\xff\xff\xff\xff\xff\xff\xff\xff",
+                       .dst_addr =
+                               "\xff\xff\xff\xff\xff\xff\xff\xff"
+                               "\xff\xff\xff\xff\xff\xff\xff\xff",
+                       .vtc_flow = RTE_BE32(0xffffffff),
+                       .proto = 0xff,
+                       .hop_limits = 0xff,
+               },
+       };
+       void *headers_m;
+       void *headers_v;
+       void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+       char *l24_m;
+       char *l24_v;
+       uint32_t vtc_m;
+       uint32_t vtc_v;
+       int i;
+       int size;
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_version, 0xf);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version, 6);
+       if (!ipv6_v)
+               return;
+       if (!ipv6_m)
+               ipv6_m = &nic_mask;
+       size = sizeof(ipv6_m->hdr.dst_addr);
+       l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+                            dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                            dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
+       memcpy(l24_m, ipv6_m->hdr.dst_addr, size);
+       for (i = 0; i < size; ++i)
+               l24_v[i] = l24_m[i] & ipv6_v->hdr.dst_addr[i];
+       l24_m = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_m,
+                            src_ipv4_src_ipv6.ipv6_layout.ipv6);
+       l24_v = MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+                            src_ipv4_src_ipv6.ipv6_layout.ipv6);
+       memcpy(l24_m, ipv6_m->hdr.src_addr, size);
+       for (i = 0; i < size; ++i)
+               l24_v[i] = l24_m[i] & ipv6_v->hdr.src_addr[i];
+       /* TOS. */
+       vtc_m = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow);
+       vtc_v = rte_be_to_cpu_32(ipv6_m->hdr.vtc_flow & ipv6_v->hdr.vtc_flow);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_ecn, vtc_m >> 20);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, vtc_v >> 20);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_dscp, vtc_m >> 22);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, vtc_v >> 22);
+       /* Label. */
+       if (inner) {
+               MLX5_SET(fte_match_set_misc, misc_m, inner_ipv6_flow_label,
+                        vtc_m);
+               MLX5_SET(fte_match_set_misc, misc_v, inner_ipv6_flow_label,
+                        vtc_v);
+       } else {
+               MLX5_SET(fte_match_set_misc, misc_m, outer_ipv6_flow_label,
+                        vtc_m);
+               MLX5_SET(fte_match_set_misc, misc_v, outer_ipv6_flow_label,
+                        vtc_v);
+       }
+       /* Protocol. */
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol,
+                ipv6_m->hdr.proto);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
+                ipv6_v->hdr.proto & ipv6_m->hdr.proto);
+}
+
+/**
+ * Add TCP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_tcp(void *matcher, void *key,
+                          const struct rte_flow_item *item,
+                          int inner)
+{
+       const struct rte_flow_item_tcp *tcp_m = item->mask;
+       const struct rte_flow_item_tcp *tcp_v = item->spec;
+       void *headers_m;
+       void *headers_v;
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP);
+       if (!tcp_v)
+               return;
+       if (!tcp_m)
+               tcp_m = &rte_flow_item_tcp_mask;
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_sport,
+                rte_be_to_cpu_16(tcp_m->hdr.src_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
+                rte_be_to_cpu_16(tcp_v->hdr.src_port & tcp_m->hdr.src_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, tcp_dport,
+                rte_be_to_cpu_16(tcp_m->hdr.dst_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
+                rte_be_to_cpu_16(tcp_v->hdr.dst_port & tcp_m->hdr.dst_port));
+}
+
+/**
+ * Add UDP item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_udp(void *matcher, void *key,
+                          const struct rte_flow_item *item,
+                          int inner)
+{
+       const struct rte_flow_item_udp *udp_m = item->mask;
+       const struct rte_flow_item_udp *udp_v = item->spec;
+       void *headers_m;
+       void *headers_v;
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+       if (!udp_v)
+               return;
+       if (!udp_m)
+               udp_m = &rte_flow_item_udp_mask;
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_sport,
+                rte_be_to_cpu_16(udp_m->hdr.src_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
+                rte_be_to_cpu_16(udp_v->hdr.src_port & udp_m->hdr.src_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport,
+                rte_be_to_cpu_16(udp_m->hdr.dst_port));
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
+                rte_be_to_cpu_16(udp_v->hdr.dst_port & udp_m->hdr.dst_port));
+}
+
+/**
+ * Add GRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_gre(void *matcher, void *key,
+                          const struct rte_flow_item *item,
+                          int inner)
+{
+       const struct rte_flow_item_gre *gre_m = item->mask;
+       const struct rte_flow_item_gre *gre_v = item->spec;
+       void *headers_m;
+       void *headers_v;
+       void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       MLX5_SET(fte_match_set_lyr_2_4, headers_m, ip_protocol, 0xff);
+       MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_GRE);
+       if (!gre_v)
+               return;
+       if (!gre_m)
+               gre_m = &rte_flow_item_gre_mask;
+       MLX5_SET(fte_match_set_misc, misc_m, gre_protocol,
+                rte_be_to_cpu_16(gre_m->protocol));
+       MLX5_SET(fte_match_set_misc, misc_v, gre_protocol,
+                rte_be_to_cpu_16(gre_v->protocol & gre_m->protocol));
+}
+
+/**
+ * Add NVGRE item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_nvgre(void *matcher, void *key,
+                            const struct rte_flow_item *item,
+                            int inner)
+{
+       const struct rte_flow_item_nvgre *nvgre_m = item->mask;
+       const struct rte_flow_item_nvgre *nvgre_v = item->spec;
+       void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+       const char *tni_flow_id_m = (const char *)nvgre_m->tni;
+       const char *tni_flow_id_v = (const char *)nvgre_v->tni;
+       char *gre_key_m;
+       char *gre_key_v;
+       int size;
+       int i;
+
+       flow_dv_translate_item_gre(matcher, key, item, inner);
+       if (!nvgre_v)
+               return;
+       if (!nvgre_m)
+               nvgre_m = &rte_flow_item_nvgre_mask;
+       size = sizeof(nvgre_m->tni) + sizeof(nvgre_m->flow_id);
+       gre_key_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, gre_key_h);
+       gre_key_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, gre_key_h);
+       memcpy(gre_key_m, tni_flow_id_m, size);
+       for (i = 0; i < size; ++i)
+               gre_key_v[i] = gre_key_m[i] & tni_flow_id_v[i];
+}
+
+/**
+ * Add VXLAN item to matcher and to the value.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_vxlan(void *matcher, void *key,
+                            const struct rte_flow_item *item,
+                            int inner)
+{
+       const struct rte_flow_item_vxlan *vxlan_m = item->mask;
+       const struct rte_flow_item_vxlan *vxlan_v = item->spec;
+       void *headers_m;
+       void *headers_v;
+       void *misc_m = MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters);
+       void *misc_v = MLX5_ADDR_OF(fte_match_param, key, misc_parameters);
+       char *vni_m;
+       char *vni_v;
+       uint16_t dport;
+       int size;
+       int i;
+
+       if (inner) {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        inner_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, inner_headers);
+       } else {
+               headers_m = MLX5_ADDR_OF(fte_match_param, matcher,
+                                        outer_headers);
+               headers_v = MLX5_ADDR_OF(fte_match_param, key, outer_headers);
+       }
+       dport = item->type == RTE_FLOW_ITEM_TYPE_VXLAN ?
+               MLX5_UDP_PORT_VXLAN : MLX5_UDP_PORT_VXLAN_GPE;
+       if (!MLX5_GET16(fte_match_set_lyr_2_4, headers_v, udp_dport)) {
+               MLX5_SET(fte_match_set_lyr_2_4, headers_m, udp_dport, 0xFFFF);
+               MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, dport);
+       }
+       if (!vxlan_v)
+               return;
+       if (!vxlan_m)
+               vxlan_m = &rte_flow_item_vxlan_mask;
+       size = sizeof(vxlan_m->vni);
+       vni_m = MLX5_ADDR_OF(fte_match_set_misc, misc_m, vxlan_vni);
+       vni_v = MLX5_ADDR_OF(fte_match_set_misc, misc_v, vxlan_vni);
+       memcpy(vni_m, vxlan_m->vni, size);
+       for (i = 0; i < size; ++i)
+               vni_v[i] = vni_m[i] & vxlan_v->vni[i];
+}
+
+/**
+ * Add META item to matcher
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_translate_item_meta(void *matcher, void *key,
+                           const struct rte_flow_item *item)
+{
+       const struct rte_flow_item_meta *meta_m;
+       const struct rte_flow_item_meta *meta_v;
+       void *misc2_m =
+               MLX5_ADDR_OF(fte_match_param, matcher, misc_parameters_2);
+       void *misc2_v =
+               MLX5_ADDR_OF(fte_match_param, key, misc_parameters_2);
+
+       meta_m = (const void *)item->mask;
+       if (!meta_m)
+               meta_m = &rte_flow_item_meta_mask;
+       meta_v = (const void *)item->spec;
+       if (meta_v) {
+               MLX5_SET(fte_match_set_misc2, misc2_m, metadata_reg_a,
+                        rte_be_to_cpu_32(meta_m->data));
+               MLX5_SET(fte_match_set_misc2, misc2_v, metadata_reg_a,
+                        rte_be_to_cpu_32(meta_v->data & meta_m->data));
+       }
+}
+
+/**
+ * Update the matcher and the value based the selected item.
+ *
+ * @param[in, out] matcher
+ *   Flow matcher.
+ * @param[in, out] key
+ *   Flow matcher value.
+ * @param[in] item
+ *   Flow pattern to translate.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5_flow.
+ * @param[in] inner
+ *   Item is inner pattern.
+ */
+static void
+flow_dv_create_item(void *matcher, void *key,
+                   const struct rte_flow_item *item,
+                   struct mlx5_flow *dev_flow,
+                   int inner)
+{
+       struct mlx5_flow_dv_matcher *tmatcher = matcher;
+
+       switch (item->type) {
+       case RTE_FLOW_ITEM_TYPE_ETH:
+               flow_dv_translate_item_eth(tmatcher->mask.buf, key, item,
+                                          inner);
+               tmatcher->priority = MLX5_PRIORITY_MAP_L2;
+               break;
+       case RTE_FLOW_ITEM_TYPE_VLAN:
+               flow_dv_translate_item_vlan(tmatcher->mask.buf, key, item,
+                                           inner);
+               break;
+       case RTE_FLOW_ITEM_TYPE_IPV4:
+               flow_dv_translate_item_ipv4(tmatcher->mask.buf, key, item,
+                                           inner);
+               tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+               dev_flow->dv.hash_fields |=
+                       mlx5_flow_hashfields_adjust(dev_flow, inner,
+                                                   MLX5_IPV4_LAYER_TYPES,
+                                                   MLX5_IPV4_IBV_RX_HASH);
+               break;
+       case RTE_FLOW_ITEM_TYPE_IPV6:
+               flow_dv_translate_item_ipv6(tmatcher->mask.buf, key, item,
+                                           inner);
+               tmatcher->priority = MLX5_PRIORITY_MAP_L3;
+               dev_flow->dv.hash_fields |=
+                       mlx5_flow_hashfields_adjust(dev_flow, inner,
+                                                   MLX5_IPV6_LAYER_TYPES,
+                                                   MLX5_IPV6_IBV_RX_HASH);
+               break;
+       case RTE_FLOW_ITEM_TYPE_TCP:
+               flow_dv_translate_item_tcp(tmatcher->mask.buf, key, item,
+                                          inner);
+               tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+               dev_flow->dv.hash_fields |=
+                       mlx5_flow_hashfields_adjust(dev_flow, inner,
+                                                   ETH_RSS_TCP,
+                                                   (IBV_RX_HASH_SRC_PORT_TCP |
+                                                    IBV_RX_HASH_DST_PORT_TCP));
+               break;
+       case RTE_FLOW_ITEM_TYPE_UDP:
+               flow_dv_translate_item_udp(tmatcher->mask.buf, key, item,
+                                          inner);
+               tmatcher->priority = MLX5_PRIORITY_MAP_L4;
+               dev_flow->verbs.hash_fields |=
+                       mlx5_flow_hashfields_adjust(dev_flow, inner,
+                                                   ETH_RSS_UDP,
+                                                   (IBV_RX_HASH_SRC_PORT_UDP |
+                                                    IBV_RX_HASH_DST_PORT_UDP));
+               break;
+       case RTE_FLOW_ITEM_TYPE_GRE:
+               flow_dv_translate_item_gre(tmatcher->mask.buf, key, item,
+                                          inner);
+               break;
+       case RTE_FLOW_ITEM_TYPE_NVGRE:
+               flow_dv_translate_item_nvgre(tmatcher->mask.buf, key, item,
+                                            inner);
+               break;
+       case RTE_FLOW_ITEM_TYPE_VXLAN:
+       case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+               flow_dv_translate_item_vxlan(tmatcher->mask.buf, key, item,
+                                            inner);
+               break;
+       case RTE_FLOW_ITEM_TYPE_META:
+               flow_dv_translate_item_meta(tmatcher->mask.buf, key, item);
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * Store the requested actions in an array.
+ *
+ * @param[in] action
+ *   Flow action to translate.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5_flow.
+ */
+static void
+flow_dv_create_action(const struct rte_flow_action *action,
+                     struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_action_queue *queue;
+       const struct rte_flow_action_rss *rss;
+       int actions_n = dev_flow->dv.actions_n;
+       struct rte_flow *flow = dev_flow->flow;
+
+       switch (action->type) {
+       case RTE_FLOW_ACTION_TYPE_VOID:
+               break;
+       case RTE_FLOW_ACTION_TYPE_FLAG:
+               dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+               dev_flow->dv.actions[actions_n].tag_value =
+                       mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT);
+               actions_n++;
+               flow->actions |= MLX5_FLOW_ACTION_FLAG;
+               break;
+       case RTE_FLOW_ACTION_TYPE_MARK:
+               dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_TAG;
+               dev_flow->dv.actions[actions_n].tag_value =
+                       mlx5_flow_mark_set
+                       (((const struct rte_flow_action_mark *)
+                         (action->conf))->id);
+               flow->actions |= MLX5_FLOW_ACTION_MARK;
+               actions_n++;
+               break;
+       case RTE_FLOW_ACTION_TYPE_DROP:
+               dev_flow->dv.actions[actions_n].type = MLX5DV_FLOW_ACTION_DROP;
+               flow->actions |= MLX5_FLOW_ACTION_DROP;
+               break;
+       case RTE_FLOW_ACTION_TYPE_QUEUE:
+               queue = action->conf;
+               flow->rss.queue_num = 1;
+               (*flow->queue)[0] = queue->index;
+               flow->actions |= MLX5_FLOW_ACTION_QUEUE;
+               break;
+       case RTE_FLOW_ACTION_TYPE_RSS:
+               rss = action->conf;
+               if (flow->queue)
+                       memcpy((*flow->queue), rss->queue,
+                              rss->queue_num * sizeof(uint16_t));
+               flow->rss.queue_num = rss->queue_num;
+               memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+               flow->rss.types = rss->types;
+               flow->rss.level = rss->level;
+               /* Added to array only in apply since we need the QP */
+               flow->actions |= MLX5_FLOW_ACTION_RSS;
+               break;
+       default:
+               break;
+       }
+       dev_flow->dv.actions_n = actions_n;
+}
+
+static uint32_t matcher_zero[MLX5_ST_SZ_DW(fte_match_param)] = { 0 };
+
+#define HEADER_IS_ZERO(match_criteria, headers)                                     \
+       !(memcmp(MLX5_ADDR_OF(fte_match_param, match_criteria, headers),     \
+                matcher_zero, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \
+
+/**
+ * Calculate flow matcher enable bitmap.
+ *
+ * @param match_criteria
+ *   Pointer to flow matcher criteria.
+ *
+ * @return
+ *   Bitmap of enabled fields.
+ */
+static uint8_t
+flow_dv_matcher_enable(uint32_t *match_criteria)
+{
+       uint8_t match_criteria_enable;
+
+       match_criteria_enable =
+               (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
+               MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
+               MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
+               MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT;
+       match_criteria_enable |=
+               (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
+               MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT;
+
+       return match_criteria_enable;
+}
+
+/**
+ * Register the flow matcher.
+ *
+ * @param dev[in, out]
+ *   Pointer to rte_eth_dev structure.
+ * @param[in, out] matcher
+ *   Pointer to flow matcher.
+ * @parm[in, out] dev_flow
+ *   Pointer to the dev_flow.
+ * @param[out] error
+ *   pointer to error structure.
+ *
+ * @return
+ *   0 on success otherwise -errno and errno is set.
+ */
+static int
+flow_dv_matcher_register(struct rte_eth_dev *dev,
+                        struct mlx5_flow_dv_matcher *matcher,
+                        struct mlx5_flow *dev_flow,
+                        struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_dv_matcher *cache_matcher;
+       struct mlx5dv_flow_matcher_attr dv_attr = {
+               .type = IBV_FLOW_ATTR_NORMAL,
+               .match_mask = (void *)&matcher->mask,
+       };
+
+       /* Lookup from cache. */
+       LIST_FOREACH(cache_matcher, &priv->matchers, next) {
+               if (matcher->crc == cache_matcher->crc &&
+                   matcher->priority == cache_matcher->priority &&
+                   matcher->egress == cache_matcher->egress &&
+                   !memcmp((const void *)matcher->mask.buf,
+                           (const void *)cache_matcher->mask.buf,
+                           cache_matcher->mask.size)) {
+                       DRV_LOG(DEBUG,
+                               "priority %hd use %s matcher %p: refcnt %d++",
+                               cache_matcher->priority,
+                               cache_matcher->egress ? "tx" : "rx",
+                               (void *)cache_matcher,
+                               rte_atomic32_read(&cache_matcher->refcnt));
+                       rte_atomic32_inc(&cache_matcher->refcnt);
+                       dev_flow->dv.matcher = cache_matcher;
+                       return 0;
+               }
+       }
+       /* Register new matcher. */
+       cache_matcher = rte_calloc(__func__, 1, sizeof(*cache_matcher), 0);
+       if (!cache_matcher)
+               return rte_flow_error_set(error, ENOMEM,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                         "cannot allocate matcher memory");
+       *cache_matcher = *matcher;
+       dv_attr.match_criteria_enable =
+               flow_dv_matcher_enable(cache_matcher->mask.buf);
+       dv_attr.priority = matcher->priority;
+       if (matcher->egress)
+               dv_attr.flags |= IBV_FLOW_ATTR_FLAGS_EGRESS;
+       cache_matcher->matcher_object =
+               mlx5_glue->dv_create_flow_matcher(priv->ctx, &dv_attr);
+       if (!cache_matcher->matcher_object)
+               return rte_flow_error_set(error, ENOMEM,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "cannot create matcher");
+       rte_atomic32_inc(&cache_matcher->refcnt);
+       LIST_INSERT_HEAD(&priv->matchers, cache_matcher, next);
+       dev_flow->dv.matcher = cache_matcher;
+       DRV_LOG(DEBUG, "priority %hd new %s matcher %p: refcnt %d",
+               cache_matcher->priority,
+               cache_matcher->egress ? "tx" : "rx", (void *)cache_matcher,
+               rte_atomic32_read(&cache_matcher->refcnt));
+       return 0;
+}
+
+
+/**
+ * Fill the flow with DV spec.
+ *
+ * @param[in] dev
+ *   Pointer to rte_eth_dev structure.
+ * @param[in, out] dev_flow
+ *   Pointer to the sub flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_dv_translate(struct rte_eth_dev *dev,
+                 struct mlx5_flow *dev_flow,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item items[],
+                 const struct rte_flow_action actions[] __rte_unused,
+                 struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       uint64_t priority = attr->priority;
+       struct mlx5_flow_dv_matcher matcher = {
+               .mask = {
+                       .size = sizeof(matcher.mask.buf),
+               },
+       };
+       void *match_value = dev_flow->dv.value.buf;
+       int tunnel = 0;
+
+       if (priority == MLX5_FLOW_PRIO_RSVD)
+               priority = priv->config.flow_prio - 1;
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+               flow_dv_create_item(&matcher, match_value, items, dev_flow,
+                                   tunnel);
+       }
+       matcher.crc = rte_raw_cksum((const void *)matcher.mask.buf,
+                                    matcher.mask.size);
+       if (priority == MLX5_FLOW_PRIO_RSVD)
+               priority = priv->config.flow_prio - 1;
+       matcher.priority = mlx5_flow_adjust_priority(dev, priority,
+                                                    matcher.priority);
+       matcher.egress = attr->egress;
+       if (flow_dv_matcher_register(dev, &matcher, dev_flow, error))
+               return -rte_errno;
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
+               flow_dv_create_action(actions, dev_flow);
+       return 0;
+}
+
+/**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_dv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+             struct rte_flow_error *error)
+{
+       struct mlx5_flow_dv *dv;
+       struct mlx5_flow *dev_flow;
+       int n;
+       int err;
+
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               dv = &dev_flow->dv;
+               n = dv->actions_n;
+               if (flow->actions & MLX5_FLOW_ACTION_DROP) {
+                       dv->hrxq = mlx5_hrxq_drop_new(dev);
+                       if (!dv->hrxq) {
+                               rte_flow_error_set
+                                       (error, errno,
+                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                        "cannot get drop hash queue");
+                               goto error;
+                       }
+                       dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+                       dv->actions[n].qp = dv->hrxq->qp;
+                       n++;
+               } else if (flow->actions &
+                          (MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)) {
+                       struct mlx5_hrxq *hrxq;
+                       hrxq = mlx5_hrxq_get(dev, flow->key,
+                                            MLX5_RSS_HASH_KEY_LEN,
+                                            dv->hash_fields,
+                                            (*flow->queue),
+                                            flow->rss.queue_num);
+                       if (!hrxq)
+                               hrxq = mlx5_hrxq_new
+                                       (dev, flow->key, MLX5_RSS_HASH_KEY_LEN,
+                                        dv->hash_fields, (*flow->queue),
+                                        flow->rss.queue_num,
+                                        !!(dev_flow->layers &
+                                           MLX5_FLOW_LAYER_TUNNEL));
+                       if (!hrxq) {
+                               rte_flow_error_set
+                                       (error, rte_errno,
+                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                        "cannot get hash queue");
+                               goto error;
+                       }
+                       dv->hrxq = hrxq;
+                       dv->actions[n].type = MLX5DV_FLOW_ACTION_DEST_IBV_QP;
+                       dv->actions[n].qp = hrxq->qp;
+                       n++;
+               }
+               dv->flow =
+                       mlx5_glue->dv_create_flow(dv->matcher->matcher_object,
+                                                 (void *)&dv->value, n,
+                                                 dv->actions);
+               if (!dv->flow) {
+                       rte_flow_error_set(error, errno,
+                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                          NULL,
+                                          "hardware refuses to create flow");
+                       goto error;
+               }
+       }
+       return 0;
+error:
+       err = rte_errno; /* Save rte_errno before cleanup. */
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               struct mlx5_flow_dv *dv = &dev_flow->dv;
+               if (dv->hrxq) {
+                       if (flow->actions & MLX5_FLOW_ACTION_DROP)
+                               mlx5_hrxq_drop_release(dev);
+                       else
+                               mlx5_hrxq_release(dev, dv->hrxq);
+                       dv->hrxq = NULL;
+               }
+       }
+       rte_errno = err; /* Restore rte_errno. */
+       return -rte_errno;
+}
+
+/**
+ * Release the flow matcher.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param flow
+ *   Pointer to mlx5_flow.
+ *
+ * @return
+ *   1 while a reference on it exists, 0 when freed.
+ */
+static int
+flow_dv_matcher_release(struct rte_eth_dev *dev,
+                       struct mlx5_flow *flow)
+{
+       struct mlx5_flow_dv_matcher *matcher = flow->dv.matcher;
+
+       assert(matcher->matcher_object);
+       DRV_LOG(DEBUG, "port %u matcher %p: refcnt %d--",
+               dev->data->port_id, (void *)matcher,
+               rte_atomic32_read(&matcher->refcnt));
+       if (rte_atomic32_dec_and_test(&matcher->refcnt)) {
+               claim_zero(mlx5_glue->dv_destroy_flow_matcher
+                          (matcher->matcher_object));
+               LIST_REMOVE(matcher, next);
+               rte_free(matcher);
+               DRV_LOG(DEBUG, "port %u matcher %p: removed",
+                       dev->data->port_id, (void *)matcher);
+               return 0;
+       }
+       return 1;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_dv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow_dv *dv;
+       struct mlx5_flow *dev_flow;
+
+       if (!flow)
+               return;
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               dv = &dev_flow->dv;
+               if (dv->flow) {
+                       claim_zero(mlx5_glue->destroy_flow(dv->flow));
+                       dv->flow = NULL;
+               }
+               if (dv->hrxq) {
+                       if (flow->actions & MLX5_FLOW_ACTION_DROP)
+                               mlx5_hrxq_drop_release(dev);
+                       else
+                               mlx5_hrxq_release(dev, dv->hrxq);
+                       dv->hrxq = NULL;
+               }
+       }
+       if (flow->counter)
+               flow->counter = NULL;
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_dv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow *dev_flow;
+
+       if (!flow)
+               return;
+       flow_dv_remove(dev, flow);
+       while (!LIST_EMPTY(&flow->dev_flows)) {
+               dev_flow = LIST_FIRST(&flow->dev_flows);
+               LIST_REMOVE(dev_flow, next);
+               if (dev_flow->dv.matcher)
+                       flow_dv_matcher_release(dev, dev_flow);
+               rte_free(dev_flow);
+       }
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_dv_query(struct rte_eth_dev *dev __rte_unused,
+             struct rte_flow *flow __rte_unused,
+             const struct rte_flow_action *actions __rte_unused,
+             void *data __rte_unused,
+             struct rte_flow_error *error __rte_unused)
+{
+       rte_errno = ENOTSUP;
+       return -rte_errno;
+}
+
+
+const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops = {
+       .validate = flow_dv_validate,
+       .prepare = flow_dv_prepare,
+       .translate = flow_dv_translate,
+       .apply = flow_dv_apply,
+       .remove = flow_dv_remove,
+       .destroy = flow_dv_destroy,
+       .query = flow_dv_query,
+};
+
+#endif /* HAVE_IBV_FLOW_DV_SUPPORT */
diff --git a/drivers/net/mlx5/mlx5_flow_tcf.c b/drivers/net/mlx5/mlx5_flow_tcf.c
new file mode 100644 (file)
index 0000000..719fb10
--- /dev/null
@@ -0,0 +1,2913 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 6WIND S.A.
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
+#include <linux/if_ether.h>
+#include <linux/netlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/tc_act/tc_mirred.h>
+#include <netinet/in.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+#include <rte_ether.h>
+#include <rte_flow.h>
+#include <rte_malloc.h>
+#include <rte_common.h>
+
+#include "mlx5.h"
+#include "mlx5_flow.h"
+#include "mlx5_autoconf.h"
+
+#ifdef HAVE_TC_ACT_VLAN
+
+#include <linux/tc_act/tc_vlan.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+#define TCA_VLAN_ACT_POP 1
+#define TCA_VLAN_ACT_PUSH 2
+#define TCA_VLAN_ACT_MODIFY 3
+#define TCA_VLAN_PARMS 2
+#define TCA_VLAN_PUSH_VLAN_ID 3
+#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
+#define TCA_VLAN_PAD 5
+#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
+
+struct tc_vlan {
+       tc_gen;
+       int v_action;
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+#ifdef HAVE_TC_ACT_PEDIT
+
+#include <linux/tc_act/tc_pedit.h>
+
+#else /* HAVE_TC_ACT_VLAN */
+
+enum {
+       TCA_PEDIT_UNSPEC,
+       TCA_PEDIT_TM,
+       TCA_PEDIT_PARMS,
+       TCA_PEDIT_PAD,
+       TCA_PEDIT_PARMS_EX,
+       TCA_PEDIT_KEYS_EX,
+       TCA_PEDIT_KEY_EX,
+       __TCA_PEDIT_MAX
+};
+
+enum {
+       TCA_PEDIT_KEY_EX_HTYPE = 1,
+       TCA_PEDIT_KEY_EX_CMD = 2,
+       __TCA_PEDIT_KEY_EX_MAX
+};
+
+enum pedit_header_type {
+       TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK = 0,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_ETH = 1,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 = 2,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 = 3,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_TCP = 4,
+       TCA_PEDIT_KEY_EX_HDR_TYPE_UDP = 5,
+       __PEDIT_HDR_TYPE_MAX,
+};
+
+enum pedit_cmd {
+       TCA_PEDIT_KEY_EX_CMD_SET = 0,
+       TCA_PEDIT_KEY_EX_CMD_ADD = 1,
+       __PEDIT_CMD_MAX,
+};
+
+struct tc_pedit_key {
+       __u32 mask; /* AND */
+       __u32 val; /*XOR */
+       __u32 off; /*offset */
+       __u32 at;
+       __u32 offmask;
+       __u32 shift;
+};
+
+__extension__
+struct tc_pedit_sel {
+       tc_gen;
+       unsigned char nkeys;
+       unsigned char flags;
+       struct tc_pedit_key keys[0];
+};
+
+#endif /* HAVE_TC_ACT_VLAN */
+
+/* Normally found in linux/netlink.h. */
+#ifndef NETLINK_CAP_ACK
+#define NETLINK_CAP_ACK 10
+#endif
+
+/* Normally found in linux/pkt_sched.h. */
+#ifndef TC_H_MIN_INGRESS
+#define TC_H_MIN_INGRESS 0xfff2u
+#endif
+
+/* Normally found in linux/pkt_cls.h. */
+#ifndef TCA_CLS_FLAGS_SKIP_SW
+#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
+#endif
+#ifndef HAVE_TCA_CHAIN
+#define TCA_CHAIN 11
+#endif
+#ifndef HAVE_TCA_FLOWER_ACT
+#define TCA_FLOWER_ACT 3
+#endif
+#ifndef HAVE_TCA_FLOWER_FLAGS
+#define TCA_FLOWER_FLAGS 22
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
+#define TCA_FLOWER_KEY_ETH_TYPE 8
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
+#define TCA_FLOWER_KEY_ETH_DST 4
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
+#define TCA_FLOWER_KEY_ETH_DST_MASK 5
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
+#define TCA_FLOWER_KEY_ETH_SRC 6
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
+#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
+#define TCA_FLOWER_KEY_IP_PROTO 9
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
+#define TCA_FLOWER_KEY_IPV4_SRC 10
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
+#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
+#define TCA_FLOWER_KEY_IPV4_DST 12
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
+#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
+#define TCA_FLOWER_KEY_IPV6_SRC 14
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
+#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
+#define TCA_FLOWER_KEY_IPV6_DST 16
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
+#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
+#define TCA_FLOWER_KEY_TCP_SRC 18
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
+#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
+#define TCA_FLOWER_KEY_TCP_DST 19
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
+#define TCA_FLOWER_KEY_TCP_DST_MASK 36
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
+#define TCA_FLOWER_KEY_UDP_SRC 20
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
+#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
+#define TCA_FLOWER_KEY_UDP_DST 21
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
+#define TCA_FLOWER_KEY_UDP_DST_MASK 38
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
+#define TCA_FLOWER_KEY_VLAN_ID 23
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
+#define TCA_FLOWER_KEY_VLAN_PRIO 24
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
+#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS
+#define TCA_FLOWER_KEY_TCP_FLAGS 71
+#endif
+#ifndef HAVE_TCA_FLOWER_KEY_TCP_FLAGS_MASK
+#define TCA_FLOWER_KEY_TCP_FLAGS_MASK 72
+#endif
+#ifndef HAVE_TC_ACT_GOTO_CHAIN
+#define TC_ACT_GOTO_CHAIN 0x20000000
+#endif
+
+#ifndef IPV6_ADDR_LEN
+#define IPV6_ADDR_LEN 16
+#endif
+
+#ifndef IPV4_ADDR_LEN
+#define IPV4_ADDR_LEN 4
+#endif
+
+#ifndef TP_PORT_LEN
+#define TP_PORT_LEN 2 /* Transport Port (UDP/TCP) Length */
+#endif
+
+#ifndef TTL_LEN
+#define TTL_LEN 1
+#endif
+
+#ifndef TCA_ACT_MAX_PRIO
+#define TCA_ACT_MAX_PRIO 32
+#endif
+
+/**
+ * Structure for holding netlink context.
+ * Note the size of the message buffer which is MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_context {
+       struct mnl_socket *nl; /* NETLINK_ROUTE libmnl socket. */
+       uint32_t seq; /* Message sequence number. */
+       uint32_t buf_size; /* Message buffer size. */
+       uint8_t *buf; /* Message buffer. */
+};
+
+/** Structure used when extracting the values of a flow counters
+ * from a netlink message.
+ */
+struct flow_tcf_stats_basic {
+       bool valid;
+       struct gnet_stats_basic counters;
+};
+
+/** Empty masks for known item types. */
+static const union {
+       struct rte_flow_item_port_id port_id;
+       struct rte_flow_item_eth eth;
+       struct rte_flow_item_vlan vlan;
+       struct rte_flow_item_ipv4 ipv4;
+       struct rte_flow_item_ipv6 ipv6;
+       struct rte_flow_item_tcp tcp;
+       struct rte_flow_item_udp udp;
+} flow_tcf_mask_empty;
+
+/** Supported masks for known item types. */
+static const struct {
+       struct rte_flow_item_port_id port_id;
+       struct rte_flow_item_eth eth;
+       struct rte_flow_item_vlan vlan;
+       struct rte_flow_item_ipv4 ipv4;
+       struct rte_flow_item_ipv6 ipv6;
+       struct rte_flow_item_tcp tcp;
+       struct rte_flow_item_udp udp;
+} flow_tcf_mask_supported = {
+       .port_id = {
+               .id = 0xffffffff,
+       },
+       .eth = {
+               .type = RTE_BE16(0xffff),
+               .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+               .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
+       },
+       .vlan = {
+               /* PCP and VID only, no DEI. */
+               .tci = RTE_BE16(0xefff),
+               .inner_type = RTE_BE16(0xffff),
+       },
+       .ipv4.hdr = {
+               .next_proto_id = 0xff,
+               .src_addr = RTE_BE32(0xffffffff),
+               .dst_addr = RTE_BE32(0xffffffff),
+       },
+       .ipv6.hdr = {
+               .proto = 0xff,
+               .src_addr =
+                       "\xff\xff\xff\xff\xff\xff\xff\xff"
+                       "\xff\xff\xff\xff\xff\xff\xff\xff",
+               .dst_addr =
+                       "\xff\xff\xff\xff\xff\xff\xff\xff"
+                       "\xff\xff\xff\xff\xff\xff\xff\xff",
+       },
+       .tcp.hdr = {
+               .src_port = RTE_BE16(0xffff),
+               .dst_port = RTE_BE16(0xffff),
+               .tcp_flags = 0xff,
+       },
+       .udp.hdr = {
+               .src_port = RTE_BE16(0xffff),
+               .dst_port = RTE_BE16(0xffff),
+       },
+};
+
+#define SZ_NLATTR_HDR MNL_ALIGN(sizeof(struct nlattr))
+#define SZ_NLATTR_NEST SZ_NLATTR_HDR
+#define SZ_NLATTR_DATA_OF(len) MNL_ALIGN(SZ_NLATTR_HDR + (len))
+#define SZ_NLATTR_TYPE_OF(typ) SZ_NLATTR_DATA_OF(sizeof(typ))
+#define SZ_NLATTR_STRZ_OF(str) SZ_NLATTR_DATA_OF(strlen(str) + 1)
+
+#define PTOI_TABLE_SZ_MAX(dev) (mlx5_dev_to_port_id((dev)->device, NULL, 0) + 2)
+
+/** DPDK port to network interface index (ifindex) conversion. */
+struct flow_tcf_ptoi {
+       uint16_t port_id; /**< DPDK port ID. */
+       unsigned int ifindex; /**< Network interface index. */
+};
+
+/* Due to a limitation on driver/FW. */
+#define MLX5_TCF_GROUP_ID_MAX 3
+#define MLX5_TCF_GROUP_PRIORITY_MAX 14
+
+#define MLX5_TCF_FATE_ACTIONS \
+       (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_PORT_ID | \
+        MLX5_FLOW_ACTION_JUMP)
+
+#define MLX5_TCF_VLAN_ACTIONS \
+       (MLX5_FLOW_ACTION_OF_POP_VLAN | MLX5_FLOW_ACTION_OF_PUSH_VLAN | \
+        MLX5_FLOW_ACTION_OF_SET_VLAN_VID | MLX5_FLOW_ACTION_OF_SET_VLAN_PCP)
+
+#define MLX5_TCF_PEDIT_ACTIONS \
+       (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST | \
+        MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST | \
+        MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST | \
+        MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL | \
+        MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)
+
+#define MLX5_TCF_CONFIG_ACTIONS \
+       (MLX5_FLOW_ACTION_PORT_ID | MLX5_FLOW_ACTION_JUMP | \
+        MLX5_FLOW_ACTION_OF_PUSH_VLAN | MLX5_FLOW_ACTION_OF_SET_VLAN_VID | \
+        MLX5_FLOW_ACTION_OF_SET_VLAN_PCP | \
+        (MLX5_TCF_PEDIT_ACTIONS & ~MLX5_FLOW_ACTION_DEC_TTL))
+
+#define MAX_PEDIT_KEYS 128
+#define SZ_PEDIT_KEY_VAL 4
+
+#define NUM_OF_PEDIT_KEYS(sz) \
+       (((sz) / SZ_PEDIT_KEY_VAL) + (((sz) % SZ_PEDIT_KEY_VAL) ? 1 : 0))
+
+struct pedit_key_ex {
+       enum pedit_header_type htype;
+       enum pedit_cmd cmd;
+};
+
+struct pedit_parser {
+       struct tc_pedit_sel sel;
+       struct tc_pedit_key keys[MAX_PEDIT_KEYS];
+       struct pedit_key_ex keys_ex[MAX_PEDIT_KEYS];
+};
+
+/**
+ * Create space for using the implicitly created TC flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ *
+ * @return
+ *   A pointer to the counter data structure, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_tcf_counter_new(void)
+{
+       struct mlx5_flow_counter *cnt;
+
+       /*
+        * eswitch counter cannot be shared and its id is unknown.
+        * currently returning all with id 0.
+        * in the future maybe better to switch to unique numbers.
+        */
+       struct mlx5_flow_counter tmpl = {
+               .ref_cnt = 1,
+       };
+       cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+       if (!cnt) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+       *cnt = tmpl;
+       /* Implicit counter, do not add to list. */
+       return cnt;
+}
+
+/**
+ * Set pedit key of MAC address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_mac(const struct rte_flow_action *actions,
+                          struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+       uint32_t off = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ?
+                                       offsetof(struct ether_hdr, s_addr) :
+                                       offsetof(struct ether_hdr, d_addr);
+       const struct rte_flow_action_set_mac *conf =
+               (const struct rte_flow_action_set_mac *)actions->conf;
+
+       p_parser->keys[idx].off = off;
+       p_parser->keys[idx].mask = ~UINT32_MAX;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr, SZ_PEDIT_KEY_VAL);
+       idx++;
+       p_parser->keys[idx].off = off + SZ_PEDIT_KEY_VAL;
+       p_parser->keys[idx].mask = 0xFFFF0000;
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_ETH;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       memcpy(&p_parser->keys[idx].val,
+               conf->mac_addr + SZ_PEDIT_KEY_VAL,
+               ETHER_ADDR_LEN - SZ_PEDIT_KEY_VAL);
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of decrease/set ttl
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_dec_ttl(const struct rte_flow_action *actions,
+                               struct pedit_parser *p_parser,
+                               uint64_t item_flags)
+{
+       int idx = p_parser->sel.nkeys;
+
+       p_parser->keys[idx].mask = 0xFFFFFF00;
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv4_hdr, time_to_live);
+       }
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+               p_parser->keys[idx].off =
+                       offsetof(struct ipv6_hdr, hop_limits);
+       }
+       if (actions->type == RTE_FLOW_ACTION_TYPE_DEC_TTL) {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_ADD;
+               p_parser->keys[idx].val = 0x000000FF;
+       } else {
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+               p_parser->keys[idx].val =
+                       (__u32)((const struct rte_flow_action_set_ttl *)
+                        actions->conf)->ttl_value;
+       }
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of transport (TCP/UDP) port value
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ * @param[in] item_flags
+ *   flags of all items presented
+ */
+static void
+flow_tcf_pedit_key_set_tp_port(const struct rte_flow_action *actions,
+                               struct pedit_parser *p_parser,
+                               uint64_t item_flags)
+{
+       int idx = p_parser->sel.nkeys;
+
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_UDP;
+       if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_TCP;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       /* offset of src/dst port is same for TCP and UDP */
+       p_parser->keys[idx].off =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ?
+               offsetof(struct tcp_hdr, src_port) :
+               offsetof(struct tcp_hdr, dst_port);
+       p_parser->keys[idx].mask = 0xFFFF0000;
+       p_parser->keys[idx].val =
+               (__u32)((const struct rte_flow_action_set_tp *)
+                               actions->conf)->port;
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Set pedit key of ipv6 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv6_addr(const struct rte_flow_action *actions,
+                                struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+       int keys = NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+       int off_base =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC ?
+               offsetof(struct ipv6_hdr, src_addr) :
+               offsetof(struct ipv6_hdr, dst_addr);
+       const struct rte_flow_action_set_ipv6 *conf =
+               (const struct rte_flow_action_set_ipv6 *)actions->conf;
+
+       for (int i = 0; i < keys; i++, idx++) {
+               p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP6;
+               p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+               p_parser->keys[idx].off = off_base + i * SZ_PEDIT_KEY_VAL;
+               p_parser->keys[idx].mask = ~UINT32_MAX;
+               memcpy(&p_parser->keys[idx].val,
+                       conf->ipv6_addr + i *  SZ_PEDIT_KEY_VAL,
+                       SZ_PEDIT_KEY_VAL);
+       }
+       p_parser->sel.nkeys += keys;
+}
+
+/**
+ * Set pedit key of ipv4 address
+ *
+ * @param[in] actions
+ *   pointer to action specification
+ * @param[in,out] p_parser
+ *   pointer to pedit_parser
+ */
+static void
+flow_tcf_pedit_key_set_ipv4_addr(const struct rte_flow_action *actions,
+                                struct pedit_parser *p_parser)
+{
+       int idx = p_parser->sel.nkeys;
+
+       p_parser->keys_ex[idx].htype = TCA_PEDIT_KEY_EX_HDR_TYPE_IP4;
+       p_parser->keys_ex[idx].cmd = TCA_PEDIT_KEY_EX_CMD_SET;
+       p_parser->keys[idx].off =
+               actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ?
+               offsetof(struct ipv4_hdr, src_addr) :
+               offsetof(struct ipv4_hdr, dst_addr);
+       p_parser->keys[idx].mask = ~UINT32_MAX;
+       p_parser->keys[idx].val =
+               ((const struct rte_flow_action_set_ipv4 *)
+                actions->conf)->ipv4_addr;
+       p_parser->sel.nkeys = (++idx);
+}
+
+/**
+ * Create the pedit's na attribute in netlink message
+ * on pre-allocate message buffer
+ *
+ * @param[in,out] nl
+ *   pointer to pre-allocated netlink message buffer
+ * @param[in,out] actions
+ *   pointer to pointer of actions specification.
+ * @param[in,out] action_flags
+ *   pointer to actions flags
+ * @param[in] item_flags
+ *   flags of all item presented
+ */
+static void
+flow_tcf_create_pedit_mnl_msg(struct nlmsghdr *nl,
+                             const struct rte_flow_action **actions,
+                             uint64_t item_flags)
+{
+       struct pedit_parser p_parser;
+       struct nlattr *na_act_options;
+       struct nlattr *na_pedit_keys;
+
+       memset(&p_parser, 0, sizeof(p_parser));
+       mnl_attr_put_strz(nl, TCA_ACT_KIND, "pedit");
+       na_act_options = mnl_attr_nest_start(nl, TCA_ACT_OPTIONS);
+       /* all modify header actions should be in one tc-pedit action */
+       for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+               switch ((*actions)->type) {
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       flow_tcf_pedit_key_set_ipv4_addr(*actions, &p_parser);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       flow_tcf_pedit_key_set_ipv6_addr(*actions, &p_parser);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       flow_tcf_pedit_key_set_tp_port(*actions,
+                                                       &p_parser, item_flags);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       flow_tcf_pedit_key_set_dec_ttl(*actions,
+                                                       &p_parser, item_flags);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       flow_tcf_pedit_key_set_mac(*actions, &p_parser);
+                       break;
+               default:
+                       goto pedit_mnl_msg_done;
+               }
+       }
+pedit_mnl_msg_done:
+       p_parser.sel.action = TC_ACT_PIPE;
+       mnl_attr_put(nl, TCA_PEDIT_PARMS_EX,
+                    sizeof(p_parser.sel) +
+                    p_parser.sel.nkeys * sizeof(struct tc_pedit_key),
+                    &p_parser);
+       na_pedit_keys =
+               mnl_attr_nest_start(nl, TCA_PEDIT_KEYS_EX | NLA_F_NESTED);
+       for (int i = 0; i < p_parser.sel.nkeys; i++) {
+               struct nlattr *na_pedit_key =
+                       mnl_attr_nest_start(nl,
+                                           TCA_PEDIT_KEY_EX | NLA_F_NESTED);
+               mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_HTYPE,
+                                p_parser.keys_ex[i].htype);
+               mnl_attr_put_u16(nl, TCA_PEDIT_KEY_EX_CMD,
+                                p_parser.keys_ex[i].cmd);
+               mnl_attr_nest_end(nl, na_pedit_key);
+       }
+       mnl_attr_nest_end(nl, na_pedit_keys);
+       mnl_attr_nest_end(nl, na_act_options);
+       (*actions)--;
+}
+
+/**
+ * Calculate max memory size of one TC-pedit actions.
+ * One TC-pedit action can contain set of keys each defining
+ * a rewrite element (rte_flow action)
+ *
+ * @param[in,out] actions
+ *   actions specification.
+ * @param[in,out] action_flags
+ *   actions flags
+ * @param[in,out] size
+ *   accumulated size
+ * @return
+ *   Max memory size of one TC-pedit action
+ */
+static int
+flow_tcf_get_pedit_actions_size(const struct rte_flow_action **actions,
+                               uint64_t *action_flags)
+{
+       int pedit_size = 0;
+       int keys = 0;
+       uint64_t flags = 0;
+
+       pedit_size += SZ_NLATTR_NEST + /* na_act_index. */
+                     SZ_NLATTR_STRZ_OF("pedit") +
+                     SZ_NLATTR_NEST; /* TCA_ACT_OPTIONS. */
+       for (; (*actions)->type != RTE_FLOW_ACTION_TYPE_END; (*actions)++) {
+               switch ((*actions)->type) {
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV4_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       keys += NUM_OF_PEDIT_KEYS(IPV4_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV4_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV6_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       keys += NUM_OF_PEDIT_KEYS(IPV6_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_IPV6_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+                       /* TCP is as same as UDP */
+                       keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TP_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       /* TCP is as same as UDP */
+                       keys += NUM_OF_PEDIT_KEYS(TP_PORT_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TP_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       keys += NUM_OF_PEDIT_KEYS(TTL_LEN);
+                       flags |= MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       keys += NUM_OF_PEDIT_KEYS(ETHER_ADDR_LEN);
+                       flags |= MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
+               default:
+                       goto get_pedit_action_size_done;
+               }
+       }
+get_pedit_action_size_done:
+       /* TCA_PEDIT_PARAMS_EX */
+       pedit_size +=
+               SZ_NLATTR_DATA_OF(sizeof(struct tc_pedit_sel) +
+                                 keys * sizeof(struct tc_pedit_key));
+       pedit_size += SZ_NLATTR_NEST; /* TCA_PEDIT_KEYS */
+       pedit_size += keys *
+                     /* TCA_PEDIT_KEY_EX + HTYPE + CMD */
+                     (SZ_NLATTR_NEST + SZ_NLATTR_DATA_OF(2) +
+                      SZ_NLATTR_DATA_OF(2));
+       (*action_flags) |= flags;
+       (*actions)--;
+       return pedit_size;
+}
+
+/**
+ * Retrieve mask for pattern item.
+ *
+ * This function does basic sanity checks on a pattern item in order to
+ * return the most appropriate mask for it.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] mask_default
+ *   Default mask for pattern item as specified by the flow API.
+ * @param[in] mask_supported
+ *   Mask fields supported by the implementation.
+ * @param[in] mask_empty
+ *   Empty mask to return when there is no specification.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   Either @p item->mask or one of the mask parameters on success, NULL
+ *   otherwise and rte_errno is set.
+ */
+static const void *
+flow_tcf_item_mask(const struct rte_flow_item *item, const void *mask_default,
+                  const void *mask_supported, const void *mask_empty,
+                  size_t mask_size, struct rte_flow_error *error)
+{
+       const uint8_t *mask;
+       size_t i;
+
+       /* item->last and item->mask cannot exist without item->spec. */
+       if (!item->spec && (item->mask || item->last)) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM, item,
+                                  "\"mask\" or \"last\" field provided without"
+                                  " a corresponding \"spec\"");
+               return NULL;
+       }
+       /* No spec, no mask, no problem. */
+       if (!item->spec)
+               return mask_empty;
+       mask = item->mask ? item->mask : mask_default;
+       assert(mask);
+       /*
+        * Single-pass check to make sure that:
+        * - Mask is supported, no bits are set outside mask_supported.
+        * - Both item->spec and item->last are included in mask.
+        */
+       for (i = 0; i != mask_size; ++i) {
+               if (!mask[i])
+                       continue;
+               if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
+                   ((const uint8_t *)mask_supported)[i]) {
+                       rte_flow_error_set(error, ENOTSUP,
+                                          RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
+                                          "unsupported field found"
+                                          " in \"mask\"");
+                       return NULL;
+               }
+               if (item->last &&
+                   (((const uint8_t *)item->spec)[i] & mask[i]) !=
+                   (((const uint8_t *)item->last)[i] & mask[i])) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM_LAST,
+                                          item->last,
+                                          "range between \"spec\" and \"last\""
+                                          " not comprised in \"mask\"");
+                       return NULL;
+               }
+       }
+       return mask;
+}
+
+/**
+ * Build a conversion table between port ID and ifindex.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[out] ptoi
+ *   Pointer to ptoi table.
+ * @param[in] len
+ *   Size of ptoi table provided.
+ *
+ * @return
+ *   Size of ptoi table filled.
+ */
+static unsigned int
+flow_tcf_build_ptoi_table(struct rte_eth_dev *dev, struct flow_tcf_ptoi *ptoi,
+                         unsigned int len)
+{
+       unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
+       uint16_t port_id[n + 1];
+       unsigned int i;
+       unsigned int own = 0;
+
+       /* At least one port is needed when no switch domain is present. */
+       if (!n) {
+               n = 1;
+               port_id[0] = dev->data->port_id;
+       } else {
+               n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
+       }
+       if (n > len)
+               return 0;
+       for (i = 0; i != n; ++i) {
+               struct rte_eth_dev_info dev_info;
+
+               rte_eth_dev_info_get(port_id[i], &dev_info);
+               if (port_id[i] == dev->data->port_id)
+                       own = i;
+               ptoi[i].port_id = port_id[i];
+               ptoi[i].ifindex = dev_info.if_index;
+       }
+       /* Ensure first entry of ptoi[] is the current device. */
+       if (own) {
+               ptoi[n] = ptoi[0];
+               ptoi[0] = ptoi[own];
+               ptoi[own] = ptoi[n];
+       }
+       /* An entry with zero ifindex terminates ptoi[]. */
+       ptoi[n].port_id = 0;
+       ptoi[n].ifindex = 0;
+       return n;
+}
+
+/**
+ * Verify the @p attr will be correctly understood by the E-switch.
+ *
+ * @param[in] attr
+ *   Pointer to flow attributes
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_validate_attributes(const struct rte_flow_attr *attr,
+                            struct rte_flow_error *error)
+{
+       /*
+        * Supported attributes: groups, some priorities and ingress only.
+        * group is supported only if kernel supports chain. Don't care about
+        * transfer as it is the caller's problem.
+        */
+       if (attr->group > MLX5_TCF_GROUP_ID_MAX)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
+                                         "group ID larger than "
+                                         RTE_STR(MLX5_TCF_GROUP_ID_MAX)
+                                         " isn't supported");
+       else if (attr->group > 0 &&
+                attr->priority > MLX5_TCF_GROUP_PRIORITY_MAX)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                         attr,
+                                         "lowest priority level is "
+                                         RTE_STR(MLX5_TCF_GROUP_PRIORITY_MAX)
+                                         " when group is configured");
+       else if (attr->priority > 0xfffe)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
+                                         attr,
+                                         "lowest priority level is 0xfffe");
+       if (!attr->ingress)
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                         attr, "only ingress is supported");
+       if (attr->egress)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                                         attr, "egress is not supported");
+       return 0;
+}
+
+/**
+ * Validate flow for E-Switch.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_validate(struct rte_eth_dev *dev,
+                 const struct rte_flow_attr *attr,
+                 const struct rte_flow_item items[],
+                 const struct rte_flow_action actions[],
+                 struct rte_flow_error *error)
+{
+       union {
+               const struct rte_flow_item_port_id *port_id;
+               const struct rte_flow_item_eth *eth;
+               const struct rte_flow_item_vlan *vlan;
+               const struct rte_flow_item_ipv4 *ipv4;
+               const struct rte_flow_item_ipv6 *ipv6;
+               const struct rte_flow_item_tcp *tcp;
+               const struct rte_flow_item_udp *udp;
+       } spec, mask;
+       union {
+               const struct rte_flow_action_port_id *port_id;
+               const struct rte_flow_action_jump *jump;
+               const struct rte_flow_action_of_push_vlan *of_push_vlan;
+               const struct rte_flow_action_of_set_vlan_vid *
+                       of_set_vlan_vid;
+               const struct rte_flow_action_of_set_vlan_pcp *
+                       of_set_vlan_pcp;
+               const struct rte_flow_action_set_ipv4 *set_ipv4;
+               const struct rte_flow_action_set_ipv6 *set_ipv6;
+       } conf;
+       uint64_t item_flags = 0;
+       uint64_t action_flags = 0;
+       uint8_t next_protocol = -1;
+       unsigned int tcm_ifindex = 0;
+       uint8_t pedit_validated = 0;
+       struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+       struct rte_eth_dev *port_id_dev = NULL;
+       bool in_port_id_set;
+       int ret;
+
+       claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+                                               PTOI_TABLE_SZ_MAX(dev)));
+       ret = flow_tcf_validate_attributes(attr, error);
+       if (ret < 0)
+               return ret;
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               unsigned int i;
+
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_PORT_ID:
+                       mask.port_id = flow_tcf_item_mask
+                               (items, &rte_flow_item_port_id_mask,
+                                &flow_tcf_mask_supported.port_id,
+                                &flow_tcf_mask_empty.port_id,
+                                sizeof(flow_tcf_mask_supported.port_id),
+                                error);
+                       if (!mask.port_id)
+                               return -rte_errno;
+                       if (mask.port_id == &flow_tcf_mask_empty.port_id) {
+                               in_port_id_set = 1;
+                               break;
+                       }
+                       spec.port_id = items->spec;
+                       if (mask.port_id->id && mask.port_id->id != 0xffffffff)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.port_id,
+                                        "no support for partial mask on"
+                                        " \"id\" field");
+                       if (!mask.port_id->id)
+                               i = 0;
+                       else
+                               for (i = 0; ptoi[i].ifindex; ++i)
+                                       if (ptoi[i].port_id == spec.port_id->id)
+                                               break;
+                       if (!ptoi[i].ifindex)
+                               return rte_flow_error_set
+                                       (error, ENODEV,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+                                        spec.port_id,
+                                        "missing data to convert port ID to"
+                                        " ifindex");
+                       if (in_port_id_set && ptoi[i].ifindex != tcm_ifindex)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
+                                        spec.port_id,
+                                        "cannot match traffic for"
+                                        " several port IDs through"
+                                        " a single flow rule");
+                       tcm_ifindex = ptoi[i].ifindex;
+                       in_port_id_set = 1;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       ret = mlx5_flow_validate_item_eth(items, item_flags,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+                       /* TODO:
+                        * Redundant check due to different supported mask.
+                        * Same for the rest of items.
+                        */
+                       mask.eth = flow_tcf_item_mask
+                               (items, &rte_flow_item_eth_mask,
+                                &flow_tcf_mask_supported.eth,
+                                &flow_tcf_mask_empty.eth,
+                                sizeof(flow_tcf_mask_supported.eth),
+                                error);
+                       if (!mask.eth)
+                               return -rte_errno;
+                       if (mask.eth->type && mask.eth->type !=
+                           RTE_BE16(0xffff))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.eth,
+                                        "no support for partial mask on"
+                                        " \"type\" field");
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       ret = mlx5_flow_validate_item_vlan(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+                       mask.vlan = flow_tcf_item_mask
+                               (items, &rte_flow_item_vlan_mask,
+                                &flow_tcf_mask_supported.vlan,
+                                &flow_tcf_mask_empty.vlan,
+                                sizeof(flow_tcf_mask_supported.vlan),
+                                error);
+                       if (!mask.vlan)
+                               return -rte_errno;
+                       if ((mask.vlan->tci & RTE_BE16(0xe000) &&
+                            (mask.vlan->tci & RTE_BE16(0xe000)) !=
+                             RTE_BE16(0xe000)) ||
+                           (mask.vlan->tci & RTE_BE16(0x0fff) &&
+                            (mask.vlan->tci & RTE_BE16(0x0fff)) !=
+                             RTE_BE16(0x0fff)) ||
+                           (mask.vlan->inner_type &&
+                            mask.vlan->inner_type != RTE_BE16(0xffff)))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.vlan,
+                                        "no support for partial masks on"
+                                        " \"tci\" (PCP and VID parts) and"
+                                        " \"inner_type\" fields");
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       mask.ipv4 = flow_tcf_item_mask
+                               (items, &rte_flow_item_ipv4_mask,
+                                &flow_tcf_mask_supported.ipv4,
+                                &flow_tcf_mask_empty.ipv4,
+                                sizeof(flow_tcf_mask_supported.ipv4),
+                                error);
+                       if (!mask.ipv4)
+                               return -rte_errno;
+                       if (mask.ipv4->hdr.next_proto_id &&
+                           mask.ipv4->hdr.next_proto_id != 0xff)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.ipv4,
+                                        "no support for partial mask on"
+                                        " \"hdr.next_proto_id\" field");
+                       else if (mask.ipv4->hdr.next_proto_id)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv4 *)
+                                        (items->spec))->hdr.next_proto_id;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       mask.ipv6 = flow_tcf_item_mask
+                               (items, &rte_flow_item_ipv6_mask,
+                                &flow_tcf_mask_supported.ipv6,
+                                &flow_tcf_mask_empty.ipv6,
+                                sizeof(flow_tcf_mask_supported.ipv6),
+                                error);
+                       if (!mask.ipv6)
+                               return -rte_errno;
+                       if (mask.ipv6->hdr.proto &&
+                           mask.ipv6->hdr.proto != 0xff)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ITEM_MASK,
+                                        mask.ipv6,
+                                        "no support for partial mask on"
+                                        " \"hdr.proto\" field");
+                       else if (mask.ipv6->hdr.proto)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv6 *)
+                                        (items->spec))->hdr.proto;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       ret = mlx5_flow_validate_item_udp(items, item_flags,
+                                                         next_protocol, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       mask.udp = flow_tcf_item_mask
+                               (items, &rte_flow_item_udp_mask,
+                                &flow_tcf_mask_supported.udp,
+                                &flow_tcf_mask_empty.udp,
+                                sizeof(flow_tcf_mask_supported.udp),
+                                error);
+                       if (!mask.udp)
+                               return -rte_errno;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       ret = mlx5_flow_validate_item_tcp
+                                            (items, item_flags,
+                                             next_protocol,
+                                             &flow_tcf_mask_supported.tcp,
+                                             error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       mask.tcp = flow_tcf_item_mask
+                               (items, &rte_flow_item_tcp_mask,
+                                &flow_tcf_mask_supported.tcp,
+                                &flow_tcf_mask_empty.tcp,
+                                sizeof(flow_tcf_mask_supported.tcp),
+                                error);
+                       if (!mask.tcp)
+                               return -rte_errno;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL, "item not supported");
+               }
+       }
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               unsigned int i;
+               uint64_t current_action_flag = 0;
+
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_PORT_ID:
+                       current_action_flag = MLX5_FLOW_ACTION_PORT_ID;
+                       if (!actions->conf)
+                               break;
+                       conf.port_id = actions->conf;
+                       if (conf.port_id->original)
+                               i = 0;
+                       else
+                               for (i = 0; ptoi[i].ifindex; ++i)
+                                       if (ptoi[i].port_id == conf.port_id->id)
+                                               break;
+                       if (!ptoi[i].ifindex)
+                               return rte_flow_error_set
+                                       (error, ENODEV,
+                                        RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                        conf.port_id,
+                                        "missing data to convert port ID to"
+                                        " ifindex");
+                       port_id_dev = &rte_eth_devices[conf.port_id->id];
+                       break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       current_action_flag = MLX5_FLOW_ACTION_JUMP;
+                       if (!actions->conf)
+                               break;
+                       conf.jump = actions->conf;
+                       if (attr->group >= conf.jump->group)
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION,
+                                        actions,
+                                        "can jump only to a group forward");
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       current_action_flag = MLX5_FLOW_ACTION_DROP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+                       current_action_flag = MLX5_FLOW_ACTION_OF_POP_VLAN;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+                       current_action_flag = MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+                       if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                        "vlan modify is not supported,"
+                                        " set action must follow push action");
+                       current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+                       if (!(action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN))
+                               return rte_flow_error_set
+                                       (error, ENOTSUP,
+                                        RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                        "vlan modify is not supported,"
+                                        " set action must follow push action");
+                       current_action_flag = MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV4_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_IPV6_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TP_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TP_DST;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+                       current_action_flag = MLX5_FLOW_ACTION_DEC_TTL;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_SRC;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       current_action_flag = MLX5_FLOW_ACTION_SET_MAC_DST;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+               if (current_action_flag & MLX5_TCF_CONFIG_ACTIONS) {
+                       if (!actions->conf)
+                               return rte_flow_error_set(error, EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                               actions,
+                                               "action configuration not set");
+               }
+               if ((current_action_flag & MLX5_TCF_PEDIT_ACTIONS) &&
+                   pedit_validated)
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "set actions should be "
+                                                 "listed successively");
+               if ((current_action_flag & ~MLX5_TCF_PEDIT_ACTIONS) &&
+                   (action_flags & MLX5_TCF_PEDIT_ACTIONS))
+                       pedit_validated = 1;
+               if ((current_action_flag & MLX5_TCF_FATE_ACTIONS) &&
+                   (action_flags & MLX5_TCF_FATE_ACTIONS))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "can't have multiple fate"
+                                                 " actions");
+               action_flags |= current_action_flag;
+       }
+       if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+           (action_flags & MLX5_FLOW_ACTION_DROP))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         actions,
+                                         "set action is not compatible with "
+                                         "drop action");
+       if ((action_flags & MLX5_TCF_PEDIT_ACTIONS) &&
+           !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION,
+                                         actions,
+                                         "set action must be followed by "
+                                         "port_id action");
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_IPV4_SRC | MLX5_FLOW_ACTION_SET_IPV4_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ipv4 item found in"
+                                                 " pattern");
+       }
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_IPV6_SRC | MLX5_FLOW_ACTION_SET_IPV6_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV6))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ipv6 item found in"
+                                                 " pattern");
+       }
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_TP_SRC | MLX5_FLOW_ACTION_SET_TP_DST)) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L4_UDP |
+                     MLX5_FLOW_LAYER_OUTER_L4_TCP)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no TCP/UDP item found in"
+                                                 " pattern");
+       }
+       /*
+        * FW syndrome (0xA9C090):
+        *     set_flow_table_entry: push vlan action fte in fdb can ONLY be
+        *     forward to the uplink.
+        */
+       if ((action_flags & MLX5_FLOW_ACTION_OF_PUSH_VLAN) &&
+           (action_flags & MLX5_FLOW_ACTION_PORT_ID) &&
+           ((struct priv *)port_id_dev->data->dev_private)->representor)
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "vlan push can only be applied"
+                                         " when forwarding to uplink port");
+       /*
+        * FW syndrome (0x294609):
+        *     set_flow_table_entry: modify/pop/push actions in fdb flow table
+        *     are supported only while forwarding to vport.
+        */
+       if ((action_flags & MLX5_TCF_VLAN_ACTIONS) &&
+           !(action_flags & MLX5_FLOW_ACTION_PORT_ID))
+               return rte_flow_error_set(error, ENOTSUP,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "vlan actions are supported"
+                                         " only with port_id action");
+       if (!(action_flags & MLX5_TCF_FATE_ACTIONS))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "no fate action is found");
+       if (action_flags &
+          (MLX5_FLOW_ACTION_SET_TTL | MLX5_FLOW_ACTION_DEC_TTL)) {
+               if (!(item_flags &
+                    (MLX5_FLOW_LAYER_OUTER_L3_IPV4 |
+                     MLX5_FLOW_LAYER_OUTER_L3_IPV6)))
+                       return rte_flow_error_set(error, EINVAL,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no IP found in pattern");
+       }
+       if (action_flags &
+           (MLX5_FLOW_ACTION_SET_MAC_SRC | MLX5_FLOW_ACTION_SET_MAC_DST)) {
+               if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L2))
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "no ethernet found in"
+                                                 " pattern");
+       }
+       return 0;
+}
+
+/**
+ * Calculate maximum size of memory for flow items of Linux TC flower and
+ * extract specified items.
+ *
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   Maximum size of memory for items.
+ */
+static int
+flow_tcf_get_items_and_size(const struct rte_flow_attr *attr,
+                           const struct rte_flow_item items[],
+                           uint64_t *item_flags)
+{
+       int size = 0;
+       uint64_t flags = 0;
+
+       size += SZ_NLATTR_STRZ_OF("flower") +
+               SZ_NLATTR_NEST + /* TCA_OPTIONS. */
+               SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CLS_FLAGS_SKIP_SW. */
+       if (attr->group > 0)
+               size += SZ_NLATTR_TYPE_OF(uint32_t); /* TCA_CHAIN. */
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_PORT_ID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+                               SZ_NLATTR_DATA_OF(ETHER_ADDR_LEN) * 4;
+                               /* dst/src MAC addr and mask. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_L2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+                               SZ_NLATTR_TYPE_OF(uint16_t) +
+                               /* VLAN Ether type. */
+                               SZ_NLATTR_TYPE_OF(uint8_t) + /* VLAN prio. */
+                               SZ_NLATTR_TYPE_OF(uint16_t); /* VLAN ID. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+                               SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                               SZ_NLATTR_TYPE_OF(uint32_t) * 4;
+                               /* dst/src IP addr and mask. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       size += SZ_NLATTR_TYPE_OF(uint16_t) + /* Ether type. */
+                               SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                               SZ_NLATTR_TYPE_OF(IPV6_ADDR_LEN) * 4;
+                               /* dst/src IP addr and mask. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                               SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+                               /* dst/src port and mask. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       size += SZ_NLATTR_TYPE_OF(uint8_t) + /* IP proto. */
+                               SZ_NLATTR_TYPE_OF(uint16_t) * 4;
+                               /* dst/src port and mask. */
+                       flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       break;
+               default:
+                       DRV_LOG(WARNING,
+                               "unsupported item %p type %d,"
+                               " items must be validated before flow creation",
+                               (const void *)items, items->type);
+                       break;
+               }
+       }
+       *item_flags = flags;
+       return size;
+}
+
+/**
+ * Calculate maximum size of memory for flow actions of Linux TC flower and
+ * extract specified actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   Maximum size of memory for actions.
+ */
+static int
+flow_tcf_get_actions_and_size(const struct rte_flow_action actions[],
+                             uint64_t *action_flags)
+{
+       int size = 0;
+       uint64_t flags = 0;
+
+       size += SZ_NLATTR_NEST; /* TCA_FLOWER_ACT. */
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_PORT_ID:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("mirred") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(struct tc_mirred);
+                       flags |= MLX5_FLOW_ACTION_PORT_ID;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("gact") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(struct tc_gact);
+                       flags |= MLX5_FLOW_ACTION_JUMP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("gact") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(struct tc_gact);
+                       flags |= MLX5_FLOW_ACTION_DROP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+                       flags |= MLX5_FLOW_ACTION_OF_POP_VLAN;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+                       flags |= MLX5_FLOW_ACTION_OF_PUSH_VLAN;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+                       flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_VID;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+                       flags |= MLX5_FLOW_ACTION_OF_SET_VLAN_PCP;
+                       goto action_of_vlan;
+action_of_vlan:
+                       size += SZ_NLATTR_NEST + /* na_act_index. */
+                               SZ_NLATTR_STRZ_OF("vlan") +
+                               SZ_NLATTR_NEST + /* TCA_ACT_OPTIONS. */
+                               SZ_NLATTR_TYPE_OF(struct tc_vlan) +
+                               SZ_NLATTR_TYPE_OF(uint16_t) +
+                               /* VLAN protocol. */
+                               SZ_NLATTR_TYPE_OF(uint16_t) + /* VLAN ID. */
+                               SZ_NLATTR_TYPE_OF(uint8_t); /* VLAN prio. */
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       size += flow_tcf_get_pedit_actions_size(&actions,
+                                                               &flags);
+                       break;
+               default:
+                       DRV_LOG(WARNING,
+                               "unsupported action %p type %d,"
+                               " items must be validated before flow creation",
+                               (const void *)actions, actions->type);
+                       break;
+               }
+       }
+       *action_flags = flags;
+       return size;
+}
+
+/**
+ * Brand rtnetlink buffer with unique handle.
+ *
+ * This handle should be unique for a given network interface to avoid
+ * collisions.
+ *
+ * @param nlh
+ *   Pointer to Netlink message.
+ * @param handle
+ *   Unique 32-bit handle to use.
+ */
+static void
+flow_tcf_nl_brand(struct nlmsghdr *nlh, uint32_t handle)
+{
+       struct tcmsg *tcm = mnl_nlmsg_get_payload(nlh);
+
+       tcm->tcm_handle = handle;
+       DRV_LOG(DEBUG, "Netlink msg %p is branded with handle %x",
+               (void *)nlh, handle);
+}
+
+/**
+ * Prepare a flow object for Linux TC flower. It calculates the maximum size of
+ * memory required, allocates the memory, initializes Netlink message headers
+ * and set unique TC message handle.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success,
+ *   otherwise NULL and rte_ernno is set.
+ */
+static struct mlx5_flow *
+flow_tcf_prepare(const struct rte_flow_attr *attr,
+                const struct rte_flow_item items[],
+                const struct rte_flow_action actions[],
+                uint64_t *item_flags, uint64_t *action_flags,
+                struct rte_flow_error *error)
+{
+       size_t size = sizeof(struct mlx5_flow) +
+                     MNL_ALIGN(sizeof(struct nlmsghdr)) +
+                     MNL_ALIGN(sizeof(struct tcmsg));
+       struct mlx5_flow *dev_flow;
+       struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
+
+       size += flow_tcf_get_items_and_size(attr, items, item_flags);
+       size += flow_tcf_get_actions_and_size(actions, action_flags);
+       dev_flow = rte_zmalloc(__func__, size, MNL_ALIGNTO);
+       if (!dev_flow) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "not enough memory to create E-Switch flow");
+               return NULL;
+       }
+       nlh = mnl_nlmsg_put_header((void *)(dev_flow + 1));
+       tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+       *dev_flow = (struct mlx5_flow){
+               .tcf = (struct mlx5_flow_tcf){
+                       .nlh = nlh,
+                       .tcm = tcm,
+               },
+       };
+       /*
+        * Generate a reasonably unique handle based on the address of the
+        * target buffer.
+        *
+        * This is straightforward on 32-bit systems where the flow pointer can
+        * be used directly. Otherwise, its least significant part is taken
+        * after shifting it by the previous power of two of the pointed buffer
+        * size.
+        */
+       if (sizeof(dev_flow) <= 4)
+               flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow);
+       else
+               flow_tcf_nl_brand(nlh, (uintptr_t)dev_flow >>
+                                      rte_log2_u32(rte_align32prevpow2(size)));
+       return dev_flow;
+}
+
+/**
+ * Make adjustments for supporting count actions.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_tcf_translate_action_count(struct rte_eth_dev *dev __rte_unused,
+                                 struct mlx5_flow *dev_flow,
+                                 struct rte_flow_error *error)
+{
+       struct rte_flow *flow = dev_flow->flow;
+
+       if (!flow->counter) {
+               flow->counter = flow_tcf_counter_new();
+               if (!flow->counter)
+                       return rte_flow_error_set(error, rte_errno,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 NULL,
+                                                 "cannot get counter"
+                                                 " context.");
+       }
+       return 0;
+}
+
+/**
+ * Translate flow for Linux TC flower and construct Netlink message.
+ *
+ * @param[in] priv
+ *   Pointer to the priv structure.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item items[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error)
+{
+       union {
+               const struct rte_flow_item_port_id *port_id;
+               const struct rte_flow_item_eth *eth;
+               const struct rte_flow_item_vlan *vlan;
+               const struct rte_flow_item_ipv4 *ipv4;
+               const struct rte_flow_item_ipv6 *ipv6;
+               const struct rte_flow_item_tcp *tcp;
+               const struct rte_flow_item_udp *udp;
+       } spec, mask;
+       union {
+               const struct rte_flow_action_port_id *port_id;
+               const struct rte_flow_action_jump *jump;
+               const struct rte_flow_action_of_push_vlan *of_push_vlan;
+               const struct rte_flow_action_of_set_vlan_vid *
+                       of_set_vlan_vid;
+               const struct rte_flow_action_of_set_vlan_pcp *
+                       of_set_vlan_pcp;
+       } conf;
+       struct flow_tcf_ptoi ptoi[PTOI_TABLE_SZ_MAX(dev)];
+       struct nlmsghdr *nlh = dev_flow->tcf.nlh;
+       struct tcmsg *tcm = dev_flow->tcf.tcm;
+       uint32_t na_act_index_cur;
+       bool eth_type_set = 0;
+       bool vlan_present = 0;
+       bool vlan_eth_type_set = 0;
+       bool ip_proto_set = 0;
+       struct nlattr *na_flower;
+       struct nlattr *na_flower_act;
+       struct nlattr *na_vlan_id = NULL;
+       struct nlattr *na_vlan_priority = NULL;
+       uint64_t item_flags = 0;
+       int ret;
+
+       claim_nonzero(flow_tcf_build_ptoi_table(dev, ptoi,
+                                               PTOI_TABLE_SZ_MAX(dev)));
+       nlh = dev_flow->tcf.nlh;
+       tcm = dev_flow->tcf.tcm;
+       /* Prepare API must have been called beforehand. */
+       assert(nlh != NULL && tcm != NULL);
+       tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm_ifindex = ptoi[0].ifindex;
+       tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
+       /*
+        * Priority cannot be zero to prevent the kernel from picking one
+        * automatically.
+        */
+       tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
+                                 RTE_BE16(ETH_P_ALL));
+       if (attr->group > 0)
+               mnl_attr_put_u32(nlh, TCA_CHAIN, attr->group);
+       mnl_attr_put_strz(nlh, TCA_KIND, "flower");
+       na_flower = mnl_attr_nest_start(nlh, TCA_OPTIONS);
+       mnl_attr_put_u32(nlh, TCA_FLOWER_FLAGS, TCA_CLS_FLAGS_SKIP_SW);
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               unsigned int i;
+
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_PORT_ID:
+                       mask.port_id = flow_tcf_item_mask
+                               (items, &rte_flow_item_port_id_mask,
+                                &flow_tcf_mask_supported.port_id,
+                                &flow_tcf_mask_empty.port_id,
+                                sizeof(flow_tcf_mask_supported.port_id),
+                                error);
+                       assert(mask.port_id);
+                       if (mask.port_id == &flow_tcf_mask_empty.port_id)
+                               break;
+                       spec.port_id = items->spec;
+                       if (!mask.port_id->id)
+                               i = 0;
+                       else
+                               for (i = 0; ptoi[i].ifindex; ++i)
+                                       if (ptoi[i].port_id == spec.port_id->id)
+                                               break;
+                       assert(ptoi[i].ifindex);
+                       tcm->tcm_ifindex = ptoi[i].ifindex;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L2;
+                       mask.eth = flow_tcf_item_mask
+                               (items, &rte_flow_item_eth_mask,
+                                &flow_tcf_mask_supported.eth,
+                                &flow_tcf_mask_empty.eth,
+                                sizeof(flow_tcf_mask_supported.eth),
+                                error);
+                       assert(mask.eth);
+                       if (mask.eth == &flow_tcf_mask_empty.eth)
+                               break;
+                       spec.eth = items->spec;
+                       if (mask.eth->type) {
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+                                                spec.eth->type);
+                               eth_type_set = 1;
+                       }
+                       if (!is_zero_ether_addr(&mask.eth->dst)) {
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST,
+                                            ETHER_ADDR_LEN,
+                                            spec.eth->dst.addr_bytes);
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_DST_MASK,
+                                            ETHER_ADDR_LEN,
+                                            mask.eth->dst.addr_bytes);
+                       }
+                       if (!is_zero_ether_addr(&mask.eth->src)) {
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC,
+                                            ETHER_ADDR_LEN,
+                                            spec.eth->src.addr_bytes);
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_ETH_SRC_MASK,
+                                            ETHER_ADDR_LEN,
+                                            mask.eth->src.addr_bytes);
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_VLAN;
+                       mask.vlan = flow_tcf_item_mask
+                               (items, &rte_flow_item_vlan_mask,
+                                &flow_tcf_mask_supported.vlan,
+                                &flow_tcf_mask_empty.vlan,
+                                sizeof(flow_tcf_mask_supported.vlan),
+                                error);
+                       assert(mask.vlan);
+                       if (!eth_type_set)
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_ETH_TYPE,
+                                                RTE_BE16(ETH_P_8021Q));
+                       eth_type_set = 1;
+                       vlan_present = 1;
+                       if (mask.vlan == &flow_tcf_mask_empty.vlan)
+                               break;
+                       spec.vlan = items->spec;
+                       if (mask.vlan->inner_type) {
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+                                                spec.vlan->inner_type);
+                               vlan_eth_type_set = 1;
+                       }
+                       if (mask.vlan->tci & RTE_BE16(0xe000))
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_VLAN_PRIO,
+                                               (rte_be_to_cpu_16
+                                                (spec.vlan->tci) >> 13) & 0x7);
+                       if (mask.vlan->tci & RTE_BE16(0x0fff))
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_VLAN_ID,
+                                                rte_be_to_cpu_16
+                                                (spec.vlan->tci &
+                                                 RTE_BE16(0x0fff)));
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       mask.ipv4 = flow_tcf_item_mask
+                               (items, &rte_flow_item_ipv4_mask,
+                                &flow_tcf_mask_supported.ipv4,
+                                &flow_tcf_mask_empty.ipv4,
+                                sizeof(flow_tcf_mask_supported.ipv4),
+                                error);
+                       assert(mask.ipv4);
+                       if (!eth_type_set || !vlan_eth_type_set)
+                               mnl_attr_put_u16(nlh,
+                                                vlan_present ?
+                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+                                                TCA_FLOWER_KEY_ETH_TYPE,
+                                                RTE_BE16(ETH_P_IP));
+                       eth_type_set = 1;
+                       vlan_eth_type_set = 1;
+                       if (mask.ipv4 == &flow_tcf_mask_empty.ipv4)
+                               break;
+                       spec.ipv4 = items->spec;
+                       if (mask.ipv4->hdr.next_proto_id) {
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               spec.ipv4->hdr.next_proto_id);
+                               ip_proto_set = 1;
+                       }
+                       if (mask.ipv4->hdr.src_addr) {
+                               mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_SRC,
+                                                spec.ipv4->hdr.src_addr);
+                               mnl_attr_put_u32(nlh,
+                                                TCA_FLOWER_KEY_IPV4_SRC_MASK,
+                                                mask.ipv4->hdr.src_addr);
+                       }
+                       if (mask.ipv4->hdr.dst_addr) {
+                               mnl_attr_put_u32(nlh, TCA_FLOWER_KEY_IPV4_DST,
+                                                spec.ipv4->hdr.dst_addr);
+                               mnl_attr_put_u32(nlh,
+                                                TCA_FLOWER_KEY_IPV4_DST_MASK,
+                                                mask.ipv4->hdr.dst_addr);
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       mask.ipv6 = flow_tcf_item_mask
+                               (items, &rte_flow_item_ipv6_mask,
+                                &flow_tcf_mask_supported.ipv6,
+                                &flow_tcf_mask_empty.ipv6,
+                                sizeof(flow_tcf_mask_supported.ipv6),
+                                error);
+                       assert(mask.ipv6);
+                       if (!eth_type_set || !vlan_eth_type_set)
+                               mnl_attr_put_u16(nlh,
+                                                vlan_present ?
+                                                TCA_FLOWER_KEY_VLAN_ETH_TYPE :
+                                                TCA_FLOWER_KEY_ETH_TYPE,
+                                                RTE_BE16(ETH_P_IPV6));
+                       eth_type_set = 1;
+                       vlan_eth_type_set = 1;
+                       if (mask.ipv6 == &flow_tcf_mask_empty.ipv6)
+                               break;
+                       spec.ipv6 = items->spec;
+                       if (mask.ipv6->hdr.proto) {
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               spec.ipv6->hdr.proto);
+                               ip_proto_set = 1;
+                       }
+                       if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr)) {
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC,
+                                            sizeof(spec.ipv6->hdr.src_addr),
+                                            spec.ipv6->hdr.src_addr);
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+                                            sizeof(mask.ipv6->hdr.src_addr),
+                                            mask.ipv6->hdr.src_addr);
+                       }
+                       if (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr)) {
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST,
+                                            sizeof(spec.ipv6->hdr.dst_addr),
+                                            spec.ipv6->hdr.dst_addr);
+                               mnl_attr_put(nlh, TCA_FLOWER_KEY_IPV6_DST_MASK,
+                                            sizeof(mask.ipv6->hdr.dst_addr),
+                                            mask.ipv6->hdr.dst_addr);
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       mask.udp = flow_tcf_item_mask
+                               (items, &rte_flow_item_udp_mask,
+                                &flow_tcf_mask_supported.udp,
+                                &flow_tcf_mask_empty.udp,
+                                sizeof(flow_tcf_mask_supported.udp),
+                                error);
+                       assert(mask.udp);
+                       if (!ip_proto_set)
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               IPPROTO_UDP);
+                       if (mask.udp == &flow_tcf_mask_empty.udp)
+                               break;
+                       spec.udp = items->spec;
+                       if (mask.udp->hdr.src_port) {
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_SRC,
+                                                spec.udp->hdr.src_port);
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_UDP_SRC_MASK,
+                                                mask.udp->hdr.src_port);
+                       }
+                       if (mask.udp->hdr.dst_port) {
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_UDP_DST,
+                                                spec.udp->hdr.dst_port);
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_UDP_DST_MASK,
+                                                mask.udp->hdr.dst_port);
+                       }
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       item_flags |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       mask.tcp = flow_tcf_item_mask
+                               (items, &rte_flow_item_tcp_mask,
+                                &flow_tcf_mask_supported.tcp,
+                                &flow_tcf_mask_empty.tcp,
+                                sizeof(flow_tcf_mask_supported.tcp),
+                                error);
+                       assert(mask.tcp);
+                       if (!ip_proto_set)
+                               mnl_attr_put_u8(nlh, TCA_FLOWER_KEY_IP_PROTO,
+                                               IPPROTO_TCP);
+                       if (mask.tcp == &flow_tcf_mask_empty.tcp)
+                               break;
+                       spec.tcp = items->spec;
+                       if (mask.tcp->hdr.src_port) {
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_SRC,
+                                                spec.tcp->hdr.src_port);
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_TCP_SRC_MASK,
+                                                mask.tcp->hdr.src_port);
+                       }
+                       if (mask.tcp->hdr.dst_port) {
+                               mnl_attr_put_u16(nlh, TCA_FLOWER_KEY_TCP_DST,
+                                                spec.tcp->hdr.dst_port);
+                               mnl_attr_put_u16(nlh,
+                                                TCA_FLOWER_KEY_TCP_DST_MASK,
+                                                mask.tcp->hdr.dst_port);
+                       }
+                       if (mask.tcp->hdr.tcp_flags) {
+                               mnl_attr_put_u16
+                                       (nlh,
+                                        TCA_FLOWER_KEY_TCP_FLAGS,
+                                        rte_cpu_to_be_16
+                                               (spec.tcp->hdr.tcp_flags));
+                               mnl_attr_put_u16
+                                       (nlh,
+                                        TCA_FLOWER_KEY_TCP_FLAGS_MASK,
+                                        rte_cpu_to_be_16
+                                               (mask.tcp->hdr.tcp_flags));
+                       }
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL, "item not supported");
+               }
+       }
+       na_flower_act = mnl_attr_nest_start(nlh, TCA_FLOWER_ACT);
+       na_act_index_cur = 1;
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               struct nlattr *na_act_index;
+               struct nlattr *na_act;
+               unsigned int vlan_act;
+               unsigned int i;
+
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_PORT_ID:
+                       conf.port_id = actions->conf;
+                       if (conf.port_id->original)
+                               i = 0;
+                       else
+                               for (i = 0; ptoi[i].ifindex; ++i)
+                                       if (ptoi[i].port_id == conf.port_id->id)
+                                               break;
+                       assert(ptoi[i].ifindex);
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "mirred");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_MIRRED_PARMS,
+                                    sizeof(struct tc_mirred),
+                                    &(struct tc_mirred){
+                                       .action = TC_ACT_STOLEN,
+                                       .eaction = TCA_EGRESS_REDIR,
+                                       .ifindex = ptoi[i].ifindex,
+                                    });
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+                       conf.jump = actions->conf;
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_GACT_PARMS,
+                                    sizeof(struct tc_gact),
+                                    &(struct tc_gact){
+                                       .action = TC_ACT_GOTO_CHAIN |
+                                                 conf.jump->group,
+                                    });
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "gact");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_GACT_PARMS,
+                                    sizeof(struct tc_gact),
+                                    &(struct tc_gact){
+                                       .action = TC_ACT_SHOT,
+                                    });
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       /*
+                        * Driver adds the count action implicitly for
+                        * each rule it creates.
+                        */
+                       ret = flow_tcf_translate_action_count(dev,
+                                                             dev_flow, error);
+                       if (ret < 0)
+                               return ret;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
+                       conf.of_push_vlan = NULL;
+                       vlan_act = TCA_VLAN_ACT_POP;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
+                       conf.of_push_vlan = actions->conf;
+                       vlan_act = TCA_VLAN_ACT_PUSH;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
+                       conf.of_set_vlan_vid = actions->conf;
+                       if (na_vlan_id)
+                               goto override_na_vlan_id;
+                       vlan_act = TCA_VLAN_ACT_MODIFY;
+                       goto action_of_vlan;
+               case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
+                       conf.of_set_vlan_pcp = actions->conf;
+                       if (na_vlan_priority)
+                               goto override_na_vlan_priority;
+                       vlan_act = TCA_VLAN_ACT_MODIFY;
+                       goto action_of_vlan;
+action_of_vlan:
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       assert(na_act_index);
+                       mnl_attr_put_strz(nlh, TCA_ACT_KIND, "vlan");
+                       na_act = mnl_attr_nest_start(nlh, TCA_ACT_OPTIONS);
+                       assert(na_act);
+                       mnl_attr_put(nlh, TCA_VLAN_PARMS,
+                                    sizeof(struct tc_vlan),
+                                    &(struct tc_vlan){
+                                       .action = TC_ACT_PIPE,
+                                       .v_action = vlan_act,
+                                    });
+                       if (vlan_act == TCA_VLAN_ACT_POP) {
+                               mnl_attr_nest_end(nlh, na_act);
+                               mnl_attr_nest_end(nlh, na_act_index);
+                               break;
+                       }
+                       if (vlan_act == TCA_VLAN_ACT_PUSH)
+                               mnl_attr_put_u16(nlh,
+                                                TCA_VLAN_PUSH_VLAN_PROTOCOL,
+                                                conf.of_push_vlan->ethertype);
+                       na_vlan_id = mnl_nlmsg_get_payload_tail(nlh);
+                       mnl_attr_put_u16(nlh, TCA_VLAN_PAD, 0);
+                       na_vlan_priority = mnl_nlmsg_get_payload_tail(nlh);
+                       mnl_attr_put_u8(nlh, TCA_VLAN_PAD, 0);
+                       mnl_attr_nest_end(nlh, na_act);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       if (actions->type ==
+                           RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
+override_na_vlan_id:
+                               na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
+                               *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
+                                       rte_be_to_cpu_16
+                                       (conf.of_set_vlan_vid->vlan_vid);
+                       } else if (actions->type ==
+                                  RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
+override_na_vlan_priority:
+                               na_vlan_priority->nla_type =
+                                       TCA_VLAN_PUSH_VLAN_PRIORITY;
+                               *(uint8_t *)mnl_attr_get_payload
+                                       (na_vlan_priority) =
+                                       conf.of_set_vlan_pcp->vlan_pcp;
+                       }
+                       break;
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
+               case RTE_FLOW_ACTION_TYPE_SET_TTL:
+               case RTE_FLOW_ACTION_TYPE_DEC_TTL:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
+               case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
+                       na_act_index =
+                               mnl_attr_nest_start(nlh, na_act_index_cur++);
+                       flow_tcf_create_pedit_mnl_msg(nlh,
+                                                     &actions, item_flags);
+                       mnl_attr_nest_end(nlh, na_act_index);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       assert(na_flower);
+       assert(na_flower_act);
+       mnl_attr_nest_end(nlh, na_flower_act);
+       mnl_attr_nest_end(nlh, na_flower);
+       return 0;
+}
+
+/**
+ * Send Netlink message with acknowledgment.
+ *
+ * @param ctx
+ *   Flow context to use.
+ * @param nlh
+ *   Message to send. This function always raises the NLM_F_ACK flag before
+ *   sending.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_nl_ack(struct mlx5_flow_tcf_context *ctx, struct nlmsghdr *nlh)
+{
+       alignas(struct nlmsghdr)
+       uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
+                   nlh->nlmsg_len - sizeof(*nlh)];
+       uint32_t seq = ctx->seq++;
+       struct mnl_socket *nl = ctx->nl;
+       int ret;
+
+       nlh->nlmsg_flags |= NLM_F_ACK;
+       nlh->nlmsg_seq = seq;
+       ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
+       if (ret != -1)
+               ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
+       if (ret != -1)
+               ret = mnl_cb_run
+                       (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
+       if (ret > 0)
+               return 0;
+       rte_errno = errno;
+       return -rte_errno;
+}
+
+/**
+ * Apply flow to E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_tcf_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+              struct rte_flow_error *error)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+       struct mlx5_flow *dev_flow;
+       struct nlmsghdr *nlh;
+
+       dev_flow = LIST_FIRST(&flow->dev_flows);
+       /* E-Switch flow can't be expanded. */
+       assert(!LIST_NEXT(dev_flow, next));
+       nlh = dev_flow->tcf.nlh;
+       nlh->nlmsg_type = RTM_NEWTFILTER;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       if (!flow_tcf_nl_ack(ctx, nlh))
+               return 0;
+       return rte_flow_error_set(error, rte_errno,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                 "netlink: failed to create TC flow rule");
+}
+
+/**
+ * Remove flow from E-Switch by sending Netlink message.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+       struct mlx5_flow *dev_flow;
+       struct nlmsghdr *nlh;
+
+       if (!flow)
+               return;
+       if (flow->counter) {
+               if (--flow->counter->ref_cnt == 0) {
+                       rte_free(flow->counter);
+                       flow->counter = NULL;
+               }
+       }
+       dev_flow = LIST_FIRST(&flow->dev_flows);
+       if (!dev_flow)
+               return;
+       /* E-Switch flow can't be expanded. */
+       assert(!LIST_NEXT(dev_flow, next));
+       nlh = dev_flow->tcf.nlh;
+       nlh->nlmsg_type = RTM_DELTFILTER;
+       nlh->nlmsg_flags = NLM_F_REQUEST;
+       flow_tcf_nl_ack(ctx, nlh);
+}
+
+/**
+ * Remove flow from E-Switch and release resources of the device flow.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] flow
+ *   Pointer to the sub flow.
+ */
+static void
+flow_tcf_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow *dev_flow;
+
+       if (!flow)
+               return;
+       flow_tcf_remove(dev, flow);
+       dev_flow = LIST_FIRST(&flow->dev_flows);
+       if (!dev_flow)
+               return;
+       /* E-Switch flow can't be expanded. */
+       assert(!LIST_NEXT(dev_flow, next));
+       LIST_REMOVE(dev_flow, next);
+       rte_free(dev_flow);
+}
+
+/**
+ * Helper routine for figuring the space size required for a parse buffer.
+ *
+ * @param array
+ *   array of values to use.
+ * @param idx
+ *   Current location in array.
+ * @param value
+ *   Value to compare with.
+ *
+ * @return
+ *   The maximum between the given value and the array value on index.
+ */
+static uint16_t
+flow_tcf_arr_val_max(uint16_t array[], int idx, uint16_t value)
+{
+       return idx < 0 ? (value) : RTE_MAX((array)[idx], value);
+}
+
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * retrieved.
+ *
+ * @param tb
+ *   Attribute table to be filled.
+ * @param[out] max
+ *   Maxinum entry in the attribute table.
+ * @param rte
+ *   The attributes section in the message to be parsed.
+ * @param len
+ *   The length of the attributes section in the message.
+ */
+static void
+flow_tcf_nl_parse_rtattr(struct rtattr *tb[], int max,
+                        struct rtattr *rta, int len)
+{
+       unsigned short type;
+       memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+       while (RTA_OK(rta, len)) {
+               type = rta->rta_type;
+               if (type <= max && !tb[type])
+                       tb[type] = rta;
+               rta = RTA_NEXT(rta, len);
+       }
+}
+
+/**
+ * Extract flow counters from flower action.
+ *
+ * @param rta
+ *   flower action stats properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data holding the count statistics of the rte_flow retrieved from
+ *   the message.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_stats_parse_and_get(struct rtattr *rta,
+                                      uint16_t rta_type[], int idx,
+                                      struct gnet_stats_basic *data)
+{
+       int tca_stats_max = flow_tcf_arr_val_max(rta_type, idx,
+                                                TCA_STATS_BASIC);
+       struct rtattr *tbs[tca_stats_max + 1];
+
+       if (rta == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tbs, tca_stats_max,
+                                RTA_DATA(rta), RTA_PAYLOAD(rta));
+       switch (rta_type[idx]) {
+       case TCA_STATS_BASIC:
+               if (tbs[TCA_STATS_BASIC]) {
+                       memcpy(data, RTA_DATA(tbs[TCA_STATS_BASIC]),
+                              RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+                              sizeof(*data)));
+                       return 0;
+               }
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower single action retrieving the requested action attribute,
+ * if found.
+ *
+ * @param arg
+ *   flower action properties in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   Count statistics retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_parse_one_action_and_get(struct rtattr *arg,
+                                    uint16_t rta_type[], int idx, void *data)
+{
+       int tca_act_max = flow_tcf_arr_val_max(rta_type, idx, TCA_ACT_STATS);
+       struct rtattr *tb[tca_act_max + 1];
+
+       if (arg == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_act_max,
+                                RTA_DATA(arg), RTA_PAYLOAD(arg));
+       if (tb[TCA_ACT_KIND] == NULL)
+               return -1;
+       switch (rta_type[idx]) {
+       case TCA_ACT_STATS:
+               if (tb[TCA_ACT_STATS])
+                       return flow_tcf_nl_action_stats_parse_and_get
+                                       (tb[TCA_ACT_STATS],
+                                        rta_type, --idx,
+                                        (struct gnet_stats_basic *)data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower action section in the message retrieving the requested
+ * attribute from the first action that provides it.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_action_parse_and_get(struct rtattr *arg,
+                                uint16_t rta_type[], int idx, void *data)
+{
+       struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+       int i;
+
+       if (arg == NULL || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, TCA_ACT_MAX_PRIO,
+                                RTA_DATA(arg), RTA_PAYLOAD(arg));
+       switch (rta_type[idx]) {
+       /*
+        * flow counters are stored in the actions defined by the flow
+        * and not in the flow itself, therefore we need to traverse the
+        * flower chain of actions in search for them.
+        *
+        * Note that the index is not decremented here.
+        */
+       case TCA_ACT_STATS:
+               for (i = 0; i <= TCA_ACT_MAX_PRIO; i++) {
+                       if (tb[i] &&
+                       !flow_tcf_nl_parse_one_action_and_get(tb[i],
+                                                             rta_type,
+                                                             idx, data))
+                               return 0;
+               }
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse flower classifier options in the message, retrieving the requested
+ * attribute if found.
+ *
+ * @param opt
+ *   flower section in the Netlink message received.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_opts_parse_and_get(struct rtattr *opt,
+                              uint16_t rta_type[], int idx, void *data)
+{
+       int tca_flower_max = flow_tcf_arr_val_max(rta_type, idx,
+                                                 TCA_FLOWER_ACT);
+       struct rtattr *tb[tca_flower_max + 1];
+
+       if (!opt || idx < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_flower_max,
+                                RTA_DATA(opt), RTA_PAYLOAD(opt));
+       switch (rta_type[idx]) {
+       case TCA_FLOWER_ACT:
+               if (tb[TCA_FLOWER_ACT])
+                       return flow_tcf_nl_action_parse_and_get
+                                                       (tb[TCA_FLOWER_ACT],
+                                                        rta_type, --idx, data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param rta_type
+ *   The backward sequence of rta_types, as written in the attribute table,
+ *   we need to traverse in order to get to the requested object.
+ * @param idx
+ *   Current location in rta_type table.
+ * @param[out] data
+ *   data retrieved from the message query.
+ *
+ * @return
+ *   0 if data was found and retrieved, -1 otherwise.
+ */
+static int
+flow_tcf_nl_filter_parse_and_get(struct nlmsghdr *cnlh,
+                                uint16_t rta_type[], int idx, void *data)
+{
+       struct nlmsghdr *nlh = cnlh;
+       struct tcmsg *t = NLMSG_DATA(nlh);
+       int len = nlh->nlmsg_len;
+       int tca_max = flow_tcf_arr_val_max(rta_type, idx, TCA_OPTIONS);
+       struct rtattr *tb[tca_max + 1];
+
+       if (idx < 0)
+               return -1;
+       if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+           nlh->nlmsg_type != RTM_GETTFILTER &&
+           nlh->nlmsg_type != RTM_DELTFILTER)
+               return -1;
+       len -= NLMSG_LENGTH(sizeof(*t));
+       if (len < 0)
+               return -1;
+       flow_tcf_nl_parse_rtattr(tb, tca_max, TCA_RTA(t), len);
+       /* Not a TC flower flow - bail out */
+       if (!tb[TCA_KIND] ||
+           strcmp(RTA_DATA(tb[TCA_KIND]), "flower"))
+               return -1;
+       switch (rta_type[idx]) {
+       case TCA_OPTIONS:
+               if (tb[TCA_OPTIONS])
+                       return flow_tcf_nl_opts_parse_and_get(tb[TCA_OPTIONS],
+                                                             rta_type,
+                                                             --idx, data);
+               break;
+       default:
+               break;
+       }
+       return -1;
+}
+
+/**
+ * A callback to parse Netlink reply on TC flower query.
+ *
+ * @param nlh
+ *   Message received from Netlink.
+ * @param[out] data
+ *   Pointer to data area to be filled by the parsing routine.
+ *   assumed to be a pinter to struct flow_tcf_stats_basic.
+ *
+ * @return
+ *   MNL_CB_OK value.
+ */
+static int
+flow_tcf_nl_message_get_stats_basic(const struct nlmsghdr *nlh, void *data)
+{
+       /*
+        * The backward sequence of rta_types to pass in order to get
+        *  to the counters.
+        */
+       uint16_t rta_type[] = { TCA_STATS_BASIC, TCA_ACT_STATS,
+                               TCA_FLOWER_ACT, TCA_OPTIONS };
+       struct flow_tcf_stats_basic *sb_data = data;
+       union {
+               const struct nlmsghdr *c;
+               struct nlmsghdr *nc;
+       } tnlh = { .c = nlh };
+
+       if (!flow_tcf_nl_filter_parse_and_get(tnlh.nc, rta_type,
+                                             RTE_DIM(rta_type) - 1,
+                                             (void *)&sb_data->counters))
+               sb_data->valid = true;
+       return MNL_CB_OK;
+}
+
+/**
+ * Query a TC flower rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in] flow
+ *   Pointer to the sub flow.
+ * @param[out] data
+ *   data retrieved by the query.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_tcf_query_count(struct rte_eth_dev *dev,
+                         struct rte_flow *flow,
+                         void *data,
+                         struct rte_flow_error *error)
+{
+       struct flow_tcf_stats_basic sb_data = { 0 };
+       struct rte_flow_query_count *qc = data;
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_tcf_context *ctx = priv->tcf_context;
+       struct mnl_socket *nl = ctx->nl;
+       struct mlx5_flow *dev_flow;
+       struct nlmsghdr *nlh;
+       uint32_t seq = priv->tcf_context->seq++;
+       ssize_t ret;
+       assert(qc);
+
+       dev_flow = LIST_FIRST(&flow->dev_flows);
+       /* E-Switch flow can't be expanded. */
+       assert(!LIST_NEXT(dev_flow, next));
+       if (!dev_flow->flow->counter)
+               goto notsup_exit;
+       nlh = dev_flow->tcf.nlh;
+       nlh->nlmsg_type = RTM_GETTFILTER;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+       nlh->nlmsg_seq = seq;
+       if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+               goto error_exit;
+       do {
+               ret = mnl_socket_recvfrom(nl, ctx->buf, ctx->buf_size);
+               if (ret <= 0)
+                       break;
+               ret = mnl_cb_run(ctx->buf, ret, seq,
+                                mnl_socket_get_portid(nl),
+                                flow_tcf_nl_message_get_stats_basic,
+                                (void *)&sb_data);
+       } while (ret > 0);
+       /* Return the delta from last reset. */
+       if (sb_data.valid) {
+               /* Return the delta from last reset. */
+               qc->hits_set = 1;
+               qc->bytes_set = 1;
+               qc->hits = sb_data.counters.packets - flow->counter->hits;
+               qc->bytes = sb_data.counters.bytes - flow->counter->bytes;
+               if (qc->reset) {
+                       flow->counter->hits = sb_data.counters.packets;
+                       flow->counter->bytes = sb_data.counters.bytes;
+               }
+               return 0;
+       }
+       return rte_flow_error_set(error, EINVAL,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "flow does not have counter");
+error_exit:
+       return rte_flow_error_set
+                       (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                        NULL, "netlink: failed to read flow rule counters");
+notsup_exit:
+       return rte_flow_error_set
+                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                        NULL, "counters are not available.");
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_tcf_query(struct rte_eth_dev *dev,
+              struct rte_flow *flow,
+              const struct rte_flow_action *actions,
+              void *data,
+              struct rte_flow_error *error)
+{
+       int ret = -EINVAL;
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = flow_tcf_query_count(dev, flow, data, error);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       return ret;
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
+       .validate = flow_tcf_validate,
+       .prepare = flow_tcf_prepare,
+       .translate = flow_tcf_translate,
+       .apply = flow_tcf_apply,
+       .remove = flow_tcf_remove,
+       .destroy = flow_tcf_destroy,
+       .query = flow_tcf_query,
+};
+
+/**
+ * Create and configure a libmnl socket for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+static struct mnl_socket *
+flow_tcf_mnl_socket_create(void)
+{
+       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
+
+       if (nl) {
+               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
+                                     sizeof(int));
+               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
+                       return nl;
+       }
+       rte_errno = errno;
+       if (nl)
+               mnl_socket_close(nl);
+       return NULL;
+}
+
+/**
+ * Destroy a libmnl socket.
+ *
+ * @param nl
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+static void
+flow_tcf_mnl_socket_destroy(struct mnl_socket *nl)
+{
+       if (nl)
+               mnl_socket_close(nl);
+}
+
+/**
+ * Initialize ingress qdisc of a given network interface.
+ *
+ * @param ctx
+ *   Pointer to tc-flower context to use.
+ * @param ifindex
+ *   Index of network interface to initialize.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+int
+mlx5_flow_tcf_init(struct mlx5_flow_tcf_context *ctx,
+                  unsigned int ifindex, struct rte_flow_error *error)
+{
+       struct nlmsghdr *nlh;
+       struct tcmsg *tcm;
+       alignas(struct nlmsghdr)
+       uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
+
+       /* Destroy existing ingress qdisc and everything attached to it. */
+       nlh = mnl_nlmsg_put_header(buf);
+       nlh->nlmsg_type = RTM_DELQDISC;
+       nlh->nlmsg_flags = NLM_F_REQUEST;
+       tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+       tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm_ifindex = ifindex;
+       tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+       tcm->tcm_parent = TC_H_INGRESS;
+       /* Ignore errors when qdisc is already absent. */
+       if (flow_tcf_nl_ack(ctx, nlh) &&
+           rte_errno != EINVAL && rte_errno != ENOENT)
+               return rte_flow_error_set(error, rte_errno,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                         "netlink: failed to remove ingress"
+                                         " qdisc");
+       /* Create fresh ingress qdisc. */
+       nlh = mnl_nlmsg_put_header(buf);
+       nlh->nlmsg_type = RTM_NEWQDISC;
+       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
+       tcm->tcm_family = AF_UNSPEC;
+       tcm->tcm_ifindex = ifindex;
+       tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
+       tcm->tcm_parent = TC_H_INGRESS;
+       mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
+       if (flow_tcf_nl_ack(ctx, nlh))
+               return rte_flow_error_set(error, rte_errno,
+                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                         "netlink: failed to create ingress"
+                                         " qdisc");
+       return 0;
+}
+
+/**
+ * Create libmnl context for Netlink flow rules.
+ *
+ * @return
+ *   A valid libmnl socket object pointer on success, NULL otherwise and
+ *   rte_errno is set.
+ */
+struct mlx5_flow_tcf_context *
+mlx5_flow_tcf_context_create(void)
+{
+       struct mlx5_flow_tcf_context *ctx = rte_zmalloc(__func__,
+                                                       sizeof(*ctx),
+                                                       sizeof(uint32_t));
+       if (!ctx)
+               goto error;
+       ctx->nl = flow_tcf_mnl_socket_create();
+       if (!ctx->nl)
+               goto error;
+       ctx->buf_size = MNL_SOCKET_BUFFER_SIZE;
+       ctx->buf = rte_zmalloc(__func__,
+                              ctx->buf_size, sizeof(uint32_t));
+       if (!ctx->buf)
+               goto error;
+       ctx->seq = random();
+       return ctx;
+error:
+       mlx5_flow_tcf_context_destroy(ctx);
+       return NULL;
+}
+
+/**
+ * Destroy a libmnl context.
+ *
+ * @param ctx
+ *   Libmnl socket of the @p NETLINK_ROUTE kind.
+ */
+void
+mlx5_flow_tcf_context_destroy(struct mlx5_flow_tcf_context *ctx)
+{
+       if (!ctx)
+               return;
+       flow_tcf_mnl_socket_destroy(ctx->nl);
+       rte_free(ctx->buf);
+       rte_free(ctx);
+}
diff --git a/drivers/net/mlx5/mlx5_flow_verbs.c b/drivers/net/mlx5/mlx5_flow_verbs.c
new file mode 100644 (file)
index 0000000..81bc39f
--- /dev/null
@@ -0,0 +1,1825 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright 2018 Mellanox Technologies, Ltd
+ */
+
+#include <netinet/in.h>
+#include <sys/queue.h>
+#include <stdalign.h>
+#include <stdint.h>
+#include <string.h>
+
+/* Verbs header. */
+/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
+#ifdef PEDANTIC
+#pragma GCC diagnostic ignored "-Wpedantic"
+#endif
+#include <infiniband/verbs.h>
+#ifdef PEDANTIC
+#pragma GCC diagnostic error "-Wpedantic"
+#endif
+
+#include <rte_common.h>
+#include <rte_ether.h>
+#include <rte_eth_ctrl.h>
+#include <rte_ethdev_driver.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+
+#include "mlx5.h"
+#include "mlx5_defs.h"
+#include "mlx5_prm.h"
+#include "mlx5_glue.h"
+#include "mlx5_flow.h"
+
+/**
+ * Create Verbs flow counter with Verbs library.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] counter
+ *   mlx5 flow counter object, contains the counter id,
+ *   handle of created Verbs flow counter is returned
+ *   in cs field (if counters are supported).
+ *
+ * @return
+ *   0 On success else a negative errno value is returned
+ *   and rte_errno is set.
+ */
+static int
+flow_verbs_counter_create(struct rte_eth_dev *dev,
+                         struct mlx5_flow_counter *counter)
+{
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+       struct priv *priv = dev->data->dev_private;
+       struct ibv_counter_set_init_attr init = {
+                        .counter_set_id = counter->id};
+
+       counter->cs = mlx5_glue->create_counter_set(priv->ctx, &init);
+       if (!counter->cs) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       return 0;
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       struct priv *priv = dev->data->dev_private;
+       struct ibv_counters_init_attr init = {0};
+       struct ibv_counter_attach_attr attach = {0};
+       int ret;
+
+       counter->cs = mlx5_glue->create_counters(priv->ctx, &init);
+       if (!counter->cs) {
+               rte_errno = ENOTSUP;
+               return -ENOTSUP;
+       }
+       attach.counter_desc = IBV_COUNTER_PACKETS;
+       attach.index = 0;
+       ret = mlx5_glue->attach_counters(counter->cs, &attach, NULL);
+       if (!ret) {
+               attach.counter_desc = IBV_COUNTER_BYTES;
+               attach.index = 1;
+               ret = mlx5_glue->attach_counters
+                                       (counter->cs, &attach, NULL);
+       }
+       if (ret) {
+               claim_zero(mlx5_glue->destroy_counters(counter->cs));
+               counter->cs = NULL;
+               rte_errno = ret;
+               return -ret;
+       }
+       return 0;
+#else
+       (void)dev;
+       (void)counter;
+       rte_errno = ENOTSUP;
+       return -ENOTSUP;
+#endif
+}
+
+/**
+ * Get a flow counter.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] shared
+ *   Indicate if this counter is shared with other flows.
+ * @param[in] id
+ *   Counter identifier.
+ *
+ * @return
+ *   A pointer to the counter, NULL otherwise and rte_errno is set.
+ */
+static struct mlx5_flow_counter *
+flow_verbs_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
+{
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_flow_counter *cnt;
+       int ret;
+
+       LIST_FOREACH(cnt, &priv->flow_counters, next) {
+               if (!cnt->shared || cnt->shared != shared)
+                       continue;
+               if (cnt->id != id)
+                       continue;
+               cnt->ref_cnt++;
+               return cnt;
+       }
+       cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
+       if (!cnt) {
+               rte_errno = ENOMEM;
+               return NULL;
+       }
+       cnt->id = id;
+       cnt->shared = shared;
+       cnt->ref_cnt = 1;
+       cnt->hits = 0;
+       cnt->bytes = 0;
+       /* Create counter with Verbs. */
+       ret = flow_verbs_counter_create(dev, cnt);
+       if (!ret) {
+               LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
+               return cnt;
+       }
+       /* Some error occurred in Verbs library. */
+       rte_free(cnt);
+       rte_errno = -ret;
+       return NULL;
+}
+
+/**
+ * Release a flow counter.
+ *
+ * @param[in] counter
+ *   Pointer to the counter handler.
+ */
+static void
+flow_verbs_counter_release(struct mlx5_flow_counter *counter)
+{
+       if (--counter->ref_cnt == 0) {
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+               claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+               claim_zero(mlx5_glue->destroy_counters(counter->cs));
+#endif
+               LIST_REMOVE(counter, next);
+               rte_free(counter);
+       }
+}
+
+/**
+ * Query a flow counter via Verbs library call.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_counter_query(struct rte_eth_dev *dev __rte_unused,
+                        struct rte_flow *flow, void *data,
+                        struct rte_flow_error *error)
+{
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+       defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       if (flow->actions & MLX5_FLOW_ACTION_COUNT) {
+               struct rte_flow_query_count *qc = data;
+               uint64_t counters[2] = {0, 0};
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+               struct ibv_query_counter_set_attr query_cs_attr = {
+                       .cs = flow->counter->cs,
+                       .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
+               };
+               struct ibv_counter_set_data query_out = {
+                       .out = counters,
+                       .outlen = 2 * sizeof(uint64_t),
+               };
+               int err = mlx5_glue->query_counter_set(&query_cs_attr,
+                                                      &query_out);
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+               int err = mlx5_glue->query_counters
+                              (flow->counter->cs, counters,
+                               RTE_DIM(counters),
+                               IBV_READ_COUNTERS_ATTR_PREFER_CACHED);
+#endif
+               if (err)
+                       return rte_flow_error_set
+                               (error, err,
+                                RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                NULL,
+                                "cannot read counter");
+               qc->hits_set = 1;
+               qc->bytes_set = 1;
+               qc->hits = counters[0] - flow->counter->hits;
+               qc->bytes = counters[1] - flow->counter->bytes;
+               if (qc->reset) {
+                       flow->counter->hits = counters[0];
+                       flow->counter->bytes = counters[1];
+               }
+               return 0;
+       }
+       return rte_flow_error_set(error, EINVAL,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "flow does not have counter");
+#else
+       (void)flow;
+       (void)data;
+       return rte_flow_error_set(error, ENOTSUP,
+                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                 NULL,
+                                 "counters are not available");
+#endif
+}
+
+/**
+ * Add a verbs item specification into @p flow.
+ *
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[in] src
+ *   Create specification.
+ * @param[in] size
+ *   Size in bytes of the specification to copy.
+ */
+static void
+flow_verbs_spec_add(struct mlx5_flow *flow, void *src, unsigned int size)
+{
+       struct mlx5_flow_verbs *verbs = &flow->verbs;
+
+       if (verbs->specs) {
+               void *dst;
+
+               dst = (void *)(verbs->specs + verbs->size);
+               memcpy(dst, src, size);
+               ++verbs->attr->num_of_specs;
+       }
+       verbs->size += size;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in] item_flags
+ *   Bit field with all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_eth(const struct rte_flow_item *item,
+                             uint64_t *item_flags,
+                             struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_eth *spec = item->spec;
+       const struct rte_flow_item_eth *mask = item->mask;
+       const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       const unsigned int size = sizeof(struct ibv_flow_spec_eth);
+       struct ibv_flow_spec_eth eth = {
+               .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_eth_mask;
+       if (spec) {
+               unsigned int i;
+
+               memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
+               memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
+               eth.val.ether_type = spec->type;
+               memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
+               memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
+               eth.mask.ether_type = mask->type;
+               /* Remove unwanted bits from values. */
+               for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+                       eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
+                       eth.val.src_mac[i] &= eth.mask.src_mac[i];
+               }
+               eth.val.ether_type &= eth.mask.ether_type;
+               dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+       }
+       flow_verbs_spec_add(dev_flow, &eth, size);
+       *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                               MLX5_FLOW_LAYER_OUTER_L2;
+}
+
+/**
+ * Update the VLAN tag in the Verbs Ethernet specification.
+ * This function assumes that the input is valid and there is space to add
+ * the requested item.
+ *
+ * @param[in, out] attr
+ *   Pointer to Verbs attributes structure.
+ * @param[in] eth
+ *   Verbs structure containing the VLAN information to copy.
+ */
+static void
+flow_verbs_item_vlan_update(struct ibv_flow_attr *attr,
+                           struct ibv_flow_spec_eth *eth)
+{
+       unsigned int i;
+       const enum ibv_flow_spec_type search = eth->type;
+       struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+               ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+       for (i = 0; i != attr->num_of_specs; ++i) {
+               if (hdr->type == search) {
+                       struct ibv_flow_spec_eth *e =
+                               (struct ibv_flow_spec_eth *)hdr;
+
+                       e->val.vlan_tag = eth->val.vlan_tag;
+                       e->mask.vlan_tag = eth->mask.vlan_tag;
+                       e->val.ether_type = eth->val.ether_type;
+                       e->mask.ether_type = eth->mask.ether_type;
+                       break;
+               }
+               hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+       }
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that holds all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to dev_flow structure.
+ */
+static void
+flow_verbs_translate_item_vlan(const struct rte_flow_item *item,
+                              uint64_t *item_flags,
+                              struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_vlan *spec = item->spec;
+       const struct rte_flow_item_vlan *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_eth);
+       const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       struct ibv_flow_spec_eth eth = {
+               .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+       const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                                     MLX5_FLOW_LAYER_OUTER_L2;
+
+       if (!mask)
+               mask = &rte_flow_item_vlan_mask;
+       if (spec) {
+               eth.val.vlan_tag = spec->tci;
+               eth.mask.vlan_tag = mask->tci;
+               eth.val.vlan_tag &= eth.mask.vlan_tag;
+               eth.val.ether_type = spec->inner_type;
+               eth.mask.ether_type = mask->inner_type;
+               eth.val.ether_type &= eth.mask.ether_type;
+       }
+       if (!(*item_flags & l2m)) {
+               dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+               flow_verbs_spec_add(dev_flow, &eth, size);
+       } else {
+               flow_verbs_item_vlan_update(dev_flow->verbs.attr, &eth);
+               size = 0; /* Only an update is done in eth specification. */
+       }
+       *item_flags |= tunnel ?
+                      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
+                      (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv4(const struct rte_flow_item *item,
+                              uint64_t *item_flags,
+                              struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_ipv4 *spec = item->spec;
+       const struct rte_flow_item_ipv4 *mask = item->mask;
+       const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
+       struct ibv_flow_spec_ipv4_ext ipv4 = {
+               .type = IBV_FLOW_SPEC_IPV4_EXT |
+                       (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_ipv4_mask;
+       *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                               MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+       if (spec) {
+               ipv4.val = (struct ibv_flow_ipv4_ext_filter){
+                       .src_ip = spec->hdr.src_addr,
+                       .dst_ip = spec->hdr.dst_addr,
+                       .proto = spec->hdr.next_proto_id,
+                       .tos = spec->hdr.type_of_service,
+               };
+               ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
+                       .src_ip = mask->hdr.src_addr,
+                       .dst_ip = mask->hdr.dst_addr,
+                       .proto = mask->hdr.next_proto_id,
+                       .tos = mask->hdr.type_of_service,
+               };
+               /* Remove unwanted bits from values. */
+               ipv4.val.src_ip &= ipv4.mask.src_ip;
+               ipv4.val.dst_ip &= ipv4.mask.dst_ip;
+               ipv4.val.proto &= ipv4.mask.proto;
+               ipv4.val.tos &= ipv4.mask.tos;
+       }
+       dev_flow->verbs.hash_fields |=
+               mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+                                           MLX5_IPV4_LAYER_TYPES,
+                                           MLX5_IPV4_IBV_RX_HASH);
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+       flow_verbs_spec_add(dev_flow, &ipv4, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_ipv6(const struct rte_flow_item *item,
+                              uint64_t *item_flags,
+                              struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_ipv6 *spec = item->spec;
+       const struct rte_flow_item_ipv6 *mask = item->mask;
+       const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+       unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
+       struct ibv_flow_spec_ipv6 ipv6 = {
+               .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_ipv6_mask;
+        *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+       if (spec) {
+               unsigned int i;
+               uint32_t vtc_flow_val;
+               uint32_t vtc_flow_mask;
+
+               memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
+                      RTE_DIM(ipv6.val.src_ip));
+               memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
+                      RTE_DIM(ipv6.val.dst_ip));
+               memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
+                      RTE_DIM(ipv6.mask.src_ip));
+               memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
+                      RTE_DIM(ipv6.mask.dst_ip));
+               vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
+               vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
+               ipv6.val.flow_label =
+                       rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
+                                        IPV6_HDR_FL_SHIFT);
+               ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
+                                        IPV6_HDR_TC_SHIFT;
+               ipv6.val.next_hdr = spec->hdr.proto;
+               ipv6.val.hop_limit = spec->hdr.hop_limits;
+               ipv6.mask.flow_label =
+                       rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
+                                        IPV6_HDR_FL_SHIFT);
+               ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
+                                         IPV6_HDR_TC_SHIFT;
+               ipv6.mask.next_hdr = mask->hdr.proto;
+               ipv6.mask.hop_limit = mask->hdr.hop_limits;
+               /* Remove unwanted bits from values. */
+               for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
+                       ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
+                       ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
+               }
+               ipv6.val.flow_label &= ipv6.mask.flow_label;
+               ipv6.val.traffic_class &= ipv6.mask.traffic_class;
+               ipv6.val.next_hdr &= ipv6.mask.next_hdr;
+               ipv6.val.hop_limit &= ipv6.mask.hop_limit;
+       }
+       dev_flow->verbs.hash_fields |=
+               mlx5_flow_hashfields_adjust(dev_flow, tunnel,
+                                           MLX5_IPV6_LAYER_TYPES,
+                                           MLX5_IPV6_IBV_RX_HASH);
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L3;
+       flow_verbs_spec_add(dev_flow, &ipv6, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_udp(const struct rte_flow_item *item,
+                             uint64_t *item_flags,
+                             struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_udp *spec = item->spec;
+       const struct rte_flow_item_udp *mask = item->mask;
+       const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
+       unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+       struct ibv_flow_spec_tcp_udp udp = {
+               .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_udp_mask;
+       *item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+                               MLX5_FLOW_LAYER_OUTER_L4_UDP;
+       if (spec) {
+               udp.val.dst_port = spec->hdr.dst_port;
+               udp.val.src_port = spec->hdr.src_port;
+               udp.mask.dst_port = mask->hdr.dst_port;
+               udp.mask.src_port = mask->hdr.src_port;
+               /* Remove unwanted bits from values. */
+               udp.val.src_port &= udp.mask.src_port;
+               udp.val.dst_port &= udp.mask.dst_port;
+       }
+       dev_flow->verbs.hash_fields |=
+               mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_UDP,
+                                           (IBV_RX_HASH_SRC_PORT_UDP |
+                                            IBV_RX_HASH_DST_PORT_UDP));
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+       flow_verbs_spec_add(dev_flow, &udp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_tcp(const struct rte_flow_item *item,
+                             uint64_t *item_flags,
+                             struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_tcp *spec = item->spec;
+       const struct rte_flow_item_tcp *mask = item->mask;
+       const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
+       unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
+       struct ibv_flow_spec_tcp_udp tcp = {
+               .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_tcp_mask;
+       *item_flags |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
+       if (spec) {
+               tcp.val.dst_port = spec->hdr.dst_port;
+               tcp.val.src_port = spec->hdr.src_port;
+               tcp.mask.dst_port = mask->hdr.dst_port;
+               tcp.mask.src_port = mask->hdr.src_port;
+               /* Remove unwanted bits from values. */
+               tcp.val.src_port &= tcp.mask.src_port;
+               tcp.val.dst_port &= tcp.mask.dst_port;
+       }
+       dev_flow->verbs.hash_fields |=
+               mlx5_flow_hashfields_adjust(dev_flow, tunnel, ETH_RSS_TCP,
+                                           (IBV_RX_HASH_SRC_PORT_TCP |
+                                            IBV_RX_HASH_DST_PORT_TCP));
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L4;
+       flow_verbs_spec_add(dev_flow, &tcp, size);
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan(const struct rte_flow_item *item,
+                               uint64_t *item_flags,
+                               struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_vxlan *spec = item->spec;
+       const struct rte_flow_item_vxlan *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+       struct ibv_flow_spec_tunnel vxlan = {
+               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id = { .vlan_id = 0, };
+
+       if (!mask)
+               mask = &rte_flow_item_vxlan_mask;
+       if (spec) {
+               memcpy(&id.vni[1], spec->vni, 3);
+               vxlan.val.tunnel_id = id.vlan_id;
+               memcpy(&id.vni[1], mask->vni, 3);
+               vxlan.mask.tunnel_id = id.vlan_id;
+               /* Remove unwanted bits from values. */
+               vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
+       }
+       flow_verbs_spec_add(dev_flow, &vxlan, size);
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+       *item_flags |= MLX5_FLOW_LAYER_VXLAN;
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_vxlan_gpe(const struct rte_flow_item *item,
+                                   uint64_t *item_flags,
+                                   struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_item_vxlan_gpe *spec = item->spec;
+       const struct rte_flow_item_vxlan_gpe *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+       struct ibv_flow_spec_tunnel vxlan_gpe = {
+               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+       union vni {
+               uint32_t vlan_id;
+               uint8_t vni[4];
+       } id = { .vlan_id = 0, };
+
+       if (!mask)
+               mask = &rte_flow_item_vxlan_gpe_mask;
+       if (spec) {
+               memcpy(&id.vni[1], spec->vni, 3);
+               vxlan_gpe.val.tunnel_id = id.vlan_id;
+               memcpy(&id.vni[1], mask->vni, 3);
+               vxlan_gpe.mask.tunnel_id = id.vlan_id;
+               /* Remove unwanted bits from values. */
+               vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
+       }
+       flow_verbs_spec_add(dev_flow, &vxlan_gpe, size);
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+       *item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+}
+
+/**
+ * Update the protocol in Verbs IPv4/IPv6 spec.
+ *
+ * @param[in, out] attr
+ *   Pointer to Verbs attributes structure.
+ * @param[in] search
+ *   Specification type to search in order to update the IP protocol.
+ * @param[in] protocol
+ *   Protocol value to set if none is present in the specification.
+ */
+static void
+flow_verbs_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
+                                      enum ibv_flow_spec_type search,
+                                      uint8_t protocol)
+{
+       unsigned int i;
+       struct ibv_spec_header *hdr = (struct ibv_spec_header *)
+               ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
+
+       if (!attr)
+               return;
+       for (i = 0; i != attr->num_of_specs; ++i) {
+               if (hdr->type == search) {
+                       union {
+                               struct ibv_flow_spec_ipv4_ext *ipv4;
+                               struct ibv_flow_spec_ipv6 *ipv6;
+                       } ip;
+
+                       switch (search) {
+                       case IBV_FLOW_SPEC_IPV4_EXT:
+                               ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
+                               if (!ip.ipv4->val.proto) {
+                                       ip.ipv4->val.proto = protocol;
+                                       ip.ipv4->mask.proto = 0xff;
+                               }
+                               break;
+                       case IBV_FLOW_SPEC_IPV6:
+                               ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
+                               if (!ip.ipv6->val.next_hdr) {
+                                       ip.ipv6->val.next_hdr = protocol;
+                                       ip.ipv6->mask.next_hdr = 0xff;
+                               }
+                               break;
+                       default:
+                               break;
+                       }
+                       break;
+               }
+               hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
+       }
+}
+
+/**
+ * Convert the @p item into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested item
+ * into the flow.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_gre(const struct rte_flow_item *item __rte_unused,
+                             uint64_t *item_flags,
+                             struct mlx5_flow *dev_flow)
+{
+       struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+#ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
+       struct ibv_flow_spec_tunnel tunnel = {
+               .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
+               .size = size,
+       };
+#else
+       const struct rte_flow_item_gre *spec = item->spec;
+       const struct rte_flow_item_gre *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_gre);
+       struct ibv_flow_spec_gre tunnel = {
+               .type = IBV_FLOW_SPEC_GRE,
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_gre_mask;
+       if (spec) {
+               tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
+               tunnel.val.protocol = spec->protocol;
+               tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
+               tunnel.mask.protocol = mask->protocol;
+               /* Remove unwanted bits from values. */
+               tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
+               tunnel.val.protocol &= tunnel.mask.protocol;
+               tunnel.val.key &= tunnel.mask.key;
+       }
+#endif
+       if (*item_flags & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
+               flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+                                                      IBV_FLOW_SPEC_IPV4_EXT,
+                                                      IPPROTO_GRE);
+       else
+               flow_verbs_item_gre_ip_protocol_update(verbs->attr,
+                                                      IBV_FLOW_SPEC_IPV6,
+                                                      IPPROTO_GRE);
+       flow_verbs_spec_add(dev_flow, &tunnel, size);
+       verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
+       *item_flags |= MLX5_FLOW_LAYER_GRE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] item
+ *   Item specification.
+ * @param[in, out] item_flags
+ *   Bit mask that marks all detected items.
+ * @param[in, out] dev_flow
+ *   Pointer to sepacific flow structure.
+ */
+static void
+flow_verbs_translate_item_mpls(const struct rte_flow_item *item __rte_unused,
+                              uint64_t *action_flags __rte_unused,
+                              struct mlx5_flow *dev_flow __rte_unused)
+{
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+       const struct rte_flow_item_mpls *spec = item->spec;
+       const struct rte_flow_item_mpls *mask = item->mask;
+       unsigned int size = sizeof(struct ibv_flow_spec_mpls);
+       struct ibv_flow_spec_mpls mpls = {
+               .type = IBV_FLOW_SPEC_MPLS,
+               .size = size,
+       };
+
+       if (!mask)
+               mask = &rte_flow_item_mpls_mask;
+       if (spec) {
+               memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
+               memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
+               /* Remove unwanted bits from values.  */
+               mpls.val.label &= mpls.mask.label;
+       }
+       flow_verbs_spec_add(dev_flow, &mpls, size);
+       dev_flow->verbs.attr->priority = MLX5_PRIORITY_MAP_L2;
+       *action_flags |= MLX5_FLOW_LAYER_MPLS;
+#endif
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_drop(uint64_t *action_flags,
+                                struct mlx5_flow *dev_flow)
+{
+       unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
+       struct ibv_flow_spec_action_drop drop = {
+                       .type = IBV_FLOW_SPEC_ACTION_DROP,
+                       .size = size,
+       };
+
+       flow_verbs_spec_add(dev_flow, &drop, size);
+       *action_flags |= MLX5_FLOW_ACTION_DROP;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_queue(const struct rte_flow_action *action,
+                                 uint64_t *action_flags,
+                                 struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_action_queue *queue = action->conf;
+       struct rte_flow *flow = dev_flow->flow;
+
+       if (flow->queue)
+               (*flow->queue)[0] = queue->index;
+       flow->rss.queue_num = 1;
+       *action_flags |= MLX5_FLOW_ACTION_QUEUE;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_rss(const struct rte_flow_action *action,
+                               uint64_t *action_flags,
+                               struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_action_rss *rss = action->conf;
+       struct rte_flow *flow = dev_flow->flow;
+
+       if (flow->queue)
+               memcpy((*flow->queue), rss->queue,
+                      rss->queue_num * sizeof(uint16_t));
+       flow->rss.queue_num = rss->queue_num;
+       memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
+       flow->rss.types = rss->types;
+       flow->rss.level = rss->level;
+       *action_flags |= MLX5_FLOW_ACTION_RSS;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_flag
+                       (const struct rte_flow_action *action __rte_unused,
+                        uint64_t *action_flags,
+                        struct mlx5_flow *dev_flow)
+{
+       unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+       struct ibv_flow_spec_action_tag tag = {
+               .type = IBV_FLOW_SPEC_ACTION_TAG,
+               .size = size,
+               .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
+       };
+       *action_flags |= MLX5_FLOW_ACTION_MARK;
+       flow_verbs_spec_add(dev_flow, &tag, size);
+}
+
+/**
+ * Update verbs specification to modify the flag to mark.
+ *
+ * @param[in, out] verbs
+ *   Pointer to the mlx5_flow_verbs structure.
+ * @param[in] mark_id
+ *   Mark identifier to replace the flag.
+ */
+static void
+flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
+{
+       struct ibv_spec_header *hdr;
+       int i;
+
+       if (!verbs)
+               return;
+       /* Update Verbs specification. */
+       hdr = (struct ibv_spec_header *)verbs->specs;
+       if (!hdr)
+               return;
+       for (i = 0; i != verbs->attr->num_of_specs; ++i) {
+               if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
+                       struct ibv_flow_spec_action_tag *t =
+                               (struct ibv_flow_spec_action_tag *)hdr;
+
+                       t->tag_id = mlx5_flow_mark_set(mark_id);
+               }
+               hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
+       }
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ */
+static void
+flow_verbs_translate_action_mark(const struct rte_flow_action *action,
+                                uint64_t *action_flags,
+                                struct mlx5_flow *dev_flow)
+{
+       const struct rte_flow_action_mark *mark = action->conf;
+       unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
+       struct ibv_flow_spec_action_tag tag = {
+               .type = IBV_FLOW_SPEC_ACTION_TAG,
+               .size = size,
+       };
+       struct mlx5_flow_verbs *verbs = &dev_flow->verbs;
+
+       if (*action_flags & MLX5_FLOW_ACTION_FLAG) {
+               flow_verbs_mark_update(verbs, mark->id);
+               size = 0;
+       } else {
+               tag.tag_id = mlx5_flow_mark_set(mark->id);
+               flow_verbs_spec_add(dev_flow, &tag, size);
+       }
+       *action_flags |= MLX5_FLOW_ACTION_MARK;
+}
+
+/**
+ * Convert the @p action into a Verbs specification. This function assumes that
+ * the input is valid and that there is space to insert the requested action
+ * into the flow. This function also return the action that was added.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] action
+ *   Action configuration.
+ * @param[in, out] action_flags
+ *   Pointer to the detected actions.
+ * @param[in] dev_flow
+ *   Pointer to mlx5_flow.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 On success else a negative errno value is returned and rte_errno is set.
+ */
+static int
+flow_verbs_translate_action_count(struct rte_eth_dev *dev,
+                                 const struct rte_flow_action *action,
+                                 uint64_t *action_flags,
+                                 struct mlx5_flow *dev_flow,
+                                 struct rte_flow_error *error)
+{
+       const struct rte_flow_action_count *count = action->conf;
+       struct rte_flow *flow = dev_flow->flow;
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+       defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
+       struct ibv_flow_spec_counter_action counter = {
+               .type = IBV_FLOW_SPEC_ACTION_COUNT,
+               .size = size,
+       };
+#endif
+
+       if (!flow->counter) {
+               flow->counter = flow_verbs_counter_new(dev, count->shared,
+                                                      count->id);
+               if (!flow->counter)
+                       return rte_flow_error_set(error, rte_errno,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 action,
+                                                 "cannot get counter"
+                                                 " context.");
+       }
+       *action_flags |= MLX5_FLOW_ACTION_COUNT;
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42)
+       counter.counter_set_handle = flow->counter->cs->handle;
+       flow_verbs_spec_add(dev_flow, &counter, size);
+#elif defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+       counter.counters = flow->counter->cs;
+       flow_verbs_spec_add(dev_flow, &counter, size);
+#endif
+       return 0;
+}
+
+/**
+ * Internal validation function. For validating both actions and items.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_validate(struct rte_eth_dev *dev,
+                   const struct rte_flow_attr *attr,
+                   const struct rte_flow_item items[],
+                   const struct rte_flow_action actions[],
+                   struct rte_flow_error *error)
+{
+       int ret;
+       uint64_t action_flags = 0;
+       uint64_t item_flags = 0;
+       int tunnel = 0;
+       uint8_t next_protocol = 0xff;
+
+       if (items == NULL)
+               return -1;
+       ret = mlx5_flow_validate_attributes(dev, attr, error);
+       if (ret < 0)
+               return ret;
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               int ret = 0;
+
+               tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       ret = mlx5_flow_validate_item_eth(items, item_flags,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                                              MLX5_FLOW_LAYER_OUTER_L2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       ret = mlx5_flow_validate_item_vlan(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+                                              MLX5_FLOW_LAYER_OUTER_VLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       ret = mlx5_flow_validate_item_ipv4(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                              MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       if (items->mask != NULL &&
+                           ((const struct rte_flow_item_ipv4 *)
+                            items->mask)->hdr.next_proto_id)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv4 *)
+                                        (items->spec))->hdr.next_proto_id;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       ret = mlx5_flow_validate_item_ipv6(items, item_flags,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                              MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       if (items->mask != NULL &&
+                           ((const struct rte_flow_item_ipv6 *)
+                            items->mask)->hdr.proto)
+                               next_protocol =
+                                       ((const struct rte_flow_item_ipv6 *)
+                                        items->spec)->hdr.proto;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       ret = mlx5_flow_validate_item_udp(items, item_flags,
+                                                         next_protocol,
+                                                         error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                              MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       ret = mlx5_flow_validate_item_tcp
+                                               (items, item_flags,
+                                                next_protocol,
+                                                &rte_flow_item_tcp_mask,
+                                                error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                              MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       ret = mlx5_flow_validate_item_vxlan(items, item_flags,
+                                                           error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+                       ret = mlx5_flow_validate_item_vxlan_gpe(items,
+                                                               item_flags,
+                                                               dev, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_GRE:
+                       ret = mlx5_flow_validate_item_gre(items, item_flags,
+                                                         next_protocol, error);
+                       if (ret < 0)
+                               return ret;
+                       item_flags |= MLX5_FLOW_LAYER_GRE;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_MPLS:
+                       ret = mlx5_flow_validate_item_mpls(items, item_flags,
+                                                          next_protocol,
+                                                          error);
+                       if (ret < 0)
+                               return ret;
+                       if (next_protocol != 0xff &&
+                           next_protocol != IPPROTO_MPLS)
+                               return rte_flow_error_set
+                                       (error, EINVAL,
+                                        RTE_FLOW_ERROR_TYPE_ITEM, items,
+                                        "protocol filtering not compatible"
+                                        " with MPLS layer");
+                       item_flags |= MLX5_FLOW_LAYER_MPLS;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL, "item not supported");
+               }
+       }
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_FLAG:
+                       ret = mlx5_flow_validate_action_flag(action_flags,
+                                                            attr,
+                                                            error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_FLAG;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_MARK:
+                       ret = mlx5_flow_validate_action_mark(actions,
+                                                            action_flags,
+                                                            attr,
+                                                            error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_MARK;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       ret = mlx5_flow_validate_action_drop(action_flags,
+                                                            attr,
+                                                            error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_DROP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       ret = mlx5_flow_validate_action_queue(actions,
+                                                             action_flags, dev,
+                                                             attr,
+                                                             error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_QUEUE;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       ret = mlx5_flow_validate_action_rss(actions,
+                                                           action_flags, dev,
+                                                           attr,
+                                                           error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_RSS;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = mlx5_flow_validate_action_count(dev, attr, error);
+                       if (ret < 0)
+                               return ret;
+                       action_flags |= MLX5_FLOW_ACTION_COUNT;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       if (!(action_flags & MLX5_FLOW_FATE_ACTIONS))
+               return rte_flow_error_set(error, EINVAL,
+                                         RTE_FLOW_ERROR_TYPE_ACTION, actions,
+                                         "no fate action is found");
+       return 0;
+}
+
+/**
+ * Calculate the required bytes that are needed for the action part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] action_flags
+ *   Pointer to the detected actions.
+ *
+ * @return
+ *   The size of the memory needed for all actions.
+ */
+static int
+flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
+                               uint64_t *action_flags)
+{
+       int size = 0;
+       uint64_t detected_actions = 0;
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_FLAG:
+                       size += sizeof(struct ibv_flow_spec_action_tag);
+                       detected_actions |= MLX5_FLOW_ACTION_FLAG;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_MARK:
+                       size += sizeof(struct ibv_flow_spec_action_tag);
+                       detected_actions |= MLX5_FLOW_ACTION_MARK;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       size += sizeof(struct ibv_flow_spec_action_drop);
+                       detected_actions |= MLX5_FLOW_ACTION_DROP;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       detected_actions |= MLX5_FLOW_ACTION_QUEUE;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       detected_actions |= MLX5_FLOW_ACTION_RSS;
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+#if defined(HAVE_IBV_DEVICE_COUNTERS_SET_V42) || \
+       defined(HAVE_IBV_DEVICE_COUNTERS_SET_V45)
+                       size += sizeof(struct ibv_flow_spec_counter_action);
+#endif
+                       detected_actions |= MLX5_FLOW_ACTION_COUNT;
+                       break;
+               default:
+                       break;
+               }
+       }
+       *action_flags = detected_actions;
+       return size;
+}
+
+/**
+ * Calculate the required bytes that are needed for the item part of the verbs
+ * flow, in addtion returns bit-fields with all the detected action, in order to
+ * avoid another interation over the actions.
+ *
+ * @param[in] actions
+ *   Pointer to the list of items.
+ * @param[in, out] item_flags
+ *   Pointer to the detected items.
+ *
+ * @return
+ *   The size of the memory needed for all items.
+ */
+static int
+flow_verbs_get_items_and_size(const struct rte_flow_item items[],
+                             uint64_t *item_flags)
+{
+       int size = 0;
+       uint64_t detected_items = 0;
+
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               int tunnel = !!(detected_items & MLX5_FLOW_LAYER_TUNNEL);
+
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       size += sizeof(struct ibv_flow_spec_eth);
+                       detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
+                                                  MLX5_FLOW_LAYER_OUTER_L2;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       size += sizeof(struct ibv_flow_spec_eth);
+                       detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
+                                                  MLX5_FLOW_LAYER_OUTER_VLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       size += sizeof(struct ibv_flow_spec_ipv4_ext);
+                       detected_items |= tunnel ?
+                                         MLX5_FLOW_LAYER_INNER_L3_IPV4 :
+                                         MLX5_FLOW_LAYER_OUTER_L3_IPV4;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       size += sizeof(struct ibv_flow_spec_ipv6);
+                       detected_items |= tunnel ?
+                                         MLX5_FLOW_LAYER_INNER_L3_IPV6 :
+                                         MLX5_FLOW_LAYER_OUTER_L3_IPV6;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       size += sizeof(struct ibv_flow_spec_tcp_udp);
+                       detected_items |= tunnel ?
+                                         MLX5_FLOW_LAYER_INNER_L4_UDP :
+                                         MLX5_FLOW_LAYER_OUTER_L4_UDP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       size += sizeof(struct ibv_flow_spec_tcp_udp);
+                       detected_items |= tunnel ?
+                                         MLX5_FLOW_LAYER_INNER_L4_TCP :
+                                         MLX5_FLOW_LAYER_OUTER_L4_TCP;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       size += sizeof(struct ibv_flow_spec_tunnel);
+                       detected_items |= MLX5_FLOW_LAYER_VXLAN;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+                       size += sizeof(struct ibv_flow_spec_tunnel);
+                       detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
+                       break;
+#ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
+               case RTE_FLOW_ITEM_TYPE_GRE:
+                       size += sizeof(struct ibv_flow_spec_gre);
+                       detected_items |= MLX5_FLOW_LAYER_GRE;
+                       break;
+               case RTE_FLOW_ITEM_TYPE_MPLS:
+                       size += sizeof(struct ibv_flow_spec_mpls);
+                       detected_items |= MLX5_FLOW_LAYER_MPLS;
+                       break;
+#else
+               case RTE_FLOW_ITEM_TYPE_GRE:
+                       size += sizeof(struct ibv_flow_spec_tunnel);
+                       detected_items |= MLX5_FLOW_LAYER_TUNNEL;
+                       break;
+#endif
+               default:
+                       break;
+               }
+       }
+       *item_flags = detected_items;
+       return size;
+}
+
+/**
+ * Internal preparation function. Allocate mlx5_flow with the required size.
+ * The required size is calculate based on the actions and items. This function
+ * also returns the detected actions and items for later use.
+ *
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] item_flags
+ *   Pointer to bit mask of all items detected.
+ * @param[out] action_flags
+ *   Pointer to bit mask of all actions detected.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
+ *   is set.
+ */
+static struct mlx5_flow *
+flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
+                  const struct rte_flow_item items[],
+                  const struct rte_flow_action actions[],
+                  uint64_t *item_flags,
+                  uint64_t *action_flags,
+                  struct rte_flow_error *error)
+{
+       uint32_t size = sizeof(struct mlx5_flow) + sizeof(struct ibv_flow_attr);
+       struct mlx5_flow *flow;
+
+       size += flow_verbs_get_actions_and_size(actions, action_flags);
+       size += flow_verbs_get_items_and_size(items, item_flags);
+       flow = rte_calloc(__func__, 1, size, 0);
+       if (!flow) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "not enough memory to create flow");
+               return NULL;
+       }
+       flow->verbs.attr = (void *)(flow + 1);
+       flow->verbs.specs =
+               (uint8_t *)(flow + 1) + sizeof(struct ibv_flow_attr);
+       return flow;
+}
+
+/**
+ * Fill the flow with verb spec.
+ *
+ * @param[in] dev
+ *   Pointer to Ethernet device.
+ * @param[in, out] dev_flow
+ *   Pointer to the mlx5 flow.
+ * @param[in] attr
+ *   Pointer to the flow attributes.
+ * @param[in] items
+ *   Pointer to the list of items.
+ * @param[in] actions
+ *   Pointer to the list of actions.
+ * @param[out] error
+ *   Pointer to the error structure.
+ *
+ * @return
+ *   0 on success, else a negative errno value otherwise and rte_ernno is set.
+ */
+static int
+flow_verbs_translate(struct rte_eth_dev *dev,
+                    struct mlx5_flow *dev_flow,
+                    const struct rte_flow_attr *attr,
+                    const struct rte_flow_item items[],
+                    const struct rte_flow_action actions[],
+                    struct rte_flow_error *error)
+{
+       uint64_t action_flags = 0;
+       uint64_t item_flags = 0;
+       uint64_t priority = attr->priority;
+       struct priv *priv = dev->data->dev_private;
+
+       if (priority == MLX5_FLOW_PRIO_RSVD)
+               priority = priv->config.flow_prio - 1;
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               int ret;
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_FLAG:
+                       flow_verbs_translate_action_flag(actions,
+                                                        &action_flags,
+                                                        dev_flow);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_MARK:
+                       flow_verbs_translate_action_mark(actions,
+                                                        &action_flags,
+                                                        dev_flow);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_DROP:
+                       flow_verbs_translate_action_drop(&action_flags,
+                                                        dev_flow);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       flow_verbs_translate_action_queue(actions,
+                                                         &action_flags,
+                                                         dev_flow);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_RSS:
+                       flow_verbs_translate_action_rss(actions,
+                                                       &action_flags,
+                                                       dev_flow);
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = flow_verbs_translate_action_count(dev,
+                                                               actions,
+                                                               &action_flags,
+                                                               dev_flow,
+                                                               error);
+                       if (ret < 0)
+                               return ret;
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       /* Device flow should have action flags by flow_drv_prepare(). */
+       assert(dev_flow->flow->actions == action_flags);
+       for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
+               switch (items->type) {
+               case RTE_FLOW_ITEM_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ITEM_TYPE_ETH:
+                       flow_verbs_translate_item_eth(items, &item_flags,
+                                                     dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VLAN:
+                       flow_verbs_translate_item_vlan(items, &item_flags,
+                                                      dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       flow_verbs_translate_item_ipv4(items, &item_flags,
+                                                      dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       flow_verbs_translate_item_ipv6(items, &item_flags,
+                                                      dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       flow_verbs_translate_item_udp(items, &item_flags,
+                                                     dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       flow_verbs_translate_item_tcp(items, &item_flags,
+                                                     dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN:
+                       flow_verbs_translate_item_vxlan(items, &item_flags,
+                                                       dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+                       flow_verbs_translate_item_vxlan_gpe(items, &item_flags,
+                                                           dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_GRE:
+                       flow_verbs_translate_item_gre(items, &item_flags,
+                                                     dev_flow);
+                       break;
+               case RTE_FLOW_ITEM_TYPE_MPLS:
+                       flow_verbs_translate_item_mpls(items, &item_flags,
+                                                      dev_flow);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ITEM,
+                                                 NULL,
+                                                 "item not supported");
+               }
+       }
+       dev_flow->verbs.attr->priority =
+               mlx5_flow_adjust_priority(dev, priority,
+                                         dev_flow->verbs.attr->priority);
+       return 0;
+}
+
+/**
+ * Remove the flow from the NIC but keeps it in memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_verbs_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow_verbs *verbs;
+       struct mlx5_flow *dev_flow;
+
+       if (!flow)
+               return;
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               verbs = &dev_flow->verbs;
+               if (verbs->flow) {
+                       claim_zero(mlx5_glue->destroy_flow(verbs->flow));
+                       verbs->flow = NULL;
+               }
+               if (verbs->hrxq) {
+                       if (flow->actions & MLX5_FLOW_ACTION_DROP)
+                               mlx5_hrxq_drop_release(dev);
+                       else
+                               mlx5_hrxq_release(dev, verbs->hrxq);
+                       verbs->hrxq = NULL;
+               }
+       }
+       if (flow->counter) {
+               flow_verbs_counter_release(flow->counter);
+               flow->counter = NULL;
+       }
+}
+
+/**
+ * Remove the flow from the NIC and the memory.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ */
+static void
+flow_verbs_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
+{
+       struct mlx5_flow *dev_flow;
+
+       if (!flow)
+               return;
+       flow_verbs_remove(dev, flow);
+       while (!LIST_EMPTY(&flow->dev_flows)) {
+               dev_flow = LIST_FIRST(&flow->dev_flows);
+               LIST_REMOVE(dev_flow, next);
+               rte_free(dev_flow);
+       }
+}
+
+/**
+ * Apply the flow to the NIC.
+ *
+ * @param[in] dev
+ *   Pointer to the Ethernet device structure.
+ * @param[in, out] flow
+ *   Pointer to flow structure.
+ * @param[out] error
+ *   Pointer to error structure.
+ *
+ * @return
+ *   0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+flow_verbs_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
+                struct rte_flow_error *error)
+{
+       struct mlx5_flow_verbs *verbs;
+       struct mlx5_flow *dev_flow;
+       int err;
+
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               verbs = &dev_flow->verbs;
+               if (flow->actions & MLX5_FLOW_ACTION_DROP) {
+                       verbs->hrxq = mlx5_hrxq_drop_new(dev);
+                       if (!verbs->hrxq) {
+                               rte_flow_error_set
+                                       (error, errno,
+                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                        "cannot get drop hash queue");
+                               goto error;
+                       }
+               } else {
+                       struct mlx5_hrxq *hrxq;
+
+                       hrxq = mlx5_hrxq_get(dev, flow->key,
+                                            MLX5_RSS_HASH_KEY_LEN,
+                                            verbs->hash_fields,
+                                            (*flow->queue),
+                                            flow->rss.queue_num);
+                       if (!hrxq)
+                               hrxq = mlx5_hrxq_new(dev, flow->key,
+                                                    MLX5_RSS_HASH_KEY_LEN,
+                                                    verbs->hash_fields,
+                                                    (*flow->queue),
+                                                    flow->rss.queue_num,
+                                                    !!(dev_flow->layers &
+                                                     MLX5_FLOW_LAYER_TUNNEL));
+                       if (!hrxq) {
+                               rte_flow_error_set
+                                       (error, rte_errno,
+                                        RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                        "cannot get hash queue");
+                               goto error;
+                       }
+                       verbs->hrxq = hrxq;
+               }
+               verbs->flow = mlx5_glue->create_flow(verbs->hrxq->qp,
+                                                    verbs->attr);
+               if (!verbs->flow) {
+                       rte_flow_error_set(error, errno,
+                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                          NULL,
+                                          "hardware refuses to create flow");
+                       goto error;
+               }
+       }
+       return 0;
+error:
+       err = rte_errno; /* Save rte_errno before cleanup. */
+       LIST_FOREACH(dev_flow, &flow->dev_flows, next) {
+               verbs = &dev_flow->verbs;
+               if (verbs->hrxq) {
+                       if (flow->actions & MLX5_FLOW_ACTION_DROP)
+                               mlx5_hrxq_drop_release(dev);
+                       else
+                               mlx5_hrxq_release(dev, verbs->hrxq);
+                       verbs->hrxq = NULL;
+               }
+       }
+       rte_errno = err; /* Restore rte_errno. */
+       return -rte_errno;
+}
+
+/**
+ * Query a flow.
+ *
+ * @see rte_flow_query()
+ * @see rte_flow_ops
+ */
+static int
+flow_verbs_query(struct rte_eth_dev *dev,
+                struct rte_flow *flow,
+                const struct rte_flow_action *actions,
+                void *data,
+                struct rte_flow_error *error)
+{
+       int ret = -EINVAL;
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_VOID:
+                       break;
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+                       ret = flow_verbs_counter_query(dev, flow, data, error);
+                       break;
+               default:
+                       return rte_flow_error_set(error, ENOTSUP,
+                                                 RTE_FLOW_ERROR_TYPE_ACTION,
+                                                 actions,
+                                                 "action not supported");
+               }
+       }
+       return ret;
+}
+
+const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops = {
+       .validate = flow_verbs_validate,
+       .prepare = flow_verbs_prepare,
+       .translate = flow_verbs_translate,
+       .apply = flow_verbs_apply,
+       .remove = flow_verbs_remove,
+       .destroy = flow_verbs_destroy,
+       .query = flow_verbs_query,
+};
index 84f9492..1afb114 100644 (file)
@@ -215,7 +215,7 @@ static struct ibv_counter_set *
 mlx5_glue_create_counter_set(struct ibv_context *context,
                             struct ibv_counter_set_init_attr *init_attr)
 {
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
        (void)context;
        (void)init_attr;
        return NULL;
@@ -227,7 +227,7 @@ mlx5_glue_create_counter_set(struct ibv_context *context,
 static int
 mlx5_glue_destroy_counter_set(struct ibv_counter_set *cs)
 {
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
        (void)cs;
        return ENOTSUP;
 #else
@@ -240,7 +240,7 @@ mlx5_glue_describe_counter_set(struct ibv_context *context,
                               uint16_t counter_set_id,
                               struct ibv_counter_set_description *cs_desc)
 {
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
        (void)context;
        (void)counter_set_id;
        (void)cs_desc;
@@ -254,7 +254,7 @@ static int
 mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr,
                            struct ibv_counter_set_data *cs_data)
 {
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
        (void)query_attr;
        (void)cs_data;
        return ENOTSUP;
@@ -263,6 +263,62 @@ mlx5_glue_query_counter_set(struct ibv_query_counter_set_attr *query_attr,
 #endif
 }
 
+static struct ibv_counters *
+mlx5_glue_create_counters(struct ibv_context *context,
+                         struct ibv_counters_init_attr *init_attr)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+       (void)context;
+       (void)init_attr;
+       return NULL;
+#else
+       return ibv_create_counters(context, init_attr);
+#endif
+}
+
+static int
+mlx5_glue_destroy_counters(struct ibv_counters *counters)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+       (void)counters;
+       return ENOTSUP;
+#else
+       return ibv_destroy_counters(counters);
+#endif
+}
+
+static int
+mlx5_glue_attach_counters(struct ibv_counters *counters,
+                         struct ibv_counter_attach_attr *attr,
+                         struct ibv_flow *flow)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+       (void)counters;
+       (void)attr;
+       (void)flow;
+       return ENOTSUP;
+#else
+       return ibv_attach_counters_point_flow(counters, attr, flow);
+#endif
+}
+
+static int
+mlx5_glue_query_counters(struct ibv_counters *counters,
+                        uint64_t *counters_value,
+                        uint32_t ncounters,
+                        uint32_t flags)
+{
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+       (void)counters;
+       (void)counters_value;
+       (void)ncounters;
+       (void)flags;
+       return ENOTSUP;
+#else
+       return ibv_read_counters(counters, counters_value, ncounters, flags);
+#endif
+}
+
 static void
 mlx5_glue_ack_async_event(struct ibv_async_event *event)
 {
@@ -346,6 +402,48 @@ mlx5_glue_dv_create_qp(struct ibv_context *context,
 #endif
 }
 
+static struct mlx5dv_flow_matcher *
+mlx5_glue_dv_create_flow_matcher(struct ibv_context *context,
+                                struct mlx5dv_flow_matcher_attr *matcher_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       return mlx5dv_create_flow_matcher(context, matcher_attr);
+#else
+       (void)context;
+       (void)matcher_attr;
+       return NULL;
+#endif
+}
+
+static struct ibv_flow *
+mlx5_glue_dv_create_flow(struct mlx5dv_flow_matcher *matcher,
+                        struct mlx5dv_flow_match_parameters *match_value,
+                        size_t num_actions,
+                        struct mlx5dv_flow_action_attr *actions_attr)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       return mlx5dv_create_flow(matcher, match_value,
+                                 num_actions, actions_attr);
+#else
+       (void)matcher;
+       (void)match_value;
+       (void)num_actions;
+       (void)actions_attr;
+       return NULL;
+#endif
+}
+
+static int
+mlx5_glue_dv_destroy_flow_matcher(struct mlx5dv_flow_matcher *matcher)
+{
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       return mlx5dv_destroy_flow_matcher(matcher);
+#else
+       (void)matcher;
+       return 0;
+#endif
+}
+
 alignas(RTE_CACHE_LINE_SIZE)
 const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
        .version = MLX5_GLUE_VERSION,
@@ -382,6 +480,10 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
        .destroy_counter_set = mlx5_glue_destroy_counter_set,
        .describe_counter_set = mlx5_glue_describe_counter_set,
        .query_counter_set = mlx5_glue_query_counter_set,
+       .create_counters = mlx5_glue_create_counters,
+       .destroy_counters = mlx5_glue_destroy_counters,
+       .attach_counters = mlx5_glue_attach_counters,
+       .query_counters = mlx5_glue_query_counters,
        .ack_async_event = mlx5_glue_ack_async_event,
        .get_async_event = mlx5_glue_get_async_event,
        .port_state_str = mlx5_glue_port_state_str,
@@ -392,4 +494,7 @@ const struct mlx5_glue *mlx5_glue = &(const struct mlx5_glue){
        .dv_set_context_attr = mlx5_glue_dv_set_context_attr,
        .dv_init_obj = mlx5_glue_dv_init_obj,
        .dv_create_qp = mlx5_glue_dv_create_qp,
+       .dv_create_flow_matcher = mlx5_glue_dv_create_flow_matcher,
+       .dv_destroy_flow_matcher = mlx5_glue_dv_destroy_flow_matcher,
+       .dv_create_flow = mlx5_glue_dv_create_flow,
 };
index e584d36..44bfefe 100644 (file)
@@ -23,7 +23,7 @@
 #define MLX5_GLUE_VERSION ""
 #endif
 
-#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V42
 struct ibv_counter_set;
 struct ibv_counter_set_data;
 struct ibv_counter_set_description;
@@ -31,6 +31,12 @@ struct ibv_counter_set_init_attr;
 struct ibv_query_counter_set_attr;
 #endif
 
+#ifndef HAVE_IBV_DEVICE_COUNTERS_SET_V45
+struct ibv_counters;
+struct ibv_counters_init_attr;
+struct ibv_counter_attach_attr;
+#endif
+
 #ifndef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
 struct mlx5dv_qp_init_attr;
 #endif
@@ -39,6 +45,13 @@ struct mlx5dv_qp_init_attr;
 struct mlx5dv_wq_init_attr;
 #endif
 
+#ifndef HAVE_IBV_FLOW_DV_SUPPORT
+struct mlx5dv_flow_matcher;
+struct mlx5dv_flow_matcher_attr;
+struct mlx5dv_flow_action_attr;
+struct mlx5dv_flow_match_parameters;
+#endif
+
 /* LIB_GLUE_VERSION must be updated every time this structure is modified. */
 struct mlx5_glue {
        const char *version;
@@ -99,6 +112,17 @@ struct mlx5_glue {
                 struct ibv_counter_set_description *cs_desc);
        int (*query_counter_set)(struct ibv_query_counter_set_attr *query_attr,
                                 struct ibv_counter_set_data *cs_data);
+       struct ibv_counters *(*create_counters)
+               (struct ibv_context *context,
+                struct ibv_counters_init_attr *init_attr);
+       int (*destroy_counters)(struct ibv_counters *counters);
+       int (*attach_counters)(struct ibv_counters *counters,
+                              struct ibv_counter_attach_attr *attr,
+                              struct ibv_flow *flow);
+       int (*query_counters)(struct ibv_counters *counters,
+                             uint64_t *counters_value,
+                             uint32_t ncounters,
+                             uint32_t flags);
        void (*ack_async_event)(struct ibv_async_event *event);
        int (*get_async_event)(struct ibv_context *context,
                               struct ibv_async_event *event);
@@ -122,6 +146,14 @@ struct mlx5_glue {
                (struct ibv_context *context,
                 struct ibv_qp_init_attr_ex *qp_init_attr_ex,
                 struct mlx5dv_qp_init_attr *dv_qp_init_attr);
+       struct mlx5dv_flow_matcher *(*dv_create_flow_matcher)
+               (struct ibv_context *context,
+                struct mlx5dv_flow_matcher_attr *matcher_attr);
+       int (*dv_destroy_flow_matcher)(struct mlx5dv_flow_matcher *matcher);
+       struct ibv_flow *(*dv_create_flow)(struct mlx5dv_flow_matcher *matcher,
+                         struct mlx5dv_flow_match_parameters *match_value,
+                         size_t num_actions,
+                         struct mlx5dv_flow_action_attr *actions_attr);
 };
 
 const struct mlx5_glue *mlx5_glue;
index 12ee37f..672a476 100644 (file)
@@ -49,7 +49,7 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[ETHER_ADDR_LEN])
        struct ifreq request;
        int ret;
 
-       ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request, 0);
+       ret = mlx5_ifreq(dev, SIOCGIFHWADDR, &request);
        if (ret)
                return ret;
        memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
index 1d1bcb5..f4b15d3 100644 (file)
@@ -277,6 +277,23 @@ mr_find_next_chunk(struct mlx5_mr *mr, struct mlx5_mr_cache *entry,
        uintptr_t end = 0;
        uint32_t idx = 0;
 
+       /* MR for external memory doesn't have memseg list. */
+       if (mr->msl == NULL) {
+               struct ibv_mr *ibv_mr = mr->ibv_mr;
+
+               assert(mr->ms_bmp_n == 1);
+               assert(mr->ms_n == 1);
+               assert(base_idx == 0);
+               /*
+                * Can't search it from memseg list but get it directly from
+                * verbs MR as there's only one chunk.
+                */
+               entry->start = (uintptr_t)ibv_mr->addr;
+               entry->end = (uintptr_t)ibv_mr->addr + mr->ibv_mr->length;
+               entry->lkey = rte_cpu_to_be_32(mr->ibv_mr->lkey);
+               /* Returning 1 ends iteration. */
+               return 1;
+       }
        for (idx = base_idx; idx < mr->ms_bmp_n; ++idx) {
                if (rte_bitmap_get(mr->ms_bmp, idx)) {
                        const struct rte_memseg_list *msl;
@@ -811,6 +828,7 @@ mlx5_mr_mem_event_free_cb(struct rte_eth_dev *dev, const void *addr, size_t len)
                mr = mr_lookup_dev_list(dev, &entry, start);
                if (mr == NULL)
                        continue;
+               assert(mr->msl); /* Can't be external memory. */
                ms = rte_mem_virt2memseg((void *)start, msl);
                assert(ms != NULL);
                assert(msl->page_sz == ms->hugepage_sz);
@@ -1061,6 +1079,139 @@ mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl)
                (void *)mr_ctrl, mr_ctrl->cur_gen);
 }
 
+/**
+ * Called during rte_mempool_mem_iter() by mlx5_mr_update_ext_mp().
+ *
+ * Externally allocated chunk is registered and a MR is created for the chunk.
+ * The MR object is added to the global list. If memseg list of a MR object
+ * (mr->msl) is null, the MR object can be regarded as externally allocated
+ * memory.
+ *
+ * Once external memory is registered, it should be static. If the memory is
+ * freed and the virtual address range has different physical memory mapped
+ * again, it may cause crash on device due to the wrong translation entry. PMD
+ * can't track the free event of the external memory for now.
+ */
+static void
+mlx5_mr_update_ext_mp_cb(struct rte_mempool *mp, void *opaque,
+                        struct rte_mempool_memhdr *memhdr,
+                        unsigned mem_idx __rte_unused)
+{
+       struct mr_update_mp_data *data = opaque;
+       struct rte_eth_dev *dev = data->dev;
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_mr_ctrl *mr_ctrl = data->mr_ctrl;
+       struct mlx5_mr *mr = NULL;
+       uintptr_t addr = (uintptr_t)memhdr->addr;
+       size_t len = memhdr->len;
+       struct mlx5_mr_cache entry;
+       uint32_t lkey;
+
+       /* If already registered, it should return. */
+       rte_rwlock_read_lock(&priv->mr.rwlock);
+       lkey = mr_lookup_dev(dev, &entry, addr);
+       rte_rwlock_read_unlock(&priv->mr.rwlock);
+       if (lkey != UINT32_MAX)
+               return;
+       mr = rte_zmalloc_socket(NULL,
+                               RTE_ALIGN_CEIL(sizeof(*mr),
+                                              RTE_CACHE_LINE_SIZE),
+                               RTE_CACHE_LINE_SIZE, mp->socket_id);
+       if (mr == NULL) {
+               DRV_LOG(WARNING,
+                       "port %u unable to allocate memory for a new MR of"
+                       " mempool (%s).",
+                       dev->data->port_id, mp->name);
+               data->ret = -1;
+               return;
+       }
+       DRV_LOG(DEBUG, "port %u register MR for chunk #%d of mempool (%s)",
+               dev->data->port_id, mem_idx, mp->name);
+       mr->ibv_mr = mlx5_glue->reg_mr(priv->pd, (void *)addr, len,
+                                      IBV_ACCESS_LOCAL_WRITE);
+       if (mr->ibv_mr == NULL) {
+               DRV_LOG(WARNING,
+                       "port %u fail to create a verbs MR for address (%p)",
+                       dev->data->port_id, (void *)addr);
+               rte_free(mr);
+               data->ret = -1;
+               return;
+       }
+       mr->msl = NULL; /* Mark it is external memory. */
+       mr->ms_bmp = NULL;
+       mr->ms_n = 1;
+       mr->ms_bmp_n = 1;
+       rte_rwlock_write_lock(&priv->mr.rwlock);
+       LIST_INSERT_HEAD(&priv->mr.mr_list, mr, mr);
+       DRV_LOG(DEBUG,
+               "port %u MR CREATED (%p) for external memory %p:\n"
+               "  [0x%" PRIxPTR ", 0x%" PRIxPTR "),"
+               " lkey=0x%x base_idx=%u ms_n=%u, ms_bmp_n=%u",
+               dev->data->port_id, (void *)mr, (void *)addr,
+               addr, addr + len, rte_cpu_to_be_32(mr->ibv_mr->lkey),
+               mr->ms_base_idx, mr->ms_n, mr->ms_bmp_n);
+       /* Insert to the global cache table. */
+       mr_insert_dev_cache(dev, mr);
+       rte_rwlock_write_unlock(&priv->mr.rwlock);
+       /* Insert to the local cache table */
+       mlx5_mr_addr2mr_bh(dev, mr_ctrl, addr);
+}
+
+/**
+ * Register MR for entire memory chunks in a Mempool having externally allocated
+ * memory and fill in local cache.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param mr_ctrl
+ *   Pointer to per-queue MR control structure.
+ * @param mp
+ *   Pointer to registering Mempool.
+ *
+ * @return
+ *   0 on success, -1 on failure.
+ */
+static uint32_t
+mlx5_mr_update_ext_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
+                     struct rte_mempool *mp)
+{
+       struct mr_update_mp_data data = {
+               .dev = dev,
+               .mr_ctrl = mr_ctrl,
+               .ret = 0,
+       };
+
+       rte_mempool_mem_iter(mp, mlx5_mr_update_ext_mp_cb, &data);
+       return data.ret;
+}
+
+/**
+ * Register MR entire memory chunks in a Mempool having externally allocated
+ * memory and search LKey of the address to return.
+ *
+ * @param dev
+ *   Pointer to Ethernet device.
+ * @param addr
+ *   Search key.
+ * @param mp
+ *   Pointer to registering Mempool where addr belongs.
+ *
+ * @return
+ *   LKey for address on success, UINT32_MAX on failure.
+ */
+uint32_t
+mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
+                     struct rte_mempool *mp)
+{
+       struct mlx5_txq_ctrl *txq_ctrl =
+               container_of(txq, struct mlx5_txq_ctrl, txq);
+       struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl;
+       struct priv *priv = txq_ctrl->priv;
+
+       mlx5_mr_update_ext_mp(ETH_DEV(priv), mr_ctrl, mp);
+       return mlx5_tx_addr2mr_bh(txq, addr);
+}
+
 /* Called during rte_mempool_mem_iter() by mlx5_mr_update_mp(). */
 static void
 mlx5_mr_update_mp_cb(struct rte_mempool *mp __rte_unused, void *opaque,
@@ -1104,6 +1255,10 @@ mlx5_mr_update_mp(struct rte_eth_dev *dev, struct mlx5_mr_ctrl *mr_ctrl,
        };
 
        rte_mempool_mem_iter(mp, mlx5_mr_update_mp_cb, &data);
+       if (data.ret < 0 && rte_errno == ENXIO) {
+               /* Mempool may have externally allocated memory. */
+               return mlx5_mr_update_ext_mp(dev, mr_ctrl, mp);
+       }
        return data.ret;
 }
 
diff --git a/drivers/net/mlx5/mlx5_nl_flow.c b/drivers/net/mlx5/mlx5_nl_flow.c
deleted file mode 100644 (file)
index a1c8c34..0000000
+++ /dev/null
@@ -1,1248 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright 2018 6WIND S.A.
- * Copyright 2018 Mellanox Technologies, Ltd
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <libmnl/libmnl.h>
-#include <linux/if_ether.h>
-#include <linux/netlink.h>
-#include <linux/pkt_cls.h>
-#include <linux/pkt_sched.h>
-#include <linux/rtnetlink.h>
-#include <linux/tc_act/tc_gact.h>
-#include <linux/tc_act/tc_mirred.h>
-#include <netinet/in.h>
-#include <stdalign.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-
-#include <rte_byteorder.h>
-#include <rte_errno.h>
-#include <rte_ether.h>
-#include <rte_flow.h>
-
-#include "mlx5.h"
-#include "mlx5_autoconf.h"
-
-#ifdef HAVE_TC_ACT_VLAN
-
-#include <linux/tc_act/tc_vlan.h>
-
-#else /* HAVE_TC_ACT_VLAN */
-
-#define TCA_VLAN_ACT_POP 1
-#define TCA_VLAN_ACT_PUSH 2
-#define TCA_VLAN_ACT_MODIFY 3
-#define TCA_VLAN_PARMS 2
-#define TCA_VLAN_PUSH_VLAN_ID 3
-#define TCA_VLAN_PUSH_VLAN_PROTOCOL 4
-#define TCA_VLAN_PAD 5
-#define TCA_VLAN_PUSH_VLAN_PRIORITY 6
-
-struct tc_vlan {
-       tc_gen;
-       int v_action;
-};
-
-#endif /* HAVE_TC_ACT_VLAN */
-
-/* Normally found in linux/netlink.h. */
-#ifndef NETLINK_CAP_ACK
-#define NETLINK_CAP_ACK 10
-#endif
-
-/* Normally found in linux/pkt_sched.h. */
-#ifndef TC_H_MIN_INGRESS
-#define TC_H_MIN_INGRESS 0xfff2u
-#endif
-
-/* Normally found in linux/pkt_cls.h. */
-#ifndef TCA_CLS_FLAGS_SKIP_SW
-#define TCA_CLS_FLAGS_SKIP_SW (1 << 1)
-#endif
-#ifndef HAVE_TCA_FLOWER_ACT
-#define TCA_FLOWER_ACT 3
-#endif
-#ifndef HAVE_TCA_FLOWER_FLAGS
-#define TCA_FLOWER_FLAGS 22
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_TYPE
-#define TCA_FLOWER_KEY_ETH_TYPE 8
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST
-#define TCA_FLOWER_KEY_ETH_DST 4
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_DST_MASK
-#define TCA_FLOWER_KEY_ETH_DST_MASK 5
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC
-#define TCA_FLOWER_KEY_ETH_SRC 6
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_ETH_SRC_MASK
-#define TCA_FLOWER_KEY_ETH_SRC_MASK 7
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IP_PROTO
-#define TCA_FLOWER_KEY_IP_PROTO 9
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC
-#define TCA_FLOWER_KEY_IPV4_SRC 10
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_SRC_MASK
-#define TCA_FLOWER_KEY_IPV4_SRC_MASK 11
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST
-#define TCA_FLOWER_KEY_IPV4_DST 12
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV4_DST_MASK
-#define TCA_FLOWER_KEY_IPV4_DST_MASK 13
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC
-#define TCA_FLOWER_KEY_IPV6_SRC 14
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_SRC_MASK
-#define TCA_FLOWER_KEY_IPV6_SRC_MASK 15
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST
-#define TCA_FLOWER_KEY_IPV6_DST 16
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_IPV6_DST_MASK
-#define TCA_FLOWER_KEY_IPV6_DST_MASK 17
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC
-#define TCA_FLOWER_KEY_TCP_SRC 18
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_SRC_MASK
-#define TCA_FLOWER_KEY_TCP_SRC_MASK 35
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST
-#define TCA_FLOWER_KEY_TCP_DST 19
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_TCP_DST_MASK
-#define TCA_FLOWER_KEY_TCP_DST_MASK 36
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC
-#define TCA_FLOWER_KEY_UDP_SRC 20
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_SRC_MASK
-#define TCA_FLOWER_KEY_UDP_SRC_MASK 37
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST
-#define TCA_FLOWER_KEY_UDP_DST 21
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_UDP_DST_MASK
-#define TCA_FLOWER_KEY_UDP_DST_MASK 38
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ID
-#define TCA_FLOWER_KEY_VLAN_ID 23
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_PRIO
-#define TCA_FLOWER_KEY_VLAN_PRIO 24
-#endif
-#ifndef HAVE_TCA_FLOWER_KEY_VLAN_ETH_TYPE
-#define TCA_FLOWER_KEY_VLAN_ETH_TYPE 25
-#endif
-
-/** Parser state definitions for mlx5_nl_flow_trans[]. */
-enum mlx5_nl_flow_trans {
-       INVALID,
-       BACK,
-       ATTR,
-       PATTERN,
-       ITEM_VOID,
-       ITEM_PORT_ID,
-       ITEM_ETH,
-       ITEM_VLAN,
-       ITEM_IPV4,
-       ITEM_IPV6,
-       ITEM_TCP,
-       ITEM_UDP,
-       ACTIONS,
-       ACTION_VOID,
-       ACTION_PORT_ID,
-       ACTION_DROP,
-       ACTION_OF_POP_VLAN,
-       ACTION_OF_PUSH_VLAN,
-       ACTION_OF_SET_VLAN_VID,
-       ACTION_OF_SET_VLAN_PCP,
-       END,
-};
-
-#define TRANS(...) (const enum mlx5_nl_flow_trans []){ __VA_ARGS__, INVALID, }
-
-#define PATTERN_COMMON \
-       ITEM_VOID, ITEM_PORT_ID, ACTIONS
-#define ACTIONS_COMMON \
-       ACTION_VOID, ACTION_OF_POP_VLAN, ACTION_OF_PUSH_VLAN, \
-       ACTION_OF_SET_VLAN_VID, ACTION_OF_SET_VLAN_PCP
-#define ACTIONS_FATE \
-       ACTION_PORT_ID, ACTION_DROP
-
-/** Parser state transitions used by mlx5_nl_flow_transpose(). */
-static const enum mlx5_nl_flow_trans *const mlx5_nl_flow_trans[] = {
-       [INVALID] = NULL,
-       [BACK] = NULL,
-       [ATTR] = TRANS(PATTERN),
-       [PATTERN] = TRANS(ITEM_ETH, PATTERN_COMMON),
-       [ITEM_VOID] = TRANS(BACK),
-       [ITEM_PORT_ID] = TRANS(BACK),
-       [ITEM_ETH] = TRANS(ITEM_IPV4, ITEM_IPV6, ITEM_VLAN, PATTERN_COMMON),
-       [ITEM_VLAN] = TRANS(ITEM_IPV4, ITEM_IPV6, PATTERN_COMMON),
-       [ITEM_IPV4] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-       [ITEM_IPV6] = TRANS(ITEM_TCP, ITEM_UDP, PATTERN_COMMON),
-       [ITEM_TCP] = TRANS(PATTERN_COMMON),
-       [ITEM_UDP] = TRANS(PATTERN_COMMON),
-       [ACTIONS] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-       [ACTION_VOID] = TRANS(BACK),
-       [ACTION_PORT_ID] = TRANS(ACTION_VOID, END),
-       [ACTION_DROP] = TRANS(ACTION_VOID, END),
-       [ACTION_OF_POP_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-       [ACTION_OF_PUSH_VLAN] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-       [ACTION_OF_SET_VLAN_VID] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-       [ACTION_OF_SET_VLAN_PCP] = TRANS(ACTIONS_FATE, ACTIONS_COMMON),
-       [END] = NULL,
-};
-
-/** Empty masks for known item types. */
-static const union {
-       struct rte_flow_item_port_id port_id;
-       struct rte_flow_item_eth eth;
-       struct rte_flow_item_vlan vlan;
-       struct rte_flow_item_ipv4 ipv4;
-       struct rte_flow_item_ipv6 ipv6;
-       struct rte_flow_item_tcp tcp;
-       struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_empty;
-
-/** Supported masks for known item types. */
-static const struct {
-       struct rte_flow_item_port_id port_id;
-       struct rte_flow_item_eth eth;
-       struct rte_flow_item_vlan vlan;
-       struct rte_flow_item_ipv4 ipv4;
-       struct rte_flow_item_ipv6 ipv6;
-       struct rte_flow_item_tcp tcp;
-       struct rte_flow_item_udp udp;
-} mlx5_nl_flow_mask_supported = {
-       .port_id = {
-               .id = 0xffffffff,
-       },
-       .eth = {
-               .type = RTE_BE16(0xffff),
-               .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-               .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
-       },
-       .vlan = {
-               /* PCP and VID only, no DEI. */
-               .tci = RTE_BE16(0xefff),
-               .inner_type = RTE_BE16(0xffff),
-       },
-       .ipv4.hdr = {
-               .next_proto_id = 0xff,
-               .src_addr = RTE_BE32(0xffffffff),
-               .dst_addr = RTE_BE32(0xffffffff),
-       },
-       .ipv6.hdr = {
-               .proto = 0xff,
-               .src_addr =
-                       "\xff\xff\xff\xff\xff\xff\xff\xff"
-                       "\xff\xff\xff\xff\xff\xff\xff\xff",
-               .dst_addr =
-                       "\xff\xff\xff\xff\xff\xff\xff\xff"
-                       "\xff\xff\xff\xff\xff\xff\xff\xff",
-       },
-       .tcp.hdr = {
-               .src_port = RTE_BE16(0xffff),
-               .dst_port = RTE_BE16(0xffff),
-       },
-       .udp.hdr = {
-               .src_port = RTE_BE16(0xffff),
-               .dst_port = RTE_BE16(0xffff),
-       },
-};
-
-/**
- * Retrieve mask for pattern item.
- *
- * This function does basic sanity checks on a pattern item in order to
- * return the most appropriate mask for it.
- *
- * @param[in] item
- *   Item specification.
- * @param[in] mask_default
- *   Default mask for pattern item as specified by the flow API.
- * @param[in] mask_supported
- *   Mask fields supported by the implementation.
- * @param[in] mask_empty
- *   Empty mask to return when there is no specification.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   Either @p item->mask or one of the mask parameters on success, NULL
- *   otherwise and rte_errno is set.
- */
-static const void *
-mlx5_nl_flow_item_mask(const struct rte_flow_item *item,
-                      const void *mask_default,
-                      const void *mask_supported,
-                      const void *mask_empty,
-                      size_t mask_size,
-                      struct rte_flow_error *error)
-{
-       const uint8_t *mask;
-       size_t i;
-
-       /* item->last and item->mask cannot exist without item->spec. */
-       if (!item->spec && (item->mask || item->last)) {
-               rte_flow_error_set
-                       (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
-                        "\"mask\" or \"last\" field provided without a"
-                        " corresponding \"spec\"");
-               return NULL;
-       }
-       /* No spec, no mask, no problem. */
-       if (!item->spec)
-               return mask_empty;
-       mask = item->mask ? item->mask : mask_default;
-       assert(mask);
-       /*
-        * Single-pass check to make sure that:
-        * - Mask is supported, no bits are set outside mask_supported.
-        * - Both item->spec and item->last are included in mask.
-        */
-       for (i = 0; i != mask_size; ++i) {
-               if (!mask[i])
-                       continue;
-               if ((mask[i] | ((const uint8_t *)mask_supported)[i]) !=
-                   ((const uint8_t *)mask_supported)[i]) {
-                       rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask, "unsupported field found in \"mask\"");
-                       return NULL;
-               }
-               if (item->last &&
-                   (((const uint8_t *)item->spec)[i] & mask[i]) !=
-                   (((const uint8_t *)item->last)[i] & mask[i])) {
-                       rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_LAST,
-                                item->last,
-                                "range between \"spec\" and \"last\" not"
-                                " comprised in \"mask\"");
-                       return NULL;
-               }
-       }
-       return mask;
-}
-
-/**
- * Transpose flow rule description to rtnetlink message.
- *
- * This function transposes a flow rule description to a traffic control
- * (TC) filter creation message ready to be sent over Netlink.
- *
- * Target interface is specified as the first entry of the @p ptoi table.
- * Subsequent entries enable this function to resolve other DPDK port IDs
- * found in the flow rule.
- *
- * @param[out] buf
- *   Output message buffer. May be NULL when @p size is 0.
- * @param size
- *   Size of @p buf. Message may be truncated if not large enough.
- * @param[in] ptoi
- *   DPDK port ID to network interface index translation table. This table
- *   is terminated by an entry with a zero ifindex value.
- * @param[in] attr
- *   Flow rule attributes.
- * @param[in] pattern
- *   Pattern specification.
- * @param[in] actions
- *   Associated actions.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   A positive value representing the exact size of the message in bytes
- *   regardless of the @p size parameter on success, a negative errno value
- *   otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_transpose(void *buf,
-                      size_t size,
-                      const struct mlx5_nl_flow_ptoi *ptoi,
-                      const struct rte_flow_attr *attr,
-                      const struct rte_flow_item *pattern,
-                      const struct rte_flow_action *actions,
-                      struct rte_flow_error *error)
-{
-       alignas(struct nlmsghdr)
-       uint8_t buf_tmp[mnl_nlmsg_size(sizeof(struct tcmsg) + 1024)];
-       const struct rte_flow_item *item;
-       const struct rte_flow_action *action;
-       unsigned int n;
-       uint32_t act_index_cur;
-       bool in_port_id_set;
-       bool eth_type_set;
-       bool vlan_present;
-       bool vlan_eth_type_set;
-       bool ip_proto_set;
-       struct nlattr *na_flower;
-       struct nlattr *na_flower_act;
-       struct nlattr *na_vlan_id;
-       struct nlattr *na_vlan_priority;
-       const enum mlx5_nl_flow_trans *trans;
-       const enum mlx5_nl_flow_trans *back;
-
-       if (!size)
-               goto error_nobufs;
-init:
-       item = pattern;
-       action = actions;
-       n = 0;
-       act_index_cur = 0;
-       in_port_id_set = false;
-       eth_type_set = false;
-       vlan_present = false;
-       vlan_eth_type_set = false;
-       ip_proto_set = false;
-       na_flower = NULL;
-       na_flower_act = NULL;
-       na_vlan_id = NULL;
-       na_vlan_priority = NULL;
-       trans = TRANS(ATTR);
-       back = trans;
-trans:
-       switch (trans[n++]) {
-               union {
-                       const struct rte_flow_item_port_id *port_id;
-                       const struct rte_flow_item_eth *eth;
-                       const struct rte_flow_item_vlan *vlan;
-                       const struct rte_flow_item_ipv4 *ipv4;
-                       const struct rte_flow_item_ipv6 *ipv6;
-                       const struct rte_flow_item_tcp *tcp;
-                       const struct rte_flow_item_udp *udp;
-               } spec, mask;
-               union {
-                       const struct rte_flow_action_port_id *port_id;
-                       const struct rte_flow_action_of_push_vlan *of_push_vlan;
-                       const struct rte_flow_action_of_set_vlan_vid *
-                               of_set_vlan_vid;
-                       const struct rte_flow_action_of_set_vlan_pcp *
-                               of_set_vlan_pcp;
-               } conf;
-               struct nlmsghdr *nlh;
-               struct tcmsg *tcm;
-               struct nlattr *act_index;
-               struct nlattr *act;
-               unsigned int i;
-
-       case INVALID:
-               if (item->type)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
-                                item, "unsupported pattern item combination");
-               else if (action->type)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
-                                action, "unsupported action combination");
-               return rte_flow_error_set
-                       (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                        "flow rule lacks some kind of fate action");
-       case BACK:
-               trans = back;
-               n = 0;
-               goto trans;
-       case ATTR:
-               /*
-                * Supported attributes: no groups, some priorities and
-                * ingress only. Don't care about transfer as it is the
-                * caller's problem.
-                */
-               if (attr->group)
-                       return rte_flow_error_set
-                               (error, ENOTSUP,
-                                RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
-                                attr, "groups are not supported");
-               if (attr->priority > 0xfffe)
-                       return rte_flow_error_set
-                               (error, ENOTSUP,
-                                RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
-                                attr, "lowest priority level is 0xfffe");
-               if (!attr->ingress)
-                       return rte_flow_error_set
-                               (error, ENOTSUP,
-                                RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-                                attr, "only ingress is supported");
-               if (attr->egress)
-                       return rte_flow_error_set
-                               (error, ENOTSUP,
-                                RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
-                                attr, "egress is not supported");
-               if (size < mnl_nlmsg_size(sizeof(*tcm)))
-                       goto error_nobufs;
-               nlh = mnl_nlmsg_put_header(buf);
-               nlh->nlmsg_type = 0;
-               nlh->nlmsg_flags = 0;
-               nlh->nlmsg_seq = 0;
-               tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-               tcm->tcm_family = AF_UNSPEC;
-               tcm->tcm_ifindex = ptoi[0].ifindex;
-               /*
-                * Let kernel pick a handle by default. A predictable handle
-                * can be set by the caller on the resulting buffer through
-                * mlx5_nl_flow_brand().
-                */
-               tcm->tcm_handle = 0;
-               tcm->tcm_parent = TC_H_MAKE(TC_H_INGRESS, TC_H_MIN_INGRESS);
-               /*
-                * Priority cannot be zero to prevent the kernel from
-                * picking one automatically.
-                */
-               tcm->tcm_info = TC_H_MAKE((attr->priority + 1) << 16,
-                                         RTE_BE16(ETH_P_ALL));
-               break;
-       case PATTERN:
-               if (!mnl_attr_put_strz_check(buf, size, TCA_KIND, "flower"))
-                       goto error_nobufs;
-               na_flower = mnl_attr_nest_start_check(buf, size, TCA_OPTIONS);
-               if (!na_flower)
-                       goto error_nobufs;
-               if (!mnl_attr_put_u32_check(buf, size, TCA_FLOWER_FLAGS,
-                                           TCA_CLS_FLAGS_SKIP_SW))
-                       goto error_nobufs;
-               break;
-       case ITEM_VOID:
-               if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
-                       goto trans;
-               ++item;
-               break;
-       case ITEM_PORT_ID:
-               if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID)
-                       goto trans;
-               mask.port_id = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_port_id_mask,
-                        &mlx5_nl_flow_mask_supported.port_id,
-                        &mlx5_nl_flow_mask_empty.port_id,
-                        sizeof(mlx5_nl_flow_mask_supported.port_id), error);
-               if (!mask.port_id)
-                       return -rte_errno;
-               if (mask.port_id == &mlx5_nl_flow_mask_empty.port_id) {
-                       in_port_id_set = 1;
-                       ++item;
-                       break;
-               }
-               spec.port_id = item->spec;
-               if (mask.port_id->id && mask.port_id->id != 0xffffffff)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.port_id,
-                                "no support for partial mask on"
-                                " \"id\" field");
-               if (!mask.port_id->id)
-                       i = 0;
-               else
-                       for (i = 0; ptoi[i].ifindex; ++i)
-                               if (ptoi[i].port_id == spec.port_id->id)
-                                       break;
-               if (!ptoi[i].ifindex)
-                       return rte_flow_error_set
-                               (error, ENODEV, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-                                spec.port_id,
-                                "missing data to convert port ID to ifindex");
-               tcm = mnl_nlmsg_get_payload(buf);
-               if (in_port_id_set &&
-                   ptoi[i].ifindex != (unsigned int)tcm->tcm_ifindex)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
-                                spec.port_id,
-                                "cannot match traffic for several port IDs"
-                                " through a single flow rule");
-               tcm->tcm_ifindex = ptoi[i].ifindex;
-               in_port_id_set = 1;
-               ++item;
-               break;
-       case ITEM_ETH:
-               if (item->type != RTE_FLOW_ITEM_TYPE_ETH)
-                       goto trans;
-               mask.eth = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_eth_mask,
-                        &mlx5_nl_flow_mask_supported.eth,
-                        &mlx5_nl_flow_mask_empty.eth,
-                        sizeof(mlx5_nl_flow_mask_supported.eth), error);
-               if (!mask.eth)
-                       return -rte_errno;
-               if (mask.eth == &mlx5_nl_flow_mask_empty.eth) {
-                       ++item;
-                       break;
-               }
-               spec.eth = item->spec;
-               if (mask.eth->type && mask.eth->type != RTE_BE16(0xffff))
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.eth,
-                                "no support for partial mask on"
-                                " \"type\" field");
-               if (mask.eth->type) {
-                       if (!mnl_attr_put_u16_check(buf, size,
-                                                   TCA_FLOWER_KEY_ETH_TYPE,
-                                                   spec.eth->type))
-                               goto error_nobufs;
-                       eth_type_set = 1;
-               }
-               if ((!is_zero_ether_addr(&mask.eth->dst) &&
-                    (!mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_ETH_DST,
-                                         ETHER_ADDR_LEN,
-                                         spec.eth->dst.addr_bytes) ||
-                     !mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_ETH_DST_MASK,
-                                         ETHER_ADDR_LEN,
-                                         mask.eth->dst.addr_bytes))) ||
-                   (!is_zero_ether_addr(&mask.eth->src) &&
-                    (!mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_ETH_SRC,
-                                         ETHER_ADDR_LEN,
-                                         spec.eth->src.addr_bytes) ||
-                     !mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_ETH_SRC_MASK,
-                                         ETHER_ADDR_LEN,
-                                         mask.eth->src.addr_bytes))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ITEM_VLAN:
-               if (item->type != RTE_FLOW_ITEM_TYPE_VLAN)
-                       goto trans;
-               mask.vlan = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_vlan_mask,
-                        &mlx5_nl_flow_mask_supported.vlan,
-                        &mlx5_nl_flow_mask_empty.vlan,
-                        sizeof(mlx5_nl_flow_mask_supported.vlan), error);
-               if (!mask.vlan)
-                       return -rte_errno;
-               if (!eth_type_set &&
-                   !mnl_attr_put_u16_check(buf, size,
-                                           TCA_FLOWER_KEY_ETH_TYPE,
-                                           RTE_BE16(ETH_P_8021Q)))
-                       goto error_nobufs;
-               eth_type_set = 1;
-               vlan_present = 1;
-               if (mask.vlan == &mlx5_nl_flow_mask_empty.vlan) {
-                       ++item;
-                       break;
-               }
-               spec.vlan = item->spec;
-               if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-                    (mask.vlan->tci & RTE_BE16(0xe000)) != RTE_BE16(0xe000)) ||
-                   (mask.vlan->tci & RTE_BE16(0x0fff) &&
-                    (mask.vlan->tci & RTE_BE16(0x0fff)) != RTE_BE16(0x0fff)) ||
-                   (mask.vlan->inner_type &&
-                    mask.vlan->inner_type != RTE_BE16(0xffff)))
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.vlan,
-                                "no support for partial masks on"
-                                " \"tci\" (PCP and VID parts) and"
-                                " \"inner_type\" fields");
-               if (mask.vlan->inner_type) {
-                       if (!mnl_attr_put_u16_check
-                           (buf, size, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
-                            spec.vlan->inner_type))
-                               goto error_nobufs;
-                       vlan_eth_type_set = 1;
-               }
-               if ((mask.vlan->tci & RTE_BE16(0xe000) &&
-                    !mnl_attr_put_u8_check
-                    (buf, size, TCA_FLOWER_KEY_VLAN_PRIO,
-                     (rte_be_to_cpu_16(spec.vlan->tci) >> 13) & 0x7)) ||
-                   (mask.vlan->tci & RTE_BE16(0x0fff) &&
-                    !mnl_attr_put_u16_check
-                    (buf, size, TCA_FLOWER_KEY_VLAN_ID,
-                     rte_be_to_cpu_16(spec.vlan->tci & RTE_BE16(0x0fff)))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ITEM_IPV4:
-               if (item->type != RTE_FLOW_ITEM_TYPE_IPV4)
-                       goto trans;
-               mask.ipv4 = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_ipv4_mask,
-                        &mlx5_nl_flow_mask_supported.ipv4,
-                        &mlx5_nl_flow_mask_empty.ipv4,
-                        sizeof(mlx5_nl_flow_mask_supported.ipv4), error);
-               if (!mask.ipv4)
-                       return -rte_errno;
-               if ((!eth_type_set || !vlan_eth_type_set) &&
-                   !mnl_attr_put_u16_check(buf, size,
-                                           vlan_present ?
-                                           TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-                                           TCA_FLOWER_KEY_ETH_TYPE,
-                                           RTE_BE16(ETH_P_IP)))
-                       goto error_nobufs;
-               eth_type_set = 1;
-               vlan_eth_type_set = 1;
-               if (mask.ipv4 == &mlx5_nl_flow_mask_empty.ipv4) {
-                       ++item;
-                       break;
-               }
-               spec.ipv4 = item->spec;
-               if (mask.ipv4->hdr.next_proto_id &&
-                   mask.ipv4->hdr.next_proto_id != 0xff)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.ipv4,
-                                "no support for partial mask on"
-                                " \"hdr.next_proto_id\" field");
-               if (mask.ipv4->hdr.next_proto_id) {
-                       if (!mnl_attr_put_u8_check
-                           (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-                            spec.ipv4->hdr.next_proto_id))
-                               goto error_nobufs;
-                       ip_proto_set = 1;
-               }
-               if ((mask.ipv4->hdr.src_addr &&
-                    (!mnl_attr_put_u32_check(buf, size,
-                                             TCA_FLOWER_KEY_IPV4_SRC,
-                                             spec.ipv4->hdr.src_addr) ||
-                     !mnl_attr_put_u32_check(buf, size,
-                                             TCA_FLOWER_KEY_IPV4_SRC_MASK,
-                                             mask.ipv4->hdr.src_addr))) ||
-                   (mask.ipv4->hdr.dst_addr &&
-                    (!mnl_attr_put_u32_check(buf, size,
-                                             TCA_FLOWER_KEY_IPV4_DST,
-                                             spec.ipv4->hdr.dst_addr) ||
-                     !mnl_attr_put_u32_check(buf, size,
-                                             TCA_FLOWER_KEY_IPV4_DST_MASK,
-                                             mask.ipv4->hdr.dst_addr))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ITEM_IPV6:
-               if (item->type != RTE_FLOW_ITEM_TYPE_IPV6)
-                       goto trans;
-               mask.ipv6 = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_ipv6_mask,
-                        &mlx5_nl_flow_mask_supported.ipv6,
-                        &mlx5_nl_flow_mask_empty.ipv6,
-                        sizeof(mlx5_nl_flow_mask_supported.ipv6), error);
-               if (!mask.ipv6)
-                       return -rte_errno;
-               if ((!eth_type_set || !vlan_eth_type_set) &&
-                   !mnl_attr_put_u16_check(buf, size,
-                                           vlan_present ?
-                                           TCA_FLOWER_KEY_VLAN_ETH_TYPE :
-                                           TCA_FLOWER_KEY_ETH_TYPE,
-                                           RTE_BE16(ETH_P_IPV6)))
-                       goto error_nobufs;
-               eth_type_set = 1;
-               vlan_eth_type_set = 1;
-               if (mask.ipv6 == &mlx5_nl_flow_mask_empty.ipv6) {
-                       ++item;
-                       break;
-               }
-               spec.ipv6 = item->spec;
-               if (mask.ipv6->hdr.proto && mask.ipv6->hdr.proto != 0xff)
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.ipv6,
-                                "no support for partial mask on"
-                                " \"hdr.proto\" field");
-               if (mask.ipv6->hdr.proto) {
-                       if (!mnl_attr_put_u8_check
-                           (buf, size, TCA_FLOWER_KEY_IP_PROTO,
-                            spec.ipv6->hdr.proto))
-                               goto error_nobufs;
-                       ip_proto_set = 1;
-               }
-               if ((!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.src_addr) &&
-                    (!mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_IPV6_SRC,
-                                         sizeof(spec.ipv6->hdr.src_addr),
-                                         spec.ipv6->hdr.src_addr) ||
-                     !mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_IPV6_SRC_MASK,
-                                         sizeof(mask.ipv6->hdr.src_addr),
-                                         mask.ipv6->hdr.src_addr))) ||
-                   (!IN6_IS_ADDR_UNSPECIFIED(mask.ipv6->hdr.dst_addr) &&
-                    (!mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_IPV6_DST,
-                                         sizeof(spec.ipv6->hdr.dst_addr),
-                                         spec.ipv6->hdr.dst_addr) ||
-                     !mnl_attr_put_check(buf, size,
-                                         TCA_FLOWER_KEY_IPV6_DST_MASK,
-                                         sizeof(mask.ipv6->hdr.dst_addr),
-                                         mask.ipv6->hdr.dst_addr))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ITEM_TCP:
-               if (item->type != RTE_FLOW_ITEM_TYPE_TCP)
-                       goto trans;
-               mask.tcp = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_tcp_mask,
-                        &mlx5_nl_flow_mask_supported.tcp,
-                        &mlx5_nl_flow_mask_empty.tcp,
-                        sizeof(mlx5_nl_flow_mask_supported.tcp), error);
-               if (!mask.tcp)
-                       return -rte_errno;
-               if (!ip_proto_set &&
-                   !mnl_attr_put_u8_check(buf, size,
-                                          TCA_FLOWER_KEY_IP_PROTO,
-                                          IPPROTO_TCP))
-                       goto error_nobufs;
-               if (mask.tcp == &mlx5_nl_flow_mask_empty.tcp) {
-                       ++item;
-                       break;
-               }
-               spec.tcp = item->spec;
-               if ((mask.tcp->hdr.src_port &&
-                    mask.tcp->hdr.src_port != RTE_BE16(0xffff)) ||
-                   (mask.tcp->hdr.dst_port &&
-                    mask.tcp->hdr.dst_port != RTE_BE16(0xffff)))
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.tcp,
-                                "no support for partial masks on"
-                                " \"hdr.src_port\" and \"hdr.dst_port\""
-                                " fields");
-               if ((mask.tcp->hdr.src_port &&
-                    (!mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_TCP_SRC,
-                                             spec.tcp->hdr.src_port) ||
-                     !mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_TCP_SRC_MASK,
-                                             mask.tcp->hdr.src_port))) ||
-                   (mask.tcp->hdr.dst_port &&
-                    (!mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_TCP_DST,
-                                             spec.tcp->hdr.dst_port) ||
-                     !mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_TCP_DST_MASK,
-                                             mask.tcp->hdr.dst_port))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ITEM_UDP:
-               if (item->type != RTE_FLOW_ITEM_TYPE_UDP)
-                       goto trans;
-               mask.udp = mlx5_nl_flow_item_mask
-                       (item, &rte_flow_item_udp_mask,
-                        &mlx5_nl_flow_mask_supported.udp,
-                        &mlx5_nl_flow_mask_empty.udp,
-                        sizeof(mlx5_nl_flow_mask_supported.udp), error);
-               if (!mask.udp)
-                       return -rte_errno;
-               if (!ip_proto_set &&
-                   !mnl_attr_put_u8_check(buf, size,
-                                          TCA_FLOWER_KEY_IP_PROTO,
-                                          IPPROTO_UDP))
-                       goto error_nobufs;
-               if (mask.udp == &mlx5_nl_flow_mask_empty.udp) {
-                       ++item;
-                       break;
-               }
-               spec.udp = item->spec;
-               if ((mask.udp->hdr.src_port &&
-                    mask.udp->hdr.src_port != RTE_BE16(0xffff)) ||
-                   (mask.udp->hdr.dst_port &&
-                    mask.udp->hdr.dst_port != RTE_BE16(0xffff)))
-                       return rte_flow_error_set
-                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM_MASK,
-                                mask.udp,
-                                "no support for partial masks on"
-                                " \"hdr.src_port\" and \"hdr.dst_port\""
-                                " fields");
-               if ((mask.udp->hdr.src_port &&
-                    (!mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_UDP_SRC,
-                                             spec.udp->hdr.src_port) ||
-                     !mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_UDP_SRC_MASK,
-                                             mask.udp->hdr.src_port))) ||
-                   (mask.udp->hdr.dst_port &&
-                    (!mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_UDP_DST,
-                                             spec.udp->hdr.dst_port) ||
-                     !mnl_attr_put_u16_check(buf, size,
-                                             TCA_FLOWER_KEY_UDP_DST_MASK,
-                                             mask.udp->hdr.dst_port))))
-                       goto error_nobufs;
-               ++item;
-               break;
-       case ACTIONS:
-               if (item->type != RTE_FLOW_ITEM_TYPE_END)
-                       goto trans;
-               assert(na_flower);
-               assert(!na_flower_act);
-               na_flower_act =
-                       mnl_attr_nest_start_check(buf, size, TCA_FLOWER_ACT);
-               if (!na_flower_act)
-                       goto error_nobufs;
-               act_index_cur = 1;
-               break;
-       case ACTION_VOID:
-               if (action->type != RTE_FLOW_ACTION_TYPE_VOID)
-                       goto trans;
-               ++action;
-               break;
-       case ACTION_PORT_ID:
-               if (action->type != RTE_FLOW_ACTION_TYPE_PORT_ID)
-                       goto trans;
-               conf.port_id = action->conf;
-               if (conf.port_id->original)
-                       i = 0;
-               else
-                       for (i = 0; ptoi[i].ifindex; ++i)
-                               if (ptoi[i].port_id == conf.port_id->id)
-                                       break;
-               if (!ptoi[i].ifindex)
-                       return rte_flow_error_set
-                               (error, ENODEV, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
-                                conf.port_id,
-                                "missing data to convert port ID to ifindex");
-               act_index =
-                       mnl_attr_nest_start_check(buf, size, act_index_cur++);
-               if (!act_index ||
-                   !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "mirred"))
-                       goto error_nobufs;
-               act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-               if (!act)
-                       goto error_nobufs;
-               if (!mnl_attr_put_check(buf, size, TCA_MIRRED_PARMS,
-                                       sizeof(struct tc_mirred),
-                                       &(struct tc_mirred){
-                                               .action = TC_ACT_STOLEN,
-                                               .eaction = TCA_EGRESS_REDIR,
-                                               .ifindex = ptoi[i].ifindex,
-                                       }))
-                       goto error_nobufs;
-               mnl_attr_nest_end(buf, act);
-               mnl_attr_nest_end(buf, act_index);
-               ++action;
-               break;
-       case ACTION_DROP:
-               if (action->type != RTE_FLOW_ACTION_TYPE_DROP)
-                       goto trans;
-               act_index =
-                       mnl_attr_nest_start_check(buf, size, act_index_cur++);
-               if (!act_index ||
-                   !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "gact"))
-                       goto error_nobufs;
-               act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-               if (!act)
-                       goto error_nobufs;
-               if (!mnl_attr_put_check(buf, size, TCA_GACT_PARMS,
-                                       sizeof(struct tc_gact),
-                                       &(struct tc_gact){
-                                               .action = TC_ACT_SHOT,
-                                       }))
-                       goto error_nobufs;
-               mnl_attr_nest_end(buf, act);
-               mnl_attr_nest_end(buf, act_index);
-               ++action;
-               break;
-       case ACTION_OF_POP_VLAN:
-               if (action->type != RTE_FLOW_ACTION_TYPE_OF_POP_VLAN)
-                       goto trans;
-               conf.of_push_vlan = NULL;
-               i = TCA_VLAN_ACT_POP;
-               goto action_of_vlan;
-       case ACTION_OF_PUSH_VLAN:
-               if (action->type != RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
-                       goto trans;
-               conf.of_push_vlan = action->conf;
-               i = TCA_VLAN_ACT_PUSH;
-               goto action_of_vlan;
-       case ACTION_OF_SET_VLAN_VID:
-               if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID)
-                       goto trans;
-               conf.of_set_vlan_vid = action->conf;
-               if (na_vlan_id)
-                       goto override_na_vlan_id;
-               i = TCA_VLAN_ACT_MODIFY;
-               goto action_of_vlan;
-       case ACTION_OF_SET_VLAN_PCP:
-               if (action->type != RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP)
-                       goto trans;
-               conf.of_set_vlan_pcp = action->conf;
-               if (na_vlan_priority)
-                       goto override_na_vlan_priority;
-               i = TCA_VLAN_ACT_MODIFY;
-               goto action_of_vlan;
-action_of_vlan:
-               act_index =
-                       mnl_attr_nest_start_check(buf, size, act_index_cur++);
-               if (!act_index ||
-                   !mnl_attr_put_strz_check(buf, size, TCA_ACT_KIND, "vlan"))
-                       goto error_nobufs;
-               act = mnl_attr_nest_start_check(buf, size, TCA_ACT_OPTIONS);
-               if (!act)
-                       goto error_nobufs;
-               if (!mnl_attr_put_check(buf, size, TCA_VLAN_PARMS,
-                                       sizeof(struct tc_vlan),
-                                       &(struct tc_vlan){
-                                               .action = TC_ACT_PIPE,
-                                               .v_action = i,
-                                       }))
-                       goto error_nobufs;
-               if (i == TCA_VLAN_ACT_POP) {
-                       mnl_attr_nest_end(buf, act);
-                       mnl_attr_nest_end(buf, act_index);
-                       ++action;
-                       break;
-               }
-               if (i == TCA_VLAN_ACT_PUSH &&
-                   !mnl_attr_put_u16_check(buf, size,
-                                           TCA_VLAN_PUSH_VLAN_PROTOCOL,
-                                           conf.of_push_vlan->ethertype))
-                       goto error_nobufs;
-               na_vlan_id = mnl_nlmsg_get_payload_tail(buf);
-               if (!mnl_attr_put_u16_check(buf, size, TCA_VLAN_PAD, 0))
-                       goto error_nobufs;
-               na_vlan_priority = mnl_nlmsg_get_payload_tail(buf);
-               if (!mnl_attr_put_u8_check(buf, size, TCA_VLAN_PAD, 0))
-                       goto error_nobufs;
-               mnl_attr_nest_end(buf, act);
-               mnl_attr_nest_end(buf, act_index);
-               if (action->type == RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID) {
-override_na_vlan_id:
-                       na_vlan_id->nla_type = TCA_VLAN_PUSH_VLAN_ID;
-                       *(uint16_t *)mnl_attr_get_payload(na_vlan_id) =
-                               rte_be_to_cpu_16
-                               (conf.of_set_vlan_vid->vlan_vid);
-               } else if (action->type ==
-                          RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP) {
-override_na_vlan_priority:
-                       na_vlan_priority->nla_type =
-                               TCA_VLAN_PUSH_VLAN_PRIORITY;
-                       *(uint8_t *)mnl_attr_get_payload(na_vlan_priority) =
-                               conf.of_set_vlan_pcp->vlan_pcp;
-               }
-               ++action;
-               break;
-       case END:
-               if (item->type != RTE_FLOW_ITEM_TYPE_END ||
-                   action->type != RTE_FLOW_ACTION_TYPE_END)
-                       goto trans;
-               if (na_flower_act)
-                       mnl_attr_nest_end(buf, na_flower_act);
-               if (na_flower)
-                       mnl_attr_nest_end(buf, na_flower);
-               nlh = buf;
-               return nlh->nlmsg_len;
-       }
-       back = trans;
-       trans = mlx5_nl_flow_trans[trans[n - 1]];
-       n = 0;
-       goto trans;
-error_nobufs:
-       if (buf != buf_tmp) {
-               buf = buf_tmp;
-               size = sizeof(buf_tmp);
-               goto init;
-       }
-       return rte_flow_error_set
-               (error, ENOBUFS, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                "generated TC message is too large");
-}
-
-/**
- * Brand rtnetlink buffer with unique handle.
- *
- * This handle should be unique for a given network interface to avoid
- * collisions.
- *
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param handle
- *   Unique 32-bit handle to use.
- */
-void
-mlx5_nl_flow_brand(void *buf, uint32_t handle)
-{
-       struct tcmsg *tcm = mnl_nlmsg_get_payload(buf);
-
-       tcm->tcm_handle = handle;
-}
-
-/**
- * Send Netlink message with acknowledgment.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param nlh
- *   Message to send. This function always raises the NLM_F_ACK flag before
- *   sending.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-static int
-mlx5_nl_flow_nl_ack(struct mnl_socket *nl, struct nlmsghdr *nlh)
-{
-       alignas(struct nlmsghdr)
-       uint8_t ans[mnl_nlmsg_size(sizeof(struct nlmsgerr)) +
-                   nlh->nlmsg_len - sizeof(*nlh)];
-       uint32_t seq = random();
-       int ret;
-
-       nlh->nlmsg_flags |= NLM_F_ACK;
-       nlh->nlmsg_seq = seq;
-       ret = mnl_socket_sendto(nl, nlh, nlh->nlmsg_len);
-       if (ret != -1)
-               ret = mnl_socket_recvfrom(nl, ans, sizeof(ans));
-       if (ret != -1)
-               ret = mnl_cb_run
-                       (ans, ret, seq, mnl_socket_get_portid(nl), NULL, NULL);
-       if (!ret)
-               return 0;
-       rte_errno = errno;
-       return -rte_errno;
-}
-
-/**
- * Create a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_create(struct mnl_socket *nl, void *buf,
-                   struct rte_flow_error *error)
-{
-       struct nlmsghdr *nlh = buf;
-
-       nlh->nlmsg_type = RTM_NEWTFILTER;
-       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-       if (!mlx5_nl_flow_nl_ack(nl, nlh))
-               return 0;
-       return rte_flow_error_set
-               (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                "netlink: failed to create TC flow rule");
-}
-
-/**
- * Destroy a Netlink flow rule.
- *
- * @param nl
- *   Libmnl socket to use.
- * @param buf
- *   Flow rule buffer previously initialized by mlx5_nl_flow_transpose().
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_destroy(struct mnl_socket *nl, void *buf,
-                    struct rte_flow_error *error)
-{
-       struct nlmsghdr *nlh = buf;
-
-       nlh->nlmsg_type = RTM_DELTFILTER;
-       nlh->nlmsg_flags = NLM_F_REQUEST;
-       if (!mlx5_nl_flow_nl_ack(nl, nlh))
-               return 0;
-       return rte_flow_error_set
-               (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
-                "netlink: failed to destroy TC flow rule");
-}
-
-/**
- * Initialize ingress qdisc of a given network interface.
- *
- * @param nl
- *   Libmnl socket of the @p NETLINK_ROUTE kind.
- * @param ifindex
- *   Index of network interface to initialize.
- * @param[out] error
- *   Perform verbose error reporting if not NULL.
- *
- * @return
- *   0 on success, a negative errno value otherwise and rte_errno is set.
- */
-int
-mlx5_nl_flow_init(struct mnl_socket *nl, unsigned int ifindex,
-                 struct rte_flow_error *error)
-{
-       struct nlmsghdr *nlh;
-       struct tcmsg *tcm;
-       alignas(struct nlmsghdr)
-       uint8_t buf[mnl_nlmsg_size(sizeof(*tcm) + 128)];
-
-       /* Destroy existing ingress qdisc and everything attached to it. */
-       nlh = mnl_nlmsg_put_header(buf);
-       nlh->nlmsg_type = RTM_DELQDISC;
-       nlh->nlmsg_flags = NLM_F_REQUEST;
-       tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-       tcm->tcm_family = AF_UNSPEC;
-       tcm->tcm_ifindex = ifindex;
-       tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-       tcm->tcm_parent = TC_H_INGRESS;
-       /* Ignore errors when qdisc is already absent. */
-       if (mlx5_nl_flow_nl_ack(nl, nlh) &&
-           rte_errno != EINVAL && rte_errno != ENOENT)
-               return rte_flow_error_set
-                       (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                        NULL, "netlink: failed to remove ingress qdisc");
-       /* Create fresh ingress qdisc. */
-       nlh = mnl_nlmsg_put_header(buf);
-       nlh->nlmsg_type = RTM_NEWQDISC;
-       nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
-       tcm = mnl_nlmsg_put_extra_header(nlh, sizeof(*tcm));
-       tcm->tcm_family = AF_UNSPEC;
-       tcm->tcm_ifindex = ifindex;
-       tcm->tcm_handle = TC_H_MAKE(TC_H_INGRESS, 0);
-       tcm->tcm_parent = TC_H_INGRESS;
-       mnl_attr_put_strz_check(nlh, sizeof(buf), TCA_KIND, "ingress");
-       if (mlx5_nl_flow_nl_ack(nl, nlh))
-               return rte_flow_error_set
-                       (error, rte_errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                        NULL, "netlink: failed to create ingress qdisc");
-       return 0;
-}
-
-/**
- * Create and configure a libmnl socket for Netlink flow rules.
- *
- * @return
- *   A valid libmnl socket object pointer on success, NULL otherwise and
- *   rte_errno is set.
- */
-struct mnl_socket *
-mlx5_nl_flow_socket_create(void)
-{
-       struct mnl_socket *nl = mnl_socket_open(NETLINK_ROUTE);
-
-       if (nl) {
-               mnl_socket_setsockopt(nl, NETLINK_CAP_ACK, &(int){ 1 },
-                                     sizeof(int));
-               if (!mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID))
-                       return nl;
-       }
-       rte_errno = errno;
-       if (nl)
-               mnl_socket_close(nl);
-       return NULL;
-}
-
-/**
- * Destroy a libmnl socket.
- */
-void
-mlx5_nl_flow_socket_destroy(struct mnl_socket *nl)
-{
-       mnl_socket_close(nl);
-}
index 0870d32..29742b1 100644 (file)
@@ -159,7 +159,7 @@ struct mlx5_wqe_eth_seg_small {
        uint8_t cs_flags;
        uint8_t rsvd1;
        uint16_t mss;
-       uint32_t rsvd2;
+       uint32_t flow_table_metadata;
        uint16_t inline_hdr_sz;
        uint8_t inline_hdr[2];
 } __rte_aligned(MLX5_WQE_DWORD_SIZE);
@@ -280,6 +280,226 @@ struct mlx5_cqe {
 /* CQE format value. */
 #define MLX5_COMPRESSED 0x3
 
+/* The field of packet to be modified. */
+enum mlx5_modificaiton_field {
+       MLX5_MODI_OUT_SMAC_47_16 = 1,
+       MLX5_MODI_OUT_SMAC_15_0,
+       MLX5_MODI_OUT_ETHERTYPE,
+       MLX5_MODI_OUT_DMAC_47_16,
+       MLX5_MODI_OUT_DMAC_15_0,
+       MLX5_MODI_OUT_IP_DSCP,
+       MLX5_MODI_OUT_TCP_FLAGS,
+       MLX5_MODI_OUT_TCP_SPORT,
+       MLX5_MODI_OUT_TCP_DPORT,
+       MLX5_MODI_OUT_IPV4_TTL,
+       MLX5_MODI_OUT_UDP_SPORT,
+       MLX5_MODI_OUT_UDP_DPORT,
+       MLX5_MODI_OUT_SIPV6_127_96,
+       MLX5_MODI_OUT_SIPV6_95_64,
+       MLX5_MODI_OUT_SIPV6_63_32,
+       MLX5_MODI_OUT_SIPV6_31_0,
+       MLX5_MODI_OUT_DIPV6_127_96,
+       MLX5_MODI_OUT_DIPV6_95_64,
+       MLX5_MODI_OUT_DIPV6_63_32,
+       MLX5_MODI_OUT_DIPV6_31_0,
+       MLX5_MODI_OUT_SIPV4,
+       MLX5_MODI_OUT_DIPV4,
+       MLX5_MODI_IN_SMAC_47_16 = 0x31,
+       MLX5_MODI_IN_SMAC_15_0,
+       MLX5_MODI_IN_ETHERTYPE,
+       MLX5_MODI_IN_DMAC_47_16,
+       MLX5_MODI_IN_DMAC_15_0,
+       MLX5_MODI_IN_IP_DSCP,
+       MLX5_MODI_IN_TCP_FLAGS,
+       MLX5_MODI_IN_TCP_SPORT,
+       MLX5_MODI_IN_TCP_DPORT,
+       MLX5_MODI_IN_IPV4_TTL,
+       MLX5_MODI_IN_UDP_SPORT,
+       MLX5_MODI_IN_UDP_DPORT,
+       MLX5_MODI_IN_SIPV6_127_96,
+       MLX5_MODI_IN_SIPV6_95_64,
+       MLX5_MODI_IN_SIPV6_63_32,
+       MLX5_MODI_IN_SIPV6_31_0,
+       MLX5_MODI_IN_DIPV6_127_96,
+       MLX5_MODI_IN_DIPV6_95_64,
+       MLX5_MODI_IN_DIPV6_63_32,
+       MLX5_MODI_IN_DIPV6_31_0,
+       MLX5_MODI_IN_SIPV4,
+       MLX5_MODI_IN_DIPV4,
+       MLX5_MODI_OUT_IPV6_HOPLIMIT,
+       MLX5_MODI_IN_IPV6_HOPLIMIT,
+       MLX5_MODI_META_DATA_REG_A,
+       MLX5_MODI_META_DATA_REG_B = 0x50,
+};
+
+/* Modification sub command. */
+struct mlx5_modification_cmd {
+       union {
+               uint32_t data0;
+               struct {
+                       unsigned int bits:5;
+                       unsigned int rsvd0:3;
+                       unsigned int src_offset:5; /* Start bit offset. */
+                       unsigned int rsvd1:3;
+                       unsigned int src_field:12;
+                       unsigned int type:4;
+               };
+       };
+       union {
+               uint32_t data1;
+               uint8_t data[4];
+               struct {
+                       unsigned int rsvd2:8;
+                       unsigned int dst_offset:8;
+                       unsigned int dst_field:12;
+                       unsigned int rsvd3:4;
+               };
+       };
+};
+
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned int)(unsigned long) \
+                                 (&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - \
+                                   (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << \
+                                 __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16)
+#define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - \
+                                   (__mlx5_bit_off(typ, fld) & 0xf))
+#define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_ST_SZ_DB(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) \
+       do { \
+               u32 _v = v; \
+               *((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+               rte_cpu_to_be_32((rte_be_to_cpu_32(*((u32 *)(p) + \
+                                 __mlx5_dw_off(typ, fld))) & \
+                                 (~__mlx5_dw_mask(typ, fld))) | \
+                                (((_v) & __mlx5_mask(typ, fld)) << \
+                                  __mlx5_dw_bit_off(typ, fld))); \
+       } while (0)
+#define MLX5_GET16(typ, p, fld) \
+       ((rte_be_to_cpu_16(*((__be16 *)(p) + \
+         __mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \
+        __mlx5_mask16(typ, fld))
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+
+struct mlx5_ifc_fte_match_set_misc_bits {
+       u8 reserved_at_0[0x8];
+       u8 source_sqn[0x18];
+       u8 reserved_at_20[0x10];
+       u8 source_port[0x10];
+       u8 outer_second_prio[0x3];
+       u8 outer_second_cfi[0x1];
+       u8 outer_second_vid[0xc];
+       u8 inner_second_prio[0x3];
+       u8 inner_second_cfi[0x1];
+       u8 inner_second_vid[0xc];
+       u8 outer_second_cvlan_tag[0x1];
+       u8 inner_second_cvlan_tag[0x1];
+       u8 outer_second_svlan_tag[0x1];
+       u8 inner_second_svlan_tag[0x1];
+       u8 reserved_at_64[0xc];
+       u8 gre_protocol[0x10];
+       u8 gre_key_h[0x18];
+       u8 gre_key_l[0x8];
+       u8 vxlan_vni[0x18];
+       u8 reserved_at_b8[0x8];
+       u8 reserved_at_c0[0x20];
+       u8 reserved_at_e0[0xc];
+       u8 outer_ipv6_flow_label[0x14];
+       u8 reserved_at_100[0xc];
+       u8 inner_ipv6_flow_label[0x14];
+       u8 reserved_at_120[0xe0];
+};
+
+struct mlx5_ifc_ipv4_layout_bits {
+       u8 reserved_at_0[0x60];
+       u8 ipv4[0x20];
+};
+
+struct mlx5_ifc_ipv6_layout_bits {
+       u8 ipv6[16][0x8];
+};
+
+union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits {
+       struct mlx5_ifc_ipv6_layout_bits ipv6_layout;
+       struct mlx5_ifc_ipv4_layout_bits ipv4_layout;
+       u8 reserved_at_0[0x80];
+};
+
+struct mlx5_ifc_fte_match_set_lyr_2_4_bits {
+       u8 smac_47_16[0x20];
+       u8 smac_15_0[0x10];
+       u8 ethertype[0x10];
+       u8 dmac_47_16[0x20];
+       u8 dmac_15_0[0x10];
+       u8 first_prio[0x3];
+       u8 first_cfi[0x1];
+       u8 first_vid[0xc];
+       u8 ip_protocol[0x8];
+       u8 ip_dscp[0x6];
+       u8 ip_ecn[0x2];
+       u8 cvlan_tag[0x1];
+       u8 svlan_tag[0x1];
+       u8 frag[0x1];
+       u8 ip_version[0x4];
+       u8 tcp_flags[0x9];
+       u8 tcp_sport[0x10];
+       u8 tcp_dport[0x10];
+       u8 reserved_at_c0[0x20];
+       u8 udp_sport[0x10];
+       u8 udp_dport[0x10];
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits src_ipv4_src_ipv6;
+       union mlx5_ifc_ipv6_layout_ipv4_layout_auto_bits dst_ipv4_dst_ipv6;
+};
+
+struct mlx5_ifc_fte_match_mpls_bits {
+       u8 mpls_label[0x14];
+       u8 mpls_exp[0x3];
+       u8 mpls_s_bos[0x1];
+       u8 mpls_ttl[0x8];
+};
+
+struct mlx5_ifc_fte_match_set_misc2_bits {
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls;
+       struct mlx5_ifc_fte_match_mpls_bits inner_first_mpls;
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_gre;
+       struct mlx5_ifc_fte_match_mpls_bits outer_first_mpls_over_udp;
+       u8 reserved_at_80[0x100];
+       u8 metadata_reg_a[0x20];
+       u8 reserved_at_1a0[0x60];
+};
+
+/* Flow matcher. */
+struct mlx5_ifc_fte_match_param_bits {
+       struct mlx5_ifc_fte_match_set_lyr_2_4_bits outer_headers;
+       struct mlx5_ifc_fte_match_set_misc_bits misc_parameters;
+       struct mlx5_ifc_fte_match_set_lyr_2_4_bits inner_headers;
+       struct mlx5_ifc_fte_match_set_misc2_bits misc_parameters_2;
+       u8 reserved_at_800[0x800];
+};
+
+enum {
+       MLX5_MATCH_CRITERIA_ENABLE_OUTER_BIT,
+       MLX5_MATCH_CRITERIA_ENABLE_MISC_BIT,
+       MLX5_MATCH_CRITERIA_ENABLE_INNER_BIT,
+       MLX5_MATCH_CRITERIA_ENABLE_MISC2_BIT
+};
+
 /* CQE format mask. */
 #define MLX5E_CQE_FORMAT_MASK 0xc
 
index 1f7bfd4..ed993ea 100644 (file)
@@ -388,7 +388,6 @@ mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev)
                             DEV_RX_OFFLOAD_TIMESTAMP |
                             DEV_RX_OFFLOAD_JUMBO_FRAME);
 
-       offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
        if (config->hw_fcs_strip)
                offloads |= DEV_RX_OFFLOAD_KEEP_CRC;
 
@@ -1438,7 +1437,7 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
        tmpl->rxq.vlan_strip = !!(offloads & DEV_RX_OFFLOAD_VLAN_STRIP);
        /* By default, FCS (CRC) is stripped by hardware. */
        tmpl->rxq.crc_present = 0;
-       if (rte_eth_dev_must_keep_crc(offloads)) {
+       if (offloads & DEV_RX_OFFLOAD_KEEP_CRC) {
                if (config->hw_fcs_strip) {
                        tmpl->rxq.crc_present = 1;
                } else {
index 2d14f8a..24a054d 100644 (file)
@@ -523,6 +523,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG);
                uint32_t swp_offsets = 0;
                uint8_t swp_types = 0;
+               rte_be32_t metadata;
                uint16_t tso_segsz = 0;
 #ifdef MLX5_PMD_SOFT_COUNTERS
                uint32_t total_length = 0;
@@ -566,6 +567,9 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                cs_flags = txq_ol_cksum_to_cs(buf);
                txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types);
                raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE;
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Replace the Ethernet type by the VLAN if necessary. */
                if (buf->ol_flags & PKT_TX_VLAN_PKT) {
                        uint32_t vlan = rte_cpu_to_be_32(0x81000000 |
@@ -781,7 +785,7 @@ next_pkt:
                                swp_offsets,
                                cs_flags | (swp_types << 8) |
                                (rte_cpu_to_be_16(tso_segsz) << 16),
-                               0,
+                               metadata,
                                (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz),
                        };
                } else {
@@ -795,7 +799,7 @@ next_pkt:
                        wqe->eseg = (rte_v128u32_t){
                                swp_offsets,
                                cs_flags | (swp_types << 8),
-                               0,
+                               metadata,
                                (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz),
                        };
                }
@@ -861,7 +865,7 @@ mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length)
        mpw->wqe->eseg.inline_hdr_sz = 0;
        mpw->wqe->eseg.rsvd0 = 0;
        mpw->wqe->eseg.rsvd1 = 0;
-       mpw->wqe->eseg.rsvd2 = 0;
+       mpw->wqe->eseg.flow_table_metadata = 0;
        mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) |
                                             (txq->wqe_ci << 8) |
                                             MLX5_OPCODE_TSO);
@@ -948,6 +952,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint32_t length;
                unsigned int segs_n = buf->nb_segs;
                uint32_t cs_flags;
+               rte_be32_t metadata;
 
                /*
                 * Make sure there is enough room to store this packet and
@@ -964,6 +969,9 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                max_elts -= segs_n;
                --pkts_n;
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                assert(length);
@@ -971,6 +979,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
                    ((mpw.len != length) ||
                     (segs_n != 1) ||
+                    (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                     (mpw.wqe->eseg.cs_flags != cs_flags)))
                        mlx5_mpw_close(txq, &mpw);
                if (mpw.state == MLX5_MPW_STATE_CLOSED) {
@@ -984,6 +993,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                        max_wqe -= 2;
                        mlx5_mpw_new(txq, &mpw, length);
                        mpw.wqe->eseg.cs_flags = cs_flags;
+                       mpw.wqe->eseg.flow_table_metadata = metadata;
                }
                /* Multi-segment packets must be alone in their MPW. */
                assert((segs_n == 1) || (mpw.pkts_n == 0));
@@ -1082,7 +1092,7 @@ mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw,
        mpw->wqe->eseg.cs_flags = 0;
        mpw->wqe->eseg.rsvd0 = 0;
        mpw->wqe->eseg.rsvd1 = 0;
-       mpw->wqe->eseg.rsvd2 = 0;
+       mpw->wqe->eseg.flow_table_metadata = 0;
        inl = (struct mlx5_wqe_inl_small *)
                (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
        mpw->data.raw = (uint8_t *)&inl->raw;
@@ -1172,6 +1182,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                uint32_t length;
                unsigned int segs_n = buf->nb_segs;
                uint8_t cs_flags;
+               rte_be32_t metadata;
 
                /*
                 * Make sure there is enough room to store this packet and
@@ -1193,18 +1204,23 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                 */
                max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                /* Start new session if packet differs. */
                if (mpw.state == MLX5_MPW_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
+                           (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags))
                                mlx5_mpw_close(txq, &mpw);
                } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
                        if ((mpw.len != length) ||
                            (segs_n != 1) ||
                            (length > inline_room) ||
+                           (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags)) {
                                mlx5_mpw_inline_close(txq, &mpw);
                                inline_room =
@@ -1224,12 +1240,14 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
                                max_wqe -= 2;
                                mlx5_mpw_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.flow_table_metadata = metadata;
                        } else {
                                if (unlikely(max_wqe < wqe_inl_n))
                                        break;
                                max_wqe -= wqe_inl_n;
                                mlx5_mpw_inline_new(txq, &mpw, length);
                                mpw.wqe->eseg.cs_flags = cs_flags;
+                               mpw.wqe->eseg.flow_table_metadata = metadata;
                        }
                }
                /* Multi-segment packets must be alone in their MPW. */
@@ -1461,6 +1479,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                unsigned int do_inline = 0; /* Whether inline is possible. */
                uint32_t length;
                uint8_t cs_flags;
+               rte_be32_t metadata;
 
                /* Multi-segmented packet is handled in slow-path outside. */
                assert(NB_SEGS(buf) == 1);
@@ -1468,6 +1487,9 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                if (max_elts - j == 0)
                        break;
                cs_flags = txq_ol_cksum_to_cs(buf);
+               /* Copy metadata from mbuf if valid */
+               metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata :
+                                                            0;
                /* Retrieve packet information. */
                length = PKT_LEN(buf);
                /* Start new session if:
@@ -1482,6 +1504,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                            (length <= txq->inline_max_packet_sz &&
                             inl_pad + sizeof(inl_hdr) + length >
                             mpw_room) ||
+                            (mpw.wqe->eseg.flow_table_metadata != metadata) ||
                            (mpw.wqe->eseg.cs_flags != cs_flags))
                                max_wqe -= mlx5_empw_close(txq, &mpw);
                }
@@ -1505,6 +1528,7 @@ txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
                                    sizeof(inl_hdr) + length <= mpw_room &&
                                    !txq->mpw_hdr_dseg;
                        mpw.wqe->eseg.cs_flags = cs_flags;
+                       mpw.wqe->eseg.flow_table_metadata = metadata;
                } else {
                        /* Evaluate whether the next packet can be inlined.
                         * Inlininig is possible when:
@@ -2097,7 +2121,7 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
        const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
        volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
        unsigned int i = 0;
-       uint16_t rq_ci = rxq->rq_ci;
+       uint32_t rq_ci = rxq->rq_ci;
        uint16_t consumed_strd = rxq->consumed_strd;
        struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask];
 
@@ -2324,7 +2348,7 @@ removed_rx_burst(void *dpdk_txq __rte_unused,
  * (e.g.  mlx5_rxtx_vec_sse.c for x86).
  */
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
                      struct rte_mbuf **pkts __rte_unused,
                      uint16_t pkts_n __rte_unused)
@@ -2332,7 +2356,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
                  struct rte_mbuf **pkts __rte_unused,
                  uint16_t pkts_n __rte_unused)
@@ -2340,7 +2364,7 @@ mlx5_tx_burst_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
                  struct rte_mbuf **pkts __rte_unused,
                  uint16_t pkts_n __rte_unused)
@@ -2348,25 +2372,25 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
        return 0;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
 {
        return -ENOTSUP;
 }
 
-int __attribute__((weak))
+__rte_weak int
 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused)
 {
        return -ENOTSUP;
index 48ed2b2..1db468c 100644 (file)
@@ -97,10 +97,10 @@ struct mlx5_rxq_data {
        volatile uint32_t *rq_db;
        volatile uint32_t *cq_db;
        uint16_t port_id;
-       uint16_t rq_ci;
+       uint32_t rq_ci;
        uint16_t consumed_strd; /* Number of consumed strides in WQE. */
-       uint16_t rq_pi;
-       uint16_t cq_ci;
+       uint32_t rq_pi;
+       uint32_t cq_ci;
        struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
        uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
        volatile void *wqes;
@@ -363,6 +363,8 @@ uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
 void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl);
 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr);
 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr);
+uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr,
+                              struct rte_mempool *mp);
 
 /**
  * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and
@@ -606,6 +608,24 @@ mlx5_tx_complete(struct mlx5_txq_data *txq)
        *txq->cq_db = rte_cpu_to_be_32(cq_ci);
 }
 
+/**
+ * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
+ * cloned mbuf is allocated is returned instead.
+ *
+ * @param buf
+ *   Pointer to mbuf.
+ *
+ * @return
+ *   Memory pool where data is located for given mbuf.
+ */
+static struct rte_mempool *
+mlx5_mb2mp(struct rte_mbuf *buf)
+{
+       if (unlikely(RTE_MBUF_INDIRECT(buf)))
+               return rte_mbuf_from_indirect(buf)->pool;
+       return buf->pool;
+}
+
 /**
  * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx
  * as mempool is pre-configured and static.
@@ -664,7 +684,20 @@ mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr)
        return mlx5_tx_addr2mr_bh(txq, addr);
 }
 
-#define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr))
+static __rte_always_inline uint32_t
+mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb)
+{
+       uintptr_t addr = (uintptr_t)mb->buf_addr;
+       uint32_t lkey = mlx5_tx_addr2mr(txq, addr);
+
+       if (likely(lkey != UINT32_MAX))
+               return lkey;
+       if (rte_errno == ENXIO) {
+               /* Mempool may have externally allocated memory. */
+               lkey = mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb));
+       }
+       return lkey;
+}
 
 /**
  * Ring TX queue doorbell and flush the update if requested.
index 0a4aed8..1453f4f 100644 (file)
@@ -40,7 +40,8 @@
 #endif
 
 /**
- * Count the number of packets having same ol_flags and calculate cs_flags.
+ * Count the number of packets having same ol_flags and same metadata (if
+ * PKT_TX_METADATA is set in ol_flags), and calculate cs_flags.
  *
  * @param pkts
  *   Pointer to array of packets.
  *   Number of packets.
  * @param cs_flags
  *   Pointer of flags to be returned.
+ * @param metadata
+ *   Pointer of metadata to be returned.
+ * @param txq_offloads
+ *   Offloads enabled on Tx queue
  *
  * @return
- *   Number of packets having same ol_flags.
+ *   Number of packets having same ol_flags and metadata, if relevant.
  */
 static inline unsigned int
-txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags)
+txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
+                rte_be32_t *metadata, const uint64_t txq_offloads)
 {
        unsigned int pos;
-       const uint64_t ol_mask =
+       const uint64_t cksum_ol_mask =
                PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
                PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
                PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
+       rte_be32_t p0_metadata, pn_metadata;
 
        if (!pkts_n)
                return 0;
-       /* Count the number of packets having same ol_flags. */
-       for (pos = 1; pos < pkts_n; ++pos)
-               if ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & ol_mask)
+       p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
+                       pkts[0]->tx_metadata : 0;
+       /* Count the number of packets having same offload parameters. */
+       for (pos = 1; pos < pkts_n; ++pos) {
+               /* Check if packet has same checksum flags. */
+               if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
+                   ((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))
                        break;
+               /* Check if packet has same metadata. */
+               if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
+                       pn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ?
+                                       pkts[pos]->tx_metadata : 0;
+                       if (pn_metadata != p0_metadata)
+                               break;
+               }
+       }
        *cs_flags = txq_ol_cksum_to_cs(pkts[0]);
+       *metadata = p0_metadata;
        return pos;
 }
 
@@ -96,7 +116,7 @@ mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
                uint16_t ret;
 
                n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
-               ret = txq_burst_v(txq, &pkts[nb_tx], n, 0);
+               ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
                nb_tx += ret;
                if (!ret)
                        break;
@@ -127,6 +147,7 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                uint8_t cs_flags = 0;
                uint16_t n;
                uint16_t ret;
+               rte_be32_t metadata = 0;
 
                /* Transmit multi-seg packets in the head of pkts list. */
                if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
@@ -137,9 +158,12 @@ mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
                n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
                if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
                        n = txq_count_contig_single_seg(&pkts[nb_tx], n);
-               if (txq->offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP)
-                       n = txq_calc_offload(&pkts[nb_tx], n, &cs_flags);
-               ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags);
+               if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
+                                    DEV_TX_OFFLOAD_MATCH_METADATA))
+                       n = txq_calc_offload(&pkts[nb_tx], n,
+                                            &cs_flags, &metadata,
+                                            txq->offloads);
+               ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
                nb_tx += ret;
                if (!ret)
                        break;
index fb884f9..fda7004 100644 (file)
@@ -22,6 +22,7 @@
 /* HW offload capabilities of vectorized Tx. */
 #define MLX5_VEC_TX_OFFLOAD_CAP \
        (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP | \
+        DEV_TX_OFFLOAD_MATCH_METADATA | \
         DEV_TX_OFFLOAD_MULTI_SEGS)
 
 /*
index b37b738..0b729f1 100644 (file)
@@ -201,13 +201,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
  *   Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
  * @param cs_flags
  *   Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ *   Metadata value to be written in the descriptor.
  *
  * @return
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
 txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
-           uint8_t cs_flags)
+           uint8_t cs_flags, rte_be32_t metadata)
 {
        struct rte_mbuf **elts;
        uint16_t elts_head = txq->elts_head;
@@ -293,11 +295,8 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
        ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
        vst1q_u8((void *)t_wqe, ctrl);
        /* Fill ESEG in the header. */
-       vst1q_u8((void *)(t_wqe + 1),
-                ((uint8x16_t) { 0, 0, 0, 0,
-                                cs_flags, 0, 0, 0,
-                                0, 0, 0, 0,
-                                0, 0, 0, 0 }));
+       vst1q_u32((void *)(t_wqe + 1),
+                ((uint32x4_t) { 0, cs_flags, metadata, 0 }));
 #ifdef MLX5_PMD_SOFT_COUNTERS
        txq->stats.opackets += pkts_n;
 #endif
index 54b3783..e0f95f9 100644 (file)
@@ -202,13 +202,15 @@ txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
  *   Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
  * @param cs_flags
  *   Checksum offload flags to be written in the descriptor.
+ * @param metadata
+ *   Metadata value to be written in the descriptor.
  *
  * @return
  *   Number of packets successfully transmitted (<= pkts_n).
  */
 static inline uint16_t
 txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
-           uint8_t cs_flags)
+           uint8_t cs_flags, rte_be32_t metadata)
 {
        struct rte_mbuf **elts;
        uint16_t elts_head = txq->elts_head;
@@ -292,11 +294,7 @@ txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
        ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
        _mm_store_si128(t_wqe, ctrl);
        /* Fill ESEG in the header. */
-       _mm_store_si128(t_wqe + 1,
-                       _mm_set_epi8(0, 0, 0, 0,
-                                    0, 0, 0, 0,
-                                    0, 0, 0, cs_flags,
-                                    0, 0, 0, 0));
+       _mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
 #ifdef MLX5_PMD_SOFT_COUNTERS
        txq->stats.opackets += pkts_n;
 #endif
index a3a5229..0010617 100644 (file)
@@ -3,8 +3,6 @@
  * Copyright 2016 Mellanox Technologies, Ltd
  */
 
-#define _GNU_SOURCE
-
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/un.h>
index 91f3d47..a14d1e4 100644 (file)
 #include "mlx5_rxtx.h"
 #include "mlx5_defs.h"
 
-struct mlx5_counter_ctrl {
-       /* Name of the counter. */
-       char dpdk_name[RTE_ETH_XSTATS_NAME_SIZE];
-       /* Name of the counter on the device table. */
-       char ctr_name[RTE_ETH_XSTATS_NAME_SIZE];
-       uint32_t ib:1; /**< Nonzero for IB counters. */
-};
-
 static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
        {
                .dpdk_name = "rx_port_unicast_bytes",
@@ -115,6 +107,23 @@ static const struct mlx5_counter_ctrl mlx5_counters_init[] = {
                .dpdk_name = "rx_bytes_phy",
                .ctr_name = "rx_bytes_phy",
        },
+       /* Representor only */
+       {
+               .dpdk_name = "rx_packets",
+               .ctr_name = "vport_rx_packets",
+       },
+       {
+               .dpdk_name = "rx_bytes",
+               .ctr_name = "vport_rx_bytes",
+       },
+       {
+               .dpdk_name = "tx_packets",
+               .ctr_name = "vport_tx_packets",
+       },
+       {
+               .dpdk_name = "tx_bytes",
+               .ctr_name = "vport_tx_bytes",
+       },
 };
 
 static const unsigned int xstats_n = RTE_DIM(mlx5_counters_init);
@@ -146,19 +155,19 @@ mlx5_read_dev_counters(struct rte_eth_dev *dev, uint64_t *stats)
        et_stats->cmd = ETHTOOL_GSTATS;
        et_stats->n_stats = xstats_ctrl->stats_n;
        ifr.ifr_data = (caddr_t)et_stats;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING,
                        "port %u unable to read statistic values from device",
                        dev->data->port_id);
                return ret;
        }
-       for (i = 0; i != xstats_n; ++i) {
-               if (mlx5_counters_init[i].ib) {
+       for (i = 0; i != xstats_ctrl->mlx5_stats_n; ++i) {
+               if (xstats_ctrl->info[i].ib) {
                        FILE *file;
                        MKSTR(path, "%s/ports/1/hw_counters/%s",
                              priv->ibdev_path,
-                             mlx5_counters_init[i].ctr_name);
+                             xstats_ctrl->info[i].ctr_name);
 
                        file = fopen(path, "rb");
                        if (file) {
@@ -194,7 +203,7 @@ mlx5_ethtool_get_stats_n(struct rte_eth_dev *dev) {
 
        drvinfo.cmd = ETHTOOL_GDRVINFO;
        ifr.ifr_data = (caddr_t)&drvinfo;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING, "port %u unable to query number of statistics",
                        dev->data->port_id);
@@ -222,6 +231,8 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
        unsigned int str_sz;
        int ret;
 
+       /* So that it won't aggregate for each init. */
+       xstats_ctrl->mlx5_stats_n = 0;
        ret = mlx5_ethtool_get_stats_n(dev);
        if (ret < 0) {
                DRV_LOG(WARNING, "port %u no extended statistics available",
@@ -229,7 +240,6 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
                return;
        }
        dev_stats_n = ret;
-       xstats_ctrl->stats_n = dev_stats_n;
        /* Allocate memory to grab stat names and values. */
        str_sz = dev_stats_n * ETH_GSTRING_LEN;
        strings = (struct ethtool_gstrings *)
@@ -244,14 +254,12 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
        strings->string_set = ETH_SS_STATS;
        strings->len = dev_stats_n;
        ifr.ifr_data = (caddr_t)strings;
-       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr, 1);
+       ret = mlx5_ifreq(dev, SIOCETHTOOL, &ifr);
        if (ret) {
                DRV_LOG(WARNING, "port %u unable to get statistic names",
                        dev->data->port_id);
                goto free;
        }
-       for (j = 0; j != xstats_n; ++j)
-               xstats_ctrl->dev_table_idx[j] = dev_stats_n;
        for (i = 0; i != dev_stats_n; ++i) {
                const char *curr_string = (const char *)
                        &strings->data[i * ETH_GSTRING_LEN];
@@ -259,24 +267,25 @@ mlx5_xstats_init(struct rte_eth_dev *dev)
                for (j = 0; j != xstats_n; ++j) {
                        if (!strcmp(mlx5_counters_init[j].ctr_name,
                                    curr_string)) {
-                               xstats_ctrl->dev_table_idx[j] = i;
+                               unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+                               xstats_ctrl->dev_table_idx[idx] = i;
+                               xstats_ctrl->info[idx] = mlx5_counters_init[j];
                                break;
                        }
                }
        }
-       for (j = 0; j != xstats_n; ++j) {
-               if (mlx5_counters_init[j].ib)
-                       continue;
-               if (xstats_ctrl->dev_table_idx[j] >= dev_stats_n) {
-                       DRV_LOG(WARNING,
-                               "port %u counter \"%s\" is not recognized",
-                               dev->data->port_id,
-                               mlx5_counters_init[j].dpdk_name);
-                       goto free;
+       /* Add IB counters. */
+       for (i = 0; i != xstats_n; ++i) {
+               if (mlx5_counters_init[i].ib) {
+                       unsigned int idx = xstats_ctrl->mlx5_stats_n++;
+
+                       xstats_ctrl->info[idx] = mlx5_counters_init[i];
                }
        }
+       assert(xstats_ctrl->mlx5_stats_n <= MLX5_MAX_XSTATS);
+       xstats_ctrl->stats_n = dev_stats_n;
        /* Copy to base at first time. */
-       assert(xstats_n <= MLX5_MAX_XSTATS);
        ret = mlx5_read_dev_counters(dev, xstats_ctrl->base);
        if (ret)
                DRV_LOG(ERR, "port %u cannot read device counters: %s",
@@ -306,9 +315,10 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
        struct priv *priv = dev->data->dev_private;
        unsigned int i;
        uint64_t counters[n];
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       uint16_t mlx5_stats_n = xstats_ctrl->mlx5_stats_n;
 
-       if (n >= xstats_n && stats) {
-               struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       if (n >= mlx5_stats_n && stats) {
                int stats_n;
                int ret;
 
@@ -320,12 +330,12 @@ mlx5_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *stats,
                ret = mlx5_read_dev_counters(dev, counters);
                if (ret)
                        return ret;
-               for (i = 0; i != xstats_n; ++i) {
+               for (i = 0; i != mlx5_stats_n; ++i) {
                        stats[i].id = i;
                        stats[i].value = (counters[i] - xstats_ctrl->base[i]);
                }
        }
-       return xstats_n;
+       return mlx5_stats_n;
 }
 
 /**
@@ -441,7 +451,7 @@ mlx5_xstats_reset(struct rte_eth_dev *dev)
        struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
        int stats_n;
        unsigned int i;
-       unsigned int n = xstats_n;
+       unsigned int n = xstats_ctrl->mlx5_stats_n;
        uint64_t counters[n];
        int ret;
 
@@ -481,14 +491,17 @@ mlx5_xstats_get_names(struct rte_eth_dev *dev __rte_unused,
                      struct rte_eth_xstat_name *xstats_names, unsigned int n)
 {
        unsigned int i;
+       struct priv *priv = dev->data->dev_private;
+       struct mlx5_xstats_ctrl *xstats_ctrl = &priv->xstats_ctrl;
+       unsigned int mlx5_xstats_n = xstats_ctrl->mlx5_stats_n;
 
-       if (n >= xstats_n && xstats_names) {
-               for (i = 0; i != xstats_n; ++i) {
+       if (n >= mlx5_xstats_n && xstats_names) {
+               for (i = 0; i != mlx5_xstats_n; ++i) {
                        strncpy(xstats_names[i].name,
-                               mlx5_counters_init[i].dpdk_name,
+                               xstats_ctrl->info[i].dpdk_name,
                                RTE_ETH_XSTATS_NAME_SIZE);
                        xstats_names[i].name[RTE_ETH_XSTATS_NAME_SIZE - 1] = 0;
                }
        }
-       return xstats_n;
+       return mlx5_xstats_n;
 }
index f9bc473..b01bd67 100644 (file)
@@ -120,7 +120,6 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
                        offloads |= (DEV_TX_OFFLOAD_IP_TNL_TSO |
                                     DEV_TX_OFFLOAD_UDP_TNL_TSO);
        }
-
        if (config->tunnel_en) {
                if (config->hw_csum)
                        offloads |= DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM;
@@ -128,6 +127,10 @@ mlx5_get_tx_port_offloads(struct rte_eth_dev *dev)
                        offloads |= (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
                                     DEV_TX_OFFLOAD_GRE_TNL_TSO);
        }
+#ifdef HAVE_IBV_FLOW_DV_SUPPORT
+       if (config->dv_flow_en)
+               offloads |= DEV_TX_OFFLOAD_MATCH_METADATA;
+#endif
        return offloads;
 }
 
diff --git a/drivers/net/mvneta/Makefile b/drivers/net/mvneta/Makefile
new file mode 100644 (file)
index 0000000..05a0487
--- /dev/null
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Marvell International Ltd.
+# Copyright(c) 2018 Semihalf.
+# All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(MAKECMDGOALS),clean)
+ifneq ($(MAKECMDGOALS),config)
+ifeq ($(LIBMUSDK_PATH),)
+$(error "Please define LIBMUSDK_PATH environment variable")
+endif
+endif
+endif
+
+# library name
+LIB = librte_pmd_mvneta.a
+
+# library version
+LIBABIVER := 1
+
+# versioning export map
+EXPORT_MAP := rte_pmd_mvneta_version.map
+
+# external library dependencies
+CFLAGS += -I$(RTE_SDK)/drivers/common/mvep
+CFLAGS += -I$(LIBMUSDK_PATH)/include
+CFLAGS += -DMVCONF_TYPES_PUBLIC
+CFLAGS += -DMVCONF_DMA_PHYS_ADDR_T_PUBLIC
+CFLAGS += -DMVCONF_DMA_PHYS_ADDR_T_SIZE=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -O3
+LDLIBS += -L$(LIBMUSDK_PATH)/lib
+LDLIBS += -lmusdk
+LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
+LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_cfgfile
+LDLIBS += -lrte_bus_vdev -lrte_common_mvep
+
+# library source files
+SRCS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD) += mvneta_ethdev.c mvneta_rxtx.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/mvneta/meson.build b/drivers/net/mvneta/meson.build
new file mode 100644 (file)
index 0000000..c0b1bce
--- /dev/null
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Marvell International Ltd.
+# Copyright(c) 2018 Semihalf.
+# All rights reserved.
+
+path = get_option('lib_musdk_dir')
+lib_dir = path + '/lib'
+inc_dir = path + '/include'
+
+lib = cc.find_library('libmusdk', dirs : [lib_dir], required: false)
+if not lib.found()
+       build = false
+else
+       ext_deps += lib
+       includes += include_directories(inc_dir)
+       cflags += [
+         '-DMVCONF_TYPES_PUBLIC',
+         '-DMVCONF_DMA_PHYS_ADDR_T_PUBLIC',
+         '-DMVCONF_DMA_PHYS_ADDR_T_SIZE=64'
+       ]
+endif
+
+sources = files(
+       'mvneta_ethdev.c',
+       'mvneta_rxtx.c'
+)
+
+deps += ['cfgfile', 'common_mvep']
diff --git a/drivers/net/mvneta/mvneta_ethdev.c b/drivers/net/mvneta/mvneta_ethdev.c
new file mode 100644 (file)
index 0000000..2d76664
--- /dev/null
@@ -0,0 +1,987 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#include <rte_ethdev_driver.h>
+#include <rte_kvargs.h>
+#include <rte_bus_vdev.h>
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <rte_mvep_common.h>
+
+#include "mvneta_rxtx.h"
+
+
+#define MVNETA_IFACE_NAME_ARG "iface"
+
+#define MVNETA_RX_OFFLOADS (DEV_RX_OFFLOAD_JUMBO_FRAME | \
+                         DEV_RX_OFFLOAD_CHECKSUM)
+
+/** Port Tx offloads capabilities */
+#define MVNETA_TX_OFFLOADS (DEV_TX_OFFLOAD_IPV4_CKSUM | \
+                         DEV_TX_OFFLOAD_UDP_CKSUM | \
+                         DEV_TX_OFFLOAD_TCP_CKSUM | \
+                         DEV_TX_OFFLOAD_MULTI_SEGS)
+
+#define MVNETA_PKT_SIZE_MAX (16382 - MV_MH_SIZE) /* 9700B */
+#define MVNETA_DEFAULT_MTU 1500
+
+#define MVNETA_MAC_ADDRS_MAX 256 /*16 UC, 256 IP, 256 MC/BC */
+/** Maximum length of a match string */
+#define MVNETA_MATCH_LEN 16
+
+int mvneta_logtype;
+
+static const char * const valid_args[] = {
+       MVNETA_IFACE_NAME_ARG,
+       NULL
+};
+
+struct mvneta_ifnames {
+       const char *names[NETA_NUM_ETH_PPIO];
+       int idx;
+};
+
+static int mvneta_dev_num;
+
+/**
+ * Deinitialize packet processor.
+ */
+static void
+mvneta_neta_deinit(void)
+{
+       neta_deinit();
+}
+
+/**
+ * Initialize packet processor.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_neta_init(void)
+{
+       return neta_init();
+}
+
+/**
+ * Callback used by rte_kvargs_process() during argument parsing.
+ *
+ * @param key
+ *   Pointer to the parsed key (unused).
+ * @param value
+ *   Pointer to the parsed value.
+ * @param extra_args
+ *   Pointer to the extra arguments which contains address of the
+ *   table of pointers to parsed interface names.
+ *
+ * @return
+ *   Always 0.
+ */
+static int
+mvneta_ifnames_get(const char *key __rte_unused, const char *value,
+                void *extra_args)
+{
+       struct mvneta_ifnames *ifnames = extra_args;
+
+       ifnames->names[ifnames->idx++] = value;
+
+       return 0;
+}
+
+/**
+ * Ethernet device configuration.
+ *
+ * Prepare the driver for a given number of TX and RX queues and
+ * configure RSS if supported.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_dev_configure(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       struct neta_ppio_params *ppio_params;
+
+       if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_NONE) {
+               MVNETA_LOG(INFO, "Unsupported RSS and rx multi queue mode %d",
+                       dev->data->dev_conf.rxmode.mq_mode);
+               if (dev->data->nb_rx_queues > 1)
+                       return -EINVAL;
+       }
+
+       if (dev->data->dev_conf.rxmode.split_hdr_size) {
+               MVNETA_LOG(INFO, "Split headers not supported");
+               return -EINVAL;
+       }
+
+       if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
+               dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
+                                MRVL_NETA_ETH_HDRS_LEN;
+
+       if (dev->data->dev_conf.txmode.offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+               priv->multiseg = 1;
+
+       ppio_params = &priv->ppio_params;
+       ppio_params->outqs_params.num_outqs = dev->data->nb_tx_queues;
+       /* Default: 1 TC, no QoS supported. */
+       ppio_params->inqs_params.num_tcs = 1;
+       ppio_params->inqs_params.tcs_params[0].pkt_offset = MRVL_NETA_PKT_OFFS;
+       priv->ppio_id = dev->data->port_id;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to get information about the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure (unused).
+ * @param info
+ *   Info structure output buffer.
+ */
+static void
+mvneta_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+                  struct rte_eth_dev_info *info)
+{
+       info->speed_capa = ETH_LINK_SPEED_10M |
+                          ETH_LINK_SPEED_100M |
+                          ETH_LINK_SPEED_1G |
+                          ETH_LINK_SPEED_2_5G;
+
+       info->max_rx_queues = MRVL_NETA_RXQ_MAX;
+       info->max_tx_queues = MRVL_NETA_TXQ_MAX;
+       info->max_mac_addrs = MVNETA_MAC_ADDRS_MAX;
+
+       info->rx_desc_lim.nb_max = MRVL_NETA_RXD_MAX;
+       info->rx_desc_lim.nb_min = MRVL_NETA_RXD_MIN;
+       info->rx_desc_lim.nb_align = MRVL_NETA_RXD_ALIGN;
+
+       info->tx_desc_lim.nb_max = MRVL_NETA_TXD_MAX;
+       info->tx_desc_lim.nb_min = MRVL_NETA_TXD_MIN;
+       info->tx_desc_lim.nb_align = MRVL_NETA_TXD_ALIGN;
+
+       info->rx_offload_capa = MVNETA_RX_OFFLOADS;
+       info->rx_queue_offload_capa = MVNETA_RX_OFFLOADS;
+
+       info->tx_offload_capa =  MVNETA_TX_OFFLOADS;
+       info->tx_queue_offload_capa =  MVNETA_TX_OFFLOADS;
+
+       /* By default packets are dropped if no descriptors are available */
+       info->default_rxconf.rx_drop_en = 1;
+       /* Deferred tx queue start is not supported */
+       info->default_txconf.tx_deferred_start = 0;
+       info->default_txconf.offloads = 0;
+
+       info->max_rx_pktlen = MVNETA_PKT_SIZE_MAX;
+}
+
+/**
+ * Return supported packet types.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure (unused).
+ *
+ * @return
+ *   Const pointer to the table with supported packet types.
+ */
+static const uint32_t *
+mvneta_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+{
+       static const uint32_t ptypes[] = {
+               RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L2_ETHER_VLAN,
+               RTE_PTYPE_L3_IPV4,
+               RTE_PTYPE_L3_IPV6,
+               RTE_PTYPE_L4_TCP,
+               RTE_PTYPE_L4_UDP
+       };
+
+       return ptypes;
+}
+
+/**
+ * DPDK callback to change the MTU.
+ *
+ * Setting the MTU affects hardware MRU (packets larger than the MRU
+ * will be dropped).
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mtu
+ *   New MTU.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       uint16_t mbuf_data_size = 0; /* SW buffer size */
+       uint16_t mru;
+       int ret;
+
+       mru = MRVL_NETA_MTU_TO_MRU(mtu);
+       /*
+        * min_rx_buf_size is equal to mbuf data size
+        * if pmd didn't set it differently
+        */
+       mbuf_data_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+       /* Prevent PMD from:
+        * - setting mru greater than the mbuf size resulting in
+        * hw and sw buffer size mismatch
+        * - setting mtu that requires the support of scattered packets
+        * when this feature has not been enabled/supported so far.
+        */
+       if (!dev->data->scattered_rx &&
+           (mru + MRVL_NETA_PKT_OFFS > mbuf_data_size)) {
+               mru = mbuf_data_size - MRVL_NETA_PKT_OFFS;
+               mtu = MRVL_NETA_MRU_TO_MTU(mru);
+               MVNETA_LOG(WARNING, "MTU too big, max MTU possible limitted by"
+                       " current mbuf size: %u. Set MTU to %u, MRU to %u",
+                       mbuf_data_size, mtu, mru);
+       }
+
+       if (mtu < ETHER_MIN_MTU || mru > MVNETA_PKT_SIZE_MAX) {
+               MVNETA_LOG(ERR, "Invalid MTU [%u] or MRU [%u]", mtu, mru);
+               return -EINVAL;
+       }
+
+       dev->data->mtu = mtu;
+       dev->data->dev_conf.rxmode.max_rx_pkt_len = mru - MV_MH_SIZE;
+
+       if (!priv->ppio)
+               /* It is OK. New MTU will be set later on mvneta_dev_start */
+               return 0;
+
+       ret = neta_ppio_set_mru(priv->ppio, mru);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to change MRU");
+               return ret;
+       }
+
+       ret = neta_ppio_set_mtu(priv->ppio, mtu);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to change MTU");
+               return ret;
+       }
+       MVNETA_LOG(INFO, "MTU changed to %u, MRU = %u", mtu, mru);
+
+       return 0;
+}
+
+/**
+ * DPDK callback to bring the link up.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_dev_set_link_up(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+
+       if (!priv->ppio)
+               return 0;
+
+       return neta_ppio_enable(priv->ppio);
+}
+
+/**
+ * DPDK callback to bring the link down.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_dev_set_link_down(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+
+       if (!priv->ppio)
+               return 0;
+
+       return neta_ppio_disable(priv->ppio);
+}
+
+/**
+ * DPDK callback to start the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ *
+ * @return
+ *   0 on success, negative errno value on failure.
+ */
+static int
+mvneta_dev_start(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       char match[MVNETA_MATCH_LEN];
+       int ret = 0, i;
+
+       if (priv->ppio)
+               return mvneta_dev_set_link_up(dev);
+
+       snprintf(match, sizeof(match), "%s", dev->data->name);
+       priv->ppio_params.match = match;
+       priv->ppio_params.inqs_params.mtu = dev->data->mtu;
+
+       ret = neta_ppio_init(&priv->ppio_params, &priv->ppio);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to init ppio");
+               return ret;
+       }
+       priv->ppio_id = priv->ppio->port_id;
+
+       /*
+        * In case there are some some stale uc/mc mac addresses flush them
+        * here. It cannot be done during mvneta_dev_close() as port information
+        * is already gone at that point (due to neta_ppio_deinit() in
+        * mvneta_dev_stop()).
+        */
+       if (!priv->uc_mc_flushed) {
+               ret = neta_ppio_flush_mac_addrs(priv->ppio, 0, 1);
+               if (ret) {
+                       MVNETA_LOG(ERR,
+                               "Failed to flush uc/mc filter list");
+                       goto out;
+               }
+               priv->uc_mc_flushed = 1;
+       }
+
+       ret = mvneta_alloc_rx_bufs(dev);
+       if (ret)
+               goto out;
+
+       ret = mvneta_mtu_set(dev, dev->data->mtu);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to set MTU %d", dev->data->mtu);
+               goto out;
+       }
+
+       ret = mvneta_dev_set_link_up(dev);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to set link up");
+               goto out;
+       }
+
+       /* start tx queues */
+       for (i = 0; i < dev->data->nb_tx_queues; i++)
+               dev->data->tx_queue_state[i] = RTE_ETH_QUEUE_STATE_STARTED;
+
+       mvneta_set_tx_function(dev);
+
+       return 0;
+
+out:
+       MVNETA_LOG(ERR, "Failed to start device");
+       neta_ppio_deinit(priv->ppio);
+       return ret;
+}
+
+/**
+ * DPDK callback to stop the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mvneta_dev_stop(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+
+       if (!priv->ppio)
+               return;
+
+       mvneta_dev_set_link_down(dev);
+       mvneta_flush_queues(dev);
+       neta_ppio_deinit(priv->ppio);
+
+       priv->ppio = NULL;
+}
+
+/**
+ * DPDK callback to close the device.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mvneta_dev_close(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       int i;
+
+       if (priv->ppio)
+               mvneta_dev_stop(dev);
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               mvneta_rx_queue_release(dev->data->rx_queues[i]);
+               dev->data->rx_queues[i] = NULL;
+       }
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               mvneta_tx_queue_release(dev->data->tx_queues[i]);
+               dev->data->tx_queues[i] = NULL;
+       }
+}
+
+/**
+ * DPDK callback to retrieve physical link information.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param wait_to_complete
+ *   Wait for request completion (ignored).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_link_update(struct rte_eth_dev *dev, int wait_to_complete __rte_unused)
+{
+       /*
+        * TODO
+        * once MUSDK provides necessary API use it here
+        */
+       struct mvneta_priv *priv = dev->data->dev_private;
+       struct ethtool_cmd edata;
+       struct ifreq req;
+       int ret, fd, link_up;
+
+       if (!priv->ppio)
+               return -EPERM;
+
+       edata.cmd = ETHTOOL_GSET;
+
+       strcpy(req.ifr_name, dev->data->name);
+       req.ifr_data = (void *)&edata;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd == -1)
+               return -EFAULT;
+       ret = ioctl(fd, SIOCETHTOOL, &req);
+       if (ret == -1) {
+               close(fd);
+               return -EFAULT;
+       }
+
+       close(fd);
+
+       switch (ethtool_cmd_speed(&edata)) {
+       case SPEED_10:
+               dev->data->dev_link.link_speed = ETH_SPEED_NUM_10M;
+               break;
+       case SPEED_100:
+               dev->data->dev_link.link_speed = ETH_SPEED_NUM_100M;
+               break;
+       case SPEED_1000:
+               dev->data->dev_link.link_speed = ETH_SPEED_NUM_1G;
+               break;
+       case SPEED_2500:
+               dev->data->dev_link.link_speed = ETH_SPEED_NUM_2_5G;
+               break;
+       default:
+               dev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
+       }
+
+       dev->data->dev_link.link_duplex = edata.duplex ? ETH_LINK_FULL_DUPLEX :
+                                                        ETH_LINK_HALF_DUPLEX;
+       dev->data->dev_link.link_autoneg = edata.autoneg ? ETH_LINK_AUTONEG :
+                                                          ETH_LINK_FIXED;
+
+       neta_ppio_get_link_state(priv->ppio, &link_up);
+       dev->data->dev_link.link_status = link_up ? ETH_LINK_UP : ETH_LINK_DOWN;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to enable promiscuous mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mvneta_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       int ret, en;
+
+       if (!priv->ppio)
+               return;
+
+       neta_ppio_get_promisc(priv->ppio, &en);
+       if (en) {
+               MVNETA_LOG(INFO, "Promiscuous already enabled");
+               return;
+       }
+
+       ret = neta_ppio_set_promisc(priv->ppio, 1);
+       if (ret)
+               MVNETA_LOG(ERR, "Failed to enable promiscuous mode");
+}
+
+/**
+ * DPDK callback to disable allmulticast mode.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mvneta_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       int ret, en;
+
+       if (!priv->ppio)
+               return;
+
+       neta_ppio_get_promisc(priv->ppio, &en);
+       if (!en) {
+               MVNETA_LOG(INFO, "Promiscuous already disabled");
+               return;
+       }
+
+       ret = neta_ppio_set_promisc(priv->ppio, 0);
+       if (ret)
+               MVNETA_LOG(ERR, "Failed to disable promiscuous mode");
+}
+
+/**
+ * DPDK callback to remove a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param index
+ *   MAC address index.
+ */
+static void
+mvneta_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       char buf[ETHER_ADDR_FMT_SIZE];
+       int ret;
+
+       if (!priv->ppio)
+               return;
+
+       ret = neta_ppio_remove_mac_addr(priv->ppio,
+                                      dev->data->mac_addrs[index].addr_bytes);
+       if (ret) {
+               ether_format_addr(buf, sizeof(buf),
+                                 &dev->data->mac_addrs[index]);
+               MVNETA_LOG(ERR, "Failed to remove mac %s", buf);
+       }
+}
+
+/**
+ * DPDK callback to add a MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ * @param index
+ *   MAC address index.
+ * @param vmdq
+ *   VMDq pool index to associate address with (unused).
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
+                 uint32_t index, uint32_t vmdq __rte_unused)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       char buf[ETHER_ADDR_FMT_SIZE];
+       int ret;
+
+       if (index == 0)
+               /* For setting index 0, mrvl_mac_addr_set() should be used.*/
+               return -1;
+
+       if (!priv->ppio)
+               return 0;
+
+       ret = neta_ppio_add_mac_addr(priv->ppio, mac_addr->addr_bytes);
+       if (ret) {
+               ether_format_addr(buf, sizeof(buf), mac_addr);
+               MVNETA_LOG(ERR, "Failed to add mac %s", buf);
+               return -1;
+       }
+
+       return 0;
+}
+
+/**
+ * DPDK callback to set the primary MAC address.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param mac_addr
+ *   MAC address to register.
+ */
+static int
+mvneta_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       int ret;
+
+       if (!priv->ppio)
+               return -EINVAL;
+
+       ret = neta_ppio_set_mac_addr(priv->ppio, mac_addr->addr_bytes);
+       if (ret) {
+               char buf[ETHER_ADDR_FMT_SIZE];
+               ether_format_addr(buf, sizeof(buf), mac_addr);
+               MVNETA_LOG(ERR, "Failed to set mac to %s", buf);
+       }
+       return 0;
+}
+
+/**
+ * DPDK callback to get device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param stats
+ *   Stats structure output buffer.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       struct neta_ppio_statistics ppio_stats;
+       unsigned int ret;
+
+       if (!priv->ppio)
+               return -EPERM;
+
+       ret = neta_ppio_get_statistics(priv->ppio, &ppio_stats);
+       if (unlikely(ret)) {
+               MVNETA_LOG(ERR, "Failed to update port statistics");
+               return ret;
+       }
+
+       stats->ipackets += ppio_stats.rx_packets +
+                       ppio_stats.rx_broadcast_packets +
+                       ppio_stats.rx_multicast_packets -
+                       priv->prev_stats.ipackets;
+       stats->opackets += ppio_stats.tx_packets +
+                       ppio_stats.tx_broadcast_packets +
+                       ppio_stats.tx_multicast_packets -
+                       priv->prev_stats.opackets;
+       stats->ibytes += ppio_stats.rx_bytes - priv->prev_stats.ibytes;
+       stats->obytes += ppio_stats.tx_bytes - priv->prev_stats.obytes;
+       stats->imissed += ppio_stats.rx_discard +
+                         ppio_stats.rx_overrun -
+                         priv->prev_stats.imissed;
+
+       stats->ierrors = ppio_stats.rx_packets_err +
+                       ppio_stats.rx_errors +
+                       ppio_stats.rx_crc_error -
+                       priv->prev_stats.ierrors;
+       stats->oerrors = ppio_stats.tx_errors - priv->prev_stats.oerrors;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to clear device statistics.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mvneta_stats_reset(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       unsigned int ret;
+
+       if (!priv->ppio)
+               return;
+
+       ret = mvneta_stats_get(dev, &priv->prev_stats);
+       if (unlikely(ret))
+               RTE_LOG(ERR, PMD, "Failed to reset port statistics");
+}
+
+
+static const struct eth_dev_ops mvneta_ops = {
+       .dev_configure = mvneta_dev_configure,
+       .dev_start = mvneta_dev_start,
+       .dev_stop = mvneta_dev_stop,
+       .dev_set_link_up = mvneta_dev_set_link_up,
+       .dev_set_link_down = mvneta_dev_set_link_down,
+       .dev_close = mvneta_dev_close,
+       .link_update = mvneta_link_update,
+       .promiscuous_enable = mvneta_promiscuous_enable,
+       .promiscuous_disable = mvneta_promiscuous_disable,
+       .mac_addr_remove = mvneta_mac_addr_remove,
+       .mac_addr_add = mvneta_mac_addr_add,
+       .mac_addr_set = mvneta_mac_addr_set,
+       .mtu_set = mvneta_mtu_set,
+       .stats_get = mvneta_stats_get,
+       .stats_reset = mvneta_stats_reset,
+       .dev_infos_get = mvneta_dev_infos_get,
+       .dev_supported_ptypes_get = mvneta_dev_supported_ptypes_get,
+       .rxq_info_get = mvneta_rxq_info_get,
+       .txq_info_get = mvneta_txq_info_get,
+       .rx_queue_setup = mvneta_rx_queue_setup,
+       .rx_queue_release = mvneta_rx_queue_release,
+       .tx_queue_setup = mvneta_tx_queue_setup,
+       .tx_queue_release = mvneta_tx_queue_release,
+};
+
+/**
+ * Create device representing Ethernet port.
+ *
+ * @param name
+ *   Pointer to the port's name.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+mvneta_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
+{
+       int ret, fd = socket(AF_INET, SOCK_DGRAM, 0);
+       struct rte_eth_dev *eth_dev;
+       struct mvneta_priv *priv;
+       struct ifreq req;
+
+       eth_dev = rte_eth_dev_allocate(name);
+       if (!eth_dev)
+               return -ENOMEM;
+
+       priv = rte_zmalloc_socket(name, sizeof(*priv), 0, rte_socket_id());
+       if (!priv) {
+               ret = -ENOMEM;
+               goto out_free;
+       }
+       eth_dev->data->dev_private = priv;
+
+       eth_dev->data->mac_addrs =
+               rte_zmalloc("mac_addrs",
+                           ETHER_ADDR_LEN * MVNETA_MAC_ADDRS_MAX, 0);
+       if (!eth_dev->data->mac_addrs) {
+               MVNETA_LOG(ERR, "Failed to allocate space for eth addrs");
+               ret = -ENOMEM;
+               goto out_free;
+       }
+
+       memset(&req, 0, sizeof(req));
+       strcpy(req.ifr_name, name);
+       ret = ioctl(fd, SIOCGIFHWADDR, &req);
+       if (ret)
+               goto out_free;
+
+       memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
+              req.ifr_addr.sa_data, ETHER_ADDR_LEN);
+
+       eth_dev->data->kdrv = RTE_KDRV_NONE;
+       eth_dev->device = &vdev->device;
+       eth_dev->rx_pkt_burst = mvneta_rx_pkt_burst;
+       mvneta_set_tx_function(eth_dev);
+       eth_dev->dev_ops = &mvneta_ops;
+
+       rte_eth_dev_probing_finish(eth_dev);
+       return 0;
+out_free:
+       rte_eth_dev_release_port(eth_dev);
+
+       return ret;
+}
+
+/**
+ * Cleanup previously created device representing Ethernet port.
+ *
+ * @param eth_dev
+ *   Pointer to the corresponding rte_eth_dev structure.
+ */
+static void
+mvneta_eth_dev_destroy(struct rte_eth_dev *eth_dev)
+{
+       rte_eth_dev_release_port(eth_dev);
+}
+
+/**
+ * Cleanup previously created device representing Ethernet port.
+ *
+ * @param name
+ *   Pointer to the port name.
+ */
+static void
+mvneta_eth_dev_destroy_name(const char *name)
+{
+       struct rte_eth_dev *eth_dev;
+
+       eth_dev = rte_eth_dev_allocated(name);
+       if (!eth_dev)
+               return;
+
+       mvneta_eth_dev_destroy(eth_dev);
+}
+
+/**
+ * DPDK callback to register the virtual device.
+ *
+ * @param vdev
+ *   Pointer to the virtual device.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+rte_pmd_mvneta_probe(struct rte_vdev_device *vdev)
+{
+       struct rte_kvargs *kvlist;
+       struct mvneta_ifnames ifnames;
+       int ret = -EINVAL;
+       uint32_t i, ifnum;
+       const char *params;
+
+       params = rte_vdev_device_args(vdev);
+       if (!params)
+               return -EINVAL;
+
+       kvlist = rte_kvargs_parse(params, valid_args);
+       if (!kvlist)
+               return -EINVAL;
+
+       ifnum = rte_kvargs_count(kvlist, MVNETA_IFACE_NAME_ARG);
+       if (ifnum > RTE_DIM(ifnames.names))
+               goto out_free_kvlist;
+
+       ifnames.idx = 0;
+       rte_kvargs_process(kvlist, MVNETA_IFACE_NAME_ARG,
+                          mvneta_ifnames_get, &ifnames);
+
+       /*
+        * The below system initialization should be done only once,
+        * on the first provided configuration file
+        */
+       if (mvneta_dev_num)
+               goto init_devices;
+
+       MVNETA_LOG(INFO, "Perform MUSDK initializations");
+
+       ret = rte_mvep_init(MVEP_MOD_T_NETA, kvlist);
+       if (ret)
+               goto out_free_kvlist;
+
+       ret = mvneta_neta_init();
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to init NETA!");
+               rte_mvep_deinit(MVEP_MOD_T_NETA);
+               goto out_free_kvlist;
+       }
+
+init_devices:
+       for (i = 0; i < ifnum; i++) {
+               MVNETA_LOG(INFO, "Creating %s", ifnames.names[i]);
+               ret = mvneta_eth_dev_create(vdev, ifnames.names[i]);
+               if (ret)
+                       goto out_cleanup;
+       }
+       mvneta_dev_num += ifnum;
+
+       rte_kvargs_free(kvlist);
+
+       return 0;
+out_cleanup:
+       for (; i > 0; i--)
+               mvneta_eth_dev_destroy_name(ifnames.names[i]);
+
+       if (mvneta_dev_num == 0) {
+               mvneta_neta_deinit();
+               rte_mvep_deinit(MVEP_MOD_T_NETA);
+       }
+out_free_kvlist:
+       rte_kvargs_free(kvlist);
+
+       return ret;
+}
+
+/**
+ * DPDK callback to remove virtual device.
+ *
+ * @param vdev
+ *   Pointer to the removed virtual device.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static int
+rte_pmd_mvneta_remove(struct rte_vdev_device *vdev)
+{
+       int i;
+       const char *name;
+
+       name = rte_vdev_device_name(vdev);
+       if (!name)
+               return -EINVAL;
+
+       MVNETA_LOG(INFO, "Removing %s", name);
+
+       RTE_ETH_FOREACH_DEV(i) {
+               if (rte_eth_devices[i].device != &vdev->device)
+                       continue;
+
+               mvneta_eth_dev_destroy(&rte_eth_devices[i]);
+               mvneta_dev_num--;
+       }
+
+       if (mvneta_dev_num == 0) {
+               MVNETA_LOG(INFO, "Perform MUSDK deinit");
+               mvneta_neta_deinit();
+               rte_mvep_deinit(MVEP_MOD_T_NETA);
+       }
+
+       return 0;
+}
+
+static struct rte_vdev_driver pmd_mvneta_drv = {
+       .probe = rte_pmd_mvneta_probe,
+       .remove = rte_pmd_mvneta_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_mvneta, pmd_mvneta_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_mvneta, "iface=<ifc>");
+
+RTE_INIT(mvneta_init_log)
+{
+       mvneta_logtype = rte_log_register("pmd.net.mvneta");
+       if (mvneta_logtype >= 0)
+               rte_log_set_level(mvneta_logtype, RTE_LOG_NOTICE);
+}
diff --git a/drivers/net/mvneta/mvneta_ethdev.h b/drivers/net/mvneta/mvneta_ethdev.h
new file mode 100644 (file)
index 0000000..101b0a8
--- /dev/null
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#ifndef _MVNETA_ETHDEV_H_
+#define _MVNETA_ETHDEV_H_
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_log.h>
+
+/*
+ * container_of is defined by both DPDK and MUSDK,
+ * we'll declare only one version.
+ *
+ * Note that it is not used in this PMD anyway.
+ */
+#ifdef container_of
+#undef container_of
+#endif
+
+#include <drivers/mv_neta.h>
+#include <drivers/mv_neta_ppio.h>
+
+/** Packet offset inside RX buffer. */
+#define MRVL_NETA_PKT_OFFS 64
+
+/** Maximum number of rx/tx queues per port */
+#define MRVL_NETA_RXQ_MAX 8
+#define MRVL_NETA_TXQ_MAX 8
+
+/** Minimum/maximum number of descriptors in tx queue */
+#define MRVL_NETA_TXD_MIN 16
+#define MRVL_NETA_TXD_MAX 2048
+
+/** Tx queue descriptors alignment in B */
+#define MRVL_NETA_TXD_ALIGN 32
+
+/** Minimum/maximum number of descriptors in rx queue */
+#define MRVL_NETA_RXD_MIN 16
+#define MRVL_NETA_RXD_MAX 2048
+
+/** Rx queue descriptors alignment in B */
+#define MRVL_NETA_RXD_ALIGN 32
+
+#define MRVL_NETA_VLAN_TAG_LEN         4
+#define MRVL_NETA_ETH_HDRS_LEN         (ETHER_HDR_LEN + ETHER_CRC_LEN + \
+                                       MRVL_NETA_VLAN_TAG_LEN)
+
+#define MRVL_NETA_HDRS_LEN             (MV_MH_SIZE + MRVL_NETA_ETH_HDRS_LEN)
+#define MRVL_NETA_MTU_TO_MRU(mtu)      ((mtu) + MRVL_NETA_HDRS_LEN)
+#define MRVL_NETA_MRU_TO_MTU(mru)      ((mru) - MRVL_NETA_HDRS_LEN)
+
+
+struct mvneta_priv {
+       /* Hot fields, used in fast path. */
+       struct neta_ppio        *ppio;    /**< Port handler pointer */
+
+       uint8_t pp_id;
+       uint8_t ppio_id;        /* ppio port id */
+       uint8_t uc_mc_flushed;
+       uint8_t multiseg;
+
+       struct neta_ppio_params ppio_params;
+
+       uint64_t rate_max;
+       struct rte_eth_stats prev_stats;
+};
+
+/** Current log type. */
+extern int mvneta_logtype;
+
+#define MVNETA_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ## level, mvneta_logtype, "%s(): " fmt "\n", \
+               __func__, ##args)
+
+#endif /* _MVNETA_ETHDEV_H_ */
diff --git a/drivers/net/mvneta/mvneta_rxtx.c b/drivers/net/mvneta/mvneta_rxtx.c
new file mode 100644 (file)
index 0000000..62caa68
--- /dev/null
@@ -0,0 +1,1030 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#include "mvneta_rxtx.h"
+
+#define MVNETA_PKT_EFFEC_OFFS (MRVL_NETA_PKT_OFFS + MV_MH_SIZE)
+
+#define MRVL_NETA_DEFAULT_TC 0
+
+/** Maximum number of descriptors in shadow queue. Must be power of 2 */
+#define MRVL_NETA_TX_SHADOWQ_SIZE MRVL_NETA_TXD_MAX
+
+/** Shadow queue size mask (since shadow queue size is power of 2) */
+#define MRVL_NETA_TX_SHADOWQ_MASK (MRVL_NETA_TX_SHADOWQ_SIZE - 1)
+
+/** Minimum number of sent buffers to release from shadow queue to BM */
+#define MRVL_NETA_BUF_RELEASE_BURST_SIZE_MIN   16
+
+/** Maximum number of sent buffers to release from shadow queue to BM */
+#define MRVL_NETA_BUF_RELEASE_BURST_SIZE_MAX   64
+
+#define MVNETA_COOKIE_ADDR_INVALID ~0ULL
+#define MVNETA_COOKIE_HIGH_ADDR_SHIFT  (sizeof(neta_cookie_t) * 8)
+#define MVNETA_COOKIE_HIGH_ADDR_MASK   (~0ULL << MVNETA_COOKIE_HIGH_ADDR_SHIFT)
+
+#define MVNETA_SET_COOKIE_HIGH_ADDR(addr) {                            \
+       if (unlikely(cookie_addr_high == MVNETA_COOKIE_ADDR_INVALID))   \
+               cookie_addr_high =                                      \
+                       (uint64_t)(addr) & MVNETA_COOKIE_HIGH_ADDR_MASK;\
+}
+
+#define MVNETA_CHECK_COOKIE_HIGH_ADDR(addr)            \
+       ((likely(cookie_addr_high ==                    \
+       ((uint64_t)(addr) & MVNETA_COOKIE_HIGH_ADDR_MASK))) ? 1 : 0)
+
+struct mvneta_rxq {
+       struct mvneta_priv *priv;
+       struct rte_mempool *mp;
+       int queue_id;
+       int port_id;
+       int size;
+       int cksum_enabled;
+       uint64_t bytes_recv;
+       uint64_t drop_mac;
+       uint64_t pkts_processed;
+};
+
+/*
+ * To use buffer harvesting based on loopback port shadow queue structure
+ * was introduced for buffers information bookkeeping.
+ */
+struct mvneta_shadow_txq {
+       int head;           /* write index - used when sending buffers */
+       int tail;           /* read index - used when releasing buffers */
+       u16 size;           /* queue occupied size */
+       struct neta_buff_inf ent[MRVL_NETA_TX_SHADOWQ_SIZE]; /* q entries */
+};
+
+struct mvneta_txq {
+       struct mvneta_priv *priv;
+       int queue_id;
+       int port_id;
+       uint64_t bytes_sent;
+       struct mvneta_shadow_txq shadow_txq;
+       int tx_deferred_start;
+};
+
+static uint64_t cookie_addr_high = MVNETA_COOKIE_ADDR_INVALID;
+static uint16_t rx_desc_free_thresh = MRVL_NETA_BUF_RELEASE_BURST_SIZE_MIN;
+
+static inline int
+mvneta_buffs_refill(struct mvneta_priv *priv, struct mvneta_rxq *rxq, u16 *num)
+{
+       struct rte_mbuf *mbufs[MRVL_NETA_BUF_RELEASE_BURST_SIZE_MAX];
+       struct neta_buff_inf entries[MRVL_NETA_BUF_RELEASE_BURST_SIZE_MAX];
+       int i, ret;
+       uint16_t nb_desc = *num;
+
+       ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, nb_desc);
+       if (ret) {
+               MVNETA_LOG(ERR, "Failed to allocate %u mbufs.", nb_desc);
+               *num = 0;
+               return -1;
+       }
+
+       MVNETA_SET_COOKIE_HIGH_ADDR(mbufs[0]);
+
+       for (i = 0; i < nb_desc; i++) {
+               if (unlikely(!MVNETA_CHECK_COOKIE_HIGH_ADDR(mbufs[i]))) {
+                       MVNETA_LOG(ERR,
+                               "mbuf virt high addr 0x%lx out of range 0x%lx",
+                               (uint64_t)mbufs[i] >> 32,
+                               cookie_addr_high >> 32);
+                       *num = 0;
+                       goto out;
+               }
+               entries[i].addr = rte_mbuf_data_iova_default(mbufs[i]);
+               entries[i].cookie = (neta_cookie_t)(uint64_t)mbufs[i];
+       }
+       neta_ppio_inq_put_buffs(priv->ppio, rxq->queue_id, entries, num);
+
+out:
+       for (i = *num; i < nb_desc; i++)
+               rte_pktmbuf_free(mbufs[i]);
+
+       return 0;
+}
+
+/**
+ * Allocate buffers from mempool
+ * and store addresses in rx descriptors.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static inline int
+mvneta_buffs_alloc(struct mvneta_priv *priv, struct mvneta_rxq *rxq, int *num)
+{
+       uint16_t nb_desc, nb_desc_burst, sent = 0;
+       int ret = 0;
+
+       nb_desc = *num;
+
+       do {
+               nb_desc_burst =
+                       (nb_desc < MRVL_NETA_BUF_RELEASE_BURST_SIZE_MAX) ?
+                       nb_desc : MRVL_NETA_BUF_RELEASE_BURST_SIZE_MAX;
+
+               ret = mvneta_buffs_refill(priv, rxq, &nb_desc_burst);
+               if (unlikely(ret || !nb_desc_burst))
+                       break;
+
+               sent += nb_desc_burst;
+               nb_desc -= nb_desc_burst;
+
+       } while (nb_desc);
+
+       *num = sent;
+
+       return ret;
+}
+
+static inline void
+mvneta_fill_shadowq(struct mvneta_shadow_txq *sq, struct rte_mbuf *buf)
+{
+       sq->ent[sq->head].cookie = (uint64_t)buf;
+       sq->ent[sq->head].addr = buf ?
+               rte_mbuf_data_iova_default(buf) : 0;
+
+       sq->head = (sq->head + 1) & MRVL_NETA_TX_SHADOWQ_MASK;
+       sq->size++;
+}
+
+static inline void
+mvneta_fill_desc(struct neta_ppio_desc *desc, struct rte_mbuf *buf)
+{
+       neta_ppio_outq_desc_reset(desc);
+       neta_ppio_outq_desc_set_phys_addr(desc, rte_pktmbuf_iova(buf));
+       neta_ppio_outq_desc_set_pkt_offset(desc, 0);
+       neta_ppio_outq_desc_set_pkt_len(desc, rte_pktmbuf_data_len(buf));
+}
+
+/**
+ * Release already sent buffers to mempool.
+ *
+ * @param ppio
+ *   Pointer to the port structure.
+ * @param sq
+ *   Pointer to the shadow queue.
+ * @param qid
+ *   Queue id number.
+ * @param force
+ *   Force releasing packets.
+ */
+static inline void
+mvneta_sent_buffers_free(struct neta_ppio *ppio,
+                        struct mvneta_shadow_txq *sq, int qid)
+{
+       struct neta_buff_inf *entry;
+       uint16_t nb_done = 0;
+       int i;
+       int tail = sq->tail;
+
+       neta_ppio_get_num_outq_done(ppio, qid, &nb_done);
+
+       if (nb_done > sq->size) {
+               MVNETA_LOG(ERR, "nb_done: %d, sq->size %d",
+                          nb_done, sq->size);
+               return;
+       }
+
+       for (i = 0; i < nb_done; i++) {
+               entry = &sq->ent[tail];
+
+               if (unlikely(!entry->addr)) {
+                       MVNETA_LOG(DEBUG,
+                               "Shadow memory @%d: cookie(%lx), pa(%lx)!",
+                               tail, (u64)entry->cookie,
+                               (u64)entry->addr);
+                       tail = (tail + 1) & MRVL_NETA_TX_SHADOWQ_MASK;
+                       continue;
+               }
+
+               struct rte_mbuf *mbuf;
+
+               mbuf = (struct rte_mbuf *)
+                          (cookie_addr_high | entry->cookie);
+               rte_pktmbuf_free(mbuf);
+               tail = (tail + 1) & MRVL_NETA_TX_SHADOWQ_MASK;
+       }
+
+       sq->tail = tail;
+       sq->size -= nb_done;
+}
+
+/**
+ * Return packet type information and l3/l4 offsets.
+ *
+ * @param desc
+ *   Pointer to the received packet descriptor.
+ * @param l3_offset
+ *   l3 packet offset.
+ * @param l4_offset
+ *   l4 packet offset.
+ *
+ * @return
+ *   Packet type information.
+ */
+static inline uint64_t
+mvneta_desc_to_packet_type_and_offset(struct neta_ppio_desc *desc,
+                                   uint8_t *l3_offset, uint8_t *l4_offset)
+{
+       enum neta_inq_l3_type l3_type;
+       enum neta_inq_l4_type l4_type;
+       uint64_t packet_type;
+
+       neta_ppio_inq_desc_get_l3_info(desc, &l3_type, l3_offset);
+       neta_ppio_inq_desc_get_l4_info(desc, &l4_type, l4_offset);
+
+       packet_type = RTE_PTYPE_L2_ETHER;
+
+       if (NETA_RXD_GET_VLAN_INFO(desc))
+               packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+
+       switch (l3_type) {
+       case NETA_INQ_L3_TYPE_IPV4_BAD:
+       case NETA_INQ_L3_TYPE_IPV4_OK:
+               packet_type |= RTE_PTYPE_L3_IPV4;
+               break;
+       case NETA_INQ_L3_TYPE_IPV6:
+               packet_type |= RTE_PTYPE_L3_IPV6;
+               break;
+       default:
+               packet_type |= RTE_PTYPE_UNKNOWN;
+               MVNETA_LOG(DEBUG, "Failed to recognize l3 packet type");
+               break;
+       }
+
+       switch (l4_type) {
+       case NETA_INQ_L4_TYPE_TCP:
+               packet_type |= RTE_PTYPE_L4_TCP;
+               break;
+       case NETA_INQ_L4_TYPE_UDP:
+               packet_type |= RTE_PTYPE_L4_UDP;
+               break;
+       default:
+               packet_type |= RTE_PTYPE_UNKNOWN;
+               MVNETA_LOG(DEBUG, "Failed to recognize l4 packet type");
+               break;
+       }
+
+       return packet_type;
+}
+
+/**
+ * Prepare offload information.
+ *
+ * @param ol_flags
+ *   Offload flags.
+ * @param packet_type
+ *   Packet type bitfield.
+ * @param l3_type
+ *   Pointer to the neta_ouq_l3_type structure.
+ * @param l4_type
+ *   Pointer to the neta_outq_l4_type structure.
+ * @param gen_l3_cksum
+ *   Will be set to 1 in case l3 checksum is computed.
+ * @param l4_cksum
+ *   Will be set to 1 in case l4 checksum is computed.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+static inline int
+mvneta_prepare_proto_info(uint64_t ol_flags, uint32_t packet_type,
+                       enum neta_outq_l3_type *l3_type,
+                       enum neta_outq_l4_type *l4_type,
+                       int *gen_l3_cksum,
+                       int *gen_l4_cksum)
+{
+       /*
+        * Based on ol_flags prepare information
+        * for neta_ppio_outq_desc_set_proto_info() which setups descriptor
+        * for offloading.
+        */
+       if (ol_flags & PKT_TX_IPV4) {
+               *l3_type = NETA_OUTQ_L3_TYPE_IPV4;
+               *gen_l3_cksum = ol_flags & PKT_TX_IP_CKSUM ? 1 : 0;
+       } else if (ol_flags & PKT_TX_IPV6) {
+               *l3_type = NETA_OUTQ_L3_TYPE_IPV6;
+               /* no checksum for ipv6 header */
+               *gen_l3_cksum = 0;
+       } else {
+               /* if something different then stop processing */
+               return -1;
+       }
+
+       ol_flags &= PKT_TX_L4_MASK;
+       if ((packet_type & RTE_PTYPE_L4_TCP) &&
+           ol_flags == PKT_TX_TCP_CKSUM) {
+               *l4_type = NETA_OUTQ_L4_TYPE_TCP;
+               *gen_l4_cksum = 1;
+       } else if ((packet_type & RTE_PTYPE_L4_UDP) &&
+                  ol_flags == PKT_TX_UDP_CKSUM) {
+               *l4_type = NETA_OUTQ_L4_TYPE_UDP;
+               *gen_l4_cksum = 1;
+       } else {
+               *l4_type = NETA_OUTQ_L4_TYPE_OTHER;
+               /* no checksum for other type */
+               *gen_l4_cksum = 0;
+       }
+
+       return 0;
+}
+
+/**
+ * Get offload information from the received packet descriptor.
+ *
+ * @param desc
+ *   Pointer to the received packet descriptor.
+ *
+ * @return
+ *   Mbuf offload flags.
+ */
+static inline uint64_t
+mvneta_desc_to_ol_flags(struct neta_ppio_desc *desc)
+{
+       uint64_t flags;
+       enum neta_inq_desc_status status;
+
+       status = neta_ppio_inq_desc_get_l3_pkt_error(desc);
+       if (unlikely(status != NETA_DESC_ERR_OK))
+               flags = PKT_RX_IP_CKSUM_BAD;
+       else
+               flags = PKT_RX_IP_CKSUM_GOOD;
+
+       status = neta_ppio_inq_desc_get_l4_pkt_error(desc);
+       if (unlikely(status != NETA_DESC_ERR_OK))
+               flags |= PKT_RX_L4_CKSUM_BAD;
+       else
+               flags |= PKT_RX_L4_CKSUM_GOOD;
+
+       return flags;
+}
+
+/**
+ * DPDK callback for transmit.
+ *
+ * @param txq
+ *   Generic pointer transmit queue.
+ * @param tx_pkts
+ *   Packets to transmit.
+ * @param nb_pkts
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted.
+ */
+static uint16_t
+mvneta_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct mvneta_txq *q = txq;
+       struct mvneta_shadow_txq *sq;
+       struct neta_ppio_desc descs[nb_pkts];
+
+       int i, ret, bytes_sent = 0;
+       uint16_t num, sq_free_size;
+       uint64_t addr;
+
+       sq = &q->shadow_txq;
+       if (unlikely(!nb_pkts || !q->priv->ppio))
+               return 0;
+
+       if (sq->size)
+               mvneta_sent_buffers_free(q->priv->ppio,
+                                        sq, q->queue_id);
+
+       sq_free_size = MRVL_NETA_TX_SHADOWQ_SIZE - sq->size - 1;
+       if (unlikely(nb_pkts > sq_free_size)) {
+               MVNETA_LOG(DEBUG,
+                       "No room in shadow queue for %d packets! %d packets will be sent.",
+                       nb_pkts, sq_free_size);
+               nb_pkts = sq_free_size;
+       }
+
+
+       for (i = 0; i < nb_pkts; i++) {
+               struct rte_mbuf *mbuf = tx_pkts[i];
+               int gen_l3_cksum, gen_l4_cksum;
+               enum neta_outq_l3_type l3_type;
+               enum neta_outq_l4_type l4_type;
+
+               /* Fill first mbuf info in shadow queue */
+               mvneta_fill_shadowq(sq, mbuf);
+               mvneta_fill_desc(&descs[i], mbuf);
+
+               bytes_sent += rte_pktmbuf_pkt_len(mbuf);
+
+               ret = mvneta_prepare_proto_info(mbuf->ol_flags,
+                                               mbuf->packet_type,
+                                               &l3_type, &l4_type,
+                                               &gen_l3_cksum,
+                                               &gen_l4_cksum);
+               if (unlikely(ret))
+                       continue;
+
+               neta_ppio_outq_desc_set_proto_info(&descs[i], l3_type, l4_type,
+                                                  mbuf->l2_len,
+                                                  mbuf->l2_len + mbuf->l3_len,
+                                                  gen_l3_cksum, gen_l4_cksum);
+       }
+       num = nb_pkts;
+       neta_ppio_send(q->priv->ppio, q->queue_id, descs, &nb_pkts);
+
+
+       /* number of packets that were not sent */
+       if (unlikely(num > nb_pkts)) {
+               for (i = nb_pkts; i < num; i++) {
+                       sq->head = (MRVL_NETA_TX_SHADOWQ_SIZE + sq->head - 1) &
+                               MRVL_NETA_TX_SHADOWQ_MASK;
+                       addr = cookie_addr_high | sq->ent[sq->head].cookie;
+                       bytes_sent -=
+                               rte_pktmbuf_pkt_len((struct rte_mbuf *)addr);
+               }
+               sq->size -= num - nb_pkts;
+       }
+
+       q->bytes_sent += bytes_sent;
+
+       return nb_pkts;
+}
+
+/** DPDK callback for S/G transmit.
+ *
+ * @param txq
+ *   Generic pointer transmit queue.
+ * @param tx_pkts
+ *   Packets to transmit.
+ * @param nb_pkts
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted.
+ */
+static uint16_t
+mvneta_tx_sg_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+       struct mvneta_txq *q = txq;
+       struct mvneta_shadow_txq *sq;
+       struct neta_ppio_desc descs[nb_pkts * NETA_PPIO_DESC_NUM_FRAGS];
+       struct neta_ppio_sg_pkts pkts;
+       uint8_t frags[nb_pkts];
+       int i, j, ret, bytes_sent = 0;
+       int tail, tail_first;
+       uint16_t num, sq_free_size;
+       uint16_t nb_segs, total_descs = 0;
+       uint64_t addr;
+
+       sq = &q->shadow_txq;
+       pkts.frags = frags;
+       pkts.num = 0;
+
+       if (unlikely(!q->priv->ppio))
+               return 0;
+
+       if (sq->size)
+               mvneta_sent_buffers_free(q->priv->ppio,
+                                        sq, q->queue_id);
+       /* Save shadow queue free size */
+       sq_free_size = MRVL_NETA_TX_SHADOWQ_SIZE - sq->size - 1;
+
+       tail = 0;
+       for (i = 0; i < nb_pkts; i++) {
+               struct rte_mbuf *mbuf = tx_pkts[i];
+               struct rte_mbuf *seg = NULL;
+               int gen_l3_cksum, gen_l4_cksum;
+               enum neta_outq_l3_type l3_type;
+               enum neta_outq_l4_type l4_type;
+
+               nb_segs = mbuf->nb_segs;
+               total_descs += nb_segs;
+
+               /*
+                * Check if total_descs does not exceed
+                * shadow queue free size
+                */
+               if (unlikely(total_descs > sq_free_size)) {
+                       total_descs -= nb_segs;
+                       MVNETA_LOG(DEBUG,
+                               "No room in shadow queue for %d packets! "
+                               "%d packets will be sent.",
+                               nb_pkts, i);
+                       break;
+               }
+
+
+               /* Check if nb_segs does not exceed the max nb of desc per
+                * fragmented packet
+                */
+               if (unlikely(nb_segs > NETA_PPIO_DESC_NUM_FRAGS)) {
+                       total_descs -= nb_segs;
+                       MVNETA_LOG(ERR,
+                               "Too many segments. Packet won't be sent.");
+                       break;
+               }
+
+               pkts.frags[pkts.num] = nb_segs;
+               pkts.num++;
+               tail_first = tail;
+
+               seg = mbuf;
+               for (j = 0; j < nb_segs - 1; j++) {
+                       /* For the subsequent segments, set shadow queue
+                        * buffer to NULL
+                        */
+                       mvneta_fill_shadowq(sq, NULL);
+                       mvneta_fill_desc(&descs[tail], seg);
+
+                       tail++;
+                       seg = seg->next;
+               }
+               /* Put first mbuf info in last shadow queue entry */
+               mvneta_fill_shadowq(sq, mbuf);
+               /* Update descriptor with last segment */
+               mvneta_fill_desc(&descs[tail++], seg);
+
+               bytes_sent += rte_pktmbuf_pkt_len(mbuf);
+
+               ret = mvneta_prepare_proto_info(mbuf->ol_flags,
+                                               mbuf->packet_type,
+                                               &l3_type, &l4_type,
+                                               &gen_l3_cksum,
+                                               &gen_l4_cksum);
+               if (unlikely(ret))
+                       continue;
+
+               neta_ppio_outq_desc_set_proto_info(&descs[tail_first],
+                                                  l3_type, l4_type,
+                                                  mbuf->l2_len,
+                                                  mbuf->l2_len + mbuf->l3_len,
+                                                  gen_l3_cksum, gen_l4_cksum);
+       }
+       num = total_descs;
+       neta_ppio_send_sg(q->priv->ppio, q->queue_id, descs, &total_descs,
+                         &pkts);
+
+       /* number of packets that were not sent */
+       if (unlikely(num > total_descs)) {
+               for (i = total_descs; i < num; i++) {
+                       sq->head = (MRVL_NETA_TX_SHADOWQ_SIZE +
+                                       sq->head - 1) &
+                                       MRVL_NETA_TX_SHADOWQ_MASK;
+                       addr = sq->ent[sq->head].cookie;
+                       if (addr) {
+                               struct rte_mbuf *mbuf;
+
+                               mbuf = (struct rte_mbuf *)
+                                               (cookie_addr_high | addr);
+                               bytes_sent -= rte_pktmbuf_pkt_len(mbuf);
+                       }
+               }
+               sq->size -= num - total_descs;
+               nb_pkts = pkts.num;
+       }
+
+       q->bytes_sent += bytes_sent;
+
+       return nb_pkts;
+}
+
+/**
+ * Set tx burst function according to offload flag
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+void
+mvneta_set_tx_function(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+
+       /* Use a simple Tx queue (no offloads, no multi segs) if possible */
+       if (priv->multiseg) {
+               MVNETA_LOG(INFO, "Using multi-segment tx callback");
+               dev->tx_pkt_burst = mvneta_tx_sg_pkt_burst;
+       } else {
+               MVNETA_LOG(INFO, "Using single-segment tx callback");
+               dev->tx_pkt_burst = mvneta_tx_pkt_burst;
+       }
+}
+
+/**
+ * DPDK callback for receive.
+ *
+ * @param rxq
+ *   Generic pointer to the receive queue.
+ * @param rx_pkts
+ *   Array to store received packets.
+ * @param nb_pkts
+ *   Maximum number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully received.
+ */
+uint16_t
+mvneta_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+       struct mvneta_rxq *q = rxq;
+       struct neta_ppio_desc descs[nb_pkts];
+       int i, ret, rx_done = 0, rx_dropped = 0;
+
+       if (unlikely(!q || !q->priv->ppio))
+               return 0;
+
+       ret = neta_ppio_recv(q->priv->ppio, q->queue_id,
+                       descs, &nb_pkts);
+
+       if (unlikely(ret < 0)) {
+               MVNETA_LOG(ERR, "Failed to receive packets");
+               return 0;
+       }
+
+       for (i = 0; i < nb_pkts; i++) {
+               struct rte_mbuf *mbuf;
+               uint8_t l3_offset, l4_offset;
+               enum neta_inq_desc_status status;
+               uint64_t addr;
+
+               addr = cookie_addr_high |
+                       neta_ppio_inq_desc_get_cookie(&descs[i]);
+               mbuf = (struct rte_mbuf *)addr;
+
+               rte_pktmbuf_reset(mbuf);
+
+               /* drop packet in case of mac, overrun or resource error */
+               status = neta_ppio_inq_desc_get_l2_pkt_error(&descs[i]);
+               if (unlikely(status != NETA_DESC_ERR_OK)) {
+                       /* Release the mbuf to the mempool since
+                        * it won't be transferred to tx path
+                        */
+                       rte_pktmbuf_free(mbuf);
+                       q->drop_mac++;
+                       rx_dropped++;
+                       continue;
+               }
+
+               mbuf->data_off += MVNETA_PKT_EFFEC_OFFS;
+               mbuf->pkt_len = neta_ppio_inq_desc_get_pkt_len(&descs[i]);
+               mbuf->data_len = mbuf->pkt_len;
+               mbuf->port = q->port_id;
+               mbuf->packet_type =
+                       mvneta_desc_to_packet_type_and_offset(&descs[i],
+                                                               &l3_offset,
+                                                               &l4_offset);
+               mbuf->l2_len = l3_offset;
+               mbuf->l3_len = l4_offset - l3_offset;
+
+               if (likely(q->cksum_enabled))
+                       mbuf->ol_flags = mvneta_desc_to_ol_flags(&descs[i]);
+
+               rx_pkts[rx_done++] = mbuf;
+               q->bytes_recv += mbuf->pkt_len;
+       }
+       q->pkts_processed += rx_done + rx_dropped;
+
+       if (q->pkts_processed > rx_desc_free_thresh) {
+               int buf_to_refill = rx_desc_free_thresh;
+
+               ret = mvneta_buffs_alloc(q->priv, q, &buf_to_refill);
+               if (ret)
+                       MVNETA_LOG(ERR, "Refill failed");
+               q->pkts_processed -= buf_to_refill;
+       }
+
+       return rx_done;
+}
+
+/**
+ * DPDK callback to configure the receive queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   RX queue index.
+ * @param desc
+ *   Number of descriptors to configure in queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param conf
+ *   Thresholds parameters (unused_).
+ * @param mp
+ *   Memory pool for buffer allocations.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+int
+mvneta_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                     unsigned int socket,
+                     const struct rte_eth_rxconf *conf __rte_unused,
+                     struct rte_mempool *mp)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       struct mvneta_rxq *rxq;
+       uint32_t frame_size, buf_size = rte_pktmbuf_data_room_size(mp);
+       uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
+
+       frame_size = buf_size - RTE_PKTMBUF_HEADROOM - MVNETA_PKT_EFFEC_OFFS;
+
+       if (frame_size < max_rx_pkt_len) {
+               MVNETA_LOG(ERR,
+                       "Mbuf size must be increased to %u bytes to hold up "
+                       "to %u bytes of data.",
+                       buf_size + max_rx_pkt_len - frame_size,
+                       max_rx_pkt_len);
+               dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
+               MVNETA_LOG(INFO, "Setting max rx pkt len to %u",
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len);
+       }
+
+       if (dev->data->rx_queues[idx]) {
+               rte_free(dev->data->rx_queues[idx]);
+               dev->data->rx_queues[idx] = NULL;
+       }
+
+       rxq = rte_zmalloc_socket("rxq", sizeof(*rxq), 0, socket);
+       if (!rxq)
+               return -ENOMEM;
+
+       rxq->priv = priv;
+       rxq->mp = mp;
+       rxq->cksum_enabled = dev->data->dev_conf.rxmode.offloads &
+                            DEV_RX_OFFLOAD_IPV4_CKSUM;
+       rxq->queue_id = idx;
+       rxq->port_id = dev->data->port_id;
+       rxq->size = desc;
+       rx_desc_free_thresh = RTE_MIN(rx_desc_free_thresh, (desc / 2));
+       priv->ppio_params.inqs_params.tcs_params[MRVL_NETA_DEFAULT_TC].size =
+               desc;
+
+       dev->data->rx_queues[idx] = rxq;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to configure the transmit queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param idx
+ *   Transmit queue index.
+ * @param desc
+ *   Number of descriptors to configure in the queue.
+ * @param socket
+ *   NUMA socket on which memory must be allocated.
+ * @param conf
+ *   Tx queue configuration parameters.
+ *
+ * @return
+ *   0 on success, negative error value otherwise.
+ */
+int
+mvneta_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                     unsigned int socket, const struct rte_eth_txconf *conf)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       struct mvneta_txq *txq;
+
+       if (dev->data->tx_queues[idx]) {
+               rte_free(dev->data->tx_queues[idx]);
+               dev->data->tx_queues[idx] = NULL;
+       }
+
+       txq = rte_zmalloc_socket("txq", sizeof(*txq), 0, socket);
+       if (!txq)
+               return -ENOMEM;
+
+       txq->priv = priv;
+       txq->queue_id = idx;
+       txq->port_id = dev->data->port_id;
+       txq->tx_deferred_start = conf->tx_deferred_start;
+       dev->data->tx_queues[idx] = txq;
+
+       priv->ppio_params.outqs_params.outqs_params[idx].size = desc;
+       priv->ppio_params.outqs_params.outqs_params[idx].weight = 1;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to release the transmit queue.
+ *
+ * @param txq
+ *   Generic transmit queue pointer.
+ */
+void
+mvneta_tx_queue_release(void *txq)
+{
+       struct mvneta_txq *q = txq;
+
+       if (!q)
+               return;
+
+       rte_free(q);
+}
+
+/**
+ * Return mbufs to mempool.
+ *
+ * @param rxq
+ *    Pointer to rx queue structure
+ * @param desc
+ *    Array of rx descriptors
+ */
+static void
+mvneta_recv_buffs_free(struct neta_ppio_desc *desc, uint16_t num)
+{
+       uint64_t addr;
+       uint8_t i;
+
+       for (i = 0; i < num; i++) {
+               if (desc) {
+                       addr = cookie_addr_high |
+                                       neta_ppio_inq_desc_get_cookie(desc);
+                       if (addr)
+                               rte_pktmbuf_free((struct rte_mbuf *)addr);
+                       desc++;
+               }
+       }
+}
+
+int
+mvneta_alloc_rx_bufs(struct rte_eth_dev *dev)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+       int ret = 0, i;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct mvneta_rxq *rxq = dev->data->rx_queues[i];
+               int num = rxq->size;
+
+               ret = mvneta_buffs_alloc(priv, rxq, &num);
+               if (ret || num != rxq->size) {
+                       rte_free(rxq);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * Flush single receive queue.
+ *
+ * @param rxq
+ *   Pointer to rx queue structure.
+ * @param descs
+ *   Array of rx descriptors
+ */
+static void
+mvneta_rx_queue_flush(struct mvneta_rxq *rxq)
+{
+       struct neta_ppio_desc *descs;
+       struct neta_buff_inf *bufs;
+       uint16_t num;
+       int ret, i;
+
+       descs = rte_malloc("rxdesc", MRVL_NETA_RXD_MAX * sizeof(*descs), 0);
+       bufs = rte_malloc("buffs", MRVL_NETA_RXD_MAX * sizeof(*bufs), 0);
+
+       do {
+               num = MRVL_NETA_RXD_MAX;
+               ret = neta_ppio_recv(rxq->priv->ppio,
+                                    rxq->queue_id,
+                                    descs, &num);
+               mvneta_recv_buffs_free(descs, num);
+       } while (ret == 0 && num);
+
+       rxq->pkts_processed = 0;
+
+       num = MRVL_NETA_RXD_MAX;
+
+       neta_ppio_inq_get_all_buffs(rxq->priv->ppio, rxq->queue_id, bufs, &num);
+       MVNETA_LOG(INFO, "freeing %u unused bufs.", num);
+
+       for (i = 0; i < num; i++) {
+               uint64_t addr;
+               if (bufs[i].cookie) {
+                       addr = cookie_addr_high | bufs[i].cookie;
+                       rte_pktmbuf_free((struct rte_mbuf *)addr);
+               }
+       }
+
+       rte_free(descs);
+       rte_free(bufs);
+}
+
+/**
+ * Flush single transmit queue.
+ *
+ * @param txq
+ *     Pointer to tx queue structure
+ */
+static void
+mvneta_tx_queue_flush(struct mvneta_txq *txq)
+{
+       struct mvneta_shadow_txq *sq = &txq->shadow_txq;
+
+       if (sq->size)
+               mvneta_sent_buffers_free(txq->priv->ppio, sq,
+                                        txq->queue_id);
+
+       /* free the rest of them */
+       while (sq->tail != sq->head) {
+               uint64_t addr = cookie_addr_high |
+                       sq->ent[sq->tail].cookie;
+               rte_pktmbuf_free((struct rte_mbuf *)addr);
+               sq->tail = (sq->tail + 1) & MRVL_NETA_TX_SHADOWQ_MASK;
+       }
+       memset(sq, 0, sizeof(*sq));
+}
+
+void
+mvneta_flush_queues(struct rte_eth_dev *dev)
+{
+       int i;
+
+       MVNETA_LOG(INFO, "Flushing rx queues");
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               struct mvneta_rxq *rxq = dev->data->rx_queues[i];
+
+               mvneta_rx_queue_flush(rxq);
+       }
+
+       MVNETA_LOG(INFO, "Flushing tx queues");
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               struct mvneta_txq *txq = dev->data->tx_queues[i];
+
+               mvneta_tx_queue_flush(txq);
+       }
+}
+
+/**
+ * DPDK callback to release the receive queue.
+ *
+ * @param rxq
+ *   Generic receive queue pointer.
+ */
+void
+mvneta_rx_queue_release(void *rxq)
+{
+       struct mvneta_rxq *q = rxq;
+
+       if (!q)
+               return;
+
+       /* If dev_stop was called already, mbufs are already
+        * returned to mempool and ppio is deinitialized.
+        * Skip this step.
+        */
+
+       if (q->priv->ppio)
+               mvneta_rx_queue_flush(q);
+
+       rte_free(rxq);
+}
+
+/**
+ * DPDK callback to get information about specific receive queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param rx_queue_id
+ *   Receive queue index.
+ * @param qinfo
+ *   Receive queue information structure.
+ */
+void
+mvneta_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+                   struct rte_eth_rxq_info *qinfo)
+{
+       struct mvneta_rxq *q = dev->data->rx_queues[rx_queue_id];
+
+       qinfo->mp = q->mp;
+       qinfo->nb_desc = q->size;
+}
+
+/**
+ * DPDK callback to get information about specific transmit queue.
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ * @param tx_queue_id
+ *   Transmit queue index.
+ * @param qinfo
+ *   Transmit queue information structure.
+ */
+void
+mvneta_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+                   struct rte_eth_txq_info *qinfo)
+{
+       struct mvneta_priv *priv = dev->data->dev_private;
+
+       qinfo->nb_desc =
+               priv->ppio_params.outqs_params.outqs_params[tx_queue_id].size;
+}
diff --git a/drivers/net/mvneta/mvneta_rxtx.h b/drivers/net/mvneta/mvneta_rxtx.h
new file mode 100644 (file)
index 0000000..cc29190
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#ifndef _MVNETA_RXTX_H_
+#define _MVNETA_RXTX_H_
+
+#include "mvneta_ethdev.h"
+
+int mvneta_alloc_rx_bufs(struct rte_eth_dev *dev);
+
+void mvneta_flush_queues(struct rte_eth_dev *dev);
+
+void mvneta_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+                        struct rte_eth_rxq_info *qinfo);
+void mvneta_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+                        struct rte_eth_txq_info *qinfo);
+
+void mvneta_set_tx_function(struct rte_eth_dev *dev);
+
+uint16_t
+mvneta_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
+int
+mvneta_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                     unsigned int socket,
+                     const struct rte_eth_rxconf *conf __rte_unused,
+                     struct rte_mempool *mp);
+int
+mvneta_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
+                     unsigned int socket, const struct rte_eth_txconf *conf);
+
+void mvneta_rx_queue_release(void *rxq);
+void mvneta_tx_queue_release(void *txq);
+
+#endif /* _MVNETA_RXTX_H_ */
diff --git a/drivers/net/mvneta/rte_pmd_mvneta_version.map b/drivers/net/mvneta/rte_pmd_mvneta_version.map
new file mode 100644 (file)
index 0000000..24bd5cd
--- /dev/null
@@ -0,0 +1,3 @@
+DPDK_18.11 {
+       local: *;
+};
index 492aef9..661d2cd 100644 (file)
@@ -23,6 +23,7 @@ LIBABIVER := 1
 EXPORT_MAP := rte_pmd_mvpp2_version.map
 
 # external library dependencies
+CFLAGS += -I$(RTE_SDK)/drivers/common/mvep
 CFLAGS += -I$(LIBMUSDK_PATH)/include
 CFLAGS += -DMVCONF_TYPES_PUBLIC
 CFLAGS += -DMVCONF_DMA_PHYS_ADDR_T_PUBLIC
@@ -32,11 +33,13 @@ LDLIBS += -L$(LIBMUSDK_PATH)/lib
 LDLIBS += -lmusdk
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_cfgfile
-LDLIBS += -lrte_bus_vdev
+LDLIBS += -lrte_bus_vdev -lrte_common_mvep
 
 # library source files
 SRCS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mrvl_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mrvl_qos.c
 SRCS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mrvl_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mrvl_mtr.c
+SRCS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mrvl_tm.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
index e139889..70ef2d6 100644 (file)
@@ -19,7 +19,9 @@ endif
 sources = files(
        'mrvl_ethdev.c',
        'mrvl_flow.c',
-       'mrvl_qos.c'
+       'mrvl_qos.c',
+       'mrvl_mtr.c',
+       'mrvl_tm.c'
 )
 
-deps += ['cfgfile']
+deps += ['cfgfile', 'common_mvep']
index a2d0576..ab4c14e 100644 (file)
 #include <rte_malloc.h>
 #include <rte_bus_vdev.h>
 
-/* Unluckily, container_of is defined by both DPDK and MUSDK,
- * we'll declare only one version.
- *
- * Note that it is not used in this PMD anyway.
- */
-#ifdef container_of
-#undef container_of
-#endif
-
 #include <fcntl.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 
+#include <rte_mvep_common.h>
 #include "mrvl_ethdev.h"
 #include "mrvl_qos.h"
+#include "mrvl_flow.h"
+#include "mrvl_mtr.h"
+#include "mrvl_tm.h"
 
 /* bitmask with reserved hifs */
 #define MRVL_MUSDK_HIFS_RESERVED 0x0F
 #define MRVL_ARP_LENGTH 28
 
 #define MRVL_COOKIE_ADDR_INVALID ~0ULL
-
-#define MRVL_COOKIE_HIGH_ADDR_SHIFT    (sizeof(pp2_cookie_t) * 8)
-#define MRVL_COOKIE_HIGH_ADDR_MASK     (~0ULL << MRVL_COOKIE_HIGH_ADDR_SHIFT)
-
-/* Memory size (in bytes) for MUSDK dma buffers */
-#define MRVL_MUSDK_DMA_MEMSIZE 41943040
+#define MRVL_COOKIE_HIGH_ADDR_MASK 0xffffff0000000000
 
 /** Port Rx offload capabilities */
 #define MRVL_RX_OFFLOADS (DEV_RX_OFFLOAD_VLAN_FILTER | \
                          DEV_RX_OFFLOAD_JUMBO_FRAME | \
-                         DEV_RX_OFFLOAD_CRC_STRIP | \
                          DEV_RX_OFFLOAD_CHECKSUM)
 
 /** Port Tx offloads capabilities */
 #define MRVL_TX_OFFLOADS (DEV_TX_OFFLOAD_IPV4_CKSUM | \
                          DEV_TX_OFFLOAD_UDP_CKSUM | \
-                         DEV_TX_OFFLOAD_TCP_CKSUM)
+                         DEV_TX_OFFLOAD_TCP_CKSUM | \
+                         DEV_TX_OFFLOAD_MULTI_SEGS)
 
 static const char * const valid_args[] = {
        MRVL_IFACE_NAME_ARG,
@@ -86,13 +76,12 @@ static const char * const valid_args[] = {
 static int used_hifs = MRVL_MUSDK_HIFS_RESERVED;
 static struct pp2_hif *hifs[RTE_MAX_LCORE];
 static int used_bpools[PP2_NUM_PKT_PROC] = {
-       MRVL_MUSDK_BPOOLS_RESERVED,
-       MRVL_MUSDK_BPOOLS_RESERVED
+       [0 ... PP2_NUM_PKT_PROC - 1] = MRVL_MUSDK_BPOOLS_RESERVED
 };
 
-struct pp2_bpool *mrvl_port_to_bpool_lookup[RTE_MAX_ETHPORTS];
-int mrvl_port_bpool_size[PP2_NUM_PKT_PROC][PP2_BPOOL_NUM_POOLS][RTE_MAX_LCORE];
-uint64_t cookie_addr_high = MRVL_COOKIE_ADDR_INVALID;
+static struct pp2_bpool *mrvl_port_to_bpool_lookup[RTE_MAX_ETHPORTS];
+static int mrvl_port_bpool_size[PP2_NUM_PKT_PROC][PP2_BPOOL_NUM_POOLS][RTE_MAX_LCORE];
+static uint64_t cookie_addr_high = MRVL_COOKIE_ADDR_INVALID;
 
 int mrvl_logtype;
 
@@ -116,7 +105,9 @@ struct mrvl_shadow_txq {
        int head;           /* write index - used when sending buffers */
        int tail;           /* read index - used when releasing buffers */
        u16 size;           /* queue occupied size */
-       u16 num_to_release; /* number of buffers sent, that can be released */
+       u16 num_to_release; /* number of descriptors sent, that can be
+                            * released
+                            */
        struct buff_release_entry ent[MRVL_PP2_TX_SHADOWQ_SIZE]; /* q entries */
 };
 
@@ -148,6 +139,12 @@ static inline void mrvl_free_sent_buffers(struct pp2_ppio *ppio,
                        struct pp2_hif *hif, unsigned int core_id,
                        struct mrvl_shadow_txq *sq, int qid, int force);
 
+static uint16_t mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts,
+                                 uint16_t nb_pkts);
+static uint16_t mrvl_tx_sg_pkt_burst(void *txq,        struct rte_mbuf **tx_pkts,
+                                    uint16_t nb_pkts);
+
+
 #define MRVL_XSTATS_TBL_ENTRY(name) { \
        #name, offsetof(struct pp2_ppio_statistics, name),      \
        sizeof(((struct pp2_ppio_statistics *)0)->name)         \
@@ -174,6 +171,31 @@ static struct {
        MRVL_XSTATS_TBL_ENTRY(tx_errors)
 };
 
+static inline void
+mrvl_fill_shadowq(struct mrvl_shadow_txq *sq, struct rte_mbuf *buf)
+{
+       sq->ent[sq->head].buff.cookie = (uint64_t)buf;
+       sq->ent[sq->head].buff.addr = buf ?
+               rte_mbuf_data_iova_default(buf) : 0;
+
+       sq->ent[sq->head].bpool =
+               (unlikely(!buf || buf->port >= RTE_MAX_ETHPORTS ||
+                buf->refcnt > 1)) ? NULL :
+                mrvl_port_to_bpool_lookup[buf->port];
+
+       sq->head = (sq->head + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
+       sq->size++;
+}
+
+static inline void
+mrvl_fill_desc(struct pp2_ppio_desc *desc, struct rte_mbuf *buf)
+{
+       pp2_ppio_outq_desc_reset(desc);
+       pp2_ppio_outq_desc_set_phys_addr(desc, rte_pktmbuf_iova(buf));
+       pp2_ppio_outq_desc_set_pkt_offset(desc, 0);
+       pp2_ppio_outq_desc_set_pkt_len(desc, rte_pktmbuf_data_len(buf));
+}
+
 static inline int
 mrvl_get_bpool_size(int pp2_id, int pool_id)
 {
@@ -252,6 +274,27 @@ out:
        return hifs[core_id];
 }
 
+/**
+ * Set tx burst function according to offload flag
+ *
+ * @param dev
+ *   Pointer to Ethernet device structure.
+ */
+static void
+mrvl_set_tx_function(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       /* Use a simple Tx queue (no offloads, no multi segs) if possible */
+       if (priv->multiseg) {
+               RTE_LOG(INFO, PMD, "Using multi-segment tx callback\n");
+               dev->tx_pkt_burst = mrvl_tx_sg_pkt_burst;
+       } else {
+               RTE_LOG(INFO, PMD, "Using single-segment tx callback\n");
+               dev->tx_pkt_burst = mrvl_tx_pkt_burst;
+       }
+}
+
 /**
  * Configure rss based on dpdk rss configuration.
  *
@@ -307,6 +350,11 @@ mrvl_dev_configure(struct rte_eth_dev *dev)
        struct mrvl_priv *priv = dev->data->dev_private;
        int ret;
 
+       if (priv->ppio) {
+               MRVL_LOG(INFO, "Device reconfiguration is not supported");
+               return -EINVAL;
+       }
+
        if (dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_NONE &&
            dev->data->dev_conf.rxmode.mq_mode != ETH_MQ_RX_RSS) {
                MRVL_LOG(INFO, "Unsupported rx multi queue mode %d",
@@ -314,14 +362,6 @@ mrvl_dev_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(dev->data->dev_conf.rxmode.offloads)) {
-               MRVL_LOG(INFO, "L2 CRC stripping is always enabled in hw");
-               dev->data->dev_conf.rxmode.offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-       }
-
        if (dev->data->dev_conf.rxmode.split_hdr_size) {
                MRVL_LOG(INFO, "Split headers not supported");
                return -EINVAL;
@@ -329,7 +369,10 @@ mrvl_dev_configure(struct rte_eth_dev *dev)
 
        if (dev->data->dev_conf.rxmode.offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
                dev->data->mtu = dev->data->dev_conf.rxmode.max_rx_pkt_len -
-                                ETHER_HDR_LEN - ETHER_CRC_LEN;
+                                MRVL_PP2_ETH_HDRS_LEN;
+
+       if (dev->data->dev_conf.txmode.offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
+               priv->multiseg = 1;
 
        ret = mrvl_configure_rxqs(priv, dev->data->port_id,
                                  dev->data->nb_rx_queues);
@@ -345,6 +388,10 @@ mrvl_dev_configure(struct rte_eth_dev *dev)
        priv->ppio_params.maintain_stats = 1;
        priv->nb_rx_queues = dev->data->nb_rx_queues;
 
+       ret = mrvl_tm_init(dev);
+       if (ret < 0)
+               return ret;
+
        if (dev->data->nb_rx_queues == 1 &&
            dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_RSS) {
                MRVL_LOG(WARNING, "Disabling hash for 1 rx queue");
@@ -375,21 +422,55 @@ static int
 mrvl_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
 {
        struct mrvl_priv *priv = dev->data->dev_private;
-       /* extra MV_MH_SIZE bytes are required for Marvell tag */
-       uint16_t mru = mtu + MV_MH_SIZE + ETHER_HDR_LEN + ETHER_CRC_LEN;
+       uint16_t mru;
+       uint16_t mbuf_data_size = 0; /* SW buffer size */
        int ret;
 
-       if (mtu < ETHER_MIN_MTU || mru > MRVL_PKT_SIZE_MAX)
+       mru = MRVL_PP2_MTU_TO_MRU(mtu);
+       /*
+        * min_rx_buf_size is equal to mbuf data size
+        * if pmd didn't set it differently
+        */
+       mbuf_data_size = dev->data->min_rx_buf_size - RTE_PKTMBUF_HEADROOM;
+       /* Prevent PMD from:
+        * - setting mru greater than the mbuf size resulting in
+        * hw and sw buffer size mismatch
+        * - setting mtu that requires the support of scattered packets
+        * when this feature has not been enabled/supported so far
+        * (TODO check scattered_rx flag here once scattered RX is supported).
+        */
+       if (mru + MRVL_PKT_OFFS > mbuf_data_size) {
+               mru = mbuf_data_size - MRVL_PKT_OFFS;
+               mtu = MRVL_PP2_MRU_TO_MTU(mru);
+               MRVL_LOG(WARNING, "MTU too big, max MTU possible limitted "
+                       "by current mbuf size: %u. Set MTU to %u, MRU to %u",
+                       mbuf_data_size, mtu, mru);
+       }
+
+       if (mtu < ETHER_MIN_MTU || mru > MRVL_PKT_SIZE_MAX) {
+               MRVL_LOG(ERR, "Invalid MTU [%u] or MRU [%u]", mtu, mru);
                return -EINVAL;
+       }
+
+       dev->data->mtu = mtu;
+       dev->data->dev_conf.rxmode.max_rx_pkt_len = mru - MV_MH_SIZE;
 
        if (!priv->ppio)
                return 0;
 
        ret = pp2_ppio_set_mru(priv->ppio, mru);
-       if (ret)
+       if (ret) {
+               MRVL_LOG(ERR, "Failed to change MRU");
                return ret;
+       }
 
-       return pp2_ppio_set_mtu(priv->ppio, mtu);
+       ret = pp2_ppio_set_mtu(priv->ppio, mtu);
+       if (ret) {
+               MRVL_LOG(ERR, "Failed to change MTU");
+               return ret;
+       }
+
+       return 0;
 }
 
 /**
@@ -528,6 +609,9 @@ mrvl_dev_start(struct rte_eth_dev *dev)
        char match[MRVL_MATCH_LEN];
        int ret = 0, i, def_init_size;
 
+       if (priv->ppio)
+               return mrvl_dev_set_link_up(dev);
+
        snprintf(match, sizeof(match), "ppio-%d:%d",
                 priv->pp_id, priv->ppio_id);
        priv->ppio_params.match = match;
@@ -597,9 +681,13 @@ mrvl_dev_start(struct rte_eth_dev *dev)
                }
                priv->vlan_flushed = 1;
        }
+       ret = mrvl_mtu_set(dev, dev->data->mtu);
+       if (ret)
+               MRVL_LOG(ERR, "Failed to set MTU to %d", dev->data->mtu);
 
        /* For default QoS config, don't start classifier. */
-       if (mrvl_qos_cfg) {
+       if (mrvl_qos_cfg  &&
+           mrvl_qos_cfg->port[dev->data->port_id].use_global_defaults == 0) {
                ret = mrvl_start_qos_mapping(priv);
                if (ret) {
                        MRVL_LOG(ERR, "Failed to setup QoS mapping");
@@ -631,6 +719,10 @@ mrvl_dev_start(struct rte_eth_dev *dev)
                        goto out;
        }
 
+       mrvl_flow_init(dev);
+       mrvl_mtr_init(dev);
+       mrvl_set_tx_function(dev);
+
        return 0;
 out:
        MRVL_LOG(ERR, "Failed to start device");
@@ -752,28 +844,7 @@ mrvl_flush_bpool(struct rte_eth_dev *dev)
 static void
 mrvl_dev_stop(struct rte_eth_dev *dev)
 {
-       struct mrvl_priv *priv = dev->data->dev_private;
-
        mrvl_dev_set_link_down(dev);
-       mrvl_flush_rx_queues(dev);
-       mrvl_flush_tx_shadow_queues(dev);
-       if (priv->cls_tbl) {
-               pp2_cls_tbl_deinit(priv->cls_tbl);
-               priv->cls_tbl = NULL;
-       }
-       if (priv->qos_tbl) {
-               pp2_cls_qos_tbl_deinit(priv->qos_tbl);
-               priv->qos_tbl = NULL;
-       }
-       if (priv->ppio)
-               pp2_ppio_deinit(priv->ppio);
-       priv->ppio = NULL;
-
-       /* policer must be released after ppio deinitialization */
-       if (priv->policer) {
-               pp2_cls_plcr_deinit(priv->policer);
-               priv->policer = NULL;
-       }
 }
 
 /**
@@ -788,6 +859,11 @@ mrvl_dev_close(struct rte_eth_dev *dev)
        struct mrvl_priv *priv = dev->data->dev_private;
        size_t i;
 
+       mrvl_flush_rx_queues(dev);
+       mrvl_flush_tx_shadow_queues(dev);
+       mrvl_flow_deinit(dev);
+       mrvl_mtr_deinit(dev);
+
        for (i = 0; i < priv->ppio_params.inqs_params.num_tcs; ++i) {
                struct pp2_ppio_tc_params *tc_params =
                        &priv->ppio_params.inqs_params.tcs_params[i];
@@ -798,7 +874,29 @@ mrvl_dev_close(struct rte_eth_dev *dev)
                }
        }
 
+       if (priv->cls_tbl) {
+               pp2_cls_tbl_deinit(priv->cls_tbl);
+               priv->cls_tbl = NULL;
+       }
+
+       if (priv->qos_tbl) {
+               pp2_cls_qos_tbl_deinit(priv->qos_tbl);
+               priv->qos_tbl = NULL;
+       }
+
        mrvl_flush_bpool(dev);
+       mrvl_tm_deinit(dev);
+
+       if (priv->ppio) {
+               pp2_ppio_deinit(priv->ppio);
+               priv->ppio = NULL;
+       }
+
+       /* policer must be released after ppio deinitialization */
+       if (priv->default_policer) {
+               pp2_cls_plcr_deinit(priv->default_policer);
+               priv->default_policer = NULL;
+       }
 }
 
 /**
@@ -1337,7 +1435,6 @@ mrvl_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
 
        /* By default packets are dropped if no descriptors are available */
        info->default_rxconf.rx_drop_en = 1;
-       info->default_rxconf.offloads = DEV_RX_OFFLOAD_CRC_STRIP;
 
        info->max_rx_pktlen = MRVL_PKT_SIZE_MAX;
 }
@@ -1356,6 +1453,8 @@ mrvl_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
 {
        static const uint32_t ptypes[] = {
                RTE_PTYPE_L2_ETHER,
+               RTE_PTYPE_L2_ETHER_VLAN,
+               RTE_PTYPE_L2_ETHER_QINQ,
                RTE_PTYPE_L3_IPV4,
                RTE_PTYPE_L3_IPV4_EXT,
                RTE_PTYPE_L3_IPV4_EXT_UNKNOWN,
@@ -1492,7 +1591,7 @@ mrvl_fill_bpool(struct mrvl_rxq *rxq, int num)
 
                entries[i].buff.addr =
                        rte_mbuf_data_iova_default(mbufs[i]);
-               entries[i].buff.cookie = (pp2_cookie_t)(uint64_t)mbufs[i];
+               entries[i].buff.cookie = (uint64_t)mbufs[i];
                entries[i].bpool = bpool;
        }
 
@@ -1537,8 +1636,8 @@ mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 {
        struct mrvl_priv *priv = dev->data->dev_private;
        struct mrvl_rxq *rxq;
-       uint32_t min_size,
-                max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
+       uint32_t frame_size, buf_size = rte_pktmbuf_data_room_size(mp);
+       uint32_t max_rx_pkt_len = dev->data->dev_conf.rxmode.max_rx_pkt_len;
        int ret, tc, inq;
        uint64_t offloads;
 
@@ -1553,15 +1652,16 @@ mrvl_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
                return -EFAULT;
        }
 
-       min_size = rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM -
-                  MRVL_PKT_EFFEC_OFFS;
-       if (min_size < max_rx_pkt_len) {
-               MRVL_LOG(ERR,
-                       "Mbuf size must be increased to %u bytes to hold up to %u bytes of data.",
-                       max_rx_pkt_len + RTE_PKTMBUF_HEADROOM +
-                       MRVL_PKT_EFFEC_OFFS,
+       frame_size = buf_size - RTE_PKTMBUF_HEADROOM - MRVL_PKT_EFFEC_OFFS;
+       if (frame_size < max_rx_pkt_len) {
+               MRVL_LOG(WARNING,
+                       "Mbuf size must be increased to %u bytes to hold up "
+                       "to %u bytes of data.",
+                       buf_size + max_rx_pkt_len - frame_size,
                        max_rx_pkt_len);
-               return -EINVAL;
+               dev->data->dev_conf.rxmode.max_rx_pkt_len = frame_size;
+               MRVL_LOG(INFO, "Setting max rx pkt len to %u",
+                       dev->data->dev_conf.rxmode.max_rx_pkt_len);
        }
 
        if (dev->data->rx_queues[idx]) {
@@ -1867,6 +1967,44 @@ mrvl_eth_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
        }
 }
 
+/**
+ * DPDK callback to get rte_mtr callbacks.
+ *
+ * @param dev
+ *   Pointer to the device structure.
+ * @param ops
+ *   Pointer to pass the mtr ops.
+ *
+ * @return
+ *   Always 0.
+ */
+static int
+mrvl_mtr_ops_get(struct rte_eth_dev *dev __rte_unused, void *ops)
+{
+       *(const void **)ops = &mrvl_mtr_ops;
+
+       return 0;
+}
+
+/**
+ * DPDK callback to get rte_tm callbacks.
+ *
+ * @param dev
+ *   Pointer to the device structure.
+ * @param ops
+ *   Pointer to pass the tm ops.
+ *
+ * @return
+ *   Always 0.
+ */
+static int
+mrvl_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *ops)
+{
+       *(const void **)ops = &mrvl_tm_ops;
+
+       return 0;
+}
+
 static const struct eth_dev_ops mrvl_ops = {
        .dev_configure = mrvl_dev_configure,
        .dev_start = mrvl_dev_start,
@@ -1904,6 +2042,8 @@ static const struct eth_dev_ops mrvl_ops = {
        .rss_hash_update = mrvl_rss_hash_update,
        .rss_hash_conf_get = mrvl_rss_hash_conf_get,
        .filter_ctrl = mrvl_eth_filter_ctrl,
+       .mtr_ops_get = mrvl_mtr_ops_get,
+       .tm_ops_get = mrvl_tm_ops_get,
 };
 
 /**
@@ -1925,13 +2065,27 @@ mrvl_desc_to_packet_type_and_offset(struct pp2_ppio_desc *desc,
 {
        enum pp2_inq_l3_type l3_type;
        enum pp2_inq_l4_type l4_type;
+       enum pp2_inq_vlan_tag vlan_tag;
        uint64_t packet_type;
 
        pp2_ppio_inq_desc_get_l3_info(desc, &l3_type, l3_offset);
        pp2_ppio_inq_desc_get_l4_info(desc, &l4_type, l4_offset);
+       pp2_ppio_inq_desc_get_vlan_tag(desc, &vlan_tag);
 
        packet_type = RTE_PTYPE_L2_ETHER;
 
+       switch (vlan_tag) {
+       case PP2_INQ_VLAN_TAG_SINGLE:
+               packet_type |= RTE_PTYPE_L2_ETHER_VLAN;
+               break;
+       case PP2_INQ_VLAN_TAG_DOUBLE:
+       case PP2_INQ_VLAN_TAG_TRIPLE:
+               packet_type |= RTE_PTYPE_L2_ETHER_QINQ;
+               break;
+       default:
+               break;
+       }
+
        switch (l3_type) {
        case PP2_INQ_L3_TYPE_IPV4_NO_OPTS:
                packet_type |= RTE_PTYPE_L3_IPV4;
@@ -2073,7 +2227,7 @@ mrvl_rx_pkt_burst(void *rxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
                if (unlikely(status != PP2_DESC_ERR_OK)) {
                        struct pp2_buff_inf binf = {
                                .addr = rte_mbuf_data_iova_default(mbuf),
-                               .cookie = (pp2_cookie_t)(uint64_t)mbuf,
+                               .cookie = (uint64_t)mbuf,
                        };
 
                        pp2_bpool_put_buff(hif, bpool, &binf);
@@ -2334,22 +2488,8 @@ mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        rte_mbuf_prefetch_part2(pref_pkt_hdr);
                }
 
-               sq->ent[sq->head].buff.cookie = (pp2_cookie_t)(uint64_t)mbuf;
-               sq->ent[sq->head].buff.addr =
-                       rte_mbuf_data_iova_default(mbuf);
-               sq->ent[sq->head].bpool =
-                       (unlikely(mbuf->port >= RTE_MAX_ETHPORTS ||
-                        mbuf->refcnt > 1)) ? NULL :
-                        mrvl_port_to_bpool_lookup[mbuf->port];
-               sq->head = (sq->head + 1) & MRVL_PP2_TX_SHADOWQ_MASK;
-               sq->size++;
-
-               pp2_ppio_outq_desc_reset(&descs[i]);
-               pp2_ppio_outq_desc_set_phys_addr(&descs[i],
-                                                rte_pktmbuf_iova(mbuf));
-               pp2_ppio_outq_desc_set_pkt_offset(&descs[i], 0);
-               pp2_ppio_outq_desc_set_pkt_len(&descs[i],
-                                              rte_pktmbuf_pkt_len(mbuf));
+               mrvl_fill_shadowq(sq, mbuf);
+               mrvl_fill_desc(&descs[i], mbuf);
 
                bytes_sent += rte_pktmbuf_pkt_len(mbuf);
                /*
@@ -2387,6 +2527,152 @@ mrvl_tx_pkt_burst(void *txq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        return nb_pkts;
 }
 
+/** DPDK callback for S/G transmit.
+ *
+ * @param txq
+ *   Generic pointer transmit queue.
+ * @param tx_pkts
+ *   Packets to transmit.
+ * @param nb_pkts
+ *   Number of packets in array.
+ *
+ * @return
+ *   Number of packets successfully transmitted.
+ */
+static uint16_t
+mrvl_tx_sg_pkt_burst(void *txq, struct rte_mbuf **tx_pkts,
+                    uint16_t nb_pkts)
+{
+       struct mrvl_txq *q = txq;
+       struct mrvl_shadow_txq *sq;
+       struct pp2_hif *hif;
+       struct pp2_ppio_desc descs[nb_pkts * PP2_PPIO_DESC_NUM_FRAGS];
+       struct pp2_ppio_sg_pkts pkts;
+       uint8_t frags[nb_pkts];
+       unsigned int core_id = rte_lcore_id();
+       int i, j, ret, bytes_sent = 0;
+       int tail, tail_first;
+       uint16_t num, sq_free_size;
+       uint16_t nb_segs, total_descs = 0;
+       uint64_t addr;
+
+       hif = mrvl_get_hif(q->priv, core_id);
+       sq = &q->shadow_txqs[core_id];
+       pkts.frags = frags;
+       pkts.num = 0;
+
+       if (unlikely(!q->priv->ppio || !hif))
+               return 0;
+
+       if (sq->size)
+               mrvl_free_sent_buffers(q->priv->ppio, hif, core_id,
+                                      sq, q->queue_id, 0);
+
+       /* Save shadow queue free size */
+       sq_free_size = MRVL_PP2_TX_SHADOWQ_SIZE - sq->size - 1;
+
+       tail = 0;
+       for (i = 0; i < nb_pkts; i++) {
+               struct rte_mbuf *mbuf = tx_pkts[i];
+               struct rte_mbuf *seg = NULL;
+               int gen_l3_cksum, gen_l4_cksum;
+               enum pp2_outq_l3_type l3_type;
+               enum pp2_outq_l4_type l4_type;
+
+               nb_segs = mbuf->nb_segs;
+               tail_first = tail;
+               total_descs += nb_segs;
+
+               /*
+                * Check if total_descs does not exceed
+                * shadow queue free size
+                */
+               if (unlikely(total_descs > sq_free_size)) {
+                       total_descs -= nb_segs;
+                       RTE_LOG(DEBUG, PMD,
+                               "No room in shadow queue for %d packets! "
+                               "%d packets will be sent.\n",
+                               nb_pkts, i);
+                       break;
+               }
+
+               /* Check if nb_segs does not exceed the max nb of desc per
+                * fragmented packet
+                */
+               if (nb_segs > PP2_PPIO_DESC_NUM_FRAGS) {
+                       total_descs -= nb_segs;
+                       RTE_LOG(ERR, PMD,
+                               "Too many segments. Packet won't be sent.\n");
+                       break;
+               }
+
+               if (likely(nb_pkts - i > MRVL_MUSDK_PREFETCH_SHIFT)) {
+                       struct rte_mbuf *pref_pkt_hdr;
+
+                       pref_pkt_hdr = tx_pkts[i + MRVL_MUSDK_PREFETCH_SHIFT];
+                       rte_mbuf_prefetch_part1(pref_pkt_hdr);
+                       rte_mbuf_prefetch_part2(pref_pkt_hdr);
+               }
+
+               pkts.frags[pkts.num] = nb_segs;
+               pkts.num++;
+
+               seg = mbuf;
+               for (j = 0; j < nb_segs - 1; j++) {
+                       /* For the subsequent segments, set shadow queue
+                        * buffer to NULL
+                        */
+                       mrvl_fill_shadowq(sq, NULL);
+                       mrvl_fill_desc(&descs[tail], seg);
+
+                       tail++;
+                       seg = seg->next;
+               }
+               /* Put first mbuf info in last shadow queue entry */
+               mrvl_fill_shadowq(sq, mbuf);
+               /* Update descriptor with last segment */
+               mrvl_fill_desc(&descs[tail++], seg);
+
+               bytes_sent += rte_pktmbuf_pkt_len(mbuf);
+               /* In case unsupported ol_flags were passed
+                * do not update descriptor offload information
+                */
+               ret = mrvl_prepare_proto_info(mbuf->ol_flags, mbuf->packet_type,
+                                             &l3_type, &l4_type, &gen_l3_cksum,
+                                             &gen_l4_cksum);
+               if (unlikely(ret))
+                       continue;
+
+               pp2_ppio_outq_desc_set_proto_info(&descs[tail_first], l3_type,
+                                                 l4_type, mbuf->l2_len,
+                                                 mbuf->l2_len + mbuf->l3_len,
+                                                 gen_l3_cksum, gen_l4_cksum);
+       }
+
+       num = total_descs;
+       pp2_ppio_send_sg(q->priv->ppio, hif, q->queue_id, descs,
+                        &total_descs, &pkts);
+       /* number of packets that were not sent */
+       if (unlikely(num > total_descs)) {
+               for (i = total_descs; i < num; i++) {
+                       sq->head = (MRVL_PP2_TX_SHADOWQ_SIZE + sq->head - 1) &
+                               MRVL_PP2_TX_SHADOWQ_MASK;
+
+                       addr = sq->ent[sq->head].buff.cookie;
+                       if (addr)
+                               bytes_sent -=
+                                       rte_pktmbuf_pkt_len((struct rte_mbuf *)
+                                               (cookie_addr_high | addr));
+               }
+               sq->size -= num - total_descs;
+               nb_pkts = pkts.num;
+       }
+
+       q->bytes_sent += bytes_sent;
+
+       return nb_pkts;
+}
+
 /**
  * Initialize packet processor.
  *
@@ -2494,8 +2780,9 @@ mrvl_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
        priv = mrvl_priv_create(name);
        if (!priv) {
                ret = -ENOMEM;
-               goto out_free_dev;
+               goto out_free;
        }
+       eth_dev->data->dev_private = priv;
 
        eth_dev->data->mac_addrs =
                rte_zmalloc("mac_addrs",
@@ -2503,33 +2790,28 @@ mrvl_eth_dev_create(struct rte_vdev_device *vdev, const char *name)
        if (!eth_dev->data->mac_addrs) {
                MRVL_LOG(ERR, "Failed to allocate space for eth addrs");
                ret = -ENOMEM;
-               goto out_free_priv;
+               goto out_free;
        }
 
        memset(&req, 0, sizeof(req));
        strcpy(req.ifr_name, name);
        ret = ioctl(fd, SIOCGIFHWADDR, &req);
        if (ret)
-               goto out_free_mac;
+               goto out_free;
 
        memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
               req.ifr_addr.sa_data, ETHER_ADDR_LEN);
 
-       eth_dev->rx_pkt_burst = mrvl_rx_pkt_burst;
-       eth_dev->tx_pkt_burst = mrvl_tx_pkt_burst;
        eth_dev->data->kdrv = RTE_KDRV_NONE;
-       eth_dev->data->dev_private = priv;
        eth_dev->device = &vdev->device;
+       eth_dev->rx_pkt_burst = mrvl_rx_pkt_burst;
+       mrvl_set_tx_function(eth_dev);
        eth_dev->dev_ops = &mrvl_ops;
 
        rte_eth_dev_probing_finish(eth_dev);
        return 0;
-out_free_mac:
-       rte_free(eth_dev->data->mac_addrs);
-out_free_dev:
+out_free:
        rte_eth_dev_release_port(eth_dev);
-out_free_priv:
-       rte_free(priv);
 
        return ret;
 }
@@ -2553,8 +2835,6 @@ mrvl_eth_dev_destroy(const char *name)
        priv = eth_dev->data->dev_private;
        pp2_bpool_deinit(priv->bpool);
        used_bpools[priv->pp_id] &= ~(1 << priv->bpool_bit);
-       rte_free(priv);
-       rte_free(eth_dev->data->mac_addrs);
        rte_eth_dev_release_port(eth_dev);
 }
 
@@ -2654,23 +2934,16 @@ rte_pmd_mrvl_probe(struct rte_vdev_device *vdev)
                goto init_devices;
 
        MRVL_LOG(INFO, "Perform MUSDK initializations");
-       /*
-        * ret == -EEXIST is correct, it means DMA
-        * has been already initialized (by another PMD).
-        */
-       ret = mv_sys_dma_mem_init(MRVL_MUSDK_DMA_MEMSIZE);
-       if (ret < 0) {
-               if (ret != -EEXIST)
-                       goto out_free_kvlist;
-               else
-                       MRVL_LOG(INFO,
-                               "DMA memory has been already initialized by a different driver.");
-       }
+
+       ret = rte_mvep_init(MVEP_MOD_T_PP2, kvlist);
+       if (ret)
+               goto out_free_kvlist;
 
        ret = mrvl_init_pp2();
        if (ret) {
                MRVL_LOG(ERR, "Failed to init PP!");
-               goto out_deinit_dma;
+               rte_mvep_deinit(MVEP_MOD_T_PP2);
+               goto out_free_kvlist;
        }
 
        memset(mrvl_port_bpool_size, 0, sizeof(mrvl_port_bpool_size));
@@ -2695,11 +2968,10 @@ out_cleanup:
        for (; i > 0; i--)
                mrvl_eth_dev_destroy(ifnames.names[i]);
 
-       if (mrvl_dev_num == 0)
+       if (mrvl_dev_num == 0) {
                mrvl_deinit_pp2();
-out_deinit_dma:
-       if (mrvl_dev_num == 0)
-               mv_sys_dma_mem_destroy();
+               rte_mvep_deinit(MVEP_MOD_T_PP2);
+       }
 out_free_kvlist:
        rte_kvargs_free(kvlist);
 
@@ -2739,7 +3011,7 @@ rte_pmd_mrvl_remove(struct rte_vdev_device *vdev)
                MRVL_LOG(INFO, "Perform MUSDK deinit");
                mrvl_deinit_hifs();
                mrvl_deinit_pp2();
-               mv_sys_dma_mem_destroy();
+               rte_mvep_deinit(MVEP_MOD_T_PP2);
        }
 
        return 0;
index 3726f78..0120b9e 100644 (file)
@@ -9,6 +9,18 @@
 
 #include <rte_spinlock.h>
 #include <rte_flow_driver.h>
+#include <rte_mtr_driver.h>
+#include <rte_tm_driver.h>
+
+/*
+ * container_of is defined by both DPDK and MUSDK,
+ * we'll declare only one version.
+ *
+ * Note that it is not used in this PMD anyway.
+ */
+#ifdef container_of
+#undef container_of
+#endif
 
 #include <env/mv_autogen_comp_flags.h>
 #include <drivers/mv_pp2.h>
@@ -16,6 +28,7 @@
 #include <drivers/mv_pp2_cls.h>
 #include <drivers/mv_pp2_hif.h>
 #include <drivers/mv_pp2_ppio.h>
+#include "env/mv_common.h" /* for BIT() */
 
 /** Maximum number of rx queues per port */
 #define MRVL_PP2_RXQ_MAX 32
 /** Minimum number of sent buffers to release from shadow queue to BM */
 #define MRVL_PP2_BUF_RELEASE_BURST_SIZE        64
 
+#define MRVL_PP2_VLAN_TAG_LEN          4
+#define MRVL_PP2_ETH_HDRS_LEN          (ETHER_HDR_LEN + ETHER_CRC_LEN + \
+                                       (2 * MRVL_PP2_VLAN_TAG_LEN))
+#define MRVL_PP2_HDRS_LEN              (MV_MH_SIZE + MRVL_PP2_ETH_HDRS_LEN)
+#define MRVL_PP2_MTU_TO_MRU(mtu)       ((mtu) + MRVL_PP2_HDRS_LEN)
+#define MRVL_PP2_MRU_TO_MTU(mru)       ((mru) - MRVL_PP2_HDRS_LEN)
+
+/** Maximum length of a match string */
+#define MRVL_MATCH_LEN 16
+
+/** Parsed fields in processed rte_flow_item. */
+enum mrvl_parsed_fields {
+       /* eth flags */
+       F_DMAC =         BIT(0),
+       F_SMAC =         BIT(1),
+       F_TYPE =         BIT(2),
+       /* vlan flags */
+       F_VLAN_PRI =     BIT(3),
+       F_VLAN_ID =      BIT(4),
+       F_VLAN_TCI =     BIT(5), /* not supported by MUSDK yet */
+       /* ip4 flags */
+       F_IP4_TOS =      BIT(6),
+       F_IP4_SIP =      BIT(7),
+       F_IP4_DIP =      BIT(8),
+       F_IP4_PROTO =    BIT(9),
+       /* ip6 flags */
+       F_IP6_TC =       BIT(10), /* not supported by MUSDK yet */
+       F_IP6_SIP =      BIT(11),
+       F_IP6_DIP =      BIT(12),
+       F_IP6_FLOW =     BIT(13),
+       F_IP6_NEXT_HDR = BIT(14),
+       /* tcp flags */
+       F_TCP_SPORT =    BIT(15),
+       F_TCP_DPORT =    BIT(16),
+       /* udp flags */
+       F_UDP_SPORT =    BIT(17),
+       F_UDP_DPORT =    BIT(18),
+};
+
+/** PMD-specific definition of a flow rule handle. */
+struct mrvl_mtr;
+struct rte_flow {
+       LIST_ENTRY(rte_flow) next;
+       struct mrvl_mtr *mtr;
+
+       enum mrvl_parsed_fields pattern;
+
+       struct pp2_cls_tbl_rule rule;
+       struct pp2_cls_cos_desc cos;
+       struct pp2_cls_tbl_action action;
+};
+
+struct mrvl_mtr_profile {
+       LIST_ENTRY(mrvl_mtr_profile) next;
+       uint32_t profile_id;
+       int refcnt;
+       struct rte_mtr_meter_profile profile;
+};
+
+struct mrvl_mtr {
+       LIST_ENTRY(mrvl_mtr) next;
+       uint32_t mtr_id;
+       int refcnt;
+       int shared;
+       int enabled;
+       int plcr_bit;
+       struct mrvl_mtr_profile *profile;
+       struct pp2_cls_plcr *plcr;
+};
+
+struct mrvl_tm_shaper_profile {
+       LIST_ENTRY(mrvl_tm_shaper_profile) next;
+       uint32_t id;
+       int refcnt;
+       struct rte_tm_shaper_params params;
+};
+
+enum {
+       MRVL_NODE_PORT,
+       MRVL_NODE_QUEUE,
+};
+
+struct mrvl_tm_node {
+       LIST_ENTRY(mrvl_tm_node) next;
+       uint32_t id;
+       uint32_t type;
+       int refcnt;
+       struct mrvl_tm_node *parent;
+       struct mrvl_tm_shaper_profile *profile;
+       uint8_t weight;
+       uint64_t stats_mask;
+};
+
 struct mrvl_priv {
        /* Hot fields, used in fast path. */
        struct pp2_bpool *bpool;  /**< BPool pointer */
@@ -82,6 +188,7 @@ struct mrvl_priv {
        uint8_t uc_mc_flushed;
        uint8_t vlan_flushed;
        uint8_t isolated;
+       uint8_t multiseg;
 
        struct pp2_ppio_params ppio_params;
        struct pp2_cls_qos_tbl_params qos_tbl_params;
@@ -93,12 +200,26 @@ struct mrvl_priv {
        uint32_t cls_tbl_pattern;
        LIST_HEAD(mrvl_flows, rte_flow) flows;
 
-       struct pp2_cls_plcr *policer;
+       struct pp2_cls_plcr *default_policer;
+
+       LIST_HEAD(profiles, mrvl_mtr_profile) profiles;
+       LIST_HEAD(mtrs, mrvl_mtr) mtrs;
+       uint32_t used_plcrs;
+
+       LIST_HEAD(shaper_profiles, mrvl_tm_shaper_profile) shaper_profiles;
+       LIST_HEAD(nodes, mrvl_tm_node) nodes;
+       uint64_t rate_max;
 };
 
 /** Flow operations forward declaration. */
 extern const struct rte_flow_ops mrvl_flow_ops;
 
+/** Meter operations forward declaration. */
+extern const struct rte_mtr_ops mrvl_mtr_ops;
+
+/** Traffic manager operations forward declaration. */
+extern const struct rte_tm_ops mrvl_tm_ops;
+
 /** Current log type. */
 extern int mrvl_logtype;
 
index ecc3419..ffd1dab 100644 (file)
 
 #include <arpa/inet.h>
 
-#ifdef container_of
-#undef container_of
-#endif
-
-#include "mrvl_ethdev.h"
+#include "mrvl_flow.h"
 #include "mrvl_qos.h"
-#include "env/mv_common.h" /* for BIT() */
 
 /** Number of rules in the classifier table. */
 #define MRVL_CLS_MAX_NUM_RULES 20
 /** Size of the classifier key and mask strings. */
 #define MRVL_CLS_STR_SIZE_MAX 40
 
-/** Parsed fields in processed rte_flow_item. */
-enum mrvl_parsed_fields {
-       /* eth flags */
-       F_DMAC =         BIT(0),
-       F_SMAC =         BIT(1),
-       F_TYPE =         BIT(2),
-       /* vlan flags */
-       F_VLAN_ID =      BIT(3),
-       F_VLAN_PRI =     BIT(4),
-       F_VLAN_TCI =     BIT(5), /* not supported by MUSDK yet */
-       /* ip4 flags */
-       F_IP4_TOS =      BIT(6),
-       F_IP4_SIP =      BIT(7),
-       F_IP4_DIP =      BIT(8),
-       F_IP4_PROTO =    BIT(9),
-       /* ip6 flags */
-       F_IP6_TC =       BIT(10), /* not supported by MUSDK yet */
-       F_IP6_SIP =      BIT(11),
-       F_IP6_DIP =      BIT(12),
-       F_IP6_FLOW =     BIT(13),
-       F_IP6_NEXT_HDR = BIT(14),
-       /* tcp flags */
-       F_TCP_SPORT =    BIT(15),
-       F_TCP_DPORT =    BIT(16),
-       /* udp flags */
-       F_UDP_SPORT =    BIT(17),
-       F_UDP_DPORT =    BIT(18),
-};
-
-/** PMD-specific definition of a flow rule handle. */
-struct rte_flow {
-       LIST_ENTRY(rte_flow) next;
-
-       enum mrvl_parsed_fields pattern;
-
-       struct pp2_cls_tbl_rule rule;
-       struct pp2_cls_cos_desc cos;
-       struct pp2_cls_tbl_action action;
-};
-
 static const enum rte_flow_item_type pattern_eth[] = {
        RTE_FLOW_ITEM_TYPE_ETH,
        RTE_FLOW_ITEM_TYPE_END
@@ -394,7 +349,8 @@ mrvl_parse_init(const struct rte_flow_item *item,
  *
  * @param spec Pointer to the specific flow item.
  * @param mask Pointer to the specific flow item's mask.
- * @param mask Pointer to the flow.
+ * @param parse_dst Parse either destination or source mac address.
+ * @param flow Pointer to the flow.
  * @return 0 in case of success, negative error value otherwise.
  */
 static int
@@ -613,6 +569,7 @@ mrvl_parse_ip4_dscp(const struct rte_flow_item_ipv4 *spec,
  *
  * @param spec Pointer to the specific flow item.
  * @param mask Pointer to the specific flow item's mask.
+ * @param parse_dst Parse either destination or source ip address.
  * @param flow Pointer to the flow.
  * @return 0 in case of success, negative error value otherwise.
  */
@@ -726,6 +683,7 @@ mrvl_parse_ip4_proto(const struct rte_flow_item_ipv4 *spec,
  *
  * @param spec Pointer to the specific flow item.
  * @param mask Pointer to the specific flow item's mask.
+ * @param parse_dst Parse either destination or source ipv6 address.
  * @param flow Pointer to the flow.
  * @return 0 in case of success, negative error value otherwise.
  */
@@ -874,6 +832,7 @@ mrvl_parse_ip6_next_hdr(const struct rte_flow_item_ipv6 *spec,
  *
  * @param spec Pointer to the specific flow item.
  * @param mask Pointer to the specific flow item's mask.
+ * @param parse_dst Parse either destination or source port.
  * @param flow Pointer to the flow.
  * @return 0 in case of success, negative error value otherwise.
  */
@@ -949,6 +908,7 @@ mrvl_parse_tcp_dport(const struct rte_flow_item_tcp *spec,
  *
  * @param spec Pointer to the specific flow item.
  * @param mask Pointer to the specific flow item's mask.
+ * @param parse_dst Parse either destination or source port.
  * @param flow Pointer to the flow.
  * @return 0 in case of success, negative error value otherwise.
  */
@@ -1022,7 +982,6 @@ mrvl_parse_udp_dport(const struct rte_flow_item_udp *spec,
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1073,7 +1032,6 @@ out:
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1139,7 +1097,6 @@ out:
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1205,7 +1162,6 @@ out:
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1276,7 +1232,6 @@ out:
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1332,7 +1287,6 @@ out:
  * @param item Pointer to the flow item.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
- * @param fields Pointer to the parsed parsed fields enum.
  * @returns 0 on success, negative value otherwise.
  */
 static int
@@ -1981,6 +1935,7 @@ mrvl_parse_pattern_ip6_tcp(const struct rte_flow_item pattern[],
  * @param pattern Pointer to the flow pattern table.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
+ * @param ip6 1 to parse ip6 item, 0 to parse ip4 item.
  * @returns 0 in case of success, negative value otherwise.
  */
 static int
@@ -2300,19 +2255,59 @@ mrvl_flow_parse_actions(struct mrvl_priv *priv,
                        flow->action.type = PP2_CLS_TBL_ACT_DONE;
                        flow->action.cos = &flow->cos;
                        specified++;
+               } else if (action->type == RTE_FLOW_ACTION_TYPE_METER) {
+                       const struct rte_flow_action_meter *meter;
+                       struct mrvl_mtr *mtr;
+
+                       meter = action->conf;
+                       if (!meter)
+                               return -rte_flow_error_set(error, EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               NULL, "Invalid meter\n");
+
+                       LIST_FOREACH(mtr, &priv->mtrs, next)
+                               if (mtr->mtr_id == meter->mtr_id)
+                                       break;
+
+                       if (!mtr)
+                               return -rte_flow_error_set(error, EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               NULL,
+                                               "Meter id does not exist\n");
+
+                       if (!mtr->shared && mtr->refcnt)
+                               return -rte_flow_error_set(error, EPERM,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               NULL,
+                                               "Meter cannot be shared\n");
+
+                       /*
+                        * In case cos has already been set
+                        * do not modify it.
+                        */
+                       if (!flow->cos.ppio) {
+                               flow->cos.ppio = priv->ppio;
+                               flow->cos.tc = 0;
+                       }
+
+                       flow->action.type = PP2_CLS_TBL_ACT_DONE;
+                       flow->action.cos = &flow->cos;
+                       flow->action.plcr = mtr->enabled ? mtr->plcr : NULL;
+                       flow->mtr = mtr;
+                       mtr->refcnt++;
+                       specified++;
                } else {
                        rte_flow_error_set(error, ENOTSUP,
                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
                                           "Action not supported");
                        return -rte_errno;
                }
-
        }
 
        if (!specified) {
                rte_flow_error_set(error, EINVAL,
-                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
-                                  NULL, "Action not specified");
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "Action not specified");
                return -rte_errno;
        }
 
@@ -2350,6 +2345,12 @@ mrvl_flow_parse(struct mrvl_priv *priv, const struct rte_flow_attr *attr,
        return mrvl_flow_parse_actions(priv, actions, flow, error);
 }
 
+/**
+ * Get engine type for the given flow.
+ *
+ * @param field Pointer to the flow.
+ * @returns The type of the engine.
+ */
 static inline enum pp2_cls_tbl_type
 mrvl_engine_type(const struct rte_flow *flow)
 {
@@ -2369,6 +2370,13 @@ mrvl_engine_type(const struct rte_flow *flow)
        return PP2_CLS_TBL_MASKABLE;
 }
 
+/**
+ * Create classifier table.
+ *
+ * @param dev Pointer to the device.
+ * @param flow Pointer to the very first flow.
+ * @returns 0 in case of success, negative value otherwise.
+ */
 static int
 mrvl_create_cls_table(struct rte_eth_dev *dev, struct rte_flow *first_flow)
 {
@@ -2429,7 +2437,8 @@ mrvl_create_cls_table(struct rte_eth_dev *dev, struct rte_flow *first_flow)
 
        if (first_flow->pattern & F_IP4_TOS) {
                key->proto_field[key->num_fields].proto = MV_NET_PROTO_IP4;
-               key->proto_field[key->num_fields].field.ipv4 = MV_NET_IP4_F_TOS;
+               key->proto_field[key->num_fields].field.ipv4 =
+                                                       MV_NET_IP4_F_DSCP;
                key->key_size += 1;
                key->num_fields += 1;
        }
@@ -2649,13 +2658,18 @@ mrvl_flow_remove(struct mrvl_priv *priv, struct rte_flow *flow,
 
        mrvl_free_all_key_mask(&flow->rule);
 
+       if (flow->mtr) {
+               flow->mtr->refcnt--;
+               flow->mtr = NULL;
+       }
+
        return 0;
 }
 
 /**
  * DPDK flow destroy callback called when flow is to be removed.
  *
- * @param priv Pointer to the port's private data.
+ * @param dev Pointer to the device.
  * @param flow Pointer to the flow.
  * @param error Pointer to the flow error.
  * @returns 0 in case of success, negative value otherwise.
@@ -2777,3 +2791,34 @@ const struct rte_flow_ops mrvl_flow_ops = {
        .flush = mrvl_flow_flush,
        .isolate = mrvl_flow_isolate
 };
+
+/**
+ * Initialize flow resources.
+ *
+ * @param dev Pointer to the device.
+ */
+void
+mrvl_flow_init(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       LIST_INIT(&priv->flows);
+}
+
+/**
+ * Cleanup flow resources.
+ *
+ * @param dev Pointer to the device.
+ */
+void
+mrvl_flow_deinit(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       mrvl_flow_flush(dev, NULL);
+
+       if (priv->cls_tbl) {
+               pp2_cls_tbl_deinit(priv->cls_tbl);
+               priv->cls_tbl = NULL;
+       }
+}
diff --git a/drivers/net/mvpp2/mrvl_flow.h b/drivers/net/mvpp2/mrvl_flow.h
new file mode 100644 (file)
index 0000000..f63747c
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#ifndef _MRVL_FLOW_H_
+#define _MRVL_FLOW_H_
+
+#include "mrvl_ethdev.h"
+
+void mrvl_flow_init(struct rte_eth_dev *dev);
+void mrvl_flow_deinit(struct rte_eth_dev *dev);
+
+#endif /* _MRVL_FLOW_H_ */
diff --git a/drivers/net/mvpp2/mrvl_mtr.c b/drivers/net/mvpp2/mrvl_mtr.c
new file mode 100644 (file)
index 0000000..9cd53be
--- /dev/null
@@ -0,0 +1,512 @@
+/*-
+ * SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#include <rte_log.h>
+#include <rte_malloc.h>
+
+#include "mrvl_mtr.h"
+
+/** Maximum meter rate */
+#define MRVL_SRTCM_RFC2697_CIR_MAX 1023000
+
+/** Invalid plcr bit */
+#define MRVL_PLCR_BIT_INVALID -1
+
+/**
+ * Return meter object capabilities.
+ *
+ * @param dev Pointer to the device (unused).
+ * @param cap Pointer to the meter object capabilities.
+ * @param error Pointer to the error (unused).
+ * @returns 0 always.
+ */
+static int
+mrvl_capabilities_get(struct rte_eth_dev *dev __rte_unused,
+                         struct rte_mtr_capabilities *cap,
+                         struct rte_mtr_error *error __rte_unused)
+{
+       struct rte_mtr_capabilities capa = {
+               .n_max = PP2_CLS_PLCR_NUM,
+               .n_shared_max = PP2_CLS_PLCR_NUM,
+               .shared_n_flows_per_mtr_max = -1,
+               .meter_srtcm_rfc2697_n_max = PP2_CLS_PLCR_NUM,
+               .meter_rate_max = MRVL_SRTCM_RFC2697_CIR_MAX,
+       };
+
+       memcpy(cap, &capa, sizeof(capa));
+
+       return 0;
+}
+
+/**
+ * Get profile using it's id.
+ *
+ * @param priv Pointer to the port's private data.
+ * @param meter_profile_id Profile id used by the meter.
+ * @returns Pointer to the profile if exists, NULL otherwise.
+ */
+static struct mrvl_mtr_profile *
+mrvl_mtr_profile_from_id(struct mrvl_priv *priv, uint32_t meter_profile_id)
+{
+       struct mrvl_mtr_profile *profile = NULL;
+
+       LIST_FOREACH(profile, &priv->profiles, next)
+               if (profile->profile_id == meter_profile_id)
+                       break;
+
+       return profile;
+}
+
+/**
+ * Add profile to the list of profiles.
+ *
+ * @param dev Pointer to the device.
+ * @param meter_profile_id Id of the new profile.
+ * @param profile Pointer to the profile configuration.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_meter_profile_add(struct rte_eth_dev *dev, uint32_t meter_profile_id,
+                      struct rte_mtr_meter_profile *profile,
+                      struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr_profile *prof;
+
+       if (!profile)
+               return -rte_mtr_error_set(error, EINVAL,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, NULL);
+
+       if (profile->alg != RTE_MTR_SRTCM_RFC2697)
+               return -rte_mtr_error_set(error, EINVAL,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "Only srTCM RFC 2697 is supported\n");
+
+       prof = mrvl_mtr_profile_from_id(priv, meter_profile_id);
+       if (prof)
+               return -rte_mtr_error_set(error, EEXIST,
+                                         RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                                         NULL, "Profile id already exists\n");
+
+       prof = rte_zmalloc_socket(NULL, sizeof(*prof), 0, rte_socket_id());
+       if (!prof)
+               return -rte_mtr_error_set(error, ENOMEM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, NULL);
+
+       prof->profile_id = meter_profile_id;
+       memcpy(&prof->profile, profile, sizeof(*profile));
+
+       LIST_INSERT_HEAD(&priv->profiles, prof, next);
+
+       return 0;
+}
+
+/**
+ * Remove profile from the list of profiles.
+ *
+ * @param dev Pointer to the device.
+ * @param meter_profile_id Id of the profile to remove.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_meter_profile_delete(struct rte_eth_dev *dev,
+                             uint32_t meter_profile_id,
+                             struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr_profile *profile;
+
+       profile = mrvl_mtr_profile_from_id(priv, meter_profile_id);
+       if (!profile)
+               return -rte_mtr_error_set(error, ENODEV,
+                                         RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                                         NULL, "Profile id does not exist\n");
+
+       if (profile->refcnt)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                                         NULL, "Profile is used\n");
+
+       LIST_REMOVE(profile, next);
+       rte_free(profile);
+
+       return 0;
+}
+
+/**
+ * Get meter using it's id.
+ *
+ * @param priv Pointer to port's private data.
+ * @param mtr_id Id of the meter.
+ * @returns Pointer to the meter if exists, NULL otherwise.
+ */
+static struct mrvl_mtr *
+mrvl_mtr_from_id(struct mrvl_priv *priv, uint32_t mtr_id)
+{
+       struct mrvl_mtr *mtr = NULL;
+
+       LIST_FOREACH(mtr, &priv->mtrs, next)
+               if (mtr->mtr_id == mtr_id)
+                       break;
+
+       return mtr;
+}
+
+/**
+ * Reserve a policer bit in a bitmap.
+ *
+ * @param plcrs Pointer to the policers bitmap.
+ * @returns Reserved bit number on success, negative value otherwise.
+ */
+static int
+mrvl_reserve_plcr(uint32_t *plcrs)
+{
+       uint32_t i, num;
+
+       num = PP2_CLS_PLCR_NUM;
+       if (num > sizeof(uint32_t) * 8) {
+               num = sizeof(uint32_t) * 8;
+               MRVL_LOG(WARNING, "Plcrs number was limited to 32.");
+       }
+
+       for (i = 0; i < num; i++) {
+               uint32_t bit = BIT(i);
+
+               if (!(*plcrs & bit)) {
+                       *plcrs |= bit;
+
+                       return i;
+               }
+       }
+
+       return -1;
+}
+
+/**
+ * Enable meter object.
+ *
+ * @param dev Pointer to the device.
+ * @param mtr_id Id of the meter.
+ * @param error Pointer to the error.
+ * @returns 0 in success, negative value otherwise.
+ */
+static int
+mrvl_meter_enable(struct rte_eth_dev *dev, uint32_t mtr_id,
+                 struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr *mtr = mrvl_mtr_from_id(priv, mtr_id);
+       struct pp2_cls_plcr_params params;
+       char match[MRVL_MATCH_LEN];
+       struct rte_flow *flow;
+       int ret;
+
+       if (!priv->ppio)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "Port is uninitialized\n");
+
+       if (!mtr)
+               return -rte_mtr_error_set(error, ENODEV,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter id does not exist\n");
+
+       if (mtr->plcr)
+               goto skip;
+
+       mtr->plcr_bit = mrvl_reserve_plcr(&priv->used_plcrs);
+       if (mtr->plcr_bit < 0)
+               return -rte_mtr_error_set(error, ENOSPC,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL,
+                                         "Failed to reserve plcr entry\n");
+
+       memset(&params, 0, sizeof(params));
+       snprintf(match, sizeof(match), "policer-%d:%d", priv->pp_id,
+                mtr->plcr_bit);
+       params.match = match;
+       params.token_unit = PP2_CLS_PLCR_BYTES_TOKEN_UNIT;
+       params.color_mode = PP2_CLS_PLCR_COLOR_BLIND_MODE;
+       params.cir = mtr->profile->profile.srtcm_rfc2697.cir;
+       params.cbs = mtr->profile->profile.srtcm_rfc2697.cbs;
+       params.ebs = mtr->profile->profile.srtcm_rfc2697.ebs;
+
+       ret = pp2_cls_plcr_init(&params, &mtr->plcr);
+       if (ret) {
+               priv->used_plcrs &= ~BIT(mtr->plcr_bit);
+               mtr->plcr_bit = MRVL_PLCR_BIT_INVALID;
+
+               return -rte_mtr_error_set(error, -ret,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "Failed to setup policer\n");
+       }
+
+       mtr->enabled = 1;
+skip:
+       /* iterate over flows that have this mtr attached */
+       LIST_FOREACH(flow, &priv->flows, next) {
+               if (flow->mtr != mtr)
+                       continue;
+
+               flow->action.plcr = mtr->plcr;
+
+               ret = pp2_cls_tbl_modify_rule(priv->cls_tbl, &flow->rule,
+                                             &flow->action);
+               if (ret)
+                       return -rte_mtr_error_set(error, -ret,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "Failed to update cls rule\n");
+       }
+
+       return 0;
+}
+
+/**
+ * Disable meter object.
+ *
+ * @param dev Pointer to the device.
+ * @param mtr Id of the meter.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_meter_disable(struct rte_eth_dev *dev, uint32_t mtr_id,
+                      struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr *mtr = mrvl_mtr_from_id(priv, mtr_id);
+       struct rte_flow *flow;
+       int ret;
+
+       if (!mtr)
+               return -rte_mtr_error_set(error, ENODEV,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter id does not exist\n");
+
+       LIST_FOREACH(flow, &priv->flows, next) {
+               if (flow->mtr != mtr)
+                       continue;
+
+               flow->action.plcr = NULL;
+
+               ret = pp2_cls_tbl_modify_rule(priv->cls_tbl, &flow->rule,
+                                             &flow->action);
+               if (ret)
+                       return -rte_mtr_error_set(error, -ret,
+                                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                       NULL, "Failed to disable meter\n");
+       }
+
+       mtr->enabled = 0;
+
+       return 0;
+}
+
+/**
+ * Create new meter.
+ *
+ * @param dev Pointer to the device.
+ * @param mtr_id Id of the meter.
+ * @param params Pointer to the meter parameters.
+ * @param shared Flags indicating whether meter is shared.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_create(struct rte_eth_dev *dev, uint32_t mtr_id,
+           struct rte_mtr_params *params, int shared,
+           struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr_profile *profile;
+       struct mrvl_mtr *mtr;
+
+       mtr = mrvl_mtr_from_id(priv, mtr_id);
+       if (mtr)
+               return -rte_mtr_error_set(error, EEXIST,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter id already exists\n");
+
+       mtr = rte_zmalloc_socket(NULL, sizeof(*mtr), 0, rte_socket_id());
+       if (!mtr)
+               return -rte_mtr_error_set(error, ENOMEM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, NULL);
+
+       profile = mrvl_mtr_profile_from_id(priv, params->meter_profile_id);
+       if (!profile)
+               return -rte_mtr_error_set(error, EINVAL,
+                                         RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                                         NULL, "Profile id does not exist\n");
+
+       mtr->shared = shared;
+       mtr->mtr_id = mtr_id;
+       mtr->plcr_bit = MRVL_PLCR_BIT_INVALID;
+       mtr->profile = profile;
+       profile->refcnt++;
+       LIST_INSERT_HEAD(&priv->mtrs, mtr, next);
+
+       if (params->meter_enable)
+               return mrvl_meter_enable(dev, mtr_id, error);
+
+       return 0;
+}
+
+/**
+ * Destroy meter object.
+ *
+ * @param dev Pointer to the device.
+ * @param mtr_id Id of the meter object.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_destroy(struct rte_eth_dev *dev, uint32_t mtr_id,
+                struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr *mtr;
+
+       if (!priv->ppio)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "Port is uninitialized\n");
+
+       mtr = mrvl_mtr_from_id(priv, mtr_id);
+       if (!mtr)
+               return -rte_mtr_error_set(error, EEXIST,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter id does not exist\n");
+
+       if (mtr->refcnt)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter is used\n");
+
+       LIST_REMOVE(mtr, next);
+       mtr->profile->refcnt--;
+
+       if (mtr->plcr_bit != MRVL_PLCR_BIT_INVALID)
+               priv->used_plcrs &= ~BIT(mtr->plcr_bit);
+
+       if (mtr->plcr)
+               pp2_cls_plcr_deinit(mtr->plcr);
+
+       rte_free(mtr);
+
+       return 0;
+}
+
+/**
+ * Update profile used by the meter.
+ *
+ * @param dev Pointer to the device.
+ * @param mtr_id Id of the meter object.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_meter_profile_update(struct rte_eth_dev *dev, uint32_t mtr_id,
+                         uint32_t meter_profile_id,
+                         struct rte_mtr_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr_profile *profile;
+       struct mrvl_mtr *mtr;
+       int ret, enabled;
+
+       if (!priv->ppio)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                         NULL, "Port is uninitialized\n");
+
+       mtr = mrvl_mtr_from_id(priv, mtr_id);
+       if (!mtr)
+               return -rte_mtr_error_set(error, EEXIST,
+                                         RTE_MTR_ERROR_TYPE_MTR_ID, NULL,
+                                         "Meter id does not exist\n");
+
+       profile = mrvl_mtr_profile_from_id(priv, meter_profile_id);
+       if (!profile)
+               return -rte_mtr_error_set(error, EINVAL,
+                                         RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                                         NULL, "Profile id does not exist\n");
+
+       ret = mrvl_meter_disable(dev, mtr_id, error);
+       if (ret)
+               return -rte_mtr_error_set(error, EPERM,
+                                         RTE_MTR_ERROR_TYPE_UNSPECIFIED, NULL,
+                                         NULL);
+
+       if (mtr->plcr) {
+               enabled = 1;
+               pp2_cls_plcr_deinit(mtr->plcr);
+               mtr->plcr = NULL;
+       }
+
+       mtr->profile->refcnt--;
+       mtr->profile = profile;
+       profile->refcnt++;
+
+       if (enabled)
+               return mrvl_meter_enable(dev, mtr_id, error);
+
+       return 0;
+}
+
+const struct rte_mtr_ops mrvl_mtr_ops = {
+       .capabilities_get = mrvl_capabilities_get,
+       .meter_profile_add = mrvl_meter_profile_add,
+       .meter_profile_delete = mrvl_meter_profile_delete,
+       .create = mrvl_create,
+       .destroy = mrvl_destroy,
+       .meter_enable = mrvl_meter_enable,
+       .meter_disable = mrvl_meter_disable,
+       .meter_profile_update = mrvl_meter_profile_update,
+};
+
+/**
+ * Initialize metering resources.
+ *
+ * @param dev Pointer to the device.
+ */
+void
+mrvl_mtr_init(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       LIST_INIT(&priv->profiles);
+       LIST_INIT(&priv->mtrs);
+}
+
+/**
+ * Cleanup metering resources.
+ *
+ * @param dev Pointer to the device.
+ */
+void
+mrvl_mtr_deinit(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_mtr_profile *profile, *tmp_profile;
+       struct mrvl_mtr *mtr, *tmp_mtr;
+
+       for (mtr = LIST_FIRST(&priv->mtrs);
+            mtr && (tmp_mtr = LIST_NEXT(mtr, next), 1);
+            mtr = tmp_mtr)
+               mrvl_destroy(dev, mtr->mtr_id, NULL);
+
+       for (profile = LIST_FIRST(&priv->profiles);
+            profile && (tmp_profile = LIST_NEXT(profile, next), 1);
+            profile = tmp_profile)
+               mrvl_meter_profile_delete(dev, profile->profile_id, NULL);
+}
diff --git a/drivers/net/mvpp2/mrvl_mtr.h b/drivers/net/mvpp2/mrvl_mtr.h
new file mode 100644 (file)
index 0000000..302a20f
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#ifndef _MRVL_MTR_H_
+#define _MRVL_MTR_H_
+
+#include "mrvl_ethdev.h"
+
+void mrvl_mtr_init(struct rte_eth_dev *dev);
+void mrvl_mtr_deinit(struct rte_eth_dev *dev);
+
+#endif /* _MRVL_MTR_H_ */
index 71856c1..7fd9703 100644 (file)
 #include <rte_malloc.h>
 #include <rte_string_fns.h>
 
-/* Unluckily, container_of is defined by both DPDK and MUSDK,
- * we'll declare only one version.
- *
- * Note that it is not used in this PMD anyway.
- */
-#ifdef container_of
-#undef container_of
-#endif
-
 #include "mrvl_qos.h"
 
 /* Parsing tokens. Defined conveniently, so that any correction is easy. */
@@ -51,7 +42,8 @@
 #define MRVL_TOK_WRR_WEIGHT "wrr_weight"
 
 /* policer specific configuration tokens */
-#define MRVL_TOK_PLCR_ENABLE "policer_enable"
+#define MRVL_TOK_PLCR "policer"
+#define MRVL_TOK_PLCR_DEFAULT "default_policer"
 #define MRVL_TOK_PLCR_UNIT "token_unit"
 #define MRVL_TOK_PLCR_UNIT_BYTES "bytes"
 #define MRVL_TOK_PLCR_UNIT_PACKETS "packets"
@@ -332,6 +324,7 @@ parse_tc_cfg(struct rte_cfgfile *file, int port, int tc,
        if (rte_cfgfile_num_sections(file, sec_name, strlen(sec_name)) <= 0)
                return 0;
 
+       cfg->port[port].use_global_defaults = 0;
        entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_RXQ);
        if (entry) {
                n = get_entry_values(entry,
@@ -377,6 +370,9 @@ parse_tc_cfg(struct rte_cfgfile *file, int port, int tc,
                cfg->port[port].tc[tc].dscps = n;
        }
 
+       if (!cfg->port[port].setup_policer)
+               return 0;
+
        entry = rte_cfgfile_get_entry(file, sec_name,
                        MRVL_TOK_PLCR_DEFAULT_COLOR);
        if (entry) {
@@ -398,6 +394,85 @@ parse_tc_cfg(struct rte_cfgfile *file, int port, int tc,
        return 0;
 }
 
+/**
+ * Parse default port policer.
+ *
+ * @param file Config file handle.
+ * @param sec_name Section name with policer configuration
+ * @param port Port number.
+ * @param cfg[out] Parsing results.
+ * @returns 0 in case of success, negative value otherwise.
+ */
+static int
+parse_policer(struct rte_cfgfile *file, int port, const char *sec_name,
+               struct mrvl_qos_cfg *cfg)
+{
+       const char *entry;
+       uint32_t val;
+
+       /* Read policer token unit */
+       entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PLCR_UNIT);
+       if (entry) {
+               if (!strncmp(entry, MRVL_TOK_PLCR_UNIT_BYTES,
+                                       sizeof(MRVL_TOK_PLCR_UNIT_BYTES))) {
+                       cfg->port[port].policer_params.token_unit =
+                               PP2_CLS_PLCR_BYTES_TOKEN_UNIT;
+               } else if (!strncmp(entry, MRVL_TOK_PLCR_UNIT_PACKETS,
+                                       sizeof(MRVL_TOK_PLCR_UNIT_PACKETS))) {
+                       cfg->port[port].policer_params.token_unit =
+                               PP2_CLS_PLCR_PACKETS_TOKEN_UNIT;
+               } else {
+                       MRVL_LOG(ERR, "Unknown token: %s", entry);
+                       return -1;
+               }
+       }
+
+       /* Read policer color mode */
+       entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PLCR_COLOR);
+       if (entry) {
+               if (!strncmp(entry, MRVL_TOK_PLCR_COLOR_BLIND,
+                                       sizeof(MRVL_TOK_PLCR_COLOR_BLIND))) {
+                       cfg->port[port].policer_params.color_mode =
+                               PP2_CLS_PLCR_COLOR_BLIND_MODE;
+               } else if (!strncmp(entry, MRVL_TOK_PLCR_COLOR_AWARE,
+                                       sizeof(MRVL_TOK_PLCR_COLOR_AWARE))) {
+                       cfg->port[port].policer_params.color_mode =
+                               PP2_CLS_PLCR_COLOR_AWARE_MODE;
+               } else {
+                       MRVL_LOG(ERR, "Error in parsing: %s", entry);
+                       return -1;
+               }
+       }
+
+       /* Read policer cir */
+       entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PLCR_CIR);
+       if (entry) {
+               if (get_val_securely(entry, &val) < 0)
+                       return -1;
+               cfg->port[port].policer_params.cir = val;
+       }
+
+       /* Read policer cbs */
+       entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PLCR_CBS);
+       if (entry) {
+               if (get_val_securely(entry, &val) < 0)
+                       return -1;
+               cfg->port[port].policer_params.cbs = val;
+       }
+
+       /* Read policer ebs */
+       entry = rte_cfgfile_get_entry(file, sec_name, MRVL_TOK_PLCR_EBS);
+       if (entry) {
+               if (get_val_securely(entry, &val) < 0)
+                       return -1;
+               cfg->port[port].policer_params.ebs = val;
+       }
+
+       cfg->port[port].setup_policer = 1;
+
+       return 0;
+}
+
 /**
  * Parse QoS configuration - rte_kvargs_process handler.
  *
@@ -444,110 +519,15 @@ mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
                snprintf(sec_name, sizeof(sec_name), "%s %d %s",
                        MRVL_TOK_PORT, n, MRVL_TOK_DEFAULT);
 
+               /* Use global defaults, unless an override occurs */
+               (*cfg)->port[n].use_global_defaults = 1;
+
                /* Skip ports non-existing in configuration. */
                if (rte_cfgfile_num_sections(file, sec_name,
                                strlen(sec_name)) <= 0) {
-                       (*cfg)->port[n].use_global_defaults = 1;
-                       (*cfg)->port[n].mapping_priority =
-                               PP2_CLS_QOS_TBL_VLAN_IP_PRI;
                        continue;
                }
 
-               entry = rte_cfgfile_get_entry(file, sec_name,
-                               MRVL_TOK_DEFAULT_TC);
-               if (entry) {
-                       if (get_val_securely(entry, &val) < 0 ||
-                               val > USHRT_MAX)
-                               return -1;
-                       (*cfg)->port[n].default_tc = (uint8_t)val;
-               } else {
-                       MRVL_LOG(ERR,
-                               "Default Traffic Class required in custom configuration!");
-                       return -1;
-               }
-
-               entry = rte_cfgfile_get_entry(file, sec_name,
-                               MRVL_TOK_PLCR_ENABLE);
-               if (entry) {
-                       if (get_val_securely(entry, &val) < 0)
-                               return -1;
-                       (*cfg)->port[n].policer_enable = val;
-               }
-
-               if ((*cfg)->port[n].policer_enable) {
-                       enum pp2_cls_plcr_token_unit unit;
-
-                       /* Read policer token unit */
-                       entry = rte_cfgfile_get_entry(file, sec_name,
-                                       MRVL_TOK_PLCR_UNIT);
-                       if (entry) {
-                               if (!strncmp(entry, MRVL_TOK_PLCR_UNIT_BYTES,
-                                       sizeof(MRVL_TOK_PLCR_UNIT_BYTES))) {
-                                       unit = PP2_CLS_PLCR_BYTES_TOKEN_UNIT;
-                               } else if (!strncmp(entry,
-                                               MRVL_TOK_PLCR_UNIT_PACKETS,
-                                       sizeof(MRVL_TOK_PLCR_UNIT_PACKETS))) {
-                                       unit = PP2_CLS_PLCR_PACKETS_TOKEN_UNIT;
-                               } else {
-                                       MRVL_LOG(ERR, "Unknown token: %s",
-                                               entry);
-                                       return -1;
-                               }
-                               (*cfg)->port[n].policer_params.token_unit =
-                                       unit;
-                       }
-
-                       /* Read policer color mode */
-                       entry = rte_cfgfile_get_entry(file, sec_name,
-                                       MRVL_TOK_PLCR_COLOR);
-                       if (entry) {
-                               enum pp2_cls_plcr_color_mode mode;
-
-                               if (!strncmp(entry, MRVL_TOK_PLCR_COLOR_BLIND,
-                                       sizeof(MRVL_TOK_PLCR_COLOR_BLIND))) {
-                                       mode = PP2_CLS_PLCR_COLOR_BLIND_MODE;
-                               } else if (!strncmp(entry,
-                                               MRVL_TOK_PLCR_COLOR_AWARE,
-                                       sizeof(MRVL_TOK_PLCR_COLOR_AWARE))) {
-                                       mode = PP2_CLS_PLCR_COLOR_AWARE_MODE;
-                               } else {
-                                       MRVL_LOG(ERR,
-                                               "Error in parsing: %s",
-                                               entry);
-                                       return -1;
-                               }
-                               (*cfg)->port[n].policer_params.color_mode =
-                                       mode;
-                       }
-
-                       /* Read policer cir */
-                       entry = rte_cfgfile_get_entry(file, sec_name,
-                                       MRVL_TOK_PLCR_CIR);
-                       if (entry) {
-                               if (get_val_securely(entry, &val) < 0)
-                                       return -1;
-                               (*cfg)->port[n].policer_params.cir = val;
-                       }
-
-                       /* Read policer cbs */
-                       entry = rte_cfgfile_get_entry(file, sec_name,
-                                       MRVL_TOK_PLCR_CBS);
-                       if (entry) {
-                               if (get_val_securely(entry, &val) < 0)
-                                       return -1;
-                               (*cfg)->port[n].policer_params.cbs = val;
-                       }
-
-                       /* Read policer ebs */
-                       entry = rte_cfgfile_get_entry(file, sec_name,
-                                       MRVL_TOK_PLCR_EBS);
-                       if (entry) {
-                               if (get_val_securely(entry, &val) < 0)
-                                       return -1;
-                               (*cfg)->port[n].policer_params.ebs = val;
-                       }
-               }
-
                /*
                 * Read per-port rate limiting. Setting that will
                 * disable per-queue rate limiting.
@@ -581,6 +561,7 @@ mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
                entry = rte_cfgfile_get_entry(file, sec_name,
                                MRVL_TOK_MAPPING_PRIORITY);
                if (entry) {
+                       (*cfg)->port[n].use_global_defaults = 0;
                        if (!strncmp(entry, MRVL_TOK_VLAN_IP,
                                sizeof(MRVL_TOK_VLAN_IP)))
                                (*cfg)->port[n].mapping_priority =
@@ -606,6 +587,21 @@ mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
                                PP2_CLS_QOS_TBL_VLAN_IP_PRI;
                }
 
+               /* Parse policer configuration (if any) */
+               entry = rte_cfgfile_get_entry(file, sec_name,
+                               MRVL_TOK_PLCR_DEFAULT);
+               if (entry) {
+                       (*cfg)->port[n].use_global_defaults = 0;
+                       if (get_val_securely(entry, &val) < 0)
+                               return -1;
+
+                       snprintf(sec_name, sizeof(sec_name), "%s %d",
+                                       MRVL_TOK_PLCR, val);
+                       ret = parse_policer(file, n, sec_name, *cfg);
+                       if (ret)
+                               return -1;
+               }
+
                for (i = 0; i < MRVL_PP2_RXQ_MAX; ++i) {
                        ret = get_outq_cfg(file, n, i, *cfg);
                        if (ret < 0)
@@ -621,6 +617,21 @@ mrvl_get_qoscfg(const char *key __rte_unused, const char *path,
                                        "Error %d parsing port %d tc %d!\n",
                                        ret, n, i);
                }
+
+               entry = rte_cfgfile_get_entry(file, sec_name,
+                                             MRVL_TOK_DEFAULT_TC);
+               if (entry) {
+                       if (get_val_securely(entry, &val) < 0 ||
+                           val > USHRT_MAX)
+                               return -1;
+                       (*cfg)->port[n].default_tc = (uint8_t)val;
+               } else {
+                       if ((*cfg)->port[n].use_global_defaults == 0) {
+                               MRVL_LOG(ERR,
+                                        "Default Traffic Class required in custom configuration!");
+                               return -1;
+                       }
+               }
        }
 
        return 0;
@@ -643,7 +654,7 @@ setup_tc(struct pp2_ppio_tc_params *param, uint8_t inqs,
        struct pp2_ppio_inq_params *inq_params;
 
        param->pkt_offset = MRVL_PKT_OFFS;
-       param->pools[0] = bpool;
+       param->pools[0][0] = bpool;
        param->default_color = color;
 
        inq_params = rte_zmalloc_socket("inq_params",
@@ -668,6 +679,7 @@ setup_tc(struct pp2_ppio_tc_params *param, uint8_t inqs,
  *
  * @param priv Port's private data.
  * @param params Pointer to the policer's configuration.
+ * @param plcr_id Policer id.
  * @returns 0 in case of success, negative values otherwise.
  */
 static int
@@ -676,17 +688,23 @@ setup_policer(struct mrvl_priv *priv, struct pp2_cls_plcr_params *params)
        char match[16];
        int ret;
 
-       snprintf(match, sizeof(match), "policer-%d:%d\n",
-                       priv->pp_id, priv->ppio_id);
+       /*
+        * At this point no other policers are used which means
+        * any policer can be picked up and used as a default one.
+        *
+        * Lets use 0th then.
+        */
+       sprintf(match, "policer-%d:%d\n", priv->pp_id, 0);
        params->match = match;
 
-       ret = pp2_cls_plcr_init(params, &priv->policer);
+       ret = pp2_cls_plcr_init(params, &priv->default_policer);
        if (ret) {
                MRVL_LOG(ERR, "Failed to setup %s", match);
                return -1;
        }
 
-       priv->ppio_params.inqs_params.plcr = priv->policer;
+       priv->ppio_params.inqs_params.plcr = priv->default_policer;
+       priv->used_plcrs = BIT(0);
 
        return 0;
 }
@@ -818,7 +836,7 @@ mrvl_configure_rxqs(struct mrvl_priv *priv, uint16_t portid,
 
        priv->ppio_params.inqs_params.num_tcs = i;
 
-       if (port_cfg->policer_enable)
+       if (port_cfg->setup_policer)
                return setup_policer(priv, &port_cfg->policer_params);
 
        return 0;
index fa9ddec..f03e773 100644 (file)
@@ -43,7 +43,7 @@ struct mrvl_qos_cfg {
                uint8_t default_tc;
                uint8_t use_global_defaults;
                struct pp2_cls_plcr_params policer_params;
-               uint8_t policer_enable;
+               uint8_t setup_policer;
        } port[RTE_MAX_ETHPORTS];
 };
 
diff --git a/drivers/net/mvpp2/mrvl_tm.c b/drivers/net/mvpp2/mrvl_tm.c
new file mode 100644 (file)
index 0000000..3de8997
--- /dev/null
@@ -0,0 +1,1009 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#include <rte_malloc.h>
+
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+
+#include "mrvl_tm.h"
+
+/** Minimum rate value in Bytes/s */
+#define MRVL_RATE_MIN (PP2_PPIO_MIN_CIR * 1000 / 8)
+
+/** Minimum burst size in Bytes */
+#define MRVL_BURST_MIN (PP2_PPIO_MIN_CBS * 1000)
+
+/** Maximum burst size in Bytes */
+#define MRVL_BURST_MAX 256000000
+
+/** Maximum WRR weight */
+#define MRVL_WEIGHT_MAX 255
+
+/**
+ * Get maximum port rate in Bytes/s.
+ *
+ * @param dev Pointer to the device.
+ * @param rate Pointer to the rate.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_get_max_rate(struct rte_eth_dev *dev, uint64_t *rate)
+{
+       struct ethtool_cmd edata;
+       struct ifreq req;
+       int ret, fd;
+
+       memset(&edata, 0, sizeof(edata));
+       memset(&req, 0, sizeof(req));
+       edata.cmd = ETHTOOL_GSET;
+       strcpy(req.ifr_name, dev->data->name);
+       req.ifr_data = (void *)&edata;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd == -1)
+               return -1;
+
+       ret = ioctl(fd, SIOCETHTOOL, &req);
+       if (ret == -1) {
+               close(fd);
+               return -1;
+       }
+
+       close(fd);
+
+       *rate = ethtool_cmd_speed(&edata) * 1000 * 1000 / 8;
+
+       return 0;
+}
+
+/**
+ * Initialize traffic manager related data.
+ *
+ * @param dev Pointer to the device.
+ * @returns 0 on success, failure otherwise.
+ */
+int
+mrvl_tm_init(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       LIST_INIT(&priv->shaper_profiles);
+       LIST_INIT(&priv->nodes);
+
+       if (priv->rate_max)
+               return 0;
+
+       return mrvl_get_max_rate(dev, &priv->rate_max);
+}
+
+/**
+ * Cleanup traffic manager related data.
+ *
+ * @param dev Pointer to the device.
+ */
+void mrvl_tm_deinit(struct rte_eth_dev *dev)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_shaper_profile *profile =
+               LIST_FIRST(&priv->shaper_profiles);
+       struct mrvl_tm_node *node = LIST_FIRST(&priv->nodes);
+
+       while (profile) {
+               struct mrvl_tm_shaper_profile *next = LIST_NEXT(profile, next);
+
+               LIST_REMOVE(profile, next);
+               rte_free(profile);
+               profile = next;
+       }
+
+       while (node) {
+               struct mrvl_tm_node *next = LIST_NEXT(node, next);
+
+               LIST_REMOVE(node, next);
+               rte_free(node);
+               node = next;
+       }
+}
+
+/**
+ * Get node using its id.
+ *
+ * @param priv Pointer to the port's private data.
+ * @param node_id Id used by this node.
+ * @returns Pointer to the node if exists, NULL otherwise.
+ */
+static struct mrvl_tm_node *
+mrvl_node_from_id(struct mrvl_priv *priv, uint32_t node_id)
+{
+       struct mrvl_tm_node *node;
+
+       LIST_FOREACH(node, &priv->nodes, next)
+               if (node->id == node_id)
+                       return node;
+
+       return NULL;
+}
+
+/**
+ * Check whether node is leaf or root.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id used by this node.
+ * @param is_leaf Pointer to flag indicating whether node is a leaf.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_type_get(struct rte_eth_dev *dev, uint32_t node_id, int *is_leaf,
+                  struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+
+       if (!is_leaf)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       *is_leaf = node->type == MRVL_NODE_QUEUE ? 1 : 0;
+
+       return 0;
+}
+
+/**
+ * Get traffic manager capabilities.
+ *
+ * @param dev Pointer to the device (unused).
+ * @param cap Pointer to the capabilities.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_capabilities_get(struct rte_eth_dev *dev,
+                     struct rte_tm_capabilities *cap,
+                     struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       if (!cap)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Capabilities are missing\n");
+
+       memset(cap, 0, sizeof(*cap));
+
+       cap->n_nodes_max = 1 + dev->data->nb_tx_queues; /* port + txqs number */
+       cap->n_levels_max = 2; /* port level + txqs level */
+       cap->non_leaf_nodes_identical = 1;
+       cap->leaf_nodes_identical = 1;
+
+       cap->shaper_n_max = cap->n_nodes_max;
+       cap->shaper_private_n_max = cap->shaper_n_max;
+       cap->shaper_private_rate_min = MRVL_RATE_MIN;
+       cap->shaper_private_rate_max = priv->rate_max;
+
+       cap->sched_n_children_max = dev->data->nb_tx_queues;
+       cap->sched_sp_n_priorities_max = dev->data->nb_tx_queues;
+       cap->sched_wfq_n_children_per_group_max = dev->data->nb_tx_queues;
+       cap->sched_wfq_n_groups_max = 1;
+       cap->sched_wfq_weight_max = MRVL_WEIGHT_MAX;
+
+       cap->dynamic_update_mask = RTE_TM_UPDATE_NODE_SUSPEND_RESUME |
+                                  RTE_TM_UPDATE_NODE_STATS;
+       cap->stats_mask = RTE_TM_STATS_N_PKTS | RTE_TM_STATS_N_BYTES;
+
+       return 0;
+}
+
+/**
+ * Get traffic manager hierarchy level capabilities.
+ *
+ * @param dev Pointer to the device.
+ * @param level_id Id of the level.
+ * @param cap Pointer to the level capabilities.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_level_capabilities_get(struct rte_eth_dev *dev,
+                           uint32_t level_id,
+                           struct rte_tm_level_capabilities *cap,
+                           struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+
+       if (!cap)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       memset(cap, 0, sizeof(*cap));
+
+       if (level_id != MRVL_NODE_PORT && level_id != MRVL_NODE_QUEUE)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_LEVEL_ID,
+                                        NULL, "Wrong level id\n");
+
+       if (level_id == MRVL_NODE_PORT) {
+               cap->n_nodes_max = 1;
+               cap->n_nodes_nonleaf_max = 1;
+               cap->non_leaf_nodes_identical = 1;
+
+               cap->nonleaf.shaper_private_supported = 1;
+               cap->nonleaf.shaper_private_rate_min = MRVL_RATE_MIN;
+               cap->nonleaf.shaper_private_rate_max = priv->rate_max;
+
+               cap->nonleaf.sched_n_children_max = dev->data->nb_tx_queues;
+               cap->nonleaf.sched_sp_n_priorities_max = 1;
+               cap->nonleaf.sched_wfq_n_children_per_group_max =
+                       dev->data->nb_tx_queues;
+               cap->nonleaf.sched_wfq_n_groups_max = 1;
+               cap->nonleaf.sched_wfq_weight_max = MRVL_WEIGHT_MAX;
+               cap->nonleaf.stats_mask = RTE_TM_STATS_N_PKTS |
+                                         RTE_TM_STATS_N_BYTES;
+       } else { /* level_id == MRVL_NODE_QUEUE */
+               cap->n_nodes_max = dev->data->nb_tx_queues;
+               cap->n_nodes_leaf_max = dev->data->nb_tx_queues;
+               cap->leaf_nodes_identical = 1;
+
+               cap->leaf.shaper_private_supported = 1;
+               cap->leaf.shaper_private_rate_min = MRVL_RATE_MIN;
+               cap->leaf.shaper_private_rate_max = priv->rate_max;
+               cap->leaf.stats_mask = RTE_TM_STATS_N_PKTS;
+       }
+
+       return 0;
+}
+
+/**
+ * Get node capabilities.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param cap Pointer to the capabilities.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_capabilities_get(struct rte_eth_dev *dev, uint32_t node_id,
+                          struct rte_tm_node_capabilities *cap,
+                          struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+
+       if (!cap)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       memset(cap, 0, sizeof(*cap));
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       cap->shaper_private_supported = 1;
+       cap->shaper_private_rate_min = MRVL_RATE_MIN;
+       cap->shaper_private_rate_max = priv->rate_max;
+
+       if (node->type == MRVL_NODE_PORT) {
+               cap->nonleaf.sched_n_children_max = dev->data->nb_tx_queues;
+               cap->nonleaf.sched_sp_n_priorities_max = 1;
+               cap->nonleaf.sched_wfq_n_children_per_group_max =
+                       dev->data->nb_tx_queues;
+               cap->nonleaf.sched_wfq_n_groups_max = 1;
+               cap->nonleaf.sched_wfq_weight_max = MRVL_WEIGHT_MAX;
+               cap->stats_mask = RTE_TM_STATS_N_PKTS | RTE_TM_STATS_N_BYTES;
+       } else {
+               cap->stats_mask = RTE_TM_STATS_N_PKTS;
+       }
+
+       return 0;
+}
+
+/**
+ * Get shaper profile using its id.
+ *
+ * @param priv Pointer to the port's private data.
+ * @param shaper_profile_id Id used by the shaper.
+ * @returns Pointer to the shaper profile if exists, NULL otherwise.
+ */
+static struct mrvl_tm_shaper_profile *
+mrvl_shaper_profile_from_id(struct mrvl_priv *priv, uint32_t shaper_profile_id)
+{
+       struct mrvl_tm_shaper_profile *profile;
+
+       LIST_FOREACH(profile, &priv->shaper_profiles, next)
+               if (profile->id == shaper_profile_id)
+                       return profile;
+
+       return NULL;
+}
+
+/**
+ * Add a new shaper profile.
+ *
+ * @param dev Pointer to the device.
+ * @param shaper_profile_id Id of the new profile.
+ * @param params Pointer to the shaper profile parameters.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_shaper_profile_add(struct rte_eth_dev *dev, uint32_t shaper_profile_id,
+                       struct rte_tm_shaper_params *params,
+                       struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_shaper_profile *profile;
+
+       if (!params)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       if (params->committed.rate)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_RATE,
+                               NULL, "Committed rate not supported\n");
+
+       if (params->committed.size)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_SHAPER_PROFILE_COMMITTED_SIZE,
+                               NULL, "Committed bucket size not supported\n");
+
+       if (params->peak.rate < MRVL_RATE_MIN ||
+           params->peak.rate > priv->rate_max)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_RATE,
+                               NULL, "Peak rate is out of range\n");
+
+       if (params->peak.size < MRVL_BURST_MIN ||
+           params->peak.size > MRVL_BURST_MAX)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_SHAPER_PROFILE_PEAK_SIZE,
+                               NULL, "Peak size is out of range\n");
+
+       if (shaper_profile_id == RTE_TM_SHAPER_PROFILE_ID_NONE)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+                                        NULL, "Wrong shaper profile id\n");
+
+       profile = mrvl_shaper_profile_from_id(priv, shaper_profile_id);
+       if (profile)
+               return -rte_tm_error_set(error, EEXIST,
+                                        RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+                                        NULL, "Profile id already exists\n");
+
+       profile = rte_zmalloc_socket(NULL, sizeof(*profile), 0,
+                                    rte_socket_id());
+       if (!profile)
+               return -rte_tm_error_set(error, ENOMEM,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       profile->id = shaper_profile_id;
+       rte_memcpy(&profile->params, params, sizeof(profile->params));
+
+       LIST_INSERT_HEAD(&priv->shaper_profiles, profile, next);
+
+       return 0;
+}
+
+/**
+ * Remove a shaper profile.
+ *
+ * @param dev Pointer to the device.
+ * @param shaper_profile_id Id of the shaper profile.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_shaper_profile_delete(struct rte_eth_dev *dev, uint32_t shaper_profile_id,
+                          struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_shaper_profile *profile;
+
+       profile = mrvl_shaper_profile_from_id(priv, shaper_profile_id);
+       if (!profile)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+                                        NULL, "Profile id does not exist\n");
+
+       if (profile->refcnt)
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+                                        NULL, "Profile is used\n");
+
+       LIST_REMOVE(profile, next);
+       rte_free(profile);
+
+       return 0;
+}
+
+/**
+ * Check node parameters.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id used by the node.
+ * @param priority Priority value.
+ * @param weight Weight value.
+ * @param level_id Id of the level.
+ * @param params Pointer to the node parameters.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_check_params(struct rte_eth_dev *dev, uint32_t node_id,
+                      uint32_t priority, uint32_t weight, uint32_t level_id,
+                      struct rte_tm_node_params *params,
+                      struct rte_tm_error *error)
+{
+       if (node_id == RTE_TM_NODE_ID_NULL)
+               return -rte_tm_error_set(error, EINVAL, RTE_TM_NODE_ID_NULL,
+                                        NULL, "Node id is invalid\n");
+
+       if (priority)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_NODE_PRIORITY,
+                                        NULL, "Priority should be 0\n");
+
+       if (weight > MRVL_WEIGHT_MAX)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_NODE_WEIGHT,
+                                        NULL, "Weight is out of range\n");
+
+       if (level_id != MRVL_NODE_PORT && level_id != MRVL_NODE_QUEUE)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_LEVEL_ID,
+                                        NULL, "Wrong level id\n");
+
+       if (!params)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       if (params->shared_shaper_id)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_SHAPER_ID,
+                               NULL, "Shared shaper is not supported\n");
+
+       if (params->n_shared_shapers)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_SHAPERS,
+                               NULL, "Shared shaper is not supported\n");
+
+       /* verify port (root node) settings */
+       if (node_id >= dev->data->nb_tx_queues) {
+               if (params->nonleaf.wfq_weight_mode)
+                       return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_WFQ_WEIGHT_MODE,
+                               NULL, "WFQ is not supported\n");
+
+               if (params->nonleaf.n_sp_priorities != 1)
+                       return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SP_PRIORITIES,
+                               NULL, "SP is not supported\n");
+
+               if (params->stats_mask & ~(RTE_TM_STATS_N_PKTS |
+                                          RTE_TM_STATS_N_BYTES))
+                       return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+                               NULL,
+                               "Requested port stats are not supported\n");
+
+               return 0;
+       }
+
+       /* verify txq (leaf node) settings */
+       if (params->leaf.cman)
+               return -rte_tm_error_set(error, EINVAL,
+                                        RTE_TM_ERROR_TYPE_NODE_PARAMS_CMAN,
+                                        NULL,
+                                        "Congestion mngmt is not supported\n");
+
+       if (params->leaf.wred.wred_profile_id)
+               return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_WRED_PROFILE_ID,
+                               NULL, "WRED is not supported\n");
+
+       if (params->leaf.wred.shared_wred_context_id)
+               return -rte_tm_error_set(error, EINVAL,
+                       RTE_TM_ERROR_TYPE_NODE_PARAMS_SHARED_WRED_CONTEXT_ID,
+                       NULL, "WRED is not supported\n");
+
+       if (params->leaf.wred.n_shared_wred_contexts)
+               return -rte_tm_error_set(error, EINVAL,
+                       RTE_TM_ERROR_TYPE_NODE_PARAMS_N_SHARED_WRED_CONTEXTS,
+                       NULL, "WRED is not supported\n");
+
+       if (params->stats_mask & ~RTE_TM_STATS_N_PKTS)
+               return -rte_tm_error_set(error, EINVAL,
+                       RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+                       NULL,
+                       "Requested txq stats are not supported\n");
+
+       return 0;
+}
+
+/**
+ * Add a new node.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param parent_node_id Id of the parent node.
+ * @param priority Priority value.
+ * @param weight Weight value.
+ * @param level_id Id of the level.
+ * @param params Pointer to the node parameters.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_add(struct rte_eth_dev *dev, uint32_t node_id,
+             uint32_t parent_node_id, uint32_t priority, uint32_t weight,
+             uint32_t level_id, struct rte_tm_node_params *params,
+             struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_shaper_profile *profile = NULL;
+       struct mrvl_tm_node *node, *parent = NULL;
+       int ret;
+
+       if (priv->ppio)
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Port is already started\n");
+
+       ret = mrvl_node_check_params(dev, node_id, priority, weight, level_id,
+                                    params, error);
+       if (ret)
+               return ret;
+
+       if (params->shaper_profile_id != RTE_TM_SHAPER_PROFILE_ID_NONE) {
+               profile = mrvl_shaper_profile_from_id(priv,
+                                                params->shaper_profile_id);
+               if (!profile)
+                       return -rte_tm_error_set(error, ENODEV,
+                                       RTE_TM_ERROR_TYPE_SHAPER_PROFILE_ID,
+                                       NULL, "Shaper id does not exist\n");
+       }
+
+       if (parent_node_id == RTE_TM_NODE_ID_NULL) {
+               LIST_FOREACH(node, &priv->nodes, next) {
+                       if (node->type != MRVL_NODE_PORT)
+                               continue;
+
+                       return -rte_tm_error_set(error, EINVAL,
+                                                RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                                NULL, "Root node exists\n");
+               }
+       } else {
+               parent = mrvl_node_from_id(priv, parent_node_id);
+               if (!parent)
+                       return -rte_tm_error_set(error, EINVAL,
+                                       RTE_TM_ERROR_TYPE_NODE_PARENT_NODE_ID,
+                                       NULL, "Node id does not exist\n");
+       }
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id already exists\n");
+
+       node = rte_zmalloc_socket(NULL, sizeof(*node), 0, rte_socket_id());
+       if (!node)
+               return -rte_tm_error_set(error, ENOMEM,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, NULL);
+
+       node->id = node_id;
+       node->type = parent_node_id == RTE_TM_NODE_ID_NULL ? MRVL_NODE_PORT :
+                                                            MRVL_NODE_QUEUE;
+
+       if (parent) {
+               node->parent = parent;
+               parent->refcnt++;
+       }
+
+       if (profile) {
+               node->profile = profile;
+               profile->refcnt++;
+       }
+
+       node->weight = weight;
+       node->stats_mask = params->stats_mask;
+
+       LIST_INSERT_HEAD(&priv->nodes, node, next);
+
+       return 0;
+}
+
+/**
+ * Delete a node.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_delete(struct rte_eth_dev *dev, uint32_t node_id,
+                struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+
+       if (priv->ppio) {
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Port is already started\n");
+       }
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       if (node->refcnt)
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id is used\n");
+
+       if (node->parent)
+               node->parent->refcnt--;
+
+       if (node->profile)
+               node->profile->refcnt--;
+
+       LIST_REMOVE(node, next);
+       rte_free(node);
+
+       return 0;
+}
+
+/**
+ * Helper for suspending specific tx queue.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id used by this node.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int mrvl_node_suspend_one(struct rte_eth_dev *dev, uint32_t node_id,
+                                struct rte_tm_error *error)
+{
+       int ret = dev->dev_ops->tx_queue_stop(dev, node_id);
+       if (ret)
+               return -rte_tm_error_set(error, ret,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Failed to suspend a txq\n");
+
+       return 0;
+}
+
+/**
+ * Suspend a node.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param error Pointer to the error.
+ * returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_suspend(struct rte_eth_dev *dev, uint32_t node_id,
+                 struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node, *tmp;
+       int ret;
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       if (!node->parent) {
+               LIST_FOREACH(tmp, &priv->nodes, next) {
+                       if (!tmp->parent)
+                               continue;
+
+                       if (node != tmp->parent)
+                               continue;
+
+                       ret = mrvl_node_suspend_one(dev, tmp->id, error);
+                       if (ret)
+                               return ret;
+               }
+
+               return 0;
+       }
+
+       return mrvl_node_suspend_one(dev, node_id, error);
+}
+
+/**
+ * Resume a node.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param error Pointer to the error.
+ * returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_resume(struct rte_eth_dev *dev, uint32_t node_id,
+                struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+       int ret;
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+
+       if (!node->parent)
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Cannot suspend a port\n");
+
+       ret = dev->dev_ops->tx_queue_start(dev, node_id);
+       if (ret)
+               return -rte_tm_error_set(error, ret,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Failed to resume a txq\n");
+       return 0;
+}
+
+/**
+ * Apply traffic manager hierarchy.
+ *
+ * @param dev Pointer to the device.
+ * @param clear_on_fail Flag indicating whether to do cleanup on the failure.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_hierarchy_commit(struct rte_eth_dev *dev, int clear_on_fail,
+                     struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+       int ret;
+
+       if (priv->ppio) {
+               ret = -rte_tm_error_set(error, EPERM,
+                                       RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                       NULL, "Port is already started\n");
+               goto out;
+       }
+
+       LIST_FOREACH(node, &priv->nodes, next) {
+               struct pp2_ppio_outq_params *p;
+
+               if (node->type == MRVL_NODE_PORT) {
+                       if (!node->profile)
+                               continue;
+
+                       priv->ppio_params.rate_limit_enable = 1;
+                       priv->ppio_params.rate_limit_params.cir =
+                               node->profile->params.peak.rate * 8 / 1000;
+                       priv->ppio_params.rate_limit_params.cbs =
+                               node->profile->params.peak.size / 1000;
+
+                       MRVL_LOG(INFO,
+                               "Port rate limit overrides txqs rate limit");
+
+                       continue;
+               }
+
+               if (node->id >= dev->data->nb_tx_queues) {
+                       ret = -rte_tm_error_set(error, EINVAL,
+                                       RTE_TM_ERROR_TYPE_NODE_ID, NULL,
+                                       "Not enough txqs are configured\n");
+                       goto out;
+               }
+
+               p = &priv->ppio_params.outqs_params.outqs_params[node->id];
+
+               if (node->weight) {
+                       p->sched_mode = PP2_PPIO_SCHED_M_WRR;
+                       p->weight = node->weight;
+               } else {
+                       p->sched_mode = PP2_PPIO_SCHED_M_SP;
+                       p->weight = 0;
+               }
+
+               if (node->profile) {
+                       p->rate_limit_enable = 1;
+                       /* convert Bytes/s to kilo bits/s */
+                       p->rate_limit_params.cir =
+                               node->profile->params.peak.rate * 8 / 1000;
+                       /* convert bits to kilo bits */
+                       p->rate_limit_params.cbs =
+                               node->profile->params.peak.size / 1000;
+               } else {
+                       p->rate_limit_enable = 0;
+                       p->rate_limit_params.cir = 0;
+                       p->rate_limit_params.cbs = 0;
+               }
+       }
+
+       /* reset to defaults in case applied tm hierarchy is empty */
+       if (LIST_EMPTY(&priv->nodes)) {
+               int i;
+
+               for (i = 0; i < priv->ppio_params.outqs_params.num_outqs; i++) {
+                       struct pp2_ppio_outq_params *p =
+                               &priv->ppio_params.outqs_params.outqs_params[i];
+
+                       p->sched_mode = PP2_PPIO_SCHED_M_WRR;
+                       p->weight = 0;
+                       p->rate_limit_enable = 0;
+                       p->rate_limit_params.cir = 0;
+                       p->rate_limit_params.cbs = 0;
+               }
+       }
+
+       return 0;
+out:
+       if (clear_on_fail) {
+               mrvl_tm_deinit(dev);
+               mrvl_tm_init(dev);
+       }
+
+       return ret;
+}
+
+/**
+ * Read statistics counters for current node.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param stats Pointer to the statistics counters.
+ * @param stats_mask Pointer to mask of enabled statistics counters
+ *                   that are retrieved.
+ * @param clear Flag indicating whether to clear statistics.
+ *              Non-zero value clears statistics.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_stats_read(struct rte_eth_dev *dev, uint32_t node_id,
+                    struct rte_tm_node_stats *stats, uint64_t *stats_mask,
+                    int clear, struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+       int ret;
+
+       if (!priv->ppio) {
+               return -rte_tm_error_set(error, EPERM,
+                                        RTE_TM_ERROR_TYPE_UNSPECIFIED,
+                                        NULL, "Port is not started\n");
+       }
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       if (stats_mask)
+               *stats_mask = node->stats_mask;
+
+       if (!stats)
+               return 0;
+
+       memset(stats, 0, sizeof(*stats));
+
+       if (!node->parent) {
+               struct pp2_ppio_statistics s;
+
+               memset(&s, 0, sizeof(s));
+               ret = pp2_ppio_get_statistics(priv->ppio, &s, clear);
+               if (ret)
+                       return -rte_tm_error_set(error, -ret,
+                                       RTE_TM_ERROR_TYPE_UNSPECIFIED, NULL,
+                                       "Failed to read port statistics\n");
+
+               if (node->stats_mask & RTE_TM_STATS_N_PKTS)
+                       stats->n_pkts = s.tx_packets;
+
+               if (node->stats_mask & RTE_TM_STATS_N_BYTES)
+                       stats->n_bytes = s.tx_bytes;
+       } else {
+               struct pp2_ppio_outq_statistics s;
+
+               memset(&s, 0, sizeof(s));
+               ret = pp2_ppio_outq_get_statistics(priv->ppio, node_id, &s,
+                                                  clear);
+               if (ret)
+                       return -rte_tm_error_set(error, -ret,
+                                       RTE_TM_ERROR_TYPE_UNSPECIFIED, NULL,
+                                       "Failed to read txq statistics\n");
+
+               if (node->stats_mask & RTE_TM_STATS_N_PKTS)
+                       stats->n_pkts = s.deq_desc;
+       }
+
+       return 0;
+}
+
+/**
+ * Update node statistics.
+ *
+ * @param dev Pointer to the device.
+ * @param node_id Id of the node.
+ * @param stats_mask Bitmask of statistics counters to be enabled.
+ * @param error Pointer to the error.
+ * @returns 0 on success, negative value otherwise.
+ */
+static int
+mrvl_node_stats_update(struct rte_eth_dev *dev, uint32_t node_id,
+                      uint64_t stats_mask, struct rte_tm_error *error)
+{
+       struct mrvl_priv *priv = dev->data->dev_private;
+       struct mrvl_tm_node *node;
+
+       node = mrvl_node_from_id(priv, node_id);
+       if (!node)
+               return -rte_tm_error_set(error, ENODEV,
+                                        RTE_TM_ERROR_TYPE_NODE_ID,
+                                        NULL, "Node id does not exist\n");
+
+       if (!node->parent) {
+               if (stats_mask & ~(RTE_TM_STATS_N_PKTS | RTE_TM_STATS_N_BYTES))
+                       return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+                               NULL,
+                               "Requested port stats are not supported\n");
+       } else {
+               if (stats_mask & ~RTE_TM_STATS_N_PKTS)
+                       return -rte_tm_error_set(error, EINVAL,
+                               RTE_TM_ERROR_TYPE_NODE_PARAMS_STATS,
+                               NULL,
+                               "Requested txq stats are not supported\n");
+       }
+
+       node->stats_mask = stats_mask;
+
+       return 0;
+}
+
+const struct rte_tm_ops mrvl_tm_ops = {
+       .node_type_get = mrvl_node_type_get,
+       .capabilities_get = mrvl_capabilities_get,
+       .level_capabilities_get = mrvl_level_capabilities_get,
+       .node_capabilities_get = mrvl_node_capabilities_get,
+       .shaper_profile_add = mrvl_shaper_profile_add,
+       .shaper_profile_delete = mrvl_shaper_profile_delete,
+       .node_add = mrvl_node_add,
+       .node_delete = mrvl_node_delete,
+       .node_suspend = mrvl_node_suspend,
+       .node_resume = mrvl_node_resume,
+       .hierarchy_commit = mrvl_hierarchy_commit,
+       .node_stats_update = mrvl_node_stats_update,
+       .node_stats_read = mrvl_node_stats_read,
+};
diff --git a/drivers/net/mvpp2/mrvl_tm.h b/drivers/net/mvpp2/mrvl_tm.h
new file mode 100644 (file)
index 0000000..9d81ede
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Marvell International Ltd.
+ * Copyright(c) 2018 Semihalf.
+ * All rights reserved.
+ */
+
+#ifndef _MRVL_TM_H_
+#define _MRVL_TM_H_
+
+#include "mrvl_ethdev.h"
+
+int mrvl_tm_init(struct rte_eth_dev *dev);
+void mrvl_tm_deinit(struct rte_eth_dev *dev);
+
+#endif /* _MRVL_TM_H_ */
index 3c713af..7148259 100644 (file)
@@ -15,6 +15,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c
 SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c
 SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c
+SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c
 
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs
index 78b842b..aa38ee7 100644 (file)
@@ -14,7 +14,9 @@
 #include <rte_memcpy.h>
 #include <rte_string_fns.h>
 #include <rte_memzone.h>
+#include <rte_devargs.h>
 #include <rte_malloc.h>
+#include <rte_kvargs.h>
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_ether.h>
@@ -40,8 +42,7 @@
                            DEV_TX_OFFLOAD_VLAN_INSERT)
 
 #define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \
-                           DEV_RX_OFFLOAD_VLAN_STRIP | \
-                           DEV_RX_OFFLOAD_CRC_STRIP)
+                           DEV_RX_OFFLOAD_VLAN_STRIP)
 
 int hn_logtype_init;
 int hn_logtype_driver;
@@ -55,7 +56,7 @@ static const struct hn_xstats_name_off hn_stat_strings[] = {
        { "good_packets",           offsetof(struct hn_stats, packets) },
        { "good_bytes",             offsetof(struct hn_stats, bytes) },
        { "errors",                 offsetof(struct hn_stats, errors) },
-       { "allocation_failed",      offsetof(struct hn_stats, nomemory) },
+       { "ring full",              offsetof(struct hn_stats, ring_full) },
        { "multicast_packets",      offsetof(struct hn_stats, multicast) },
        { "broadcast_packets",      offsetof(struct hn_stats, broadcast) },
        { "undersize_packets",      offsetof(struct hn_stats, size_bins[0]) },
@@ -105,6 +106,10 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
        }
 
        eth_dev->device = &dev->device;
+
+       /* interrupt is simulated */
+       dev->intr_handle.type = RTE_INTR_HANDLE_EXT;
+       eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
        eth_dev->intr_handle = &dev->intr_handle;
 
        return eth_dev;
@@ -113,22 +118,66 @@ eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size)
 static void
 eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
 {
+       /* mac_addrs must not be freed alone because part of dev_private */
+       eth_dev->data->mac_addrs = NULL;
        /* free ether device */
        rte_eth_dev_release_port(eth_dev);
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
+       eth_dev->device = NULL;
+       eth_dev->intr_handle = NULL;
+}
 
-       eth_dev->data->dev_private = NULL;
+/* handle "latency=X" from devargs */
+static int hn_set_latency(const char *key, const char *value, void *opaque)
+{
+       struct hn_data *hv = opaque;
+       char *endp = NULL;
+       unsigned long lat;
 
-       /*
-        * Secondary process will check the name to attach.
-        * Clear this field to avoid attaching a released ports.
-        */
-       eth_dev->data->name[0] = '\0';
+       errno = 0;
+       lat = strtoul(value, &endp, 0);
 
-       eth_dev->device = NULL;
-       eth_dev->intr_handle = NULL;
+       if (*value == '\0' || *endp != '\0') {
+               PMD_DRV_LOG(ERR, "invalid parameter %s=%s", key, value);
+               return -EINVAL;
+       }
+
+       PMD_DRV_LOG(DEBUG, "set latency %lu usec", lat);
+
+       hv->latency = lat * 1000;       /* usec to nsec */
+       return 0;
+}
+
+/* Parse device arguments */
+static int hn_parse_args(const struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_devargs *devargs = dev->device->devargs;
+       static const char * const valid_keys[] = {
+               "latency",
+               NULL
+       };
+       struct rte_kvargs *kvlist;
+       int ret;
+
+       if (!devargs)
+               return 0;
+
+       PMD_INIT_LOG(DEBUG, "device args %s %s",
+                    devargs->name, devargs->args);
+
+       kvlist = rte_kvargs_parse(devargs->args, valid_keys);
+       if (!kvlist) {
+               PMD_DRV_LOG(NOTICE, "invalid parameters");
+               return -EINVAL;
+       }
+
+       ret = rte_kvargs_process(kvlist, "latency", hn_set_latency, hv);
+       if (ret)
+               PMD_DRV_LOG(ERR, "Unable to process latency arg\n");
+
+       rte_kvargs_free(kvlist);
+       return ret;
 }
 
 /* Update link status.
@@ -136,9 +185,9 @@ eth_dev_vmbus_release(struct rte_eth_dev *eth_dev)
  *   means block this call until link is up.
  *   which is not worth supporting.
  */
-static int
+int
 hn_dev_link_update(struct rte_eth_dev *dev,
-                  __rte_unused int wait_to_complete)
+                  int wait_to_complete)
 {
        struct hn_data *hv = dev->data->dev_private;
        struct rte_eth_link link, old;
@@ -152,6 +201,8 @@ hn_dev_link_update(struct rte_eth_dev *dev,
 
        hn_rndis_get_linkspeed(hv);
 
+       hn_vf_link_update(dev, wait_to_complete);
+
        link = (struct rte_eth_link) {
                .link_duplex = ETH_LINK_FULL_DUPLEX,
                .link_autoneg = ETH_LINK_SPEED_FIXED,
@@ -190,6 +241,7 @@ static void hn_dev_info_get(struct rte_eth_dev *dev,
        dev_info->max_tx_queues = hv->max_queues;
 
        hn_rndis_get_offload(hv, dev_info);
+       hn_vf_info_get(hv, dev_info);
 }
 
 static void
@@ -198,6 +250,7 @@ hn_dev_promiscuous_enable(struct rte_eth_dev *dev)
        struct hn_data *hv = dev->data->dev_private;
 
        hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
+       hn_vf_promiscuous_enable(dev);
 }
 
 static void
@@ -210,6 +263,7 @@ hn_dev_promiscuous_disable(struct rte_eth_dev *dev)
        if (dev->data->all_multicast)
                filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
        hn_rndis_set_rxfilter(hv, filter);
+       hn_vf_promiscuous_disable(dev);
 }
 
 static void
@@ -220,6 +274,7 @@ hn_dev_allmulticast_enable(struct rte_eth_dev *dev)
        hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
                              NDIS_PACKET_TYPE_ALL_MULTICAST |
                        NDIS_PACKET_TYPE_BROADCAST);
+       hn_vf_allmulticast_enable(dev);
 }
 
 static void
@@ -229,6 +284,16 @@ hn_dev_allmulticast_disable(struct rte_eth_dev *dev)
 
        hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED |
                             NDIS_PACKET_TYPE_BROADCAST);
+       hn_vf_allmulticast_disable(dev);
+}
+
+static int
+hn_dev_mc_addr_list(struct rte_eth_dev *dev,
+                    struct ether_addr *mc_addr_set,
+                    uint32_t nb_mc_addr)
+{
+       /* No filtering on the synthetic path, but can do it on VF */
+       return hn_vf_mc_addr_list(dev, mc_addr_set, nb_mc_addr);
 }
 
 /* Setup shared rx/tx queue data */
@@ -264,6 +329,8 @@ static int hn_subchan_configure(struct hn_data *hv,
                        return err;
                }
 
+               rte_vmbus_set_latency(hv->vmbus, new_sc, hv->latency);
+
                retry = 0;
                chn_index = rte_vmbus_sub_channel_index(new_sc);
                if (chn_index == 0 || chn_index > hv->max_queues) {
@@ -338,7 +405,7 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
                }
        }
 
-       return 0;
+       return hn_vf_configure(dev, dev_conf);
 }
 
 static int hn_dev_stats_get(struct rte_eth_dev *dev,
@@ -346,6 +413,8 @@ static int hn_dev_stats_get(struct rte_eth_dev *dev,
 {
        unsigned int i;
 
+       hn_vf_stats_get(dev, stats);
+
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
                const struct hn_tx_queue *txq = dev->data->tx_queues[i];
 
@@ -354,7 +423,7 @@ static int hn_dev_stats_get(struct rte_eth_dev *dev,
 
                stats->opackets += txq->stats.packets;
                stats->obytes += txq->stats.bytes;
-               stats->oerrors += txq->stats.errors + txq->stats.nomemory;
+               stats->oerrors += txq->stats.errors;
 
                if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
                        stats->q_opackets[i] = txq->stats.packets;
@@ -371,7 +440,7 @@ static int hn_dev_stats_get(struct rte_eth_dev *dev,
                stats->ipackets += rxq->stats.packets;
                stats->ibytes += rxq->stats.bytes;
                stats->ierrors += rxq->stats.errors;
-               stats->imissed += rxq->ring_full;
+               stats->imissed += rxq->stats.ring_full;
 
                if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) {
                        stats->q_ipackets[i] = rxq->stats.packets;
@@ -405,22 +474,41 @@ hn_dev_stats_reset(struct rte_eth_dev *dev)
                        continue;
 
                memset(&rxq->stats, 0, sizeof(struct hn_stats));
-               rxq->ring_full = 0;
        }
 }
 
+static void
+hn_dev_xstats_reset(struct rte_eth_dev *dev)
+{
+       hn_dev_stats_reset(dev);
+       hn_vf_xstats_reset(dev);
+}
+
+static int
+hn_dev_xstats_count(struct rte_eth_dev *dev)
+{
+       int ret, count;
+
+       count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings);
+       count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+
+       ret = hn_vf_xstats_get_names(dev, NULL, 0);
+       if (ret < 0)
+               return ret;
+
+       return count + ret;
+}
+
 static int
 hn_dev_xstats_get_names(struct rte_eth_dev *dev,
                        struct rte_eth_xstat_name *xstats_names,
-                       __rte_unused unsigned int limit)
+                       unsigned int limit)
 {
        unsigned int i, t, count = 0;
-
-       PMD_INIT_FUNC_TRACE();
+       int ret;
 
        if (!xstats_names)
-               return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
-                       + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+               return hn_dev_xstats_count(dev);
 
        /* Note: limit checked in rte_eth_xstats_names() */
        for (i = 0; i < dev->data->nb_tx_queues; i++) {
@@ -429,6 +517,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
                if (!txq)
                        continue;
 
+               if (count >= limit)
+                       break;
+
                for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
                        snprintf(xstats_names[count++].name,
                                 RTE_ETH_XSTATS_NAME_SIZE,
@@ -441,6 +532,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
                if (!rxq)
                        continue;
 
+               if (count >= limit)
+                       break;
+
                for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
                        snprintf(xstats_names[count++].name,
                                 RTE_ETH_XSTATS_NAME_SIZE,
@@ -448,7 +542,12 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
                                 hn_stat_strings[t].name);
        }
 
-       return count;
+       ret = hn_vf_xstats_get_names(dev, xstats_names + count,
+                                    limit - count);
+       if (ret < 0)
+               return ret;
+
+       return count + ret;
 }
 
 static int
@@ -457,11 +556,9 @@ hn_dev_xstats_get(struct rte_eth_dev *dev,
                  unsigned int n)
 {
        unsigned int i, t, count = 0;
-
-       const unsigned int nstats =
-               dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
-               + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
+       const unsigned int nstats = hn_dev_xstats_count(dev);
        const char *stats;
+       int ret;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -492,26 +589,33 @@ hn_dev_xstats_get(struct rte_eth_dev *dev,
                                (stats + hn_stat_strings[t].offset);
        }
 
-       return count;
+       ret = hn_vf_xstats_get(dev, xstats + count, n - count);
+       if (ret < 0)
+               return ret;
+
+       return count + ret;
 }
 
 static int
 hn_dev_start(struct rte_eth_dev *dev)
 {
        struct hn_data *hv = dev->data->dev_private;
+       int error;
 
        PMD_INIT_FUNC_TRACE();
 
-       /* check if lsc interrupt feature is enabled */
-       if (dev->data->dev_conf.intr_conf.lsc) {
-               PMD_DRV_LOG(ERR, "link status not supported yet");
-               return -ENOTSUP;
-       }
+       error = hn_rndis_set_rxfilter(hv,
+                                     NDIS_PACKET_TYPE_BROADCAST |
+                                     NDIS_PACKET_TYPE_ALL_MULTICAST |
+                                     NDIS_PACKET_TYPE_DIRECTED);
+       if (error)
+               return error;
+
+       error = hn_vf_start(dev);
+       if (error)
+               hn_rndis_set_rxfilter(hv, 0);
 
-       return hn_rndis_set_rxfilter(hv,
-                                    NDIS_PACKET_TYPE_BROADCAST |
-                                    NDIS_PACKET_TYPE_ALL_MULTICAST |
-                                    NDIS_PACKET_TYPE_DIRECTED);
+       return error;
 }
 
 static void
@@ -522,12 +626,15 @@ hn_dev_stop(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        hn_rndis_set_rxfilter(hv, 0);
+       hn_vf_stop(dev);
 }
 
 static void
 hn_dev_close(struct rte_eth_dev *dev __rte_unused)
 {
        PMD_INIT_LOG(DEBUG, "close");
+
+       hn_vf_close(dev);
 }
 
 static const struct eth_dev_ops hn_eth_dev_ops = {
@@ -536,22 +643,23 @@ static const struct eth_dev_ops hn_eth_dev_ops = {
        .dev_stop               = hn_dev_stop,
        .dev_close              = hn_dev_close,
        .dev_infos_get          = hn_dev_info_get,
-       .txq_info_get           = hn_dev_tx_queue_info,
-       .rxq_info_get           = hn_dev_rx_queue_info,
+       .dev_supported_ptypes_get = hn_vf_supported_ptypes,
        .promiscuous_enable     = hn_dev_promiscuous_enable,
        .promiscuous_disable    = hn_dev_promiscuous_disable,
        .allmulticast_enable    = hn_dev_allmulticast_enable,
        .allmulticast_disable   = hn_dev_allmulticast_disable,
+       .set_mc_addr_list       = hn_dev_mc_addr_list,
        .tx_queue_setup         = hn_dev_tx_queue_setup,
        .tx_queue_release       = hn_dev_tx_queue_release,
+       .tx_done_cleanup        = hn_dev_tx_done_cleanup,
        .rx_queue_setup         = hn_dev_rx_queue_setup,
        .rx_queue_release       = hn_dev_rx_queue_release,
        .link_update            = hn_dev_link_update,
        .stats_get              = hn_dev_stats_get,
+       .stats_reset            = hn_dev_stats_reset,
        .xstats_get             = hn_dev_xstats_get,
        .xstats_get_names       = hn_dev_xstats_get_names,
-       .stats_reset            = hn_dev_stats_reset,
-       .xstats_reset           = hn_dev_stats_reset,
+       .xstats_reset           = hn_dev_xstats_reset,
 };
 
 /*
@@ -623,12 +731,27 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
        hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP];
        hv->chim_res  = &vmbus->resource[HV_SEND_BUF_MAP];
        hv->port_id = eth_dev->data->port_id;
+       hv->latency = HN_CHAN_LATENCY_NS;
+
+       err = hn_parse_args(eth_dev);
+       if (err)
+               return err;
+
+       strlcpy(hv->owner.name, eth_dev->device->name,
+               RTE_ETH_MAX_OWNER_NAME_LEN);
+       err = rte_eth_dev_owner_new(&hv->owner.id);
+       if (err) {
+               PMD_INIT_LOG(ERR, "Can not get owner id");
+               return err;
+       }
 
        /* Initialize primary channel input for control operations */
        err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
        if (err)
                return err;
 
+       rte_vmbus_set_latency(hv->vmbus, hv->channels[0], hv->latency);
+
        hv->primary = hn_rx_queue_alloc(hv, 0,
                                        eth_dev->device->numa_node);
 
@@ -657,6 +780,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
 
        hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
 
+       /* If VF was reported but not added, do it now */
+       if (hv->vf_present && !hv->vf_dev) {
+               PMD_INIT_LOG(DEBUG, "Adding VF device");
+
+               err = hn_vf_add(eth_dev, hv);
+               if (err)
+                       goto failed;
+       }
+
        return 0;
 
 failed:
@@ -686,8 +818,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
        hn_detach(hv);
        rte_vmbus_chan_close(hv->primary->chan);
        rte_free(hv->primary);
-
-       eth_dev->data->mac_addrs = NULL;
+       rte_eth_dev_owner_delete(hv->owner.id);
 
        return 0;
 }
index 77d3b83..9690c5f 100644 (file)
@@ -279,14 +279,13 @@ hn_nvs_conn_chim(struct hn_data *hv)
                               NVS_TYPE_CHIM_CONNRESP);
        if (error) {
                PMD_DRV_LOG(ERR, "exec nvs chim conn failed");
-               goto cleanup;
+               return error;
        }
 
        if (resp.status != NVS_STATUS_OK) {
                PMD_DRV_LOG(ERR, "nvs chim conn failed: %x",
                            resp.status);
-               error = -EIO;
-               goto cleanup;
+               return -EIO;
        }
 
        sectsz = resp.sectsz;
@@ -295,7 +294,8 @@ hn_nvs_conn_chim(struct hn_data *hv)
                PMD_DRV_LOG(NOTICE,
                            "invalid chimney sending buffer section size: %u",
                            sectsz);
-               return 0;
+               error = -EINVAL;
+               goto cleanup;
        }
 
        hv->chim_szmax = sectsz;
@@ -304,11 +304,6 @@ hn_nvs_conn_chim(struct hn_data *hv)
        PMD_DRV_LOG(INFO, "send buffer %lu section size:%u, count:%u",
                    len, hv->chim_szmax, hv->chim_cnt);
 
-       if (len % hv->chim_szmax != 0) {
-               PMD_DRV_LOG(NOTICE,
-                           "chimney sending sections are not properly aligned");
-       }
-
        /* Done! */
        return 0;
 
@@ -537,10 +532,19 @@ void
 hn_nvs_set_datapath(struct hn_data *hv, uint32_t path)
 {
        struct hn_nvs_datapath dp;
+       int error;
+
+       PMD_DRV_LOG(DEBUG, "set datapath %s",
+                   path ? "VF" : "Synthetic");
 
        memset(&dp, 0, sizeof(dp));
        dp.type = NVS_TYPE_SET_DATAPATH;
        dp.active_path = path;
 
-       hn_nvs_req_send(hv, &dp, sizeof(dp));
+       error = hn_nvs_req_send(hv, &dp, sizeof(dp));
+       if (error) {
+               PMD_DRV_LOG(ERR,
+                           "send set datapath failed: %d",
+                           error);
+       }
 }
index 984a9c1..2563fd8 100644 (file)
@@ -105,6 +105,12 @@ struct hn_nvs_ndis_init {
        uint8_t         rsvd[28];
 } __rte_packed;
 
+struct hn_nvs_vf_association {
+       uint32_t        type;   /* NVS_TYPE_VFASSOC_NOTE */
+       uint32_t        allocated;
+       uint32_t        serial;
+} __rte_packed;
+
 #define NVS_DATAPATH_SYNTHETIC 0
 #define NVS_DATAPATH_VF                1
 
@@ -207,6 +213,9 @@ void        hn_nvs_detach(struct hn_data *hv);
 void   hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
 int    hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch);
 void   hn_nvs_set_datapath(struct hn_data *hv, uint32_t path);
+void   hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+                             const struct vmbus_chanpkt_hdr *hdr,
+                             const void *data);
 
 static inline int
 hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,
index bde3396..0134ecb 100644 (file)
@@ -11,6 +11,7 @@
 #include <errno.h>
 #include <unistd.h>
 
+#include <rte_ethdev_driver.h>
 #include <rte_ethdev.h>
 #include <rte_string_fns.h>
 #include <rte_memzone.h>
@@ -281,7 +282,7 @@ static int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan,
                                  &nvs_rndis, sizeof(nvs_rndis), 0U, NULL);
 }
 
-void hn_rndis_link_status(struct hn_data *hv __rte_unused, const void *msg)
+void hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg)
 {
        const struct rndis_status_msg *indicate = msg;
 
@@ -290,15 +291,19 @@ void hn_rndis_link_status(struct hn_data *hv __rte_unused, const void *msg)
        PMD_DRV_LOG(DEBUG, "link status %#x", indicate->status);
 
        switch (indicate->status) {
-       case RNDIS_STATUS_LINK_SPEED_CHANGE:
        case RNDIS_STATUS_NETWORK_CHANGE:
        case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
                /* ignore not in DPDK API */
                break;
 
+       case RNDIS_STATUS_LINK_SPEED_CHANGE:
        case RNDIS_STATUS_MEDIA_CONNECT:
        case RNDIS_STATUS_MEDIA_DISCONNECT:
-               /* TODO handle as LSC interrupt  */
+               if (dev->data->dev_conf.intr_conf.lsc &&
+                   hn_dev_link_update(dev, 0) == 0)
+                       _rte_eth_dev_callback_process(dev,
+                                                     RTE_ETH_EVENT_INTR_LSC,
+                                                     NULL);
                break;
        default:
                PMD_DRV_LOG(NOTICE, "unknown RNDIS indication: %#x",
@@ -382,7 +387,7 @@ static int hn_rndis_exec1(struct hn_data *hv,
        if (comp) {
                /* Poll primary channel until response received */
                while (hv->rndis_pending == rid)
-                       hn_process_events(hv, 0);
+                       hn_process_events(hv, 0, 1);
 
                memcpy(comp, hv->rndis_resp, comp_len);
        }
@@ -892,8 +897,7 @@ int hn_rndis_get_offload(struct hn_data *hv,
            == HN_NDIS_LSOV2_CAP_IP6)
                dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO;
 
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
-                                   DEV_RX_OFFLOAD_CRC_STRIP;
+       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
 
        if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
                dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM;
@@ -909,6 +913,37 @@ int hn_rndis_get_offload(struct hn_data *hv,
        return 0;
 }
 
+uint32_t
+hn_rndis_get_ptypes(struct hn_data *hv)
+{
+       struct ndis_offload hwcaps;
+       uint32_t ptypes;
+       int error;
+
+       memset(&hwcaps, 0, sizeof(hwcaps));
+
+       error = hn_rndis_query_hwcaps(hv, &hwcaps);
+       if (error) {
+               PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
+               return RTE_PTYPE_L2_ETHER;
+       }
+
+       ptypes = RTE_PTYPE_L2_ETHER;
+
+       if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
+               ptypes |= RTE_PTYPE_L3_IPV4;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) ||
+           (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
+               ptypes |= RTE_PTYPE_L4_TCP;
+
+       if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) ||
+           (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
+               ptypes |= RTE_PTYPE_L4_UDP;
+
+       return ptypes;
+}
+
 int
 hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter)
 {
index 89e2e6b..319b497 100644 (file)
@@ -6,7 +6,7 @@ struct hn_data;
 
 void hn_rndis_receive_response(struct hn_data *hv,
                              const void *data, uint32_t len);
-void   hn_rndis_link_status(struct hn_data *hv, const void *data);
+void   hn_rndis_link_status(struct rte_eth_dev *dev, const void *msg);
 int    hn_rndis_attach(struct hn_data *hv);
 void   hn_rndis_detach(struct hn_data *hv);
 int    hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr);
@@ -24,6 +24,7 @@ int   hn_rndis_query_rsscaps(struct hn_data *hv,
                               unsigned int *rxr_cnt0);
 int    hn_rndis_conf_rss(struct hn_data *hv,
                          const struct rte_eth_rss_conf *rss_conf);
+uint32_t hn_rndis_get_ptypes(struct hn_data *hv);
 
 #ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
 void hn_rndis_dump(const void *buf);
index 02ef27e..f4a3664 100644 (file)
@@ -10,6 +10,7 @@
 #include <errno.h>
 #include <unistd.h>
 #include <strings.h>
+#include <malloc.h>
 
 #include <rte_ethdev.h>
 #include <rte_memcpy.h>
@@ -216,6 +217,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
        struct hn_data *hv = dev->data->dev_private;
        struct hn_tx_queue *txq;
        uint32_t tx_free_thresh;
+       int err;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -245,8 +247,14 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 
        hn_reset_txagg(txq);
 
-       dev->data->tx_queues[queue_idx] = txq;
+       err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
+                                    socket_id, tx_conf);
+       if (err) {
+               rte_free(txq);
+               return err;
+       }
 
+       dev->data->tx_queues[queue_idx] = txq;
        return 0;
 }
 
@@ -269,17 +277,6 @@ hn_dev_tx_queue_release(void *arg)
        rte_free(txq);
 }
 
-void
-hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
-                    struct rte_eth_txq_info *qinfo)
-{
-       struct hn_data *hv = dev->data->dev_private;
-       struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx];
-
-       qinfo->conf.tx_free_thresh = txq->free_thresh;
-       qinfo->nb_desc = hv->tx_pool->size;
-}
-
 static void
 hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
                      unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
@@ -533,7 +530,7 @@ static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb,
        hn_update_packet_stats(&rxq->stats, m);
 
        if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) {
-               ++rxq->ring_full;
+               ++rxq->stats.ring_full;
                rte_pktmbuf_free(m);
        }
 }
@@ -600,7 +597,7 @@ error:
 }
 
 static void
-hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
+hn_rndis_receive(struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
                 struct hn_rx_bufinfo *rxb, void *buf, uint32_t len)
 {
        const struct rndis_msghdr *hdr = buf;
@@ -612,7 +609,7 @@ hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq,
                break;
 
        case RNDIS_INDICATE_STATUS_MSG:
-               hn_rndis_link_status(rxq->hv, buf);
+               hn_rndis_link_status(dev, buf);
                break;
 
        case RNDIS_INITIALIZE_CMPLT:
@@ -712,22 +709,59 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
        hn_rx_buf_release(rxb);
 }
 
+/*
+ * Called when NVS inband events are received.
+ * Send up a two part message with port_id and the NVS message
+ * to the pipe to the netvsc-vf-event control thread.
+ */
+static void hn_nvs_handle_notify(struct rte_eth_dev *dev,
+                                const struct vmbus_chanpkt_hdr *pkt,
+                                const void *data)
+{
+       const struct hn_nvs_hdr *hdr = data;
+
+       switch (hdr->type) {
+       case NVS_TYPE_TXTBL_NOTE:
+               /* Transmit indirection table has locking problems
+                * in DPDK and therefore not implemented
+                */
+               PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table");
+               break;
+
+       case NVS_TYPE_VFASSOC_NOTE:
+               hn_nvs_handle_vfassoc(dev, pkt, data);
+               break;
+
+       default:
+               PMD_DRV_LOG(INFO,
+                           "got notify, nvs type %u", hdr->type);
+       }
+}
+
 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
                                      uint16_t queue_id,
                                      unsigned int socket_id)
 {
        struct hn_rx_queue *rxq;
 
-       rxq = rte_zmalloc_socket("HN_RXQ",
-                                sizeof(*rxq) + HN_RXQ_EVENT_DEFAULT,
+       rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq),
                                 RTE_CACHE_LINE_SIZE, socket_id);
-       if (rxq) {
-               rxq->hv = hv;
-               rxq->chan = hv->channels[queue_id];
-               rte_spinlock_init(&rxq->ring_lock);
-               rxq->port_id = hv->port_id;
-               rxq->queue_id = queue_id;
+       if (!rxq)
+               return NULL;
+
+       rxq->hv = hv;
+       rxq->chan = hv->channels[queue_id];
+       rte_spinlock_init(&rxq->ring_lock);
+       rxq->port_id = hv->port_id;
+       rxq->queue_id = queue_id;
+       rxq->event_sz = HN_RXQ_EVENT_DEFAULT;
+       rxq->event_buf = rte_malloc_socket("HN_EVENTS", HN_RXQ_EVENT_DEFAULT,
+                                          RTE_CACHE_LINE_SIZE, socket_id);
+       if (!rxq->event_buf) {
+               rte_free(rxq);
+               return NULL;
        }
+
        return rxq;
 }
 
@@ -735,13 +769,14 @@ int
 hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
                      uint16_t queue_idx, uint16_t nb_desc,
                      unsigned int socket_id,
-                     const struct rte_eth_rxconf *rx_conf __rte_unused,
+                     const struct rte_eth_rxconf *rx_conf,
                      struct rte_mempool *mp)
 {
        struct hn_data *hv = dev->data->dev_private;
        char ring_name[RTE_RING_NAMESIZE];
        struct hn_rx_queue *rxq;
        unsigned int count;
+       int error = -ENOMEM;
 
        PMD_INIT_FUNC_TRACE();
 
@@ -771,6 +806,11 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
        if (!rxq->rx_ring)
                goto fail;
 
+       error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc,
+                                    socket_id, rx_conf, mp);
+       if (error)
+               goto fail;
+
        dev->data->rx_queues[queue_idx] = rxq;
        return 0;
 
@@ -778,7 +818,7 @@ fail:
        rte_ring_free(rxq->rx_ring);
        rte_free(rxq->event_buf);
        rte_free(rxq);
-       return -ENOMEM;
+       return error;
 }
 
 void
@@ -795,77 +835,79 @@ hn_dev_rx_queue_release(void *arg)
        rxq->rx_ring = NULL;
        rxq->mb_pool = NULL;
 
+       hn_vf_rx_queue_release(rxq->hv, rxq->queue_id);
+
+       /* Keep primary queue to allow for control operations */
        if (rxq != rxq->hv->primary) {
                rte_free(rxq->event_buf);
                rte_free(rxq);
        }
 }
 
-void
-hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
-                    struct rte_eth_rxq_info *qinfo)
-{
-       struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx];
-
-       qinfo->mp = rxq->mb_pool;
-       qinfo->scattered_rx = 1;
-       qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring);
-}
-
-static void
-hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
-                    const void *data)
+int
+hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt)
 {
-       const struct hn_nvs_hdr *hdr = data;
-
-       if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
-               PMD_DRV_LOG(ERR, "invalid nvs notify");
-               return;
-       }
+       struct hn_tx_queue *txq = arg;
 
-       PMD_DRV_LOG(INFO,
-                   "got notify, nvs type %u", hdr->type);
+       return hn_process_events(txq->hv, txq->queue_id, free_cnt);
 }
 
 /*
  * Process pending events on the channel.
  * Called from both Rx queue poll and Tx cleanup
  */
-void hn_process_events(struct hn_data *hv, uint16_t queue_id)
+uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
+                          uint32_t tx_limit)
 {
        struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id];
        struct hn_rx_queue *rxq;
        uint32_t bytes_read = 0;
+       uint32_t tx_done = 0;
        int ret = 0;
 
        rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id];
 
        /* If no pending data then nothing to do */
        if (rte_vmbus_chan_rx_empty(rxq->chan))
-               return;
+               return 0;
 
        /*
         * Since channel is shared between Rx and TX queue need to have a lock
         * since DPDK does not force same CPU to be used for Rx/Tx.
         */
        if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock)))
-               return;
+               return 0;
 
        for (;;) {
                const struct vmbus_chanpkt_hdr *pkt;
-               uint32_t len = HN_RXQ_EVENT_DEFAULT;
+               uint32_t len = rxq->event_sz;
                const void *data;
 
+retry:
                ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len);
                if (ret == -EAGAIN)
                        break;  /* ring is empty */
 
-               else if (ret == -ENOBUFS)
-                       rte_exit(EXIT_FAILURE, "event buffer not big enough (%u < %u)",
-                                HN_RXQ_EVENT_DEFAULT, len);
-               else if (ret <= 0)
+               if (unlikely(ret == -ENOBUFS)) {
+                       /* event buffer not large enough to read ring */
+
+                       PMD_DRV_LOG(DEBUG,
+                                   "event buffer expansion (need %u)", len);
+                       rxq->event_sz = len + len / 4;
+                       rxq->event_buf = rte_realloc(rxq->event_buf, rxq->event_sz,
+                                                    RTE_CACHE_LINE_SIZE);
+                       if (rxq->event_buf)
+                               goto retry;
+                       /* out of memory, no more events now */
+                       rxq->event_sz = 0;
+                       break;
+               }
+
+               if (unlikely(ret <= 0)) {
+                       /* This indicates a failure to communicate (or worse) */
                        rte_exit(EXIT_FAILURE,
                                 "vmbus ring buffer error: %d", ret);
+               }
 
                bytes_read += ret;
                pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf;
@@ -873,6 +915,7 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id)
 
                switch (pkt->type) {
                case VMBUS_CHANPKT_TYPE_COMP:
+                       ++tx_done;
                        hn_nvs_handle_comp(dev, queue_id, pkt, data);
                        break;
 
@@ -881,7 +924,7 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id)
                        break;
 
                case VMBUS_CHANPKT_TYPE_INBAND:
-                       hn_nvs_handle_notify(pkt, data);
+                       hn_nvs_handle_notify(dev, pkt, data);
                        break;
 
                default:
@@ -889,6 +932,9 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id)
                        break;
                }
 
+               if (tx_limit && tx_done >= tx_limit)
+                       break;
+
                if (rxq->rx_ring && rte_ring_full(rxq->rx_ring))
                        break;
        }
@@ -897,6 +943,8 @@ void hn_process_events(struct hn_data *hv, uint16_t queue_id)
                rte_vmbus_chan_signal_read(rxq->chan, bytes_read);
 
        rte_spinlock_unlock(&rxq->ring_lock);
+
+       return tx_done;
 }
 
 static void hn_append_to_chim(struct hn_tx_queue *txq,
@@ -967,7 +1015,7 @@ static struct hn_txdesc *hn_new_txd(struct hn_data *hv,
        struct hn_txdesc *txd;
 
        if (rte_mempool_get(hv->tx_pool, (void **)&txd)) {
-               ++txq->stats.nomemory;
+               ++txq->stats.ring_full;
                PMD_TX_LOG(DEBUG, "tx pool exhausted!");
                return NULL;
        }
@@ -1235,7 +1283,9 @@ uint16_t
 hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
        struct hn_tx_queue *txq = ptxq;
+       uint16_t queue_id = txq->queue_id;
        struct hn_data *hv = txq->hv;
+       struct rte_eth_dev *vf_dev;
        bool need_sig = false;
        uint16_t nb_tx;
        int ret;
@@ -1243,8 +1293,17 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
        if (unlikely(hv->closed))
                return 0;
 
+       /* Transmit over VF if present and up */
+       vf_dev = hv->vf_dev;
+       rte_compiler_barrier();
+       if (vf_dev && vf_dev->data->dev_started) {
+               void *sub_q = vf_dev->data->tx_queues[queue_id];
+
+               return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
+       }
+
        if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
-               hn_process_events(hv, txq->queue_id);
+               hn_process_events(hv, txq->queue_id, 0);
 
        for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
                struct rte_mbuf *m = tx_pkts[nb_tx];
@@ -1264,7 +1323,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        if (unlikely(!pkt))
                                break;
 
-                       hn_encap(pkt, txq->queue_id, m);
+                       hn_encap(pkt, queue_id, m);
                        hn_append_to_chim(txq, pkt, m);
 
                        rte_pktmbuf_free(m);
@@ -1291,7 +1350,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                        txd->data_size += m->pkt_len;
                        ++txd->packets;
 
-                       hn_encap(pkt, txq->queue_id, m);
+                       hn_encap(pkt, queue_id, m);
 
                        ret = hn_xmit_sg(txq, txd, m, &need_sig);
                        if (unlikely(ret != 0)) {
@@ -1320,15 +1379,36 @@ hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
        struct hn_rx_queue *rxq = prxq;
        struct hn_data *hv = rxq->hv;
+       struct rte_eth_dev *vf_dev;
+       uint16_t nb_rcv;
 
        if (unlikely(hv->closed))
                return 0;
 
-       /* If ring is empty then process more */
-       if (rte_ring_count(rxq->rx_ring) < nb_pkts)
-               hn_process_events(hv, rxq->queue_id);
+       vf_dev = hv->vf_dev;
+       rte_compiler_barrier();
+
+       if (vf_dev && vf_dev->data->dev_started) {
+               /* Normally, with SR-IOV the ring buffer will be empty */
+               hn_process_events(hv, rxq->queue_id, 0);
+
+               /* Get mbufs some bufs off of staging ring */
+               nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+                                                  (void **)rx_pkts,
+                                                  nb_pkts / 2, NULL);
+               /* And rest off of VF */
+               nb_rcv += rte_eth_rx_burst(vf_dev->data->port_id,
+                                          rxq->queue_id,
+                                          rx_pkts + nb_rcv, nb_pkts - nb_rcv);
+       } else {
+               /* If receive ring is not full then get more */
+               if (rte_ring_count(rxq->rx_ring) < nb_pkts)
+                       hn_process_events(hv, rxq->queue_id, 0);
+
+               nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
+                                                  (void **)rx_pkts,
+                                                  nb_pkts, NULL);
+       }
 
-       /* Get mbufs off staging ring */
-       return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
-                                        nb_pkts, NULL);
+       return nb_rcv;
 }
index f7ff858..e1072c7 100644 (file)
@@ -20,6 +20,9 @@
 /* Retry interval */
 #define HN_CHAN_INTERVAL_US    100
 
+/* Host monitor interval */
+#define HN_CHAN_LATENCY_NS     50000
+
 /* Buffers need to be aligned */
 #ifndef PAGE_SIZE
 #define PAGE_SIZE 4096
@@ -36,7 +39,7 @@ struct hn_stats {
        uint64_t        packets;
        uint64_t        bytes;
        uint64_t        errors;
-       uint64_t        nomemory;
+       uint64_t        ring_full;
        uint64_t        multicast;
        uint64_t        broadcast;
        /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
@@ -75,9 +78,8 @@ struct hn_rx_queue {
        uint16_t port_id;
        uint16_t queue_id;
        struct hn_stats stats;
-       uint64_t ring_full;
 
-       uint8_t event_buf[];
+       void *event_buf;
 };
 
 
@@ -92,8 +94,11 @@ struct hn_rx_bufinfo {
 struct hn_data {
        struct rte_vmbus_device *vmbus;
        struct hn_rx_queue *primary;
+       struct rte_eth_dev *vf_dev;             /* Subordinate device */
+       rte_spinlock_t  vf_lock;
        uint16_t        port_id;
        bool            closed;
+       bool            vf_present;
        uint32_t        link_status;
        uint32_t        link_speed;
 
@@ -110,6 +115,7 @@ struct hn_data {
        uint32_t        chim_szmax;             /* Max size per buffer */
        uint32_t        chim_cnt;               /* Max packets per buffer */
 
+       uint32_t        latency;
        uint32_t        nvs_ver;
        uint32_t        ndis_ver;
        uint32_t        rndis_agg_size;
@@ -121,6 +127,10 @@ struct hn_data {
        uint8_t         rndis_resp[256];
 
        struct ether_addr mac_addr;
+
+       struct rte_eth_dev_owner owner;
+       struct rte_intr_handle vf_intr;
+
        struct vmbus_channel *channels[HN_MAX_CHANNELS];
 };
 
@@ -130,7 +140,8 @@ hn_primary_chan(const struct hn_data *hv)
        return hv->channels[0];
 }
 
-void hn_process_events(struct hn_data *hv, uint16_t queue_id);
+uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
+                      uint32_t tx_limit);
 
 uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
                      uint16_t nb_pkts);
@@ -138,12 +149,14 @@ uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
                      uint16_t nb_pkts);
 
 int    hn_tx_pool_init(struct rte_eth_dev *dev);
+int    hn_dev_link_update(struct rte_eth_dev *dev, int wait);
 int    hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                              uint16_t nb_desc, unsigned int socket_id,
                              const struct rte_eth_txconf *tx_conf);
 void   hn_dev_tx_queue_release(void *arg);
 void   hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
                             struct rte_eth_txq_info *qinfo);
+int    hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt);
 
 struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
                                      uint16_t queue_id,
@@ -154,5 +167,46 @@ int        hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
                              const struct rte_eth_rxconf *rx_conf,
                              struct rte_mempool *mp);
 void   hn_dev_rx_queue_release(void *arg);
-void   hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
-                            struct rte_eth_rxq_info *qinfo);
+
+void   hn_vf_info_get(struct hn_data *hv,
+                      struct rte_eth_dev_info *info);
+int    hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
+int    hn_vf_configure(struct rte_eth_dev *dev,
+                       const struct rte_eth_conf *dev_conf);
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev);
+int    hn_vf_start(struct rte_eth_dev *dev);
+void   hn_vf_reset(struct rte_eth_dev *dev);
+void   hn_vf_stop(struct rte_eth_dev *dev);
+void   hn_vf_close(struct rte_eth_dev *dev);
+
+void   hn_vf_allmulticast_enable(struct rte_eth_dev *dev);
+void   hn_vf_allmulticast_disable(struct rte_eth_dev *dev);
+void   hn_vf_promiscuous_enable(struct rte_eth_dev *dev);
+void   hn_vf_promiscuous_disable(struct rte_eth_dev *dev);
+int    hn_vf_mc_addr_list(struct rte_eth_dev *dev,
+                          struct ether_addr *mc_addr_set,
+                          uint32_t nb_mc_addr);
+
+int    hn_vf_link_update(struct rte_eth_dev *dev,
+                         int wait_to_complete);
+int    hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+                            uint16_t queue_idx, uint16_t nb_desc,
+                            unsigned int socket_id,
+                            const struct rte_eth_txconf *tx_conf);
+void   hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id);
+int    hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+                            uint16_t queue_idx, uint16_t nb_desc,
+                            unsigned int socket_id,
+                            const struct rte_eth_rxconf *rx_conf,
+                            struct rte_mempool *mp);
+void   hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id);
+
+int    hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
+void   hn_vf_stats_reset(struct rte_eth_dev *dev);
+int    hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+                              struct rte_eth_xstat_name *xstats_names,
+                              unsigned int size);
+int    hn_vf_xstats_get(struct rte_eth_dev *dev,
+                        struct rte_eth_xstat *xstats,
+                        unsigned int n);
+void   hn_vf_xstats_reset(struct rte_eth_dev *dev);
diff --git a/drivers/net/netvsc/hn_vf.c b/drivers/net/netvsc/hn_vf.c
new file mode 100644 (file)
index 0000000..7a84ad8
--- /dev/null
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2018 Microsoft Corp.
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ethdev_driver.h>
+#include <rte_lcore.h>
+#include <rte_memory.h>
+#include <rte_bus_vmbus.h>
+#include <rte_pci.h>
+#include <rte_bus_pci.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "hn_logs.h"
+#include "hn_var.h"
+#include "hn_nvs.h"
+
+/* Search for VF with matching MAC address, return port id */
+static int hn_vf_match(const struct rte_eth_dev *dev)
+{
+       const struct ether_addr *mac = dev->data->mac_addrs;
+       char buf[32];
+       int i;
+
+       ether_format_addr(buf, sizeof(buf), mac);
+       RTE_ETH_FOREACH_DEV(i) {
+               const struct rte_eth_dev *vf_dev = &rte_eth_devices[i];
+               const struct ether_addr *vf_mac = vf_dev->data->mac_addrs;
+
+               if (vf_dev == dev)
+                       continue;
+
+               ether_format_addr(buf, sizeof(buf), vf_mac);
+               if (is_same_ether_addr(mac, vf_mac))
+                       return i;
+       }
+       return -ENOENT;
+}
+
+/*
+ * Attach new PCI VF device and return the port_id
+ */
+static int hn_vf_attach(struct hn_data *hv, uint16_t port_id,
+                       struct rte_eth_dev **vf_dev)
+{
+       struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER };
+       int ret;
+
+       ret = rte_eth_dev_owner_get(port_id, &owner);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id);
+               return ret;
+       }
+
+       if (owner.id != RTE_ETH_DEV_NO_OWNER) {
+               PMD_DRV_LOG(ERR, "Port %u already owned by other device %s",
+                           port_id, owner.name);
+               return -EBUSY;
+       }
+
+       ret = rte_eth_dev_owner_set(port_id, &hv->owner);
+       if (ret < 0) {
+               PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id);
+               return ret;
+       }
+
+       PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id);
+       rte_smp_wmb();
+       *vf_dev = &rte_eth_devices[port_id];
+       return 0;
+}
+
+/* Add new VF device to synthetic device */
+int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
+{
+       int port, err;
+
+       port = hn_vf_match(dev);
+       if (port < 0) {
+               PMD_DRV_LOG(NOTICE, "No matching MAC found");
+               return port;
+       }
+
+       rte_spinlock_lock(&hv->vf_lock);
+       if (hv->vf_dev) {
+               PMD_DRV_LOG(ERR, "VF already attached");
+               err = -EBUSY;
+       } else {
+               err = hn_vf_attach(hv, port, &hv->vf_dev);
+       }
+
+       if (err == 0) {
+               dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
+               hv->vf_intr = (struct rte_intr_handle) {
+                       .fd = -1,
+                       .type = RTE_INTR_HANDLE_EXT,
+               };
+               dev->intr_handle = &hv->vf_intr;
+               hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
+       }
+       rte_spinlock_unlock(&hv->vf_lock);
+
+       return err;
+}
+
+/* Remove new VF device */
+static void hn_vf_remove(struct hn_data *hv)
+{
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (!vf_dev) {
+               PMD_DRV_LOG(ERR, "VF path not active");
+               rte_spinlock_unlock(&hv->vf_lock);
+               return;
+       }
+
+       /* Stop incoming packets from arriving on VF */
+       hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
+       hv->vf_dev = NULL;
+
+       /* Give back ownership */
+       rte_eth_dev_owner_unset(vf_dev->data->port_id, hv->owner.id);
+       rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* Handle VF association message from host */
+void
+hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
+                     const struct vmbus_chanpkt_hdr *hdr,
+                     const void *data)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       const struct hn_nvs_vf_association *vf_assoc = data;
+
+       if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) {
+               PMD_DRV_LOG(ERR, "invalid vf association NVS");
+               return;
+       }
+
+       PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u",
+                   vf_assoc->serial,
+                   vf_assoc->allocated ? "add to" : "remove from",
+                   dev->data->port_id);
+
+       hv->vf_present = vf_assoc->allocated;
+
+       if (dev->state != RTE_ETH_DEV_ATTACHED)
+               return;
+
+       if (vf_assoc->allocated)
+               hn_vf_add(dev, hv);
+       else
+               hn_vf_remove(hv);
+}
+
+/*
+ * Merge the info from the VF and synthetic path.
+ * use the default config of the VF
+ * and the minimum number of queues and buffer sizes.
+ */
+static void hn_vf_info_merge(struct rte_eth_dev *vf_dev,
+                            struct rte_eth_dev_info *info)
+{
+       struct rte_eth_dev_info vf_info;
+
+       rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info);
+
+       info->speed_capa = vf_info.speed_capa;
+       info->default_rxportconf = vf_info.default_rxportconf;
+       info->default_txportconf = vf_info.default_txportconf;
+
+       info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues,
+                                     info->max_rx_queues);
+       info->rx_offload_capa &= vf_info.rx_offload_capa;
+       info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa;
+       info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads;
+
+       info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues,
+                                     info->max_tx_queues);
+       info->tx_offload_capa &= vf_info.tx_offload_capa;
+       info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
+
+       info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
+                                      info->min_rx_bufsize);
+       info->max_rx_pktlen  = RTE_MAX(vf_info.max_rx_pktlen,
+                                      info->max_rx_pktlen);
+}
+
+void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info)
+{
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               hn_vf_info_merge(vf_dev, info);
+       rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_link_update(struct rte_eth_dev *dev,
+                     int wait_to_complete)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->link_update)
+               ret = (*vf_dev->dev_ops->link_update)(dev, wait_to_complete);
+       rte_spinlock_unlock(&hv->vf_lock);
+
+       return ret;
+}
+
+/* called when VF has link state interrupts enabled */
+static int hn_vf_lsc_event(uint16_t port_id __rte_unused,
+                          enum rte_eth_event_type event,
+                          void *cb_arg, void *out __rte_unused)
+{
+       struct rte_eth_dev *dev = cb_arg;
+
+       if (event != RTE_ETH_EVENT_INTR_LSC)
+               return 0;
+
+       /* if link state has changed pass on */
+       if (hn_dev_link_update(dev, 0) == 0)
+               return 0; /* no change */
+
+       return _rte_eth_dev_callback_process(dev,
+                                            RTE_ETH_EVENT_INTR_LSC,
+                                            NULL);
+}
+
+static int _hn_vf_configure(struct rte_eth_dev *dev,
+                           struct rte_eth_dev *vf_dev,
+                           const struct rte_eth_conf *dev_conf)
+{
+       struct rte_eth_conf vf_conf = *dev_conf;
+       uint16_t vf_port = vf_dev->data->port_id;
+       int ret;
+
+       if (dev_conf->intr_conf.lsc &&
+           (vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+               PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u",
+                           vf_port);
+               vf_conf.intr_conf.lsc = 1;
+       } else {
+               PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u",
+                           vf_port);
+               vf_conf.intr_conf.lsc = 0;
+       }
+
+       ret = rte_eth_dev_configure(vf_port,
+                                   dev->data->nb_rx_queues,
+                                   dev->data->nb_tx_queues,
+                                   &vf_conf);
+       if (ret) {
+               PMD_DRV_LOG(ERR,
+                           "VF configuration failed: %d", ret);
+       } else if (vf_conf.intr_conf.lsc) {
+               ret = rte_eth_dev_callback_register(vf_port,
+                                                   RTE_ETH_DEV_INTR_LSC,
+                                                   hn_vf_lsc_event, dev);
+               if (ret)
+                       PMD_DRV_LOG(ERR,
+                                   "Failed to register LSC callback for VF %u",
+                                   vf_port);
+       }
+       return ret;
+}
+
+/*
+ * Configure VF if present.
+ * Force VF to have same number of queues as synthetic device
+ */
+int hn_vf_configure(struct rte_eth_dev *dev,
+                   const struct rte_eth_conf *dev_conf)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = _hn_vf_configure(dev, vf_dev, dev_conf);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       const uint32_t *ptypes = NULL;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get)
+               ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev);
+       rte_spinlock_unlock(&hv->vf_lock);
+
+       return ptypes;
+}
+
+int hn_vf_start(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = rte_eth_dev_start(vf_dev->data->port_id);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+void hn_vf_stop(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               rte_eth_dev_stop(vf_dev->data->port_id);
+       rte_spinlock_unlock(&hv->vf_lock);
+}
+
+/* If VF is present, then cascade configuration down */
+#define VF_ETHDEV_FUNC(dev, func)                              \
+       {                                                       \
+               struct hn_data *hv = (dev)->data->dev_private;  \
+               struct rte_eth_dev *vf_dev;                     \
+               rte_spinlock_lock(&hv->vf_lock);                \
+               vf_dev = hv->vf_dev;                            \
+               if (vf_dev)                                     \
+                       func(vf_dev->data->port_id);            \
+               rte_spinlock_unlock(&hv->vf_lock);              \
+       }
+
+void hn_vf_reset(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_dev_reset);
+}
+
+void hn_vf_close(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_dev_close);
+}
+
+void hn_vf_stats_reset(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_stats_reset);
+}
+
+void hn_vf_allmulticast_enable(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_enable);
+}
+
+void hn_vf_allmulticast_disable(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_allmulticast_disable);
+}
+
+void hn_vf_promiscuous_enable(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_enable);
+}
+
+void hn_vf_promiscuous_disable(struct rte_eth_dev *dev)
+{
+       VF_ETHDEV_FUNC(dev, rte_eth_promiscuous_disable);
+}
+
+int hn_vf_mc_addr_list(struct rte_eth_dev *dev,
+                       struct ether_addr *mc_addr_set,
+                       uint32_t nb_mc_addr)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = rte_eth_dev_set_mc_addr_list(vf_dev->data->port_id,
+                                                  mc_addr_set, nb_mc_addr);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
+                        uint16_t queue_idx, uint16_t nb_desc,
+                        unsigned int socket_id,
+                        const struct rte_eth_txconf *tx_conf)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = rte_eth_tx_queue_setup(vf_dev->data->port_id,
+                                            queue_idx, nb_desc,
+                                            socket_id, tx_conf);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->tx_queue_release) {
+               void *subq = vf_dev->data->tx_queues[queue_id];
+
+               (*vf_dev->dev_ops->tx_queue_release)(subq);
+       }
+
+       rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
+                        uint16_t queue_idx, uint16_t nb_desc,
+                        unsigned int socket_id,
+                        const struct rte_eth_rxconf *rx_conf,
+                        struct rte_mempool *mp)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = rte_eth_rx_queue_setup(vf_dev->data->port_id,
+                                            queue_idx, nb_desc,
+                                            socket_id, rx_conf, mp);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id)
+{
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->rx_queue_release) {
+               void *subq = vf_dev->data->rx_queues[queue_id];
+
+               (*vf_dev->dev_ops->rx_queue_release)(subq);
+       }
+       rte_spinlock_unlock(&hv->vf_lock);
+}
+
+int hn_vf_stats_get(struct rte_eth_dev *dev,
+                   struct rte_eth_stats *stats)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int ret = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev)
+               ret = rte_eth_stats_get(vf_dev->data->port_id, stats);
+       rte_spinlock_unlock(&hv->vf_lock);
+       return ret;
+}
+
+int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
+                          struct rte_eth_xstat_name *names,
+                          unsigned int n)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int i, count = 0;
+       char tmp[RTE_ETH_XSTATS_NAME_SIZE];
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->xstats_get_names)
+               count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n);
+       rte_spinlock_unlock(&hv->vf_lock);
+
+       /* add vf_ prefix to xstat names */
+       if (names) {
+               for (i = 0; i < count; i++) {
+                       snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name);
+                       strlcpy(names[i].name, tmp, sizeof(names[i].name));
+               }
+       }
+
+       return count;
+}
+
+int hn_vf_xstats_get(struct rte_eth_dev *dev,
+                    struct rte_eth_xstat *xstats,
+                    unsigned int n)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+       int count = 0;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->xstats_get)
+               count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n);
+       rte_spinlock_unlock(&hv->vf_lock);
+
+       return count;
+}
+
+void hn_vf_xstats_reset(struct rte_eth_dev *dev)
+{
+       struct hn_data *hv = dev->data->dev_private;
+       struct rte_eth_dev *vf_dev;
+
+       rte_spinlock_lock(&hv->vf_lock);
+       vf_dev = hv->vf_dev;
+       if (vf_dev && vf_dev->dev_ops->xstats_reset)
+               vf_dev->dev_ops->xstats_reset(vf_dev);
+       rte_spinlock_unlock(&hv->vf_lock);
+}
index a717cdd..c842697 100644 (file)
@@ -3,7 +3,7 @@
 
 build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS')
 version = 2
-sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c')
+sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c')
 
 deps += ['bus_vmbus' ]
 
index ab4e0a7..d3fa569 100644 (file)
@@ -10,6 +10,7 @@ LIB = librte_pmd_nfp.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 
 LDLIBS += -lm
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
index 3ba37e2..ba6a22e 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2018 Intel Corporation
 
+if host_machine.system() != 'linux'
+        build = false
+endif
 sources = files('nfpcore/nfp_cpp_pcie_ops.c',
        'nfpcore/nfp_nsp.c',
        'nfpcore/nfp_cppcore.c',
@@ -14,3 +17,5 @@ sources = files('nfpcore/nfp_cpp_pcie_ops.c',
        'nfpcore/nfp_nsp_eth.c',
        'nfpcore/nfp_hwinfo.c',
        'nfp_net.c')
+
+allow_experimental_apis = true
index 6e5e305..bab1f68 100644 (file)
@@ -411,12 +411,6 @@ nfp_net_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(rxmode->offloads))
-               PMD_INIT_LOG(INFO, "HW does strip CRC. No configurable!");
-
        return 0;
 }
 
@@ -1168,8 +1162,7 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                                             DEV_RX_OFFLOAD_UDP_CKSUM |
                                             DEV_RX_OFFLOAD_TCP_CKSUM;
 
-       dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME |
-                                    DEV_RX_OFFLOAD_KEEP_CRC;
+       dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_JUMBO_FRAME;
 
        if (hw->cap & NFP_NET_CFG_CTRL_TXVLAN)
                dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
@@ -1205,8 +1198,10 @@ nfp_net_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                .tx_rs_thresh = DEFAULT_TX_RSBIT_THRESH,
        };
 
-       dev_info->flow_type_rss_offloads = ETH_RSS_NONFRAG_IPV4_TCP |
+       dev_info->flow_type_rss_offloads = ETH_RSS_IPV4 |
+                                          ETH_RSS_NONFRAG_IPV4_TCP |
                                           ETH_RSS_NONFRAG_IPV4_UDP |
+                                          ETH_RSS_IPV6 |
                                           ETH_RSS_NONFRAG_IPV6_TCP |
                                           ETH_RSS_NONFRAG_IPV6_UDP;
 
@@ -1786,21 +1781,20 @@ nfp_net_rx_cksum(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
                return;
 
        /* If IPv4 and IP checksum error, fail */
-       if ((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK))
+       if (unlikely((rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM) &&
+           !(rxd->rxd.flags & PCIE_DESC_RX_IP4_CSUM_OK)))
                mb->ol_flags |= PKT_RX_IP_CKSUM_BAD;
+       else
+               mb->ol_flags |= PKT_RX_IP_CKSUM_GOOD;
 
        /* If neither UDP nor TCP return */
        if (!(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
            !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM))
                return;
 
-       if ((rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK))
-               mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
-
-       if ((rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM) &&
-           !(rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK))
+       if (likely(rxd->rxd.flags & PCIE_DESC_RX_L4_CSUM_OK))
+               mb->ol_flags |= PKT_RX_L4_CKSUM_GOOD;
+       else
                mb->ol_flags |= PKT_RX_L4_CKSUM_BAD;
 }
 
@@ -1884,6 +1878,18 @@ nfp_net_set_hash(struct nfp_net_rxq *rxq, struct nfp_net_rx_desc *rxd,
        case NFP_NET_RSS_IPV6_EX:
                mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
                break;
+       case NFP_NET_RSS_IPV4_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_TCP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV4_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
+       case NFP_NET_RSS_IPV6_UDP:
+               mbuf->packet_type |= RTE_PTYPE_INNER_L3_IPV6_EXT;
+               break;
        default:
                mbuf->packet_type |= RTE_PTYPE_INNER_L4_MASK;
        }
@@ -2465,14 +2471,22 @@ nfp_net_rss_hash_write(struct rte_eth_dev *dev,
        rss_hf = rss_conf->rss_hf;
 
        if (rss_hf & ETH_RSS_IPV4)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4 |
-                               NFP_NET_CFG_RSS_IPV4_TCP |
-                               NFP_NET_CFG_RSS_IPV4_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV4_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV4_UDP;
 
        if (rss_hf & ETH_RSS_IPV6)
-               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6 |
-                               NFP_NET_CFG_RSS_IPV6_TCP |
-                               NFP_NET_CFG_RSS_IPV6_UDP;
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_TCP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_TCP;
+
+       if (rss_hf & ETH_RSS_NONFRAG_IPV6_UDP)
+               cfg_rss_ctrl |= NFP_NET_CFG_RSS_IPV6_UDP;
 
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_MASK;
        cfg_rss_ctrl |= NFP_NET_CFG_RSS_TOEPLITZ;
@@ -2688,6 +2702,14 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
 
        pci_dev = RTE_ETH_DEV_TO_PCI(eth_dev);
 
+       /* NFP can not handle DMA addresses requiring more than 40 bits */
+       if (rte_eal_check_dma_mask(40)) {
+               RTE_LOG(ERR, PMD, "device %s can not be used:",
+                                  pci_dev->device.name);
+               RTE_LOG(ERR, PMD, "\trestricted dma mask to 40 bits!\n");
+               return -ENODEV;
+       };
+
        if ((pci_dev->id.device_id == PCI_DEVICE_ID_NFP4000_PF_NIC) ||
            (pci_dev->id.device_id == PCI_DEVICE_ID_NFP6000_PF_NIC)) {
                port = get_pf_port_number(eth_dev->data->name);
@@ -2886,6 +2908,9 @@ nfp_net_init(struct rte_eth_dev *eth_dev)
        ether_addr_copy((struct ether_addr *)hw->mac_addr,
                        &eth_dev->data->mac_addrs[0]);
 
+       if (!(hw->cap & NFP_NET_CFG_CTRL_LIVE_ADDR))
+               eth_dev->data->dev_flags |= RTE_ETH_DEV_NOLIVE_MAC_ADDR;
+
        PMD_INIT_LOG(INFO, "port %d VendorID=0x%x DeviceID=0x%x "
                     "mac=%02x:%02x:%02x:%02x:%02x:%02x",
                     eth_dev->data->port_id, pci_dev->id.vendor_id,
@@ -3265,14 +3290,16 @@ static int eth_nfp_pci_remove(struct rte_pci_device *pci_dev)
 
 static struct rte_pci_driver rte_nfp_net_pf_pmd = {
        .id_table = pci_id_nfp_pf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = nfp_pf_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
 
 static struct rte_pci_driver rte_nfp_net_vf_pmd = {
        .id_table = pci_id_nfp_vf_net_map,
-       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC,
+       .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC |
+                    RTE_PCI_DRV_IOVA_AS_VA,
        .probe = eth_nfp_pci_probe,
        .remove = eth_nfp_pci_remove,
 };
index c1b044e..b01036d 100644 (file)
@@ -293,6 +293,8 @@ struct nfp_net_txq {
 #define PCIE_DESC_RX_UDP_CSUM_OK        (1 <<  1)
 #define PCIE_DESC_RX_VLAN               (1 <<  0)
 
+#define PCIE_DESC_RX_L4_CSUM_OK         (PCIE_DESC_RX_TCP_CSUM_OK | \
+                                        PCIE_DESC_RX_UDP_CSUM_OK)
 struct nfp_net_rx_desc {
        union {
                /* Freelist descriptor */
index 244f865..159c1c1 100644 (file)
@@ -305,7 +305,6 @@ eth_dev_info(struct rte_eth_dev *dev,
        dev_info->min_rx_bufsize = 0;
        dev_info->reta_size = internals->reta_size;
        dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
@@ -615,8 +614,7 @@ rte_pmd_null_probe(struct rte_vdev_device *dev)
        params = rte_vdev_device_args(dev);
        PMD_LOG(INFO, "Initializing pmd_null for %s", name);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(params) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        PMD_LOG(ERR, "Failed to probe %s", name);
@@ -681,7 +679,9 @@ rte_pmd_null_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return -1;
 
-       rte_free(eth_dev->data->dev_private);
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
+               /* mac_addrs must not be freed alone because part of dev_private */
+               eth_dev->data->mac_addrs = NULL;
 
        rte_eth_dev_release_port(eth_dev);
 
index 09f657a..a06a2c8 100644 (file)
@@ -13,8 +13,12 @@ foreach d: depends
        static_objs += [get_variable('static_rte_' + d)]
 endforeach
 
+c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 base_lib = static_library('octeontx_base', sources,
-       c_args: cflags,
+       c_args: c_args,
        dependencies: static_objs,
 )
 
index d51ded2..04b9ce1 100644 (file)
@@ -10,7 +10,7 @@
 
 #include <rte_io.h>
 
-/* In Cavium OcteonTX SoC, all accesses to the device registers are
+/* In Cavium OCTEON TX SoC, all accesses to the device registers are
  * implicitly strongly ordered. So, The relaxed version of IO operation is
  * safe to use with out any IO memory barriers.
  */
index 0f3d5d6..0681486 100644 (file)
@@ -281,14 +281,6 @@ octeontx_dev_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(rxmode->offloads)) {
-               PMD_INIT_LOG(NOTICE, "can't disable hw crc strip");
-               rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-       }
-
        if (!(txmode->offloads & DEV_TX_OFFLOAD_MT_LOCKFREE)) {
                PMD_INIT_LOG(NOTICE, "cant disable lockfree tx");
                txmode->offloads |= DEV_TX_OFFLOAD_MT_LOCKFREE;
@@ -1023,12 +1015,22 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,
                return 0;
        }
 
+       /* Reserve an ethdev entry */
+       eth_dev = rte_eth_dev_allocate(octtx_name);
+       if (eth_dev == NULL) {
+               octeontx_log_err("failed to allocate rte_eth_dev");
+               res = -ENOMEM;
+               goto err;
+       }
+       data = eth_dev->data;
+
        nic = rte_zmalloc_socket(octtx_name, sizeof(*nic), 0, socket_id);
        if (nic == NULL) {
                octeontx_log_err("failed to allocate nic structure");
                res = -ENOMEM;
                goto err;
        }
+       data->dev_private = nic;
 
        nic->port_id = port;
        nic->evdev = evdev;
@@ -1045,21 +1047,11 @@ octeontx_create(struct rte_vdev_device *dev, int port, uint8_t evdev,
                goto err;
        }
 
-       /* Reserve an ethdev entry */
-       eth_dev = rte_eth_dev_allocate(octtx_name);
-       if (eth_dev == NULL) {
-               octeontx_log_err("failed to allocate rte_eth_dev");
-               res = -ENOMEM;
-               goto err;
-       }
-
        eth_dev->device = &dev->device;
        eth_dev->intr_handle = NULL;
        eth_dev->data->kdrv = RTE_KDRV_NONE;
        eth_dev->data->numa_node = dev->device.numa_node;
 
-       data = eth_dev->data;
-       data->dev_private = nic;
        data->port_id = eth_dev->data->port_id;
 
        nic->ev_queues = 1;
@@ -1111,12 +1103,7 @@ err:
        if (nic)
                octeontx_port_close(nic);
 
-       if (eth_dev != NULL) {
-               rte_free(eth_dev->data->mac_addrs);
-               rte_free(data);
-               rte_free(nic);
-               rte_eth_dev_release_port(eth_dev);
-       }
+       rte_eth_dev_release_port(eth_dev);
 
        return res;
 }
@@ -1141,16 +1128,22 @@ octeontx_remove(struct rte_vdev_device *dev)
                if (eth_dev == NULL)
                        return -ENODEV;
 
+               if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+                       rte_eth_dev_release_port(eth_dev);
+                       continue;
+               }
+
                nic = octeontx_pmd_priv(eth_dev);
                rte_event_dev_stop(nic->evdev);
                PMD_INIT_LOG(INFO, "Closing octeontx device %s", octtx_name);
 
-               rte_free(eth_dev->data->mac_addrs);
-               rte_free(eth_dev->data->dev_private);
                rte_eth_dev_release_port(eth_dev);
                rte_event_dev_close(nic->evdev);
        }
 
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return 0;
+
        /* Free FC resource */
        octeontx_pko_fc_free();
 
index 14f1696..920f6f8 100644 (file)
@@ -28,8 +28,7 @@
 #define OCTEONTX_MAX_BGX_PORTS                 4
 #define OCTEONTX_MAX_LMAC_PER_BGX              4
 
-#define OCTEONTX_RX_OFFLOADS                   (DEV_RX_OFFLOAD_CRC_STRIP \
-                                               | DEV_RX_OFFLOAD_CHECKSUM)
+#define OCTEONTX_RX_OFFLOADS                   DEV_RX_OFFLOAD_CHECKSUM
 #define OCTEONTX_TX_OFFLOADS                   DEV_TX_OFFLOAD_MT_LOCKFREE
 
 static inline struct octeontx_nic *
index a9149b4..1e201f3 100644 (file)
 #include "octeontx_rxtx.h"
 #include "octeontx_logs.h"
 
-
-static __rte_always_inline uint16_t __hot
-__octeontx_xmit_pkts(void *lmtline_va, void *ioreg_va, int64_t *fc_status_va,
-                       struct rte_mbuf *tx_pkt)
-{
-       uint64_t cmd_buf[4];
-       uint16_t gaura_id;
-
-       if (unlikely(*((volatile int64_t *)fc_status_va) < 0))
-               return -ENOSPC;
-
-       /* Get the gaura Id */
-       gaura_id = octeontx_fpa_bufpool_gaura((uintptr_t)tx_pkt->pool->pool_id);
-
-       /* Setup PKO_SEND_HDR_S */
-       cmd_buf[0] = tx_pkt->data_len & 0xffff;
-       cmd_buf[1] = 0x0;
-
-       /* Set don't free bit if reference count > 1 */
-       if (rte_mbuf_refcnt_read(tx_pkt) > 1)
-               cmd_buf[0] |= (1ULL << 58); /* SET DF */
-
-       /* Setup PKO_SEND_GATHER_S */
-       cmd_buf[(1 << 1) | 1] = rte_mbuf_data_iova(tx_pkt);
-       cmd_buf[(1 << 1) | 0] = PKO_SEND_GATHER_SUBDC |
-                               PKO_SEND_GATHER_LDTYPE(0x1ull) |
-                               PKO_SEND_GATHER_GAUAR((long)gaura_id) |
-                               tx_pkt->data_len;
-
-       octeontx_reg_lmtst(lmtline_va, ioreg_va, cmd_buf, PKO_CMD_SZ);
-
-       return 0;
-}
-
 uint16_t __hot
 octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -63,6 +29,7 @@ octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
        count = 0;
 
+       rte_cio_wmb();
        while (count < nb_pkts) {
                res = __octeontx_xmit_pkts(dq->lmtline_va, dq->ioreg_va,
                                           dq->fc_status_va,
index fe3e5cc..d0d73b3 100644 (file)
@@ -100,6 +100,39 @@ ptype_table[PTYPE_SIZE][PTYPE_SIZE][PTYPE_SIZE] = {
 
 };
 
+static __rte_always_inline int
+__octeontx_xmit_pkts(void *lmtline_va, void *ioreg_va, int64_t *fc_status_va,
+                       struct rte_mbuf *tx_pkt)
+{
+       uint64_t cmd_buf[4] __rte_cache_aligned;
+       uint16_t gaura_id;
+
+       if (unlikely(*((volatile int64_t *)fc_status_va) < 0))
+               return -ENOSPC;
+
+       /* Get the gaura Id */
+       gaura_id = octeontx_fpa_bufpool_gpool((uintptr_t)tx_pkt->pool->pool_id);
+
+       /* Setup PKO_SEND_HDR_S */
+       cmd_buf[0] = tx_pkt->data_len & 0xffff;
+       cmd_buf[1] = 0x0;
+
+       /* Set don't free bit if reference count > 1 */
+       if (rte_mbuf_refcnt_read(tx_pkt) > 1)
+               cmd_buf[0] |= (1ULL << 58); /* SET DF */
+
+       /* Setup PKO_SEND_GATHER_S */
+       cmd_buf[(1 << 1) | 1] = rte_mbuf_data_iova(tx_pkt);
+       cmd_buf[(1 << 1) | 0] = PKO_SEND_GATHER_SUBDC |
+                               PKO_SEND_GATHER_LDTYPE(0x1ull) |
+                               PKO_SEND_GATHER_GAUAR((long)gaura_id) |
+                               tx_pkt->data_len;
+
+       octeontx_reg_lmtst(lmtline_va, ioreg_va, cmd_buf, PKO_CMD_SZ);
+
+       return 0;
+}
+
 uint16_t
 octeontx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 
index e8810a1..7bbe72e 100644 (file)
@@ -7,6 +7,14 @@
 #include <time.h>
 
 #include <net/if.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#if defined(RTE_EXEC_ENV_BSDAPP)
+#include <sys/sysctl.h>
+#include <net/if_dl.h>
+#endif
 
 #include <pcap.h>
 
@@ -17,6 +25,7 @@
 #include <rte_malloc.h>
 #include <rte_mbuf.h>
 #include <rte_bus_vdev.h>
+#include <rte_string_fns.h>
 
 #define RTE_ETH_PCAP_SNAPSHOT_LEN 65535
 #define RTE_ETH_PCAP_SNAPLEN ETHER_MAX_JUMBO_FRAME_LEN
@@ -29,6 +38,7 @@
 #define ETH_PCAP_RX_IFACE_IN_ARG "rx_iface_in"
 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
 #define ETH_PCAP_IFACE_ARG    "iface"
+#define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
 
 #define ETH_PCAP_ARG_MAXLEN    64
 
@@ -39,6 +49,7 @@ static unsigned char tx_pcap_data[RTE_ETH_PCAP_SNAPLEN];
 static struct timeval start_time;
 static uint64_t start_cycles;
 static uint64_t hz;
+static uint8_t iface_idx;
 
 struct queue_stat {
        volatile unsigned long pkts;
@@ -66,8 +77,10 @@ struct pcap_tx_queue {
 struct pmd_internals {
        struct pcap_rx_queue rx_queue[RTE_PMD_PCAP_MAX_QUEUES];
        struct pcap_tx_queue tx_queue[RTE_PMD_PCAP_MAX_QUEUES];
+       struct ether_addr eth_addr;
        int if_index;
        int single_iface;
+       int phy_mac;
 };
 
 struct pmd_devargs {
@@ -78,6 +91,7 @@ struct pmd_devargs {
                const char *name;
                const char *type;
        } queue[RTE_PMD_PCAP_MAX_QUEUES];
+       int phy_mac;
 };
 
 static const char *valid_arguments[] = {
@@ -87,13 +101,10 @@ static const char *valid_arguments[] = {
        ETH_PCAP_RX_IFACE_IN_ARG,
        ETH_PCAP_TX_IFACE_ARG,
        ETH_PCAP_IFACE_ARG,
+       ETH_PCAP_PHY_MAC_ARG,
        NULL
 };
 
-static struct ether_addr eth_addr = {
-       .addr_bytes = { 0, 0, 0, 0x1, 0x2, 0x3 }
-};
-
 static struct rte_eth_link pmd_link = {
                .link_speed = ETH_SPEED_NUM_10G,
                .link_duplex = ETH_LINK_FULL_DUPLEX,
@@ -553,7 +564,6 @@ eth_dev_info(struct rte_eth_dev *dev,
        dev_info->max_rx_queues = dev->data->nb_rx_queues;
        dev_info->max_tx_queues = dev->data->nb_tx_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
@@ -862,6 +872,20 @@ open_tx_iface(const char *key, const char *value, void *extra_args)
        return open_iface(key, value, extra_args);
 }
 
+static int
+select_phy_mac(const char *key __rte_unused, const char *value,
+               void *extra_args)
+{
+       if (extra_args) {
+               const int phy_mac = atoi(value);
+               int *enable_phy_mac = extra_args;
+
+               if (phy_mac)
+                       *enable_phy_mac = 1;
+       }
+       return 0;
+}
+
 static struct rte_vdev_driver pmd_pcap_drv;
 
 static int
@@ -889,11 +913,20 @@ pmd_init_internals(struct rte_vdev_device *vdev,
         * - and point eth_dev structure to new eth_dev_data structure
         */
        *internals = (*eth_dev)->data->dev_private;
+       /*
+        * Interface MAC = 02:70:63:61:70:<iface_idx>
+        * derived from: 'locally administered':'p':'c':'a':'p':'iface_idx'
+        * where the middle 4 characters are converted to hex.
+        */
+       (*internals)->eth_addr = (struct ether_addr) {
+               .addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
+       };
+       (*internals)->phy_mac = 0;
        data = (*eth_dev)->data;
        data->nb_rx_queues = (uint16_t)nb_rx_queues;
        data->nb_tx_queues = (uint16_t)nb_tx_queues;
        data->dev_link = pmd_link;
-       data->mac_addrs = &eth_addr;
+       data->mac_addrs = &(*internals)->eth_addr;
 
        /*
         * NOTE: we'll replace the data element, of originally allocated
@@ -904,15 +937,96 @@ pmd_init_internals(struct rte_vdev_device *vdev,
        return 0;
 }
 
+static int
+eth_pcap_update_mac(const char *if_name, struct rte_eth_dev *eth_dev,
+               const unsigned int numa_node)
+{
+#if defined(RTE_EXEC_ENV_LINUXAPP)
+       void *mac_addrs;
+       struct ifreq ifr;
+       int if_fd = socket(AF_INET, SOCK_DGRAM, 0);
+
+       if (if_fd == -1)
+               return -1;
+
+       rte_strscpy(ifr.ifr_name, if_name, sizeof(ifr.ifr_name));
+       if (ioctl(if_fd, SIOCGIFHWADDR, &ifr)) {
+               close(if_fd);
+               return -1;
+       }
+
+       mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
+       if (!mac_addrs) {
+               close(if_fd);
+               return -1;
+       }
+
+       PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
+       eth_dev->data->mac_addrs = mac_addrs;
+       rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
+                       ifr.ifr_hwaddr.sa_data, ETHER_ADDR_LEN);
+
+       close(if_fd);
+
+       return 0;
+
+#elif defined(RTE_EXEC_ENV_BSDAPP)
+       void *mac_addrs;
+       struct if_msghdr *ifm;
+       struct sockaddr_dl *sdl;
+       int mib[6];
+       size_t len = 0;
+       char *buf;
+
+       mib[0] = CTL_NET;
+       mib[1] = AF_ROUTE;
+       mib[2] = 0;
+       mib[3] = AF_LINK;
+       mib[4] = NET_RT_IFLIST;
+       mib[5] = if_nametoindex(if_name);
+
+       if (sysctl(mib, 6, NULL, &len, NULL, 0) < 0)
+               return -1;
+
+       if (len == 0)
+               return -1;
+
+       buf = rte_malloc(NULL, len, 0);
+       if (!buf)
+               return -1;
+
+       if (sysctl(mib, 6, buf, &len, NULL, 0) < 0) {
+               rte_free(buf);
+               return -1;
+       }
+       ifm = (struct if_msghdr *)buf;
+       sdl = (struct sockaddr_dl *)(ifm + 1);
+
+       mac_addrs = rte_zmalloc_socket(NULL, ETHER_ADDR_LEN, 0, numa_node);
+       if (!mac_addrs) {
+               rte_free(buf);
+               return -1;
+       }
+
+       PMD_LOG(INFO, "Setting phy MAC for %s", if_name);
+       eth_dev->data->mac_addrs = mac_addrs;
+       rte_memcpy(eth_dev->data->mac_addrs[0].addr_bytes,
+                       LLADDR(sdl), ETHER_ADDR_LEN);
+
+       rte_free(buf);
+
+       return 0;
+#else
+       return -1;
+#endif
+}
+
 static int
 eth_from_pcaps_common(struct rte_vdev_device *vdev,
                struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
                struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
-               struct rte_kvargs *kvlist, struct pmd_internals **internals,
-               struct rte_eth_dev **eth_dev)
+               struct pmd_internals **internals, struct rte_eth_dev **eth_dev)
 {
-       struct rte_kvargs_pair *pair = NULL;
-       unsigned int k_idx;
        unsigned int i;
 
        /* do some parameter checking */
@@ -944,17 +1058,6 @@ eth_from_pcaps_common(struct rte_vdev_device *vdev,
                snprintf(tx->type, sizeof(tx->type), "%s", queue->type);
        }
 
-       for (k_idx = 0; k_idx < kvlist->count; k_idx++) {
-               pair = &kvlist->pairs[k_idx];
-               if (strstr(pair->key, ETH_PCAP_IFACE_ARG) != NULL)
-                       break;
-       }
-
-       if (pair == NULL)
-               (*internals)->if_index = 0;
-       else
-               (*internals)->if_index = if_nametoindex(pair->value);
-
        return 0;
 }
 
@@ -962,15 +1065,14 @@ static int
 eth_from_pcaps(struct rte_vdev_device *vdev,
                struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
                struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
-               struct rte_kvargs *kvlist, int single_iface,
-               unsigned int using_dumpers)
+               int single_iface, unsigned int using_dumpers)
 {
        struct pmd_internals *internals = NULL;
        struct rte_eth_dev *eth_dev = NULL;
        int ret;
 
        ret = eth_from_pcaps_common(vdev, rx_queues, nb_rx_queues,
-               tx_queues, nb_tx_queues, kvlist, &internals, &eth_dev);
+               tx_queues, nb_tx_queues, &internals, &eth_dev);
 
        if (ret < 0)
                return ret;
@@ -978,6 +1080,18 @@ eth_from_pcaps(struct rte_vdev_device *vdev,
        /* store weather we are using a single interface for rx/tx or not */
        internals->single_iface = single_iface;
 
+       if (single_iface) {
+               internals->if_index = if_nametoindex(rx_queues->queue[0].name);
+
+               /* phy_mac arg is applied only only if "iface" devarg is provided */
+               if (rx_queues->phy_mac) {
+                       int ret = eth_pcap_update_mac(rx_queues->queue[0].name,
+                                       eth_dev, vdev->device.numa_node);
+                       if (ret == 0)
+                               internals->phy_mac = 1;
+               }
+       }
+
        eth_dev->rx_pkt_burst = eth_pcap_rx;
 
        if (using_dumpers)
@@ -1008,8 +1122,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
        start_cycles = rte_get_timer_cycles();
        hz = rte_get_timer_hz();
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(rte_vdev_device_args(dev)) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        PMD_LOG(ERR, "Failed to probe %s", name);
@@ -1034,12 +1147,18 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
 
                ret = rte_kvargs_process(kvlist, ETH_PCAP_IFACE_ARG,
                                &open_rx_tx_iface, &pcaps);
-
                if (ret < 0)
                        goto free_kvlist;
 
                dumpers.queue[0] = pcaps.queue[0];
 
+               ret = rte_kvargs_process(kvlist, ETH_PCAP_PHY_MAC_ARG,
+                               &select_phy_mac, &pcaps.phy_mac);
+               if (ret < 0)
+                       goto free_kvlist;
+
+               dumpers.phy_mac = pcaps.phy_mac;
+
                single_iface = 1;
                pcaps.num_of_queue = 1;
                dumpers.num_of_queue = 1;
@@ -1084,7 +1203,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
 
 create_eth:
        ret = eth_from_pcaps(dev, &pcaps, pcaps.num_of_queue, &dumpers,
-               dumpers.num_of_queue, kvlist, single_iface, is_tx_pcap);
+               dumpers.num_of_queue, single_iface, is_tx_pcap);
 
 free_kvlist:
        rte_kvargs_free(kvlist);
@@ -1095,6 +1214,7 @@ free_kvlist:
 static int
 pmd_pcap_remove(struct rte_vdev_device *dev)
 {
+       struct pmd_internals *internals = NULL;
        struct rte_eth_dev *eth_dev = NULL;
 
        PMD_LOG(INFO, "Closing pcap ethdev on numa socket %d",
@@ -1108,7 +1228,12 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return -1;
 
-       rte_free(eth_dev->data->dev_private);
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               internals = eth_dev->data->dev_private;
+               if (internals != NULL && internals->phy_mac == 0)
+                       /* not dynamically allocated, must not be freed */
+                       eth_dev->data->mac_addrs = NULL;
+       }
 
        rte_eth_dev_release_port(eth_dev);
 
@@ -1128,7 +1253,8 @@ RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
        ETH_PCAP_RX_IFACE_ARG "=<ifc> "
        ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
        ETH_PCAP_TX_IFACE_ARG "=<ifc> "
-       ETH_PCAP_IFACE_ARG "=<ifc>");
+       ETH_PCAP_IFACE_ARG "=<ifc> "
+       ETH_PCAP_PHY_MAC_ARG "=<int>");
 
 RTE_INIT(eth_pcap_init_log)
 {
index 488ca1d..2ecbd8d 100644 (file)
@@ -105,6 +105,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += ecore_vf.c
 SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede_ethdev.c
 SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede_main.c
 SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede_rxtx.c
-SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede_fdir.c
+SRCS-$(CONFIG_RTE_LIBRTE_QEDE_PMD) += qede_filter.c
 
 include $(RTE_SDK)/mk/rte.lib.mk
index d5d6f8e..693328f 100644 (file)
@@ -17,7 +17,7 @@
 /* Array of memzone pointers */
 static const struct rte_memzone *ecore_mz_mapping[RTE_MAX_MEMZONE];
 /* Counter to track current memzone allocated */
-uint16_t ecore_mz_count;
+static uint16_t ecore_mz_count;
 
 unsigned long qede_log2_align(unsigned long n)
 {
index 630867f..1abf44f 100644 (file)
@@ -447,10 +447,13 @@ u32 qede_crc32(u32 crc, u8 *ptr, u32 length);
 #define OSAL_CRC8(table, pdata, nbytes, crc) 0
 #define OSAL_MFW_TLV_REQ(p_hwfn) nothing
 #define OSAL_MFW_FILL_TLV_DATA(type, buf, data) (0)
+#define OSAL_HW_INFO_CHANGE(p_hwfn, change) nothing
 #define OSAL_MFW_CMD_PREEMPT(p_hwfn) nothing
 #define OSAL_PF_VALIDATE_MODIFY_TUNN_CONFIG(p_hwfn, mask, b_update, tunn) 0
 
 #define OSAL_DIV_S64(a, b)     ((a) / (b))
 #define OSAL_LLDP_RX_TLVS(p_hwfn, tlv_buf, tlv_size) nothing
+#define OSAL_DBG_ALLOC_USER_DATA(p_hwfn, user_data_ptr) (0)
+#define OSAL_DB_REC_OCCURRED(p_hwfn) nothing
 
 #endif /* __BCM_OSAL_H */
index ca8e59d..2aaf298 100644 (file)
@@ -95,8 +95,8 @@
 
 
 #define FW_MAJOR_VERSION        8
-#define FW_MINOR_VERSION        33
-#define FW_REVISION_VERSION     12
+#define FW_MINOR_VERSION        37
+#define FW_REVISION_VERSION     7
 #define FW_ENGINEERING_VERSION  0
 
 /***********************/
@@ -1033,13 +1033,14 @@ struct db_rdma_dpm_params {
 #define DB_RDMA_DPM_PARAMS_WQE_SIZE_SHIFT           16
 #define DB_RDMA_DPM_PARAMS_RESERVED0_MASK           0x1
 #define DB_RDMA_DPM_PARAMS_RESERVED0_SHIFT          27
-/* RoCE completion flag */
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK      0x1
-#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT     28
+/* RoCE ack request (will be set 1) */
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_MASK         0x1
+#define DB_RDMA_DPM_PARAMS_ACK_REQUEST_SHIFT        28
 #define DB_RDMA_DPM_PARAMS_S_FLG_MASK               0x1 /* RoCE S flag */
 #define DB_RDMA_DPM_PARAMS_S_FLG_SHIFT              29
-#define DB_RDMA_DPM_PARAMS_RESERVED1_MASK           0x1
-#define DB_RDMA_DPM_PARAMS_RESERVED1_SHIFT          30
+/* RoCE completion flag for FW use */
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_MASK      0x1
+#define DB_RDMA_DPM_PARAMS_COMPLETION_FLG_SHIFT     30
 /* Connection type is iWARP */
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_MASK  0x1
 #define DB_RDMA_DPM_PARAMS_CONN_TYPE_IS_IWARP_SHIFT 31
index 5d79fdf..524a1dd 100644 (file)
@@ -19,6 +19,7 @@
 #include <zlib.h>
 #endif
 
+#include "ecore_status.h"
 #include "ecore_hsi_common.h"
 #include "ecore_hsi_debug_tools.h"
 #include "ecore_hsi_init_func.h"
@@ -27,8 +28,8 @@
 #include "mcp_public.h"
 
 #define ECORE_MAJOR_VERSION            8
-#define ECORE_MINOR_VERSION            30
-#define ECORE_REVISION_VERSION         8
+#define ECORE_MINOR_VERSION            37
+#define ECORE_REVISION_VERSION         20
 #define ECORE_ENGINEERING_VERSION      0
 
 #define ECORE_VERSION                                                  \
@@ -207,6 +208,7 @@ struct ecore_l2_info;
 struct ecore_igu_info;
 struct ecore_mcp_info;
 struct ecore_dcbx_info;
+struct ecore_llh_info;
 
 struct ecore_rt_data {
        u32     *init_val;
@@ -543,6 +545,9 @@ enum ecore_mf_mode_bit {
 
        /* Use stag for steering */
        ECORE_MF_8021AD_TAGGING,
+
+       /* Allow FIP discovery fallback */
+       ECORE_MF_FIP_SPECIAL,
 };
 
 enum ecore_ufp_mode {
@@ -660,6 +665,7 @@ struct ecore_hwfn {
 #endif
 
        struct dbg_tools_data           dbg_info;
+       void                            *dbg_user_info;
 
        struct z_stream_s               *stream;
 
@@ -739,6 +745,7 @@ struct ecore_dev {
 #endif
 #define ECORE_IS_AH(dev)       ((dev)->type == ECORE_DEV_TYPE_AH)
 #define ECORE_IS_K2(dev)       ECORE_IS_AH(dev)
+#define ECORE_IS_E4(dev)       (ECORE_IS_BB(dev) || ECORE_IS_AH(dev))
 
        u16 vendor_id;
        u16 device_id;
@@ -833,8 +840,26 @@ struct ecore_dev {
        /* HW functions */
        u8                              num_hwfns;
        struct ecore_hwfn               hwfns[MAX_HWFNS_PER_DEVICE];
+#define ECORE_LEADING_HWFN(dev)                (&dev->hwfns[0])
 #define ECORE_IS_CMT(dev)              ((dev)->num_hwfns > 1)
 
+       /* Engine affinity */
+       u8                              l2_affin_hint;
+       u8                              fir_affin;
+       u8                              iwarp_affin;
+       /* Macro for getting the engine-affinitized hwfn for FCoE/iSCSI/RoCE */
+#define ECORE_FIR_AFFIN_HWFN(dev)      (&dev->hwfns[dev->fir_affin])
+       /* Macro for getting the engine-affinitized hwfn for iWARP */
+#define ECORE_IWARP_AFFIN_HWFN(dev)    (&dev->hwfns[dev->iwarp_affin])
+       /* Generic macro for getting the engine-affinitized hwfn */
+#define ECORE_AFFIN_HWFN(dev) \
+       (ECORE_IS_IWARP_PERSONALITY(ECORE_LEADING_HWFN(dev)) ? \
+        ECORE_IWARP_AFFIN_HWFN(dev) : \
+        ECORE_FIR_AFFIN_HWFN(dev))
+       /* Macro for getting the index (0/1) of the engine-affinitized hwfn */
+#define ECORE_AFFIN_HWFN_IDX(dev) \
+       (IS_LEAD_HWFN(ECORE_AFFIN_HWFN(dev)) ? 0 : 1)
+
        /* SRIOV */
        struct ecore_hw_sriov_info      *p_iov_info;
 #define IS_ECORE_SRIOV(p_dev)          (!!(p_dev)->p_iov_info)
@@ -869,6 +894,12 @@ struct ecore_dev {
 #ifndef ASIC_ONLY
        bool                            b_is_emul_full;
 #endif
+       /* LLH info */
+       u8                              ppfid_bitmap;
+       struct ecore_llh_info           *p_llh_info;
+
+       /* Indicates whether this PF serves a storage target */
+       bool                            b_is_target;
 
 #ifdef CONFIG_ECORE_BINARY_FW /* @DPDK */
        void                            *firmware;
@@ -958,6 +989,8 @@ void ecore_db_recovery_dp(struct ecore_hwfn *p_hwfn);
 void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
                               enum ecore_db_rec_exec);
 
+bool ecore_edpm_enabled(struct ecore_hwfn *p_hwfn);
+
 /* amount of resources used in qm init */
 u8 ecore_init_qm_get_num_tcs(struct ecore_hwfn *p_hwfn);
 u16 ecore_init_qm_get_num_vfs(struct ecore_hwfn *p_hwfn);
@@ -965,6 +998,29 @@ u16 ecore_init_qm_get_num_pf_rls(struct ecore_hwfn *p_hwfn);
 u16 ecore_init_qm_get_num_vports(struct ecore_hwfn *p_hwfn);
 u16 ecore_init_qm_get_num_pqs(struct ecore_hwfn *p_hwfn);
 
-#define ECORE_LEADING_HWFN(dev)        (&dev->hwfns[0])
+#define MFW_PORT(_p_hwfn)      ((_p_hwfn)->abs_pf_id % \
+                                ecore_device_num_ports((_p_hwfn)->p_dev))
+
+/* The PFID<->PPFID calculation is based on the relative index of a PF on its
+ * port. In BB there is a bug in the LLH in which the PPFID is actually engine
+ * based, and thus it equals the PFID.
+ */
+#define ECORE_PFID_BY_PPFID(_p_hwfn, abs_ppfid) \
+       (ECORE_IS_BB((_p_hwfn)->p_dev) ? \
+        (abs_ppfid) : \
+        (abs_ppfid) * (_p_hwfn)->p_dev->num_ports_in_engine + \
+        MFW_PORT(_p_hwfn))
+#define ECORE_PPFID_BY_PFID(_p_hwfn) \
+       (ECORE_IS_BB((_p_hwfn)->p_dev) ? \
+        (_p_hwfn)->rel_pf_id : \
+        (_p_hwfn)->rel_pf_id / (_p_hwfn)->p_dev->num_ports_in_engine)
+
+enum _ecore_status_t ecore_all_ppfids_wr(struct ecore_hwfn *p_hwfn,
+                                        struct ecore_ptt *p_ptt, u32 addr,
+                                        u32 val);
+
+/* Utility functions for dumping the content of the NIG LLH filters */
+enum _ecore_status_t ecore_llh_dump_ppfid(struct ecore_dev *p_dev, u8 ppfid);
+enum _ecore_status_t ecore_llh_dump_all(struct ecore_dev *p_dev);
 
 #endif /* __ECORE_H */
index bf36ce5..5c3370e 100644 (file)
@@ -1133,6 +1133,9 @@ enum _ecore_status_t ecore_cxt_mngr_alloc(struct ecore_hwfn *p_hwfn)
                return ECORE_NOMEM;
        }
 
+       /* Set the cxt mangr pointer prior to further allocations */
+       p_hwfn->p_cxt_mngr = p_mngr;
+
        /* Initialize ILT client registers */
        clients = p_mngr->clients;
        clients[ILT_CLI_CDUC].first.reg = ILT_CFG_REG(CDUC, FIRST_ILT);
@@ -1174,13 +1177,13 @@ enum _ecore_status_t ecore_cxt_mngr_alloc(struct ecore_hwfn *p_hwfn)
 
        /* Initialize the dynamic ILT allocation mutex */
 #ifdef CONFIG_ECORE_LOCK_ALLOC
-       OSAL_MUTEX_ALLOC(p_hwfn, &p_mngr->mutex);
+       if (OSAL_MUTEX_ALLOC(p_hwfn, &p_mngr->mutex)) {
+               DP_NOTICE(p_hwfn, false, "Failed to alloc p_mngr->mutex\n");
+               return ECORE_NOMEM;
+       }
 #endif
        OSAL_MUTEX_INIT(&p_mngr->mutex);
 
-       /* Set the cxt mangr pointer priori to further allocations */
-       p_hwfn->p_cxt_mngr = p_mngr;
-
        return ECORE_SUCCESS;
 }
 
@@ -2111,7 +2114,7 @@ ecore_cxt_dynamic_ilt_alloc(struct ecore_hwfn *p_hwfn,
 
        ecore_dmae_host2grc(p_hwfn, p_ptt, (u64)(osal_uintptr_t)&ilt_hw_entry,
                            reg_offset, sizeof(ilt_hw_entry) / sizeof(u32),
-                           0 /* no flags */);
+                           OSAL_NULL /* default parameters */);
 
        if (elem_type == ECORE_ELEM_CXT) {
                u32 last_cid_allocated = (1 + (iid / elems_per_p)) *
@@ -2218,7 +2221,7 @@ ecore_cxt_free_ilt_range(struct ecore_hwfn *p_hwfn,
                                    (u64)(osal_uintptr_t)&ilt_hw_entry,
                                    reg_offset,
                                    sizeof(ilt_hw_entry) / sizeof(u32),
-                                   0 /* no flags */);
+                                   OSAL_NULL /* default parameters */);
        }
 
        ecore_ptt_release(p_hwfn, p_ptt);
index 9667874..cbc69cd 100644 (file)
@@ -129,7 +129,7 @@ u8 ecore_dcbx_get_dscp_value(struct ecore_hwfn *p_hwfn, u8 pri)
 
 static void
 ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
-                     struct ecore_hwfn *p_hwfn,
+                     struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                      bool enable, u8 prio, u8 tc,
                      enum dcbx_protocol_type type,
                      enum ecore_pci_personality personality)
@@ -154,12 +154,19 @@ ecore_dcbx_set_params(struct ecore_dcbx_results *p_data,
        /* QM reconf data */
        if (p_hwfn->hw_info.personality == personality)
                p_hwfn->hw_info.offload_tc = tc;
+
+       /* Configure dcbx vlan priority in doorbell block for roce EDPM */
+       if (OSAL_TEST_BIT(ECORE_MF_UFP_SPECIFIC, &p_hwfn->p_dev->mf_bits) &&
+           (type == DCBX_PROTOCOL_ROCE)) {
+               ecore_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
+               ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_PCP_BB_K2, prio << 1);
+       }
 }
 
 /* Update app protocol data and hw_info fields with the TLV info */
 static void
 ecore_dcbx_update_app_info(struct ecore_dcbx_results *p_data,
-                          struct ecore_hwfn *p_hwfn,
+                          struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                           bool enable, u8 prio, u8 tc,
                           enum dcbx_protocol_type type)
 {
@@ -175,7 +182,7 @@ ecore_dcbx_update_app_info(struct ecore_dcbx_results *p_data,
 
                personality = ecore_dcbx_app_update[i].personality;
 
-               ecore_dcbx_set_params(p_data, p_hwfn, enable,
+               ecore_dcbx_set_params(p_data, p_hwfn, p_ptt, enable,
                                      prio, tc, type, personality);
        }
 }
@@ -231,7 +238,7 @@ ecore_dcbx_get_app_protocol_type(struct ecore_hwfn *p_hwfn,
  * reconfiguring QM. Get protocol specific data for PF update ramrod command.
  */
 static enum _ecore_status_t
-ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
+ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                       struct ecore_dcbx_results *p_data,
                       struct dcbx_app_priority_entry *p_tbl, u32 pri_tc_tbl,
                       int count, u8 dcbx_version)
@@ -280,8 +287,8 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
                                enable = true;
                        }
 
-                       ecore_dcbx_update_app_info(p_data, p_hwfn, enable,
-                                                  priority, tc, type);
+                       ecore_dcbx_update_app_info(p_data, p_hwfn, p_ptt,
+                                                  enable, priority, tc, type);
                }
        }
 
@@ -302,8 +309,8 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
                if (p_data->arr[type].update)
                        continue;
 
-               enable = (type == DCBX_PROTOCOL_ETH) ? false : !!dcbx_version;
-               ecore_dcbx_update_app_info(p_data, p_hwfn, enable,
+               /* if no app tlv was present, don't override in FW */
+               ecore_dcbx_update_app_info(p_data, p_hwfn, p_ptt, false,
                                           priority, tc, type);
        }
 
@@ -314,11 +321,11 @@ ecore_dcbx_process_tlv(struct ecore_hwfn *p_hwfn,
  * reconfiguring QM. Get protocol specific data for PF update ramrod command.
  */
 static enum _ecore_status_t
-ecore_dcbx_process_mib_info(struct ecore_hwfn *p_hwfn)
+ecore_dcbx_process_mib_info(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
 {
        struct dcbx_app_priority_feature *p_app;
        struct dcbx_app_priority_entry *p_tbl;
-       struct ecore_dcbx_results data = { 0 };
+       struct ecore_dcbx_results data;
        struct dcbx_ets_feature *p_ets;
        struct ecore_hw_info *p_info;
        u32 pri_tc_tbl, flags;
@@ -338,7 +345,8 @@ ecore_dcbx_process_mib_info(struct ecore_hwfn *p_hwfn)
        p_info = &p_hwfn->hw_info;
        num_entries = GET_MFW_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES);
 
-       rc = ecore_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl,
+       OSAL_MEMSET(&data, 0, sizeof(struct ecore_dcbx_results));
+       rc = ecore_dcbx_process_tlv(p_hwfn, p_ptt, &data, p_tbl, pri_tc_tbl,
                                    num_entries, dcbx_version);
        if (rc != ECORE_SUCCESS)
                return rc;
@@ -879,7 +887,7 @@ ecore_dcbx_mib_update_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
        if (type == ECORE_DCBX_OPERATIONAL_MIB) {
                ecore_dcbx_get_dscp_params(p_hwfn, &p_hwfn->p_dcbx_info->get);
 
-               rc = ecore_dcbx_process_mib_info(p_hwfn);
+               rc = ecore_dcbx_process_mib_info(p_hwfn, p_ptt);
                if (!rc) {
                        /* reconfigure tcs of QM queues according
                         * to negotiation results
@@ -893,12 +901,19 @@ ecore_dcbx_mib_update_event(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 
        ecore_dcbx_get_params(p_hwfn, &p_hwfn->p_dcbx_info->get, type);
 
-       /* Update the DSCP to TC mapping bit if required */
+       /* Update the DSCP to TC mapping enable bit if required */
        if ((type == ECORE_DCBX_OPERATIONAL_MIB) &&
            p_hwfn->p_dcbx_info->dscp_nig_update) {
                u8 val = !!p_hwfn->p_dcbx_info->get.dscp.enabled;
+               u32 addr = NIG_REG_DSCP_TO_TC_MAP_ENABLE;
+
+               rc = ecore_all_ppfids_wr(p_hwfn, p_ptt, addr, val);
+               if (rc != ECORE_SUCCESS) {
+                       DP_NOTICE(p_hwfn, false,
+                                 "Failed to update the DSCP to TC mapping enable bit\n");
+                       return rc;
+               }
 
-               ecore_wr(p_hwfn, p_ptt, NIG_REG_DSCP_TO_TC_MAP_ENABLE, val);
                p_hwfn->p_dcbx_info->dscp_nig_update = false;
        }
 
@@ -1533,3 +1548,59 @@ ecore_lldp_set_system_tlvs(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 
        return rc;
 }
+
+enum _ecore_status_t
+ecore_dcbx_get_dscp_priority(struct ecore_hwfn *p_hwfn,
+                            u8 dscp_index, u8 *p_dscp_pri)
+{
+       struct ecore_dcbx_get *p_dcbx_info;
+       enum _ecore_status_t rc;
+
+       if (dscp_index >= ECORE_DCBX_DSCP_SIZE) {
+               DP_ERR(p_hwfn, "Invalid dscp index %d\n", dscp_index);
+               return ECORE_INVAL;
+       }
+
+       p_dcbx_info = OSAL_ALLOC(p_hwfn->p_dev, GFP_KERNEL,
+                                sizeof(*p_dcbx_info));
+       if (!p_dcbx_info)
+               return ECORE_NOMEM;
+
+       OSAL_MEMSET(p_dcbx_info, 0, sizeof(*p_dcbx_info));
+       rc = ecore_dcbx_query_params(p_hwfn, p_dcbx_info,
+                                    ECORE_DCBX_OPERATIONAL_MIB);
+       if (rc) {
+               OSAL_FREE(p_hwfn->p_dev, p_dcbx_info);
+               return rc;
+       }
+
+       *p_dscp_pri = p_dcbx_info->dscp.dscp_pri_map[dscp_index];
+       OSAL_FREE(p_hwfn->p_dev, p_dcbx_info);
+
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t
+ecore_dcbx_set_dscp_priority(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                            u8 dscp_index, u8 pri_val)
+{
+       struct ecore_dcbx_set dcbx_set;
+       enum _ecore_status_t rc;
+
+       if (dscp_index >= ECORE_DCBX_DSCP_SIZE ||
+           pri_val >= ECORE_MAX_PFC_PRIORITIES) {
+               DP_ERR(p_hwfn, "Invalid dscp params: index = %d pri = %d\n",
+                      dscp_index, pri_val);
+               return ECORE_INVAL;
+       }
+
+       OSAL_MEMSET(&dcbx_set, 0, sizeof(dcbx_set));
+       rc = ecore_dcbx_get_config_params(p_hwfn, &dcbx_set);
+       if (rc)
+               return rc;
+
+       dcbx_set.override_flags = ECORE_DCBX_OVERRIDE_DSCP_CFG;
+       dcbx_set.dscp.dscp_pri_map[dscp_index] = pri_val;
+
+       return ecore_dcbx_config_params(p_hwfn, p_ptt, &dcbx_set, 1);
+}
index eaf8e08..6fad2ec 100644 (file)
@@ -228,6 +228,16 @@ enum _ecore_status_t
 ecore_lldp_set_system_tlvs(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                           struct ecore_lldp_sys_tlvs *p_params);
 
+/* Returns priority value for a given dscp index */
+enum _ecore_status_t
+ecore_dcbx_get_dscp_priority(struct ecore_hwfn *p_hwfn,
+                            u8 dscp_index, u8 *p_dscp_pri);
+
+/* Sets priority value for a given dscp index */
+enum _ecore_status_t
+ecore_dcbx_set_dscp_priority(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                            u8 dscp_index, u8 pri_val);
+
 static const struct ecore_dcbx_app_metadata ecore_dcbx_app_update[] = {
        {DCBX_PROTOCOL_ISCSI, "ISCSI", ECORE_PCI_ISCSI},
        {DCBX_PROTOCOL_FCOE, "FCOE", ECORE_PCI_FCOE},
index 31f1f3e..cf454b1 100644 (file)
@@ -352,6 +352,1189 @@ void ecore_db_recovery_execute(struct ecore_hwfn *p_hwfn,
 }
 /******************** Doorbell Recovery end ****************/
 
+/********************************** NIG LLH ***********************************/
+
+enum ecore_llh_filter_type {
+       ECORE_LLH_FILTER_TYPE_MAC,
+       ECORE_LLH_FILTER_TYPE_PROTOCOL,
+};
+
+struct ecore_llh_mac_filter {
+       u8 addr[ETH_ALEN];
+};
+
+struct ecore_llh_protocol_filter {
+       enum ecore_llh_prot_filter_type_t type;
+       u16 source_port_or_eth_type;
+       u16 dest_port;
+};
+
+union ecore_llh_filter {
+       struct ecore_llh_mac_filter mac;
+       struct ecore_llh_protocol_filter protocol;
+};
+
+struct ecore_llh_filter_info {
+       bool b_enabled;
+       u32 ref_cnt;
+       enum ecore_llh_filter_type type;
+       union ecore_llh_filter filter;
+};
+
+struct ecore_llh_info {
+       /* Number of LLH filters banks */
+       u8 num_ppfid;
+
+#define MAX_NUM_PPFID  8
+       u8 ppfid_array[MAX_NUM_PPFID];
+
+       /* Array of filters arrays:
+        * "num_ppfid" elements of filters banks, where each is an array of
+        * "NIG_REG_LLH_FUNC_FILTER_EN_SIZE" filters.
+        */
+       struct ecore_llh_filter_info **pp_filters;
+};
+
+static void ecore_llh_free(struct ecore_dev *p_dev)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       u32 i;
+
+       if (p_llh_info != OSAL_NULL) {
+               if (p_llh_info->pp_filters != OSAL_NULL) {
+                       for (i = 0; i < p_llh_info->num_ppfid; i++)
+                               OSAL_FREE(p_dev, p_llh_info->pp_filters[i]);
+               }
+
+               OSAL_FREE(p_dev, p_llh_info->pp_filters);
+       }
+
+       OSAL_FREE(p_dev, p_llh_info);
+       p_dev->p_llh_info = OSAL_NULL;
+}
+
+static enum _ecore_status_t ecore_llh_alloc(struct ecore_dev *p_dev)
+{
+       struct ecore_llh_info *p_llh_info;
+       u32 size;
+       u8 i;
+
+       p_llh_info = OSAL_ZALLOC(p_dev, GFP_KERNEL, sizeof(*p_llh_info));
+       if (!p_llh_info)
+               return ECORE_NOMEM;
+       p_dev->p_llh_info = p_llh_info;
+
+       for (i = 0; i < MAX_NUM_PPFID; i++) {
+               if (!(p_dev->ppfid_bitmap & (0x1 << i)))
+                       continue;
+
+               p_llh_info->ppfid_array[p_llh_info->num_ppfid] = i;
+               DP_VERBOSE(p_dev, ECORE_MSG_SP, "ppfid_array[%d] = %hhd\n",
+                          p_llh_info->num_ppfid, i);
+               p_llh_info->num_ppfid++;
+       }
+
+       size = p_llh_info->num_ppfid * sizeof(*p_llh_info->pp_filters);
+       p_llh_info->pp_filters = OSAL_ZALLOC(p_dev, GFP_KERNEL, size);
+       if (!p_llh_info->pp_filters)
+               return ECORE_NOMEM;
+
+       size = NIG_REG_LLH_FUNC_FILTER_EN_SIZE *
+              sizeof(**p_llh_info->pp_filters);
+       for (i = 0; i < p_llh_info->num_ppfid; i++) {
+               p_llh_info->pp_filters[i] = OSAL_ZALLOC(p_dev, GFP_KERNEL,
+                                                       size);
+               if (!p_llh_info->pp_filters[i])
+                       return ECORE_NOMEM;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t ecore_llh_shadow_sanity(struct ecore_dev *p_dev,
+                                                   u8 ppfid, u8 filter_idx,
+                                                   const char *action)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+
+       if (ppfid >= p_llh_info->num_ppfid) {
+               DP_NOTICE(p_dev, false,
+                         "LLH shadow [%s]: using ppfid %d while only %d ppfids are available\n",
+                         action, ppfid, p_llh_info->num_ppfid);
+               return ECORE_INVAL;
+       }
+
+       if (filter_idx >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) {
+               DP_NOTICE(p_dev, false,
+                         "LLH shadow [%s]: using filter_idx %d while only %d filters are available\n",
+                         action, filter_idx, NIG_REG_LLH_FUNC_FILTER_EN_SIZE);
+               return ECORE_INVAL;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+#define ECORE_LLH_INVALID_FILTER_IDX   0xff
+
+static enum _ecore_status_t
+ecore_llh_shadow_search_filter(struct ecore_dev *p_dev, u8 ppfid,
+                              union ecore_llh_filter *p_filter,
+                              u8 *p_filter_idx)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       struct ecore_llh_filter_info *p_filters;
+       enum _ecore_status_t rc;
+       u8 i;
+
+       rc = ecore_llh_shadow_sanity(p_dev, ppfid, 0, "search");
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       *p_filter_idx = ECORE_LLH_INVALID_FILTER_IDX;
+
+       p_filters = p_llh_info->pp_filters[ppfid];
+       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+               if (!OSAL_MEMCMP(p_filter, &p_filters[i].filter,
+                                sizeof(*p_filter))) {
+                       *p_filter_idx = i;
+                       break;
+               }
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_shadow_get_free_idx(struct ecore_dev *p_dev, u8 ppfid,
+                             u8 *p_filter_idx)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       struct ecore_llh_filter_info *p_filters;
+       enum _ecore_status_t rc;
+       u8 i;
+
+       rc = ecore_llh_shadow_sanity(p_dev, ppfid, 0, "get_free_idx");
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       *p_filter_idx = ECORE_LLH_INVALID_FILTER_IDX;
+
+       p_filters = p_llh_info->pp_filters[ppfid];
+       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
+               if (!p_filters[i].b_enabled) {
+                       *p_filter_idx = i;
+                       break;
+               }
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+__ecore_llh_shadow_add_filter(struct ecore_dev *p_dev, u8 ppfid, u8 filter_idx,
+                             enum ecore_llh_filter_type type,
+                             union ecore_llh_filter *p_filter, u32 *p_ref_cnt)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       struct ecore_llh_filter_info *p_filters;
+       enum _ecore_status_t rc;
+
+       rc = ecore_llh_shadow_sanity(p_dev, ppfid, filter_idx, "add");
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       p_filters = p_llh_info->pp_filters[ppfid];
+       if (!p_filters[filter_idx].ref_cnt) {
+               p_filters[filter_idx].b_enabled = true;
+               p_filters[filter_idx].type = type;
+               OSAL_MEMCPY(&p_filters[filter_idx].filter, p_filter,
+                           sizeof(p_filters[filter_idx].filter));
+       }
+
+       *p_ref_cnt = ++p_filters[filter_idx].ref_cnt;
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_shadow_add_filter(struct ecore_dev *p_dev, u8 ppfid,
+                           enum ecore_llh_filter_type type,
+                           union ecore_llh_filter *p_filter,
+                           u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+       enum _ecore_status_t rc;
+
+       /* Check if the same filter already exist */
+       rc = ecore_llh_shadow_search_filter(p_dev, ppfid, p_filter,
+                                           p_filter_idx);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       /* Find a new entry in case of a new filter */
+       if (*p_filter_idx == ECORE_LLH_INVALID_FILTER_IDX) {
+               rc = ecore_llh_shadow_get_free_idx(p_dev, ppfid, p_filter_idx);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+       }
+
+       /* No free entry was found */
+       if (*p_filter_idx == ECORE_LLH_INVALID_FILTER_IDX) {
+               DP_NOTICE(p_dev, false,
+                         "Failed to find an empty LLH filter to utilize [ppfid %d]\n",
+                         ppfid);
+               return ECORE_NORESOURCES;
+       }
+
+       return __ecore_llh_shadow_add_filter(p_dev, ppfid, *p_filter_idx, type,
+                                            p_filter, p_ref_cnt);
+}
+
+static enum _ecore_status_t
+__ecore_llh_shadow_remove_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                u8 filter_idx, u32 *p_ref_cnt)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       struct ecore_llh_filter_info *p_filters;
+       enum _ecore_status_t rc;
+
+       rc = ecore_llh_shadow_sanity(p_dev, ppfid, filter_idx, "remove");
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       p_filters = p_llh_info->pp_filters[ppfid];
+       if (!p_filters[filter_idx].ref_cnt) {
+               DP_NOTICE(p_dev, false,
+                         "LLH shadow: trying to remove a filter with ref_cnt=0\n");
+               return ECORE_INVAL;
+       }
+
+       *p_ref_cnt = --p_filters[filter_idx].ref_cnt;
+       if (!p_filters[filter_idx].ref_cnt)
+               OSAL_MEM_ZERO(&p_filters[filter_idx],
+                             sizeof(p_filters[filter_idx]));
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_shadow_remove_filter(struct ecore_dev *p_dev, u8 ppfid,
+                              union ecore_llh_filter *p_filter,
+                              u8 *p_filter_idx, u32 *p_ref_cnt)
+{
+       enum _ecore_status_t rc;
+
+       rc = ecore_llh_shadow_search_filter(p_dev, ppfid, p_filter,
+                                           p_filter_idx);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       /* No matching filter was found */
+       if (*p_filter_idx == ECORE_LLH_INVALID_FILTER_IDX) {
+               DP_NOTICE(p_dev, false,
+                         "Failed to find a filter in the LLH shadow\n");
+               return ECORE_INVAL;
+       }
+
+       return __ecore_llh_shadow_remove_filter(p_dev, ppfid, *p_filter_idx,
+                                               p_ref_cnt);
+}
+
+static enum _ecore_status_t
+ecore_llh_shadow_remove_all_filters(struct ecore_dev *p_dev, u8 ppfid)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       struct ecore_llh_filter_info *p_filters;
+       enum _ecore_status_t rc;
+
+       rc = ecore_llh_shadow_sanity(p_dev, ppfid, 0, "remove_all");
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       p_filters = p_llh_info->pp_filters[ppfid];
+       OSAL_MEM_ZERO(p_filters,
+                     NIG_REG_LLH_FUNC_FILTER_EN_SIZE * sizeof(*p_filters));
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t ecore_abs_ppfid(struct ecore_dev *p_dev,
+                                           u8 rel_ppfid, u8 *p_abs_ppfid)
+{
+       struct ecore_llh_info *p_llh_info = p_dev->p_llh_info;
+       u8 ppfids = p_llh_info->num_ppfid - 1;
+
+       if (rel_ppfid >= p_llh_info->num_ppfid) {
+               DP_NOTICE(p_dev, false,
+                         "rel_ppfid %d is not valid, available indices are 0..%hhd\n",
+                         rel_ppfid, ppfids);
+               return ECORE_INVAL;
+       }
+
+       *p_abs_ppfid = p_llh_info->ppfid_array[rel_ppfid];
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+__ecore_llh_set_engine_affin(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       enum ecore_eng eng;
+       u8 ppfid;
+       enum _ecore_status_t rc;
+
+       rc = ecore_mcp_get_engine_config(p_hwfn, p_ptt);
+       if (rc != ECORE_SUCCESS && rc != ECORE_NOTIMPL) {
+               DP_NOTICE(p_hwfn, false,
+                         "Failed to get the engine affinity configuration\n");
+               return rc;
+       }
+
+       /* RoCE PF is bound to a single engine */
+       if (ECORE_IS_ROCE_PERSONALITY(p_hwfn)) {
+               eng = p_dev->fir_affin ? ECORE_ENG1 : ECORE_ENG0;
+               rc = ecore_llh_set_roce_affinity(p_dev, eng);
+               if (rc != ECORE_SUCCESS) {
+                       DP_NOTICE(p_dev, false,
+                                 "Failed to set the RoCE engine affinity\n");
+                       return rc;
+               }
+
+               DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                          "LLH: Set the engine affinity of RoCE packets as %d\n",
+                          eng);
+       }
+
+       /* Storage PF is bound to a single engine while L2 PF uses both */
+       if (ECORE_IS_FCOE_PERSONALITY(p_hwfn) ||
+           ECORE_IS_ISCSI_PERSONALITY(p_hwfn))
+               eng = p_dev->fir_affin ? ECORE_ENG1 : ECORE_ENG0;
+       else /* L2_PERSONALITY */
+               eng = ECORE_BOTH_ENG;
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+               rc = ecore_llh_set_ppfid_affinity(p_dev, ppfid, eng);
+               if (rc != ECORE_SUCCESS) {
+                       DP_NOTICE(p_dev, false,
+                                 "Failed to set the engine affinity of ppfid %d\n",
+                                 ppfid);
+                       return rc;
+               }
+       }
+
+       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                  "LLH: Set the engine affinity of non-RoCE packets as %d\n",
+                  eng);
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_set_engine_affin(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                          bool avoid_eng_affin)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       enum _ecore_status_t rc;
+
+       /* Backwards compatible mode:
+        * - RoCE packets     - Use engine 0.
+        * - Non-RoCE packets - Use connection based classification for L2 PFs,
+        *                      and engine 0 otherwise.
+        */
+       if (avoid_eng_affin) {
+               enum ecore_eng eng;
+               u8 ppfid;
+
+               if (ECORE_IS_ROCE_PERSONALITY(p_hwfn)) {
+                       eng = ECORE_ENG0;
+                       rc = ecore_llh_set_roce_affinity(p_dev, eng);
+                       if (rc != ECORE_SUCCESS) {
+                               DP_NOTICE(p_dev, false,
+                                         "Failed to set the RoCE engine affinity\n");
+                               return rc;
+                       }
+
+                       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                                  "LLH [backwards compatible mode]: Set the engine affinity of RoCE packets as %d\n",
+                                  eng);
+               }
+
+               eng = (ECORE_IS_FCOE_PERSONALITY(p_hwfn) ||
+                      ECORE_IS_ISCSI_PERSONALITY(p_hwfn)) ? ECORE_ENG0
+                                                          : ECORE_BOTH_ENG;
+               for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+                       rc = ecore_llh_set_ppfid_affinity(p_dev, ppfid, eng);
+                       if (rc != ECORE_SUCCESS) {
+                               DP_NOTICE(p_dev, false,
+                                         "Failed to set the engine affinity of ppfid %d\n",
+                                         ppfid);
+                               return rc;
+                       }
+               }
+
+               DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                          "LLH [backwards compatible mode]: Set the engine affinity of non-RoCE packets as %d\n",
+                          eng);
+
+               return ECORE_SUCCESS;
+       }
+
+       return __ecore_llh_set_engine_affin(p_hwfn, p_ptt);
+}
+
+static enum _ecore_status_t ecore_llh_hw_init_pf(struct ecore_hwfn *p_hwfn,
+                                                struct ecore_ptt *p_ptt,
+                                                bool avoid_eng_affin)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       u8 ppfid, abs_ppfid;
+       enum _ecore_status_t rc;
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+               u32 addr;
+
+               rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+
+               addr = NIG_REG_LLH_PPFID2PFID_TBL_0 + abs_ppfid * 0x4;
+               ecore_wr(p_hwfn, p_ptt, addr, p_hwfn->rel_pf_id);
+       }
+
+       if (OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+           !ECORE_IS_FCOE_PERSONALITY(p_hwfn)) {
+               rc = ecore_llh_add_mac_filter(p_dev, 0,
+                                             p_hwfn->hw_info.hw_mac_addr);
+               if (rc != ECORE_SUCCESS)
+                       DP_NOTICE(p_dev, false,
+                                 "Failed to add an LLH filter with the primary MAC\n");
+       }
+
+       if (ECORE_IS_CMT(p_dev)) {
+               rc = ecore_llh_set_engine_affin(p_hwfn, p_ptt, avoid_eng_affin);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+u8 ecore_llh_get_num_ppfid(struct ecore_dev *p_dev)
+{
+       return p_dev->p_llh_info->num_ppfid;
+}
+
+enum ecore_eng ecore_llh_get_l2_affinity_hint(struct ecore_dev *p_dev)
+{
+       return p_dev->l2_affin_hint ? ECORE_ENG1 : ECORE_ENG0;
+}
+
+/* TBD - should be removed when these definitions are available in reg_addr.h */
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_MASK            0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_ROCE_SHIFT           0
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_MASK                0x3
+#define NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE_SHIFT       2
+
+enum _ecore_status_t ecore_llh_set_ppfid_affinity(struct ecore_dev *p_dev,
+                                                 u8 ppfid, enum ecore_eng eng)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       u32 addr, val, eng_sel;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+       u8 abs_ppfid;
+
+       if (p_ptt == OSAL_NULL)
+               return ECORE_AGAIN;
+
+       if (!ECORE_IS_CMT(p_dev))
+               goto out;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto out;
+
+       switch (eng) {
+       case ECORE_ENG0:
+               eng_sel = 0;
+               break;
+       case ECORE_ENG1:
+               eng_sel = 1;
+               break;
+       case ECORE_BOTH_ENG:
+               eng_sel = 2;
+               break;
+       default:
+               DP_NOTICE(p_dev, false,
+                         "Invalid affinity value for ppfid [%d]\n", eng);
+               rc = ECORE_INVAL;
+               goto out;
+       }
+
+       addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+       val = ecore_rd(p_hwfn, p_ptt, addr);
+       SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_NON_ROCE, eng_sel);
+       ecore_wr(p_hwfn, p_ptt, addr, val);
+
+       /* The iWARP affinity is set as the affinity of ppfid 0 */
+       if (!ppfid && ECORE_IS_IWARP_PERSONALITY(p_hwfn))
+               p_dev->iwarp_affin = (eng == ECORE_ENG1) ? 1 : 0;
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+enum _ecore_status_t ecore_llh_set_roce_affinity(struct ecore_dev *p_dev,
+                                                enum ecore_eng eng)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       u32 addr, val, eng_sel;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+       u8 ppfid, abs_ppfid;
+
+       if (p_ptt == OSAL_NULL)
+               return ECORE_AGAIN;
+
+       if (!ECORE_IS_CMT(p_dev))
+               goto out;
+
+       switch (eng) {
+       case ECORE_ENG0:
+               eng_sel = 0;
+               break;
+       case ECORE_ENG1:
+               eng_sel = 1;
+               break;
+       case ECORE_BOTH_ENG:
+               eng_sel = 2;
+               ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL,
+                        0xf /* QP bit 15 */);
+               break;
+       default:
+               DP_NOTICE(p_dev, false,
+                         "Invalid affinity value for RoCE [%d]\n", eng);
+               rc = ECORE_INVAL;
+               goto out;
+       }
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+               rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+               if (rc != ECORE_SUCCESS)
+                       goto out;
+
+               addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+               val = ecore_rd(p_hwfn, p_ptt, addr);
+               SET_FIELD(val, NIG_REG_PPF_TO_ENGINE_SEL_ROCE, eng_sel);
+               ecore_wr(p_hwfn, p_ptt, addr, val);
+       }
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+struct ecore_llh_filter_e4_details {
+       u64 value;
+       u32 mode;
+       u32 protocol_type;
+       u32 hdr_sel;
+       u32 enable;
+};
+
+static enum _ecore_status_t
+ecore_llh_access_filter_e4(struct ecore_hwfn *p_hwfn,
+                          struct ecore_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx,
+                          struct ecore_llh_filter_e4_details *p_details,
+                          bool b_write_access)
+{
+       u8 pfid = ECORE_PFID_BY_PPFID(p_hwfn, abs_ppfid);
+       struct ecore_dmae_params params;
+       enum _ecore_status_t rc;
+       u32 addr;
+
+       /* The NIG/LLH registers that are accessed in this function have only 16
+        * rows which are exposed to a PF. I.e. only the 16 filters of its
+        * default ppfid
+        * Accessing filters of other ppfids requires pretending to other PFs,
+        * and thus the usage of the ecore_ppfid_rd/wr() functions.
+        */
+
+       /* Filter enable - should be done first when removing a filter */
+       if (b_write_access && !p_details->enable) {
+               addr = NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 + filter_idx * 0x4;
+               ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr,
+                              p_details->enable);
+       }
+
+       /* Filter value */
+       addr = NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 + 2 * filter_idx * 0x4;
+       OSAL_MEMSET(&params, 0, sizeof(params));
+
+       if (b_write_access) {
+               params.flags = ECORE_DMAE_FLAG_PF_DST;
+               params.dst_pfid = pfid;
+               rc = ecore_dmae_host2grc(p_hwfn, p_ptt,
+                                        (u64)(osal_uintptr_t)&p_details->value,
+                                        addr, 2 /* size_in_dwords */, &params);
+       } else {
+               params.flags = ECORE_DMAE_FLAG_PF_SRC |
+                              ECORE_DMAE_FLAG_COMPLETION_DST;
+               params.src_pfid = pfid;
+               rc = ecore_dmae_grc2host(p_hwfn, p_ptt, addr,
+                                        (u64)(osal_uintptr_t)&p_details->value,
+                                        2 /* size_in_dwords */, &params);
+       }
+
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       /* Filter mode */
+       addr = NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 + filter_idx * 0x4;
+       if (b_write_access)
+               ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr, p_details->mode);
+       else
+               p_details->mode = ecore_ppfid_rd(p_hwfn, p_ptt, abs_ppfid,
+                                                addr);
+
+       /* Filter protocol type */
+       addr = NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 + filter_idx * 0x4;
+       if (b_write_access)
+               ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr,
+                              p_details->protocol_type);
+       else
+               p_details->protocol_type = ecore_ppfid_rd(p_hwfn, p_ptt,
+                                                         abs_ppfid, addr);
+
+       /* Filter header select */
+       addr = NIG_REG_LLH_FUNC_FILTER_HDR_SEL_BB_K2 + filter_idx * 0x4;
+       if (b_write_access)
+               ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr,
+                              p_details->hdr_sel);
+       else
+               p_details->hdr_sel = ecore_ppfid_rd(p_hwfn, p_ptt, abs_ppfid,
+                                                   addr);
+
+       /* Filter enable - should be done last when adding a filter */
+       if (!b_write_access || p_details->enable) {
+               addr = NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 + filter_idx * 0x4;
+               if (b_write_access)
+                       ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr,
+                                      p_details->enable);
+               else
+                       p_details->enable = ecore_ppfid_rd(p_hwfn, p_ptt,
+                                                          abs_ppfid, addr);
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_add_filter_e4(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u8 abs_ppfid, u8 filter_idx, u8 filter_prot_type,
+                       u32 high, u32 low)
+{
+       struct ecore_llh_filter_e4_details filter_details;
+
+       filter_details.enable = 1;
+       filter_details.value = ((u64)high << 32) | low;
+       filter_details.hdr_sel =
+               OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits) ?
+               1 : /* inner/encapsulated header */
+               0;  /* outer/tunnel header */
+       filter_details.protocol_type = filter_prot_type;
+       filter_details.mode = filter_prot_type ?
+                             1 : /* protocol-based classification */
+                             0;  /* MAC-address based classification */
+
+       return ecore_llh_access_filter_e4(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+                                         &filter_details,
+                                         true /* write access */);
+}
+
+static enum _ecore_status_t
+ecore_llh_remove_filter_e4(struct ecore_hwfn *p_hwfn,
+                          struct ecore_ptt *p_ptt, u8 abs_ppfid, u8 filter_idx)
+{
+       struct ecore_llh_filter_e4_details filter_details;
+
+       OSAL_MEMSET(&filter_details, 0, sizeof(filter_details));
+
+       return ecore_llh_access_filter_e4(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+                                         &filter_details,
+                                         true /* write access */);
+}
+
+static enum _ecore_status_t
+ecore_llh_add_filter(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                    u8 abs_ppfid, u8 filter_idx, u8 filter_prot_type, u32 high,
+                    u32 low)
+{
+       return ecore_llh_add_filter_e4(p_hwfn, p_ptt, abs_ppfid,
+                                      filter_idx, filter_prot_type,
+                                      high, low);
+}
+
+static enum _ecore_status_t
+ecore_llh_remove_filter(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u8 abs_ppfid, u8 filter_idx)
+{
+       return ecore_llh_remove_filter_e4(p_hwfn, p_ptt, abs_ppfid,
+                                         filter_idx);
+}
+
+enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                             u8 mac_addr[ETH_ALEN])
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       union ecore_llh_filter filter;
+       u8 filter_idx, abs_ppfid;
+       u32 high, low, ref_cnt;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+
+       if (p_ptt == OSAL_NULL)
+               return ECORE_AGAIN;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+               goto out;
+
+       OSAL_MEM_ZERO(&filter, sizeof(filter));
+       OSAL_MEMCPY(filter.mac.addr, mac_addr, ETH_ALEN);
+       rc = ecore_llh_shadow_add_filter(p_dev, ppfid,
+                                        ECORE_LLH_FILTER_TYPE_MAC,
+                                        &filter, &filter_idx, &ref_cnt);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       /* Configure the LLH only in case of a new the filter */
+       if (ref_cnt == 1) {
+               high = mac_addr[1] | (mac_addr[0] << 8);
+               low = mac_addr[5] | (mac_addr[4] << 8) | (mac_addr[3] << 16) |
+                     (mac_addr[2] << 24);
+               rc = ecore_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+                                         0, high, low);
+               if (rc != ECORE_SUCCESS)
+                       goto err;
+       }
+
+       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                  "LLH: Added MAC filter [%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+                  mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+                  mac_addr[4], mac_addr[5], ppfid, abs_ppfid, filter_idx,
+                  ref_cnt);
+
+       goto out;
+
+err:
+       DP_NOTICE(p_dev, false,
+                 "LLH: Failed to add MAC filter [%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx] to ppfid %hhd\n",
+                 mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+                 mac_addr[4], mac_addr[5], ppfid);
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+static enum _ecore_status_t
+ecore_llh_protocol_filter_stringify(struct ecore_dev *p_dev,
+                                   enum ecore_llh_prot_filter_type_t type,
+                                   u16 source_port_or_eth_type, u16 dest_port,
+                                   char *str, osal_size_t str_len)
+{
+       switch (type) {
+       case ECORE_LLH_FILTER_ETHERTYPE:
+               OSAL_SNPRINTF(str, str_len, "Ethertype 0x%04x",
+                             source_port_or_eth_type);
+               break;
+       case ECORE_LLH_FILTER_TCP_SRC_PORT:
+               OSAL_SNPRINTF(str, str_len, "TCP src port 0x%04x",
+                             source_port_or_eth_type);
+               break;
+       case ECORE_LLH_FILTER_UDP_SRC_PORT:
+               OSAL_SNPRINTF(str, str_len, "UDP src port 0x%04x",
+                             source_port_or_eth_type);
+               break;
+       case ECORE_LLH_FILTER_TCP_DEST_PORT:
+               OSAL_SNPRINTF(str, str_len, "TCP dst port 0x%04x", dest_port);
+               break;
+       case ECORE_LLH_FILTER_UDP_DEST_PORT:
+               OSAL_SNPRINTF(str, str_len, "UDP dst port 0x%04x", dest_port);
+               break;
+       case ECORE_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+               OSAL_SNPRINTF(str, str_len, "TCP src/dst ports 0x%04x/0x%04x",
+                             source_port_or_eth_type, dest_port);
+               break;
+       case ECORE_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+               OSAL_SNPRINTF(str, str_len, "UDP src/dst ports 0x%04x/0x%04x",
+                             source_port_or_eth_type, dest_port);
+               break;
+       default:
+               DP_NOTICE(p_dev, true,
+                         "Non valid LLH protocol filter type %d\n", type);
+               return ECORE_INVAL;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_protocol_filter_to_hilo(struct ecore_dev *p_dev,
+                                 enum ecore_llh_prot_filter_type_t type,
+                                 u16 source_port_or_eth_type, u16 dest_port,
+                                 u32 *p_high, u32 *p_low)
+{
+       *p_high = 0;
+       *p_low = 0;
+
+       switch (type) {
+       case ECORE_LLH_FILTER_ETHERTYPE:
+               *p_high = source_port_or_eth_type;
+               break;
+       case ECORE_LLH_FILTER_TCP_SRC_PORT:
+       case ECORE_LLH_FILTER_UDP_SRC_PORT:
+               *p_low = source_port_or_eth_type << 16;
+               break;
+       case ECORE_LLH_FILTER_TCP_DEST_PORT:
+       case ECORE_LLH_FILTER_UDP_DEST_PORT:
+               *p_low = dest_port;
+               break;
+       case ECORE_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
+       case ECORE_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
+               *p_low = (source_port_or_eth_type << 16) | dest_port;
+               break;
+       default:
+               DP_NOTICE(p_dev, true,
+                         "Non valid LLH protocol filter type %d\n", type);
+               return ECORE_INVAL;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t
+ecore_llh_add_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
+                             enum ecore_llh_prot_filter_type_t type,
+                             u16 source_port_or_eth_type, u16 dest_port)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       u8 filter_idx, abs_ppfid, type_bitmap;
+       char str[32];
+       union ecore_llh_filter filter;
+       u32 high, low, ref_cnt;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+
+       if (p_ptt == OSAL_NULL)
+               return ECORE_AGAIN;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+               goto out;
+
+       rc = ecore_llh_protocol_filter_stringify(p_dev, type,
+                                                source_port_or_eth_type,
+                                                dest_port, str, sizeof(str));
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       OSAL_MEM_ZERO(&filter, sizeof(filter));
+       filter.protocol.type = type;
+       filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+       filter.protocol.dest_port = dest_port;
+       rc = ecore_llh_shadow_add_filter(p_dev, ppfid,
+                                        ECORE_LLH_FILTER_TYPE_PROTOCOL,
+                                        &filter, &filter_idx, &ref_cnt);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       /* Configure the LLH only in case of a new the filter */
+       if (ref_cnt == 1) {
+               rc = ecore_llh_protocol_filter_to_hilo(p_dev, type,
+                                                      source_port_or_eth_type,
+                                                      dest_port, &high, &low);
+               if (rc != ECORE_SUCCESS)
+                       goto err;
+
+               type_bitmap = 0x1 << type;
+               rc = ecore_llh_add_filter(p_hwfn, p_ptt, abs_ppfid, filter_idx,
+                                         type_bitmap, high, low);
+               if (rc != ECORE_SUCCESS)
+                       goto err;
+       }
+
+       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                  "LLH: Added protocol filter [%s] to ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+                  str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+       goto out;
+
+err:
+       DP_NOTICE(p_hwfn, false,
+                 "LLH: Failed to add protocol filter [%s] to ppfid %hhd\n",
+                 str, ppfid);
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+void ecore_llh_remove_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                u8 mac_addr[ETH_ALEN])
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       union ecore_llh_filter filter;
+       u8 filter_idx, abs_ppfid;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+       u32 ref_cnt;
+
+       if (p_ptt == OSAL_NULL)
+               return;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+               goto out;
+
+       OSAL_MEM_ZERO(&filter, sizeof(filter));
+       OSAL_MEMCPY(filter.mac.addr, mac_addr, ETH_ALEN);
+       rc = ecore_llh_shadow_remove_filter(p_dev, ppfid, &filter, &filter_idx,
+                                           &ref_cnt);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       /* Remove from the LLH in case the filter is not in use */
+       if (!ref_cnt) {
+               rc = ecore_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+                                            filter_idx);
+               if (rc != ECORE_SUCCESS)
+                       goto err;
+       }
+
+       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                  "LLH: Removed MAC filter [%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+                  mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+                  mac_addr[4], mac_addr[5], ppfid, abs_ppfid, filter_idx,
+                  ref_cnt);
+
+       goto out;
+
+err:
+       DP_NOTICE(p_dev, false,
+                 "LLH: Failed to remove MAC filter [%02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx] from ppfid %hhd\n",
+                 mac_addr[0], mac_addr[1], mac_addr[2], mac_addr[3],
+                 mac_addr[4], mac_addr[5], ppfid);
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+}
+
+void ecore_llh_remove_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                     enum ecore_llh_prot_filter_type_t type,
+                                     u16 source_port_or_eth_type,
+                                     u16 dest_port)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       u8 filter_idx, abs_ppfid;
+       char str[32];
+       union ecore_llh_filter filter;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+       u32 ref_cnt;
+
+       if (p_ptt == OSAL_NULL)
+               return;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits))
+               goto out;
+
+       rc = ecore_llh_protocol_filter_stringify(p_dev, type,
+                                                source_port_or_eth_type,
+                                                dest_port, str, sizeof(str));
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       OSAL_MEM_ZERO(&filter, sizeof(filter));
+       filter.protocol.type = type;
+       filter.protocol.source_port_or_eth_type = source_port_or_eth_type;
+       filter.protocol.dest_port = dest_port;
+       rc = ecore_llh_shadow_remove_filter(p_dev, ppfid, &filter, &filter_idx,
+                                           &ref_cnt);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto err;
+
+       /* Remove from the LLH in case the filter is not in use */
+       if (!ref_cnt) {
+               rc = ecore_llh_remove_filter(p_hwfn, p_ptt, abs_ppfid,
+                                            filter_idx);
+               if (rc != ECORE_SUCCESS)
+                       goto err;
+       }
+
+       DP_VERBOSE(p_dev, ECORE_MSG_SP,
+                  "LLH: Removed protocol filter [%s] from ppfid %hhd [abs %hhd] at idx %hhd [ref_cnt %d]\n",
+                  str, ppfid, abs_ppfid, filter_idx, ref_cnt);
+
+       goto out;
+
+err:
+       DP_NOTICE(p_dev, false,
+                 "LLH: Failed to remove protocol filter [%s] from ppfid %hhd\n",
+                 str, ppfid);
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+}
+
+void ecore_llh_clear_ppfid_filters(struct ecore_dev *p_dev, u8 ppfid)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       u8 filter_idx, abs_ppfid;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+
+       if (p_ptt == OSAL_NULL)
+               return;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+           !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+               goto out;
+
+       rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto out;
+
+       rc = ecore_llh_shadow_remove_all_filters(p_dev, ppfid);
+       if (rc != ECORE_SUCCESS)
+               goto out;
+
+       for (filter_idx = 0; filter_idx < NIG_REG_LLH_FUNC_FILTER_EN_SIZE;
+            filter_idx++) {
+               rc = ecore_llh_remove_filter_e4(p_hwfn, p_ptt,
+                                               abs_ppfid, filter_idx);
+               if (rc != ECORE_SUCCESS)
+                       goto out;
+       }
+out:
+       ecore_ptt_release(p_hwfn, p_ptt);
+}
+
+void ecore_llh_clear_all_filters(struct ecore_dev *p_dev)
+{
+       u8 ppfid;
+
+       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS, &p_dev->mf_bits) &&
+           !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits))
+               return;
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++)
+               ecore_llh_clear_ppfid_filters(p_dev, ppfid);
+}
+
+enum _ecore_status_t ecore_all_ppfids_wr(struct ecore_hwfn *p_hwfn,
+                                        struct ecore_ptt *p_ptt, u32 addr,
+                                        u32 val)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       u8 ppfid, abs_ppfid;
+       enum _ecore_status_t rc;
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+               rc = ecore_abs_ppfid(p_dev, ppfid, &abs_ppfid);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+
+               ecore_ppfid_wr(p_hwfn, p_ptt, abs_ppfid, addr, val);
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static enum _ecore_status_t
+ecore_llh_dump_ppfid_e4(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u8 ppfid)
+{
+       struct ecore_llh_filter_e4_details filter_details;
+       u8 abs_ppfid, filter_idx;
+       u32 addr;
+       enum _ecore_status_t rc;
+
+       rc = ecore_abs_ppfid(p_hwfn->p_dev, ppfid, &abs_ppfid);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       addr = NIG_REG_PPF_TO_ENGINE_SEL + abs_ppfid * 0x4;
+       DP_NOTICE(p_hwfn, false,
+                 "[rel_pf_id %hhd, ppfid={rel %hhd, abs %hhd}, engine_sel 0x%x]\n",
+                 p_hwfn->rel_pf_id, ppfid, abs_ppfid,
+                 ecore_rd(p_hwfn, p_ptt, addr));
+
+       for (filter_idx = 0; filter_idx < NIG_REG_LLH_FUNC_FILTER_EN_SIZE;
+            filter_idx++) {
+               OSAL_MEMSET(&filter_details, 0, sizeof(filter_details));
+               rc =  ecore_llh_access_filter_e4(p_hwfn, p_ptt, abs_ppfid,
+                                                filter_idx, &filter_details,
+                                                false /* read access */);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+
+               DP_NOTICE(p_hwfn, false,
+                         "filter %2hhd: enable %d, value 0x%016lx, mode %d, protocol_type 0x%x, hdr_sel 0x%x\n",
+                         filter_idx, filter_details.enable,
+                         (unsigned long)filter_details.value,
+                         filter_details.mode,
+                         filter_details.protocol_type, filter_details.hdr_sel);
+       }
+
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_llh_dump_ppfid(struct ecore_dev *p_dev, u8 ppfid)
+{
+       struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       struct ecore_ptt *p_ptt = ecore_ptt_acquire(p_hwfn);
+       enum _ecore_status_t rc;
+
+       if (p_ptt == OSAL_NULL)
+               return ECORE_AGAIN;
+
+       rc = ecore_llh_dump_ppfid_e4(p_hwfn, p_ptt, ppfid);
+
+       ecore_ptt_release(p_hwfn, p_ptt);
+
+       return rc;
+}
+
+enum _ecore_status_t ecore_llh_dump_all(struct ecore_dev *p_dev)
+{
+       u8 ppfid;
+       enum _ecore_status_t rc;
+
+       for (ppfid = 0; ppfid < p_dev->p_llh_info->num_ppfid; ppfid++) {
+               rc = ecore_llh_dump_ppfid(p_dev, ppfid);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+/******************************* NIG LLH - End ********************************/
+
 /* Configurable */
 #define ECORE_MIN_DPIS         (4)     /* The minimal num of DPIs required to
                                         * load the driver. The number was
@@ -456,6 +1639,12 @@ static void ecore_qm_info_free(struct ecore_hwfn *p_hwfn)
        OSAL_FREE(p_hwfn->p_dev, qm_info->wfq_data);
 }
 
+static void ecore_dbg_user_data_free(struct ecore_hwfn *p_hwfn)
+{
+       OSAL_FREE(p_hwfn->p_dev, p_hwfn->dbg_user_info);
+       p_hwfn->dbg_user_info = OSAL_NULL;
+}
+
 void ecore_resc_free(struct ecore_dev *p_dev)
 {
        int i;
@@ -470,6 +1659,8 @@ void ecore_resc_free(struct ecore_dev *p_dev)
 
        OSAL_FREE(p_dev, p_dev->reset_stats);
 
+       ecore_llh_free(p_dev);
+
        for_each_hwfn(p_dev, i) {
                struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
 
@@ -483,6 +1674,7 @@ void ecore_resc_free(struct ecore_dev *p_dev)
                ecore_l2_free(p_hwfn);
                ecore_dmae_info_free(p_hwfn);
                ecore_dcbx_info_free(p_hwfn);
+               ecore_dbg_user_data_free(p_hwfn);
                /* @@@TBD Flush work-queue ? */
 
                /* destroy doorbell recovery mechanism */
@@ -562,12 +1754,11 @@ u16 ecore_init_qm_get_num_pf_rls(struct ecore_hwfn *p_hwfn)
 {
        u16 num_pf_rls, num_vfs = ecore_init_qm_get_num_vfs(p_hwfn);
 
-       /* @DPDK */
        /* num RLs can't exceed resource amount of rls or vports or the
         * dcqcn qps
         */
        num_pf_rls = (u16)OSAL_MIN_T(u32, RESC_NUM(p_hwfn, ECORE_RL),
-                                    (u16)RESC_NUM(p_hwfn, ECORE_VPORT));
+                                    RESC_NUM(p_hwfn, ECORE_VPORT));
 
        /* make sure after we reserve the default and VF rls we'll have
         * something left
@@ -828,7 +2019,7 @@ u16 ecore_get_cm_pq_idx_mcos(struct ecore_hwfn *p_hwfn, u8 tc)
        if (tc > max_tc)
                DP_ERR(p_hwfn, "tc %d must be smaller than %d\n", tc, max_tc);
 
-       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + tc;
+       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_MCOS) + (tc % max_tc);
 }
 
 u16 ecore_get_cm_pq_idx_vf(struct ecore_hwfn *p_hwfn, u16 vf)
@@ -838,17 +2029,17 @@ u16 ecore_get_cm_pq_idx_vf(struct ecore_hwfn *p_hwfn, u16 vf)
        if (vf > max_vf)
                DP_ERR(p_hwfn, "vf %d must be smaller than %d\n", vf, max_vf);
 
-       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + vf;
+       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_VFS) + (vf % max_vf);
 }
 
 u16 ecore_get_cm_pq_idx_rl(struct ecore_hwfn *p_hwfn, u16 rl)
 {
        u16 max_rl = ecore_init_qm_get_num_pf_rls(p_hwfn);
 
-       if (rl > max_rl)
-               DP_ERR(p_hwfn, "rl %d must be smaller than %d\n", rl, max_rl);
-
-       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_RLS) + rl;
+       /* for rate limiters, it is okay to use the modulo behavior - no
+        * DP_ERR
+        */
+       return ecore_get_cm_pq_idx(p_hwfn, PQ_FLAGS_RLS) + (rl % max_rl);
 }
 
 u16 ecore_get_qm_vport_idx_rl(struct ecore_hwfn *p_hwfn, u16 rl)
@@ -1334,6 +2525,20 @@ enum _ecore_status_t ecore_resc_alloc(struct ecore_dev *p_dev)
                                  "Failed to allocate memory for dcbx structure\n");
                        goto alloc_err;
                }
+
+               rc = OSAL_DBG_ALLOC_USER_DATA(p_hwfn, &p_hwfn->dbg_user_info);
+               if (rc) {
+                       DP_NOTICE(p_hwfn, false,
+                                 "Failed to allocate dbg user info structure\n");
+                       goto alloc_err;
+               }
+       } /* hwfn loop */
+
+       rc = ecore_llh_alloc(p_dev);
+       if (rc != ECORE_SUCCESS) {
+               DP_NOTICE(p_dev, true,
+                         "Failed to allocate memory for the llh_info structure\n");
+               goto alloc_err;
        }
 
        p_dev->reset_stats = OSAL_ZALLOC(p_dev, GFP_KERNEL,
@@ -1476,8 +2681,7 @@ static enum _ecore_status_t ecore_calc_hw_mode(struct ecore_hwfn *p_hwfn)
                return ECORE_INVAL;
        }
 
-       if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS,
-                         &p_hwfn->p_dev->mf_bits))
+       if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits))
                hw_mode |= 1 << MODE_MF_SD;
        else
                hw_mode |= 1 << MODE_MF_SI;
@@ -1960,6 +3164,14 @@ enum ECORE_ROCE_EDPM_MODE {
        ECORE_ROCE_EDPM_MODE_DISABLE = 2,
 };
 
+bool ecore_edpm_enabled(struct ecore_hwfn *p_hwfn)
+{
+       if (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm)
+               return false;
+
+       return true;
+}
+
 static enum _ecore_status_t
 ecore_hw_init_pf_doorbell_bar(struct ecore_hwfn *p_hwfn,
                              struct ecore_ptt *p_ptt)
@@ -2047,7 +3259,7 @@ ecore_hw_init_pf_doorbell_bar(struct ecore_hwfn *p_hwfn,
        DP_INFO(p_hwfn,
                " dpi_size=%d, dpi_count=%d, roce_edpm=%s\n",
                p_hwfn->dpi_size, p_hwfn->dpi_count,
-               ((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ?
+               (!ecore_edpm_enabled(p_hwfn)) ?
                "disabled" : "enabled");
 
        /* Check return codes from above calls */
@@ -2073,17 +3285,7 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
                                               struct ecore_ptt *p_ptt,
                                               int hw_mode)
 {
-       u32 ppf_to_eng_sel[NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE];
-       u32 val;
        enum _ecore_status_t rc = ECORE_SUCCESS;
-       u8 i;
-
-       /* In CMT for non-RoCE packets - use connection based classification */
-       val = ECORE_IS_CMT(p_hwfn->p_dev) ? 0x8 : 0x0;
-       for (i = 0; i < NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE; i++)
-               ppf_to_eng_sel[i] = val;
-       STORE_RT_REG_AGG(p_hwfn, NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET,
-                        ppf_to_eng_sel);
 
        /* In CMT the gate should be cleared by the 2nd hwfn */
        if (!ECORE_IS_CMT(p_hwfn->p_dev) || !IS_LEAD_HWFN(p_hwfn))
@@ -2135,12 +3337,8 @@ static enum _ecore_status_t ecore_hw_init_port(struct ecore_hwfn *p_hwfn,
 }
 
 static enum _ecore_status_t
-ecore_hw_init_pf(struct ecore_hwfn *p_hwfn,
-                struct ecore_ptt *p_ptt,
-                struct ecore_tunnel_info *p_tunn,
-                int hw_mode,
-                bool b_hw_start,
-                enum ecore_int_mode int_mode, bool allow_npar_tx_switch)
+ecore_hw_init_pf(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                int hw_mode, struct ecore_hw_init_params *p_params)
 {
        u8 rel_pf_id = p_hwfn->rel_pf_id;
        u32 prs_reg;
@@ -2228,17 +3426,18 @@ ecore_hw_init_pf(struct ecore_hwfn *p_hwfn,
        */
 
        rc = ecore_hw_init_pf_doorbell_bar(p_hwfn, p_ptt);
-       if (rc)
+       if (rc != ECORE_SUCCESS)
                return rc;
-       if (b_hw_start) {
+
+       if (p_params->b_hw_start) {
                /* enable interrupts */
-               rc = ecore_int_igu_enable(p_hwfn, p_ptt, int_mode);
+               rc = ecore_int_igu_enable(p_hwfn, p_ptt, p_params->int_mode);
                if (rc != ECORE_SUCCESS)
                        return rc;
 
                /* send function start command */
-               rc = ecore_sp_pf_start(p_hwfn, p_ptt, p_tunn,
-                                      allow_npar_tx_switch);
+               rc = ecore_sp_pf_start(p_hwfn, p_ptt, p_params->p_tunn,
+                                      p_params->allow_npar_tx_switch);
                if (rc) {
                        DP_NOTICE(p_hwfn, true,
                                  "Function start ramrod failed\n");
@@ -2410,6 +3609,7 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
        bool b_default_mtu = true;
        struct ecore_hwfn *p_hwfn;
        enum _ecore_status_t rc = ECORE_SUCCESS;
+       u16 ether_type;
        int i;
 
        if ((p_params->int_mode == ECORE_INT_MODE_MSI) && ECORE_IS_CMT(p_dev)) {
@@ -2442,6 +3642,25 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                if (rc != ECORE_SUCCESS)
                        return rc;
 
+               if (IS_PF(p_dev) && (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+                                                  &p_dev->mf_bits) ||
+                                    OSAL_TEST_BIT(ECORE_MF_8021AD_TAGGING,
+                                                  &p_dev->mf_bits))) {
+                       if (OSAL_TEST_BIT(ECORE_MF_8021Q_TAGGING,
+                                         &p_dev->mf_bits))
+                               ether_type = ETHER_TYPE_VLAN;
+                       else
+                               ether_type = ETHER_TYPE_QINQ;
+                       STORE_RT_REG(p_hwfn, PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET,
+                                    ether_type);
+                       STORE_RT_REG(p_hwfn, DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET,
+                                    ether_type);
+               }
+
                ecore_set_spq_block_timeout(p_hwfn, p_params->spq_timeout_ms);
 
                rc = ecore_fill_load_req_params(p_hwfn, &load_req_params,
@@ -2542,11 +3761,8 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                        /* Fall into */
                case FW_MSG_CODE_DRV_LOAD_FUNCTION:
                        rc = ecore_hw_init_pf(p_hwfn, p_hwfn->p_main_ptt,
-                                             p_params->p_tunn,
                                              p_hwfn->hw_info.hw_mode,
-                                             p_params->b_hw_start,
-                                             p_params->int_mode,
-                                             p_params->allow_npar_tx_switch);
+                                             p_params);
                        break;
                default:
                        DP_NOTICE(p_hwfn, false,
@@ -2590,6 +3806,34 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                p_hwfn->hw_init_done = true;
        }
 
+       if (IS_PF(p_dev)) {
+               /* Get pre-negotiated values for stag, bandwidth etc. */
+               p_hwfn = ECORE_LEADING_HWFN(p_dev);
+               DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ,
+                          "Sending GET_OEM_UPDATES command to trigger stag/bandwidth attention handling\n");
+               rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+                                  DRV_MSG_CODE_GET_OEM_UPDATES,
+                                  1 << DRV_MB_PARAM_DUMMY_OEM_UPDATES_OFFSET,
+                                  &resp, &param);
+               if (rc != ECORE_SUCCESS)
+                       DP_NOTICE(p_hwfn, false,
+                                 "Failed to send GET_OEM_UPDATES attention request\n");
+       }
+
+       if (IS_PF(p_dev)) {
+               /* Get pre-negotiated values for stag, bandwidth etc. */
+               p_hwfn = ECORE_LEADING_HWFN(p_dev);
+               DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ,
+                          "Sending GET_OEM_UPDATES command to trigger stag/bandwidth attention handling\n");
+               rc = ecore_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+                                  DRV_MSG_CODE_GET_OEM_UPDATES,
+                                  1 << DRV_MB_PARAM_DUMMY_OEM_UPDATES_OFFSET,
+                                  &resp, &param);
+               if (rc != ECORE_SUCCESS)
+                       DP_NOTICE(p_hwfn, false,
+                                 "Failed to send GET_OEM_UPDATES attention request\n");
+       }
+
        if (IS_PF(p_dev)) {
                p_hwfn = ECORE_LEADING_HWFN(p_dev);
                drv_mb_param = STORM_FW_VERSION;
@@ -2599,17 +3843,23 @@ enum _ecore_status_t ecore_hw_init(struct ecore_dev *p_dev,
                if (rc != ECORE_SUCCESS)
                        DP_INFO(p_hwfn, "Failed to update firmware version\n");
 
-               if (!b_default_mtu)
+               if (!b_default_mtu) {
                        rc = ecore_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt,
                                                      p_hwfn->hw_info.mtu);
-               if (rc != ECORE_SUCCESS)
-                       DP_INFO(p_hwfn, "Failed to update default mtu\n");
+                       if (rc != ECORE_SUCCESS)
+                               DP_INFO(p_hwfn, "Failed to update default mtu\n");
+               }
 
                rc = ecore_mcp_ov_update_driver_state(p_hwfn,
                                                      p_hwfn->p_main_ptt,
                                                ECORE_OV_DRIVER_STATE_DISABLED);
                if (rc != ECORE_SUCCESS)
                        DP_INFO(p_hwfn, "Failed to update driver state\n");
+
+               rc = ecore_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
+                                                ECORE_OV_ESWITCH_NONE);
+               if (rc != ECORE_SUCCESS)
+                       DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
        }
 
        return rc;
@@ -2742,6 +3992,12 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
                        rc2 = ECORE_UNKNOWN_ERROR;
                }
 
+               OSAL_DPC_SYNC(p_hwfn);
+
+               /* After this point we don't expect the FW to send us async
+                * events
+                */
+
                /* perform debug action after PF stop was sent */
                OSAL_AFTER_PF_STOP((void *)p_dev, p_hwfn->my_id);
 
@@ -2778,6 +4034,12 @@ enum _ecore_status_t ecore_hw_stop(struct ecore_dev *p_dev)
                /* Need to wait 1ms to guarantee SBs are cleared */
                OSAL_MSLEEP(1);
 
+               if (IS_LEAD_HWFN(p_hwfn) &&
+                   OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS, &p_dev->mf_bits) &&
+                   !ECORE_IS_FCOE_PERSONALITY(p_hwfn))
+                       ecore_llh_remove_mac_filter(p_dev, 0,
+                                                  p_hwfn->hw_info.hw_mac_addr);
+
                if (!p_dev->recov_in_prog) {
                        ecore_verify_reg_val(p_hwfn, p_ptt,
                                             QM_REG_USG_CNT_PF_TX, 0);
@@ -2987,16 +4249,31 @@ static void ecore_hw_set_feat(struct ecore_hwfn *p_hwfn)
                                   FEAT_NUM(p_hwfn, ECORE_VF_L2_QUE));
        }
 
-       if (ECORE_IS_FCOE_PERSONALITY(p_hwfn))
-               feat_num[ECORE_FCOE_CQ] =
-                       OSAL_MIN_T(u32, sb_cnt.cnt, RESC_NUM(p_hwfn,
-                                                            ECORE_CMDQS_CQS));
-
-       if (ECORE_IS_ISCSI_PERSONALITY(p_hwfn))
-               feat_num[ECORE_ISCSI_CQ] =
-                       OSAL_MIN_T(u32, sb_cnt.cnt, RESC_NUM(p_hwfn,
-                                                            ECORE_CMDQS_CQS));
-
+       if (ECORE_IS_FCOE_PERSONALITY(p_hwfn) ||
+           ECORE_IS_ISCSI_PERSONALITY(p_hwfn)) {
+               u32 *p_storage_feat = ECORE_IS_FCOE_PERSONALITY(p_hwfn) ?
+                                     &feat_num[ECORE_FCOE_CQ] :
+                                     &feat_num[ECORE_ISCSI_CQ];
+               u32 limit = sb_cnt.cnt;
+
+               /* The number of queues should not exceed the number of FP SBs.
+                * In storage target, the queues are divided into pairs of a CQ
+                * and a CmdQ, and each pair uses a single SB. The limit in
+                * this case should allow a max ratio of 2:1 instead of 1:1.
+                */
+               if (p_hwfn->p_dev->b_is_target)
+                       limit *= 2;
+               *p_storage_feat = OSAL_MIN_T(u32, limit,
+                                            RESC_NUM(p_hwfn, ECORE_CMDQS_CQS));
+
+               /* @DPDK */
+               /* The size of "cq_cmdq_sb_num_arr" in the fcoe/iscsi init
+                * ramrod is limited to "NUM_OF_GLOBAL_QUEUES / 2".
+                */
+               *p_storage_feat = OSAL_MIN_T(u32, *p_storage_feat,
+                                            (NUM_OF_GLOBAL_QUEUES / 2));
+       }
+
        DP_VERBOSE(p_hwfn, ECORE_MSG_PROBE,
                   "#PF_L2_QUEUE=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #FCOE_CQ=%d #ISCSI_CQ=%d #SB=%d\n",
                   (int)FEAT_NUM(p_hwfn, ECORE_PF_L2_QUE),
@@ -3276,6 +4553,59 @@ static enum _ecore_status_t ecore_hw_set_resc_info(struct ecore_hwfn *p_hwfn,
        return ECORE_SUCCESS;
 }
 
+#define ECORE_NONUSED_PPFID_MASK_BB_4P_LO_PORTS        0xaa
+#define ECORE_NONUSED_PPFID_MASK_BB_4P_HI_PORTS        0x55
+#define ECORE_NONUSED_PPFID_MASK_AH_4P         0xf0
+
+static enum _ecore_status_t ecore_hw_get_ppfid_bitmap(struct ecore_hwfn *p_hwfn,
+                                                     struct ecore_ptt *p_ptt)
+{
+       u8 native_ppfid_idx = ECORE_PPFID_BY_PFID(p_hwfn), new_bitmap;
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       enum _ecore_status_t rc;
+
+       rc = ecore_mcp_get_ppfid_bitmap(p_hwfn, p_ptt);
+       if (rc != ECORE_SUCCESS && rc != ECORE_NOTIMPL)
+               return rc;
+       else if (rc == ECORE_NOTIMPL)
+               p_dev->ppfid_bitmap = 0x1 << native_ppfid_idx;
+
+       /* 4-ports mode has limitations that should be enforced:
+        * - BB: the MFW can access only PPFIDs which their corresponding PFIDs
+        *       belong to this certain port.
+        * - AH/E5: only 4 PPFIDs per port are available.
+        */
+       if (ecore_device_num_ports(p_dev) == 4) {
+               u8 mask;
+
+               if (ECORE_IS_BB(p_dev))
+                       mask = MFW_PORT(p_hwfn) > 1 ?
+                              ECORE_NONUSED_PPFID_MASK_BB_4P_HI_PORTS :
+                              ECORE_NONUSED_PPFID_MASK_BB_4P_LO_PORTS;
+               else
+                       mask = ECORE_NONUSED_PPFID_MASK_AH_4P;
+
+               if (p_dev->ppfid_bitmap & mask) {
+                       new_bitmap = p_dev->ppfid_bitmap & ~mask;
+                       DP_INFO(p_hwfn,
+                               "Fix the PPFID bitmap for 4-ports mode: 0x%hhx -> 0x%hhx\n",
+                               p_dev->ppfid_bitmap, new_bitmap);
+                       p_dev->ppfid_bitmap = new_bitmap;
+               }
+       }
+
+       /* The native PPFID is expected to be part of the allocated bitmap */
+       if (!(p_dev->ppfid_bitmap & (0x1 << native_ppfid_idx))) {
+               new_bitmap = 0x1 << native_ppfid_idx;
+               DP_INFO(p_hwfn,
+                       "Fix the PPFID bitmap to inculde the native PPFID: %hhd -> 0x%hhx\n",
+                       p_dev->ppfid_bitmap, new_bitmap);
+               p_dev->ppfid_bitmap = new_bitmap;
+       }
+
+       return ECORE_SUCCESS;
+}
+
 static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
                                              struct ecore_ptt *p_ptt,
                                              bool drv_resc_alloc)
@@ -3350,6 +4680,13 @@ static enum _ecore_status_t ecore_hw_get_resc(struct ecore_hwfn *p_hwfn,
                                "Failed to release the resource lock for the resource allocation commands\n");
        }
 
+       /* PPFID bitmap */
+       if (IS_LEAD_HWFN(p_hwfn)) {
+               rc = ecore_hw_get_ppfid_bitmap(p_hwfn, p_ptt);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+       }
+
 #ifndef ASIC_ONLY
        if (CHIP_REV_IS_SLOW(p_hwfn->p_dev)) {
                /* Reduced build contains less PQs */
@@ -3621,7 +4958,8 @@ ecore_hw_get_nvm_info(struct ecore_hwfn *p_hwfn,
        case NVM_CFG1_GLOB_MF_MODE_BD:
                p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_OVLAN_CLSS |
                                         1 << ECORE_MF_LLH_PROTO_CLSS |
-                                        1 << ECORE_MF_8021AD_TAGGING;
+                                        1 << ECORE_MF_8021AD_TAGGING |
+                                        1 << ECORE_MF_FIP_SPECIAL;
                break;
        case NVM_CFG1_GLOB_MF_MODE_NPAR1_0:
                p_hwfn->p_dev->mf_bits = 1 << ECORE_MF_LLH_MAC_CLSS |
@@ -4139,9 +5477,8 @@ void ecore_prepare_hibernate(struct ecore_dev *p_dev)
 #endif
 
 static enum _ecore_status_t
-ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
-                       void OSAL_IOMEM * p_regview,
-                       void OSAL_IOMEM * p_doorbells,
+ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn, void OSAL_IOMEM *p_regview,
+                       void OSAL_IOMEM *p_doorbells, u64 db_phys_addr,
                        struct ecore_hw_prepare_params *p_params)
 {
        struct ecore_mdump_retain_data mdump_retain;
@@ -4152,6 +5489,7 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
        /* Split PCI bars evenly between hwfns */
        p_hwfn->regview = p_regview;
        p_hwfn->doorbells = p_doorbells;
+       p_hwfn->db_phys_addr = db_phys_addr;
 
        if (IS_VF(p_dev))
                return ecore_vf_hw_prepare(p_hwfn);
@@ -4217,6 +5555,13 @@ ecore_hw_prepare_single(struct ecore_hwfn *p_hwfn,
                rc = ecore_mcp_initiate_pf_flr(p_hwfn, p_hwfn->p_main_ptt);
                if (rc != ECORE_SUCCESS)
                        DP_NOTICE(p_hwfn, false, "Failed to initiate PF FLR\n");
+
+               /* Workaround for MFW issue where PF FLR does not cleanup
+                * IGU block
+                */
+               if (!(p_hwfn->mcp_info->capabilities &
+                     FW_MB_PARAM_FEATURE_SUPPORT_IGU_CLEANUP))
+                       ecore_pf_flr_igu_cleanup(p_hwfn);
        }
 
        /* Check if mdump logs/data are present and update the epoch value */
@@ -4287,6 +5632,7 @@ enum _ecore_status_t ecore_hw_prepare(struct ecore_dev *p_dev,
        p_dev->chk_reg_fifo = p_params->chk_reg_fifo;
        p_dev->allow_mdump = p_params->allow_mdump;
        p_hwfn->b_en_pacing = p_params->b_en_pacing;
+       p_dev->b_is_target = p_params->b_is_target;
 
        if (p_params->b_relaxed_probe)
                p_params->p_relaxed_res = ECORE_HW_PREPARE_SUCCESS;
@@ -4296,9 +5642,9 @@ enum _ecore_status_t ecore_hw_prepare(struct ecore_dev *p_dev,
                ecore_init_iro_array(p_dev);
 
        /* Initialize the first hwfn - will learn number of hwfns */
-       rc = ecore_hw_prepare_single(p_hwfn,
-                                    p_dev->regview,
-                                    p_dev->doorbells, p_params);
+       rc = ecore_hw_prepare_single(p_hwfn, p_dev->regview,
+                                    p_dev->doorbells, p_dev->db_phys_addr,
+                                    p_params);
        if (rc != ECORE_SUCCESS)
                return rc;
 
@@ -4308,24 +5654,26 @@ enum _ecore_status_t ecore_hw_prepare(struct ecore_dev *p_dev,
        if (ECORE_IS_CMT(p_dev)) {
                void OSAL_IOMEM *p_regview, *p_doorbell;
                u8 OSAL_IOMEM *addr;
+               u64 db_phys_addr;
+               u32 offset;
 
                /* adjust bar offset for second engine */
-               addr = (u8 OSAL_IOMEM *)p_dev->regview +
-                                       ecore_hw_bar_size(p_hwfn,
-                                                         p_hwfn->p_main_ptt,
-                                                         BAR_ID_0) / 2;
+               offset = ecore_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+                                          BAR_ID_0) / 2;
+               addr = (u8 OSAL_IOMEM *)p_dev->regview + offset;
                p_regview = (void OSAL_IOMEM *)addr;
 
-               addr = (u8 OSAL_IOMEM *)p_dev->doorbells +
-                                       ecore_hw_bar_size(p_hwfn,
-                                                         p_hwfn->p_main_ptt,
-                                                         BAR_ID_1) / 2;
+               offset = ecore_hw_bar_size(p_hwfn, p_hwfn->p_main_ptt,
+                                          BAR_ID_1) / 2;
+               addr = (u8 OSAL_IOMEM *)p_dev->doorbells + offset;
                p_doorbell = (void OSAL_IOMEM *)addr;
+               db_phys_addr = p_dev->db_phys_addr + offset;
 
                p_dev->hwfns[1].b_en_pacing = p_params->b_en_pacing;
                /* prepare second hw function */
                rc = ecore_hw_prepare_single(&p_dev->hwfns[1], p_regview,
-                                            p_doorbell, p_params);
+                                            p_doorbell, db_phys_addr,
+                                            p_params);
 
                /* in case of error, need to free the previously
                 * initiliazed hwfn 0.
@@ -4722,419 +6070,6 @@ enum _ecore_status_t ecore_fw_rss_eng(struct ecore_hwfn *p_hwfn,
        return ECORE_SUCCESS;
 }
 
-static enum _ecore_status_t
-ecore_llh_add_mac_filter_bb_ah(struct ecore_hwfn *p_hwfn,
-                              struct ecore_ptt *p_ptt, u32 high, u32 low,
-                              u32 *p_entry_num)
-{
-       u32 en;
-       int i;
-
-       /* Find a free entry and utilize it */
-       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-               en = ecore_rd(p_hwfn, p_ptt,
-                             NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 +
-                             i * sizeof(u32));
-               if (en)
-                       continue;
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        2 * i * sizeof(u32), low);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        (2 * i + 1) * sizeof(u32), high);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 +
-                        i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 +
-                        i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 +
-                        i * sizeof(u32), 1);
-               break;
-       }
-
-       if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-               return ECORE_NORESOURCES;
-
-       *p_entry_num = i;
-
-       return ECORE_SUCCESS;
-}
-
-enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_hwfn *p_hwfn,
-                                         struct ecore_ptt *p_ptt, u8 *p_filter)
-{
-       u32 high, low, entry_num;
-       enum _ecore_status_t rc = ECORE_SUCCESS;
-
-       if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
-                          &p_hwfn->p_dev->mf_bits))
-               return ECORE_SUCCESS;
-
-       high = p_filter[1] | (p_filter[0] << 8);
-       low = p_filter[5] | (p_filter[4] << 8) |
-             (p_filter[3] << 16) | (p_filter[2] << 24);
-
-       if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
-               rc = ecore_llh_add_mac_filter_bb_ah(p_hwfn, p_ptt, high, low,
-                                                   &entry_num);
-       if (rc != ECORE_SUCCESS) {
-               DP_NOTICE(p_hwfn, false,
-                         "Failed to find an empty LLH filter to utilize\n");
-               return rc;
-       }
-
-       DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                  "MAC: %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx is added at %d\n",
-                  p_filter[0], p_filter[1], p_filter[2], p_filter[3],
-                  p_filter[4], p_filter[5], entry_num);
-
-       return rc;
-}
-
-static enum _ecore_status_t
-ecore_llh_remove_mac_filter_bb_ah(struct ecore_hwfn *p_hwfn,
-                                 struct ecore_ptt *p_ptt, u32 high, u32 low,
-                                 u32 *p_entry_num)
-{
-       int i;
-
-       /* Find the entry and clean it */
-       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-               if (ecore_rd(p_hwfn, p_ptt,
-                            NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                            2 * i * sizeof(u32)) != low)
-                       continue;
-               if (ecore_rd(p_hwfn, p_ptt,
-                            NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                            (2 * i + 1) * sizeof(u32)) != high)
-                       continue;
-
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 + i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        2 * i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        (2 * i + 1) * sizeof(u32), 0);
-               break;
-       }
-
-       if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-               return ECORE_INVAL;
-
-       *p_entry_num = i;
-
-       return ECORE_SUCCESS;
-}
-
-void ecore_llh_remove_mac_filter(struct ecore_hwfn *p_hwfn,
-                            struct ecore_ptt *p_ptt, u8 *p_filter)
-{
-       u32 high, low, entry_num;
-       enum _ecore_status_t rc = ECORE_SUCCESS;
-
-       if (!OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
-                          &p_hwfn->p_dev->mf_bits))
-               return;
-
-       high = p_filter[1] | (p_filter[0] << 8);
-       low = p_filter[5] | (p_filter[4] << 8) |
-             (p_filter[3] << 16) | (p_filter[2] << 24);
-
-       if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
-               rc = ecore_llh_remove_mac_filter_bb_ah(p_hwfn, p_ptt, high,
-                                                      low, &entry_num);
-       if (rc != ECORE_SUCCESS) {
-               DP_NOTICE(p_hwfn, false,
-                         "Tried to remove a non-configured filter\n");
-               return;
-       }
-
-
-       DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                  "MAC: %02hhx:%02hhx:%02hhx:%02hhx:%02hhx:%02hhx was removed from %d\n",
-                  p_filter[0], p_filter[1], p_filter[2], p_filter[3],
-                  p_filter[4], p_filter[5], entry_num);
-}
-
-static enum _ecore_status_t
-ecore_llh_add_protocol_filter_bb_ah(struct ecore_hwfn *p_hwfn,
-                                   struct ecore_ptt *p_ptt,
-                                   enum ecore_llh_port_filter_type_t type,
-                                   u32 high, u32 low, u32 *p_entry_num)
-{
-       u32 en;
-       int i;
-
-       /* Find a free entry and utilize it */
-       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-               en = ecore_rd(p_hwfn, p_ptt,
-                             NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 +
-                             i * sizeof(u32));
-               if (en)
-                       continue;
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        2 * i * sizeof(u32), low);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        (2 * i + 1) * sizeof(u32), high);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 +
-                        i * sizeof(u32), 1);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 +
-                        i * sizeof(u32), 1 << type);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 + i * sizeof(u32), 1);
-               break;
-       }
-
-       if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-               return ECORE_NORESOURCES;
-
-       *p_entry_num = i;
-
-       return ECORE_SUCCESS;
-}
-
-enum _ecore_status_t
-ecore_llh_add_protocol_filter(struct ecore_hwfn *p_hwfn,
-                             struct ecore_ptt *p_ptt,
-                             u16 source_port_or_eth_type,
-                             u16 dest_port,
-                             enum ecore_llh_port_filter_type_t type)
-{
-       u32 high, low, entry_num;
-       enum _ecore_status_t rc = ECORE_SUCCESS;
-
-       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
-                          &p_hwfn->p_dev->mf_bits))
-               return rc;
-
-       high = 0;
-       low = 0;
-
-       switch (type) {
-       case ECORE_LLH_FILTER_ETHERTYPE:
-               high = source_port_or_eth_type;
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_PORT:
-       case ECORE_LLH_FILTER_UDP_SRC_PORT:
-               low = source_port_or_eth_type << 16;
-               break;
-       case ECORE_LLH_FILTER_TCP_DEST_PORT:
-       case ECORE_LLH_FILTER_UDP_DEST_PORT:
-               low = dest_port;
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-       case ECORE_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-               low = (source_port_or_eth_type << 16) | dest_port;
-               break;
-       default:
-               DP_NOTICE(p_hwfn, true,
-                         "Non valid LLH protocol filter type %d\n", type);
-               return ECORE_INVAL;
-       }
-
-       if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
-               rc = ecore_llh_add_protocol_filter_bb_ah(p_hwfn, p_ptt, type,
-                                                        high, low, &entry_num);
-       if (rc != ECORE_SUCCESS) {
-               DP_NOTICE(p_hwfn, false,
-                         "Failed to find an empty LLH filter to utilize\n");
-               return rc;
-       }
-       switch (type) {
-       case ECORE_LLH_FILTER_ETHERTYPE:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "ETH type %x is added at %d\n",
-                          source_port_or_eth_type, entry_num);
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "TCP src port %x is added at %d\n",
-                          source_port_or_eth_type, entry_num);
-               break;
-       case ECORE_LLH_FILTER_UDP_SRC_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "UDP src port %x is added at %d\n",
-                          source_port_or_eth_type, entry_num);
-               break;
-       case ECORE_LLH_FILTER_TCP_DEST_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "TCP dst port %x is added at %d\n", dest_port,
-                          entry_num);
-               break;
-       case ECORE_LLH_FILTER_UDP_DEST_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "UDP dst port %x is added at %d\n", dest_port,
-                          entry_num);
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "TCP src/dst ports %x/%x are added at %d\n",
-                          source_port_or_eth_type, dest_port, entry_num);
-               break;
-       case ECORE_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-               DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                          "UDP src/dst ports %x/%x are added at %d\n",
-                          source_port_or_eth_type, dest_port, entry_num);
-               break;
-       }
-
-       return rc;
-}
-
-static enum _ecore_status_t
-ecore_llh_remove_protocol_filter_bb_ah(struct ecore_hwfn *p_hwfn,
-                                      struct ecore_ptt *p_ptt,
-                                      enum ecore_llh_port_filter_type_t type,
-                                      u32 high, u32 low, u32 *p_entry_num)
-{
-       int i;
-
-       /* Find the entry and clean it */
-       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-               if (!ecore_rd(p_hwfn, p_ptt,
-                             NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 +
-                             i * sizeof(u32)))
-                       continue;
-               if (!ecore_rd(p_hwfn, p_ptt,
-                             NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 +
-                             i * sizeof(u32)))
-                       continue;
-               if (!(ecore_rd(p_hwfn, p_ptt,
-                              NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 +
-                              i * sizeof(u32)) & (1 << type)))
-                       continue;
-               if (ecore_rd(p_hwfn, p_ptt,
-                            NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                            2 * i * sizeof(u32)) != low)
-                       continue;
-               if (ecore_rd(p_hwfn, p_ptt,
-                            NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                            (2 * i + 1) * sizeof(u32)) != high)
-                       continue;
-
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_EN_BB_K2 + i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_MODE_BB_K2 +
-                        i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_BB_K2 +
-                        i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        2 * i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        (2 * i + 1) * sizeof(u32), 0);
-               break;
-       }
-
-       if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE)
-               return ECORE_INVAL;
-
-       *p_entry_num = i;
-
-       return ECORE_SUCCESS;
-}
-
-void
-ecore_llh_remove_protocol_filter(struct ecore_hwfn *p_hwfn,
-                                struct ecore_ptt *p_ptt,
-                                u16 source_port_or_eth_type,
-                                u16 dest_port,
-                                enum ecore_llh_port_filter_type_t type)
-{
-       u32 high, low, entry_num;
-       enum _ecore_status_t rc = ECORE_SUCCESS;
-
-       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
-                          &p_hwfn->p_dev->mf_bits))
-               return;
-
-       high = 0;
-       low = 0;
-
-       switch (type) {
-       case ECORE_LLH_FILTER_ETHERTYPE:
-               high = source_port_or_eth_type;
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_PORT:
-       case ECORE_LLH_FILTER_UDP_SRC_PORT:
-               low = source_port_or_eth_type << 16;
-               break;
-       case ECORE_LLH_FILTER_TCP_DEST_PORT:
-       case ECORE_LLH_FILTER_UDP_DEST_PORT:
-               low = dest_port;
-               break;
-       case ECORE_LLH_FILTER_TCP_SRC_AND_DEST_PORT:
-       case ECORE_LLH_FILTER_UDP_SRC_AND_DEST_PORT:
-               low = (source_port_or_eth_type << 16) | dest_port;
-               break;
-       default:
-               DP_NOTICE(p_hwfn, true,
-                         "Non valid LLH protocol filter type %d\n", type);
-               return;
-       }
-
-       if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
-               rc = ecore_llh_remove_protocol_filter_bb_ah(p_hwfn, p_ptt, type,
-                                                           high, low,
-                                                           &entry_num);
-       if (rc != ECORE_SUCCESS) {
-               DP_NOTICE(p_hwfn, false,
-                         "Tried to remove a non-configured filter [type %d, source_port_or_eth_type 0x%x, dest_port 0x%x]\n",
-                         type, source_port_or_eth_type, dest_port);
-               return;
-       }
-
-       DP_VERBOSE(p_hwfn, ECORE_MSG_HW,
-                  "Protocol filter [type %d, source_port_or_eth_type 0x%x, dest_port 0x%x] was removed from %d\n",
-                  type, source_port_or_eth_type, dest_port, entry_num);
-}
-
-static void ecore_llh_clear_all_filters_bb_ah(struct ecore_hwfn *p_hwfn,
-                                             struct ecore_ptt *p_ptt)
-{
-       int i;
-
-       if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn)))
-               return;
-
-       for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) {
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_EN_BB_K2  +
-                        i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        2 * i * sizeof(u32), 0);
-               ecore_wr(p_hwfn, p_ptt,
-                        NIG_REG_LLH_FUNC_FILTER_VALUE_BB_K2 +
-                        (2 * i + 1) * sizeof(u32), 0);
-       }
-}
-
-void ecore_llh_clear_all_filters(struct ecore_hwfn *p_hwfn,
-                            struct ecore_ptt *p_ptt)
-{
-       if (!OSAL_TEST_BIT(ECORE_MF_LLH_PROTO_CLSS,
-                          &p_hwfn->p_dev->mf_bits) &&
-           !OSAL_TEST_BIT(ECORE_MF_LLH_MAC_CLSS,
-                          &p_hwfn->p_dev->mf_bits))
-               return;
-
-       if (ECORE_IS_BB(p_hwfn->p_dev) || ECORE_IS_AH(p_hwfn->p_dev))
-               ecore_llh_clear_all_filters_bb_ah(p_hwfn, p_ptt);
-}
-
 enum _ecore_status_t
 ecore_llh_set_function_as_default(struct ecore_hwfn *p_hwfn,
                                  struct ecore_ptt *p_ptt)
@@ -5713,3 +6648,8 @@ void ecore_set_fw_mac_addr(__le16 *fw_msb,
        ((u8 *)fw_lsb)[0] = mac[5];
        ((u8 *)fw_lsb)[1] = mac[4];
 }
+
+bool ecore_is_mf_fip_special(struct ecore_dev *p_dev)
+{
+       return !!OSAL_TEST_BIT(ECORE_MF_FIP_SPECIAL, &p_dev->mf_bits);
+}
index 02bacc2..7308063 100644 (file)
@@ -114,6 +114,9 @@ struct ecore_hw_init_params {
        /* Driver load parameters */
        struct ecore_drv_load_params *p_drv_load_params;
 
+       /* Avoid engine affinity for RoCE/storage in case of CMT mode */
+       bool avoid_eng_affin;
+
        /* SPQ block timeout in msec */
        u32 spq_timeout_ms;
 };
@@ -271,6 +274,9 @@ struct ecore_hw_prepare_params {
 
        /* Enable/disable request by ecore client for pacing */
        bool b_en_pacing;
+
+       /* Indicates whether this PF serves a storage target */
+       bool b_is_target;
 };
 
 /**
@@ -425,11 +431,17 @@ enum ecore_dmae_address_type_t {
 #define ECORE_DMAE_FLAG_VF_SRC         0x00000002
 #define ECORE_DMAE_FLAG_VF_DST         0x00000004
 #define ECORE_DMAE_FLAG_COMPLETION_DST 0x00000008
+#define ECORE_DMAE_FLAG_PORT           0x00000010
+#define ECORE_DMAE_FLAG_PF_SRC         0x00000020
+#define ECORE_DMAE_FLAG_PF_DST         0x00000040
 
 struct ecore_dmae_params {
        u32 flags; /* consists of ECORE_DMAE_FLAG_* values */
        u8 src_vfid;
        u8 dst_vfid;
+       u8 port_id;
+       u8 src_pfid;
+       u8 dst_pfid;
 };
 
 /**
@@ -441,7 +453,9 @@ struct ecore_dmae_params {
  * @param source_addr
  * @param grc_addr (dmae_data_offset)
  * @param size_in_dwords
- * @param flags (one of the flags defined above)
+ * @param p_params (default parameters will be used in case of OSAL_NULL)
+ *
+ * @return enum _ecore_status_t
  */
 enum _ecore_status_t
 ecore_dmae_host2grc(struct ecore_hwfn *p_hwfn,
@@ -449,7 +463,7 @@ ecore_dmae_host2grc(struct ecore_hwfn *p_hwfn,
                    u64 source_addr,
                    u32 grc_addr,
                    u32 size_in_dwords,
-                   u32 flags);
+                   struct ecore_dmae_params *p_params);
 
 /**
  * @brief ecore_dmae_grc2host - Read data from dmae data offset
@@ -459,7 +473,9 @@ ecore_dmae_host2grc(struct ecore_hwfn *p_hwfn,
  * @param grc_addr (dmae_data_offset)
  * @param dest_addr
  * @param size_in_dwords
- * @param flags - one of the flags defined above
+ * @param p_params (default parameters will be used in case of OSAL_NULL)
+ *
+ * @return enum _ecore_status_t
  */
 enum _ecore_status_t
 ecore_dmae_grc2host(struct ecore_hwfn *p_hwfn,
@@ -467,7 +483,7 @@ ecore_dmae_grc2host(struct ecore_hwfn *p_hwfn,
                    u32 grc_addr,
                    dma_addr_t dest_addr,
                    u32 size_in_dwords,
-                   u32 flags);
+                   struct ecore_dmae_params *p_params);
 
 /**
  * @brief ecore_dmae_host2host - copy data from to source address
@@ -478,7 +494,9 @@ ecore_dmae_grc2host(struct ecore_hwfn *p_hwfn,
  * @param source_addr
  * @param dest_addr
  * @param size_in_dwords
- * @param params
+ * @param p_params (default parameters will be used in case of OSAL_NULL)
+ *
+ * @return enum _ecore_status_t
  */
 enum _ecore_status_t
 ecore_dmae_host2host(struct ecore_hwfn *p_hwfn,
@@ -559,28 +577,79 @@ enum _ecore_status_t ecore_fw_rss_eng(struct ecore_hwfn *p_hwfn,
                                      u8 *dst_id);
 
 /**
- * @brief ecore_llh_add_mac_filter - configures a MAC filter in llh
+ * @brief ecore_llh_get_num_ppfid - Return the allocated number of LLH filter
+ *     banks that are allocated to the PF.
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to add
+ * @param p_dev
+ *
+ * @return u8 - Number of LLH filter banks
  */
-enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_hwfn *p_hwfn,
-                                         struct ecore_ptt *p_ptt,
-                                         u8 *p_filter);
+u8 ecore_llh_get_num_ppfid(struct ecore_dev *p_dev);
+
+enum ecore_eng {
+       ECORE_ENG0,
+       ECORE_ENG1,
+       ECORE_BOTH_ENG,
+};
 
 /**
- * @brief ecore_llh_remove_mac_filter - removes a MAC filtre from llh
+ * @brief ecore_llh_get_l2_affinity_hint - Return the hint for the L2 affinity
  *
- * @param p_hwfn
- * @param p_ptt
- * @param p_filter - MAC to remove
+ * @param p_dev
+ *
+ * @return enum ecore_eng - L2 affintiy hint
+ */
+enum ecore_eng ecore_llh_get_l2_affinity_hint(struct ecore_dev *p_dev);
+
+/**
+ * @brief ecore_llh_set_ppfid_affinity - Set the engine affinity for the given
+ *     LLH filter bank.
+ *
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param eng
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_llh_set_ppfid_affinity(struct ecore_dev *p_dev,
+                                                 u8 ppfid, enum ecore_eng eng);
+
+/**
+ * @brief ecore_llh_set_roce_affinity - Set the RoCE engine affinity
+ *
+ * @param p_dev
+ * @param eng
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_llh_set_roce_affinity(struct ecore_dev *p_dev,
+                                                enum ecore_eng eng);
+
+/**
+ * @brief ecore_llh_add_mac_filter - Add a LLH MAC filter into the given filter
+ *     bank.
+ *
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to add
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t ecore_llh_add_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                             u8 mac_addr[ETH_ALEN]);
+
+/**
+ * @brief ecore_llh_remove_mac_filter - Remove a LLH MAC filter from the given
+ *     filter bank.
+ *
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param mac_addr - MAC to remove
  */
-void ecore_llh_remove_mac_filter(struct ecore_hwfn *p_hwfn,
-                            struct ecore_ptt *p_ptt,
-                            u8 *p_filter);
+void ecore_llh_remove_mac_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                u8 mac_addr[ETH_ALEN]);
 
-enum ecore_llh_port_filter_type_t {
+enum ecore_llh_prot_filter_type_t {
        ECORE_LLH_FILTER_ETHERTYPE,
        ECORE_LLH_FILTER_TCP_SRC_PORT,
        ECORE_LLH_FILTER_TCP_DEST_PORT,
@@ -591,45 +660,52 @@ enum ecore_llh_port_filter_type_t {
 };
 
 /**
- * @brief ecore_llh_add_protocol_filter - configures a protocol filter in llh
+ * @brief ecore_llh_add_protocol_filter - Add a LLH protocol filter into the
+ *     given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
- * @param type - type of filters and comparing
+ *
+ * @return enum _ecore_status_t
  */
 enum _ecore_status_t
-ecore_llh_add_protocol_filter(struct ecore_hwfn *p_hwfn,
-                             struct ecore_ptt *p_ptt,
-                             u16 source_port_or_eth_type,
-                             u16 dest_port,
-                             enum ecore_llh_port_filter_type_t type);
+ecore_llh_add_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
+                             enum ecore_llh_prot_filter_type_t type,
+                             u16 source_port_or_eth_type, u16 dest_port);
 
 /**
- * @brief ecore_llh_remove_protocol_filter - remove a protocol filter in llh
+ * @brief ecore_llh_remove_protocol_filter - Remove a LLH protocol filter from
+ *     the given filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
+ * @param type - type of filters and comparing
  * @param source_port_or_eth_type - source port or ethertype to add
  * @param dest_port - destination port to add
- * @param type - type of filters and comparing
  */
-void
-ecore_llh_remove_protocol_filter(struct ecore_hwfn *p_hwfn,
-                                struct ecore_ptt *p_ptt,
-                                u16 source_port_or_eth_type,
-                                u16 dest_port,
-                                enum ecore_llh_port_filter_type_t type);
+void ecore_llh_remove_protocol_filter(struct ecore_dev *p_dev, u8 ppfid,
+                                     enum ecore_llh_prot_filter_type_t type,
+                                     u16 source_port_or_eth_type,
+                                     u16 dest_port);
 
 /**
- * @brief ecore_llh_clear_all_filters - removes all MAC filters from llh
+ * @brief ecore_llh_clear_ppfid_filters - Remove all LLH filters from the given
+ *     filter bank.
  *
- * @param p_hwfn
- * @param p_ptt
+ * @param p_dev
+ * @param ppfid - relative within the allocated ppfids ('0' is the default one).
  */
-void ecore_llh_clear_all_filters(struct ecore_hwfn *p_hwfn,
-                            struct ecore_ptt *p_ptt);
+void ecore_llh_clear_ppfid_filters(struct ecore_dev *p_dev, u8 ppfid);
+
+/**
+ * @brief ecore_llh_clear_all_filters - Remove all LLH filters
+ *
+ * @param p_dev
+ */
+void ecore_llh_clear_all_filters(struct ecore_dev *p_dev);
 
 /**
  * @brief ecore_llh_set_function_as_default - set function as default per port
@@ -701,4 +777,13 @@ ecore_set_queue_coalesce(struct ecore_hwfn *p_hwfn, u16 rx_coal,
 enum _ecore_status_t ecore_pglueb_set_pfid_enable(struct ecore_hwfn *p_hwfn,
                                                  struct ecore_ptt *p_ptt,
                                                  bool b_enable);
+
+/**
+ * @brief Whether FIP discovery fallback special mode is enabled or not.
+ *
+ * @param cdev
+ *
+ * @return true if device is in FIP special mode, false otherwise.
+ */
+bool ecore_is_mf_fip_special(struct ecore_dev *p_dev);
 #endif
index 2d761b9..6d4a4dd 100644 (file)
@@ -922,7 +922,11 @@ struct core_rx_start_ramrod_data {
        struct core_rx_action_on_error action_on_error;
 /* set when in GSI offload mode on ROCE connection */
        u8 gsi_offload_flag;
-       u8 reserved[6];
+/* If set, the inner vlan (802.1q tag) priority that is written to cqe will be
+ * zero out, used for TenantDcb
+ */
+       u8 wipe_inner_vlan_pri_en;
+       u8 reserved[5];
 };
 
 
@@ -1044,7 +1048,11 @@ struct core_tx_start_ramrod_data {
        __le16 qm_pq_id /* QM PQ ID */;
 /* set when in GSI offload mode on ROCE connection */
        u8 gsi_offload_flag;
-       u8 resrved[3];
+/* vport id of the current connection, used to access non_rdma_in_to_in_pri_map
+ * which is per vport
+ */
+       u8 vport_id;
+       u8 resrved[2];
 };
 
 
@@ -1171,6 +1179,25 @@ struct eth_rx_rate_limit {
 };
 
 
+/* Update RSS indirection table entry command. One outstanding command supported
+ * per PF.
+ */
+struct eth_tstorm_rss_update_data {
+/* Valid flag. Driver must set this flag, FW clear valid flag when ready for new
+ * RSS update command.
+ */
+       u8 valid;
+/* Global VPORT ID. If RSS is disable for VPORT, RSS update command will be
+ * ignored.
+ */
+       u8 vport_id;
+       u8 ind_table_index /* RSS indirect table index that will be updated. */;
+       u8 reserved;
+       __le16 ind_table_value /* RSS indirect table new value. */;
+       __le16 reserved1 /* reserved. */;
+};
+
+
 struct eth_ustorm_per_pf_stat {
 /* number of total ucast bytes received on loopback port without errors */
        struct regpair rcv_lb_ucast_bytes;
@@ -1463,6 +1490,10 @@ struct pf_start_tunnel_config {
  * FW will use a default port
  */
        u8 set_geneve_udp_port_flg;
+/* Set no-innet-L2 VXLAN tunnel UDP destination port to
+ * no_inner_l2_vxlan_udp_port. If not set - FW will use a default port
+ */
+       u8 set_no_inner_l2_vxlan_udp_port_flg;
        u8 tunnel_clss_vxlan /* Rx classification scheme for VXLAN tunnel. */;
 /* Rx classification scheme for l2 GENEVE tunnel. */
        u8 tunnel_clss_l2geneve;
@@ -1470,11 +1501,15 @@ struct pf_start_tunnel_config {
        u8 tunnel_clss_ipgeneve;
        u8 tunnel_clss_l2gre /* Rx classification scheme for l2 GRE tunnel. */;
        u8 tunnel_clss_ipgre /* Rx classification scheme for ip GRE tunnel. */;
-       u8 reserved;
 /* VXLAN tunnel UDP destination port. Valid if set_vxlan_udp_port_flg=1 */
        __le16 vxlan_udp_port;
 /* GENEVE tunnel UDP destination port. Valid if set_geneve_udp_port_flg=1 */
        __le16 geneve_udp_port;
+/* no-innet-L2 VXLAN  tunnel UDP destination port. Valid if
+ * set_no_inner_l2_vxlan_udp_port_flg=1
+ */
+       __le16 no_inner_l2_vxlan_udp_port;
+       __le16 reserved[3];
 };
 
 /*
@@ -1547,6 +1582,8 @@ struct pf_update_tunnel_config {
        u8 set_vxlan_udp_port_flg;
 /* Update GENEVE tunnel UDP destination port. */
        u8 set_geneve_udp_port_flg;
+/* Update no-innet-L2 VXLAN  tunnel UDP destination port. */
+       u8 set_no_inner_l2_vxlan_udp_port_flg;
        u8 tunnel_clss_vxlan /* Classification scheme for VXLAN tunnel. */;
 /* Classification scheme for l2 GENEVE tunnel. */
        u8 tunnel_clss_l2geneve;
@@ -1554,9 +1591,12 @@ struct pf_update_tunnel_config {
        u8 tunnel_clss_ipgeneve;
        u8 tunnel_clss_l2gre /* Classification scheme for l2 GRE tunnel. */;
        u8 tunnel_clss_ipgre /* Classification scheme for ip GRE tunnel. */;
+       u8 reserved;
        __le16 vxlan_udp_port /* VXLAN tunnel UDP destination port. */;
        __le16 geneve_udp_port /* GENEVE tunnel UDP destination port. */;
-       __le16 reserved;
+/* no-innet-L2 VXLAN  tunnel UDP destination port. */
+       __le16 no_inner_l2_vxlan_udp_port;
+       __le16 reserved1[3];
 };
 
 /*
@@ -1686,6 +1726,13 @@ struct rl_update_ramrod_data {
 /* ID of last RL, that will be updated. If clear, single RL will updated. */
        u8 rl_id_last;
        u8 rl_dc_qcn_flg /* If set, RL will used for DCQCN. */;
+/* If set, alpha will be reset to 1 when the state machine is idle. */
+       u8 dcqcn_reset_alpha_on_idle;
+/* Byte counter threshold to change rate increase stage. */
+       u8 rl_bc_stage_th;
+/* Timer threshold to change rate increase stage. */
+       u8 rl_timer_stage_th;
+       u8 reserved1;
        __le32 rl_bc_rate /* Byte Counter Limit. */;
        __le16 rl_max_rate /* Maximum rate in 1.6 Mbps resolution. */;
        __le16 rl_r_ai /* Active increase rate. */;
@@ -1694,7 +1741,7 @@ struct rl_update_ramrod_data {
        __le32 dcqcn_k_us /* DCQCN Alpha update interval. */;
        __le32 dcqcn_timeuot_us /* DCQCN timeout. */;
        __le32 qcn_timeuot_us /* QCN timeout. */;
-       __le32 reserved[2];
+       __le32 reserved2;
 };
 
 
index bf54872..085af0a 100644 (file)
@@ -1090,6 +1090,15 @@ struct idle_chk_data {
        u16 reserved2;
 };
 
+/*
+ * Pretend parameters
+ */
+struct pretend_params {
+       u8 split_type /* Pretend split type (from enum init_split_types) */;
+       u8 reserved;
+       u16 split_id /* Preted split ID (within the pretend split type) */;
+};
+
 /*
  * Debug Tools data (per HW function)
  */
@@ -1102,11 +1111,17 @@ struct dbg_tools_data {
        u8 block_in_reset[88];
        u8 chip_id /* Chip ID (from enum chip_ids) */;
        u8 platform_id /* Platform ID */;
+       u8 num_ports /* Number of ports in the chip */;
+       u8 num_pfs_per_port /* Number of PFs in each port */;
+       u8 num_vfs /* Number of VFs in the chip */;
        u8 initialized /* Indicates if the data was initialized */;
        u8 use_dmae /* Indicates if DMAE should be used */;
+       u8 reserved;
+       struct pretend_params pretend /* Current pretend parameters */;
 /* Numbers of registers that were read since last log */
        u32 num_regs_read;
 };
 
 
+
 #endif /* __ECORE_HSI_DEBUG_TOOLS__ */
index 6b51230..158ca67 100644 (file)
@@ -831,6 +831,26 @@ enum eth_filter_type {
 };
 
 
+/*
+ * inner to inner vlan priority translation configurations
+ */
+struct eth_in_to_in_pri_map_cfg {
+/* If set, non_rdma_in_to_in_pri_map or rdma_in_to_in_pri_map will be used for
+ * inner to inner priority mapping depending on protocol type
+ */
+       u8 inner_vlan_pri_remap_en;
+       u8 reserved[7];
+/* Map for inner to inner vlan priority translation for Non RDMA protocols, used
+ * for TenantDcb. Set inner_vlan_pri_remap_en, when init the map.
+ */
+       u8 non_rdma_in_to_in_pri_map[8];
+/* Map for inner to inner vlan priority translation for RDMA protocols, used for
+ * TenantDcb. Set inner_vlan_pri_remap_en, when init the map.
+ */
+       u8 rdma_in_to_in_pri_map[8];
+};
+
+
 /*
  * eth IPv4 Fragment Type
  */
@@ -1030,8 +1050,11 @@ struct eth_vport_rx_mode {
 /* accept all broadcast packets (subject to vlan) */
 #define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_MASK        0x1
 #define ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL_SHIFT       5
-#define ETH_VPORT_RX_MODE_RESERVED1_MASK               0x3FF
-#define ETH_VPORT_RX_MODE_RESERVED1_SHIFT              6
+/* accept any VNI in tunnel VNI classification. Used for default queue. */
+#define ETH_VPORT_RX_MODE_ACCEPT_ANY_VNI_MASK          0x1
+#define ETH_VPORT_RX_MODE_ACCEPT_ANY_VNI_SHIFT         6
+#define ETH_VPORT_RX_MODE_RESERVED1_MASK               0x1FF
+#define ETH_VPORT_RX_MODE_RESERVED1_SHIFT              7
 };
 
 
@@ -1357,6 +1380,20 @@ struct tx_queue_update_ramrod_data {
 };
 
 
+/*
+ * Inner to Inner VLAN priority map update mode
+ */
+enum update_in_to_in_pri_map_mode_enum {
+/* Inner to Inner VLAN priority map update Disabled */
+       ETH_IN_TO_IN_PRI_MAP_UPDATE_DISABLED,
+/* Update Inner to Inner VLAN priority map for non RDMA protocols */
+       ETH_IN_TO_IN_PRI_MAP_UPDATE_NON_RDMA_TBL,
+/* Update Inner to Inner VLAN priority map for RDMA protocols */
+       ETH_IN_TO_IN_PRI_MAP_UPDATE_RDMA_TBL,
+       MAX_UPDATE_IN_TO_IN_PRI_MAP_MODE_ENUM
+};
+
+
 
 /*
  * Ramrod data for vport update ramrod
@@ -1405,7 +1442,12 @@ struct vport_start_ramrod_data {
        u8 ctl_frame_mac_check_en;
 /* If set, control frames will be filtered according to ethtype check. */
        u8 ctl_frame_ethtype_check_en;
-       u8 reserved[1];
+/* If set, the inner vlan (802.1q tag) priority that is written to cqe will be
+ * zero out, used for TenantDcb
+ */
+       u8 wipe_inner_vlan_pri_en;
+/* inner to inner vlan priority translation configurations */
+       struct eth_in_to_in_pri_map_cfg in_to_in_vlan_pri_map_cfg;
 };
 
 
@@ -1473,7 +1515,14 @@ struct vport_update_ramrod_data_cmn {
        u8 ctl_frame_mac_check_en;
 /* If set, control frames will be filtered according to ethtype check. */
        u8 ctl_frame_ethtype_check_en;
-       u8 reserved[15];
+/* Indicates to update RDMA or NON-RDMA vlan remapping priority table according
+ * to update_in_to_in_pri_map_mode_enum, used for TenantDcb (use enum
+ * update_in_to_in_pri_map_mode_enum)
+ */
+       u8 update_in_to_in_pri_map_mode;
+/* Map for inner to inner vlan priority translation, used for TenantDcb. */
+       u8 in_to_in_pri_map[8];
+       u8 reserved[6];
 };
 
 struct vport_update_ramrod_mcast {
index 51bba27..72cd7e9 100644 (file)
@@ -407,6 +407,30 @@ void ecore_port_unpretend(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
                        *(u32 *)&p_ptt->pxp.pretend);
 }
 
+void ecore_port_fid_pretend(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                           u8 port_id, u16 fid)
+{
+       u16 control = 0;
+
+       SET_FIELD(control, PXP_PRETEND_CMD_PORT, port_id);
+       SET_FIELD(control, PXP_PRETEND_CMD_USE_PORT, 1);
+       SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_PORT, 1);
+
+       SET_FIELD(control, PXP_PRETEND_CMD_IS_CONCRETE, 1);
+       SET_FIELD(control, PXP_PRETEND_CMD_PRETEND_FUNCTION, 1);
+
+       if (!GET_FIELD(fid, PXP_CONCRETE_FID_VFVALID))
+               fid = GET_FIELD(fid, PXP_CONCRETE_FID_PFID);
+
+       p_ptt->pxp.pretend.control = OSAL_CPU_TO_LE16(control);
+       p_ptt->pxp.pretend.fid.concrete_fid.fid = OSAL_CPU_TO_LE16(fid);
+
+       REG_WR(p_hwfn,
+              ecore_ptt_config_addr(p_ptt) +
+              OFFSETOF(struct pxp_ptt_entry, pretend),
+              *(u32 *)&p_ptt->pxp.pretend);
+}
+
 u32 ecore_vfid_to_concrete(struct ecore_hwfn *p_hwfn, u8 vfid)
 {
        u32 concrete_fid = 0;
@@ -426,14 +450,17 @@ u32 ecore_vfid_to_concrete(struct ecore_hwfn *p_hwfn, u8 vfid)
  * If this changes, this needs to be revisted.
  */
 
-/* Ecore DMAE
- * =============
- */
+/* DMAE */
+
+#define ECORE_DMAE_FLAGS_IS_SET(params, flag)  \
+       ((params) != OSAL_NULL && ((params)->flags & ECORE_DMAE_FLAG_##flag))
+
 static void ecore_dmae_opcode(struct ecore_hwfn *p_hwfn,
                              const u8 is_src_type_grc,
                              const u8 is_dst_type_grc,
                              struct ecore_dmae_params *p_params)
 {
+       u8 src_pfid, dst_pfid, port_id;
        u16 opcode_b = 0;
        u32 opcode = 0;
 
@@ -443,16 +470,20 @@ static void ecore_dmae_opcode(struct ecore_hwfn *p_hwfn,
         */
        opcode |= (is_src_type_grc ? DMAE_CMD_SRC_MASK_GRC
                   : DMAE_CMD_SRC_MASK_PCIE) << DMAE_CMD_SRC_SHIFT;
-       opcode |= (p_hwfn->rel_pf_id & DMAE_CMD_SRC_PF_ID_MASK) <<
-           DMAE_CMD_SRC_PF_ID_SHIFT;
+       src_pfid = ECORE_DMAE_FLAGS_IS_SET(p_params, PF_SRC) ?
+                  p_params->src_pfid : p_hwfn->rel_pf_id;
+       opcode |= (src_pfid & DMAE_CMD_SRC_PF_ID_MASK) <<
+                 DMAE_CMD_SRC_PF_ID_SHIFT;
 
        /* The destination of the DMA can be: 0-None 1-PCIe 2-GRC 3-None */
        opcode |= (is_dst_type_grc ? DMAE_CMD_DST_MASK_GRC
                   : DMAE_CMD_DST_MASK_PCIE) << DMAE_CMD_DST_SHIFT;
-       opcode |= (p_hwfn->rel_pf_id & DMAE_CMD_DST_PF_ID_MASK) <<
-           DMAE_CMD_DST_PF_ID_SHIFT;
+       dst_pfid = ECORE_DMAE_FLAGS_IS_SET(p_params, PF_DST) ?
+                  p_params->dst_pfid : p_hwfn->rel_pf_id;
+       opcode |= (dst_pfid & DMAE_CMD_DST_PF_ID_MASK) <<
+                 DMAE_CMD_DST_PF_ID_SHIFT;
 
-       /* DMAE_E4_TODO need to check which value to specifiy here. */
+       /* DMAE_E4_TODO need to check which value to specify here. */
        /* opcode |= (!b_complete_to_host)<< DMAE_CMD_C_DST_SHIFT; */
 
        /* Whether to write a completion word to the completion destination:
@@ -462,7 +493,7 @@ static void ecore_dmae_opcode(struct ecore_hwfn *p_hwfn,
        opcode |= DMAE_CMD_COMP_WORD_EN_MASK << DMAE_CMD_COMP_WORD_EN_SHIFT;
        opcode |= DMAE_CMD_SRC_ADDR_RESET_MASK << DMAE_CMD_SRC_ADDR_RESET_SHIFT;
 
-       if (p_params->flags & ECORE_DMAE_FLAG_COMPLETION_DST)
+       if (ECORE_DMAE_FLAGS_IS_SET(p_params, COMPLETION_DST))
                opcode |= 1 << DMAE_CMD_COMP_FUNC_SHIFT;
 
        /* swapping mode 3 - big endian there should be a define ifdefed in
@@ -470,7 +501,9 @@ static void ecore_dmae_opcode(struct ecore_hwfn *p_hwfn,
         */
        opcode |= DMAE_CMD_ENDIANITY << DMAE_CMD_ENDIANITY_MODE_SHIFT;
 
-       opcode |= p_hwfn->port_id << DMAE_CMD_PORT_ID_SHIFT;
+       port_id = (ECORE_DMAE_FLAGS_IS_SET(p_params, PORT)) ?
+                 p_params->port_id : p_hwfn->port_id;
+       opcode |= port_id << DMAE_CMD_PORT_ID_SHIFT;
 
        /* reset source address in next go */
        opcode |= DMAE_CMD_SRC_ADDR_RESET_MASK << DMAE_CMD_SRC_ADDR_RESET_SHIFT;
@@ -479,14 +512,14 @@ static void ecore_dmae_opcode(struct ecore_hwfn *p_hwfn,
        opcode |= DMAE_CMD_DST_ADDR_RESET_MASK << DMAE_CMD_DST_ADDR_RESET_SHIFT;
 
        /* SRC/DST VFID: all 1's - pf, otherwise VF id */
-       if (p_params->flags & ECORE_DMAE_FLAG_VF_SRC) {
+       if (ECORE_DMAE_FLAGS_IS_SET(p_params, VF_SRC)) {
                opcode |= (1 << DMAE_CMD_SRC_VF_ID_VALID_SHIFT);
                opcode_b |= (p_params->src_vfid << DMAE_CMD_SRC_VF_ID_SHIFT);
        } else {
                opcode_b |= (DMAE_CMD_SRC_VF_ID_MASK <<
                             DMAE_CMD_SRC_VF_ID_SHIFT);
        }
-       if (p_params->flags & ECORE_DMAE_FLAG_VF_DST) {
+       if (ECORE_DMAE_FLAGS_IS_SET(p_params, VF_DST)) {
                opcode |= 1 << DMAE_CMD_DST_VF_ID_VALID_SHIFT;
                opcode_b |= p_params->dst_vfid << DMAE_CMD_DST_VF_ID_SHIFT;
        } else {
@@ -831,7 +864,7 @@ ecore_dmae_execute_command(struct ecore_hwfn *p_hwfn,
        for (i = 0; i <= cnt_split; i++) {
                offset = length_limit * i;
 
-               if (!(p_params->flags & ECORE_DMAE_FLAG_RW_REPL_SRC)) {
+               if (!ECORE_DMAE_FLAGS_IS_SET(p_params, RW_REPL_SRC)) {
                        if (src_type == ECORE_DMAE_ADDRESS_GRC)
                                src_addr_split = src_addr + offset;
                        else
@@ -872,51 +905,45 @@ ecore_dmae_execute_command(struct ecore_hwfn *p_hwfn,
        return ecore_status;
 }
 
-enum _ecore_status_t
-ecore_dmae_host2grc(struct ecore_hwfn *p_hwfn,
-                   struct ecore_ptt *p_ptt,
-                   u64 source_addr,
-                   u32 grc_addr, u32 size_in_dwords, u32 flags)
+enum _ecore_status_t ecore_dmae_host2grc(struct ecore_hwfn *p_hwfn,
+                                        struct ecore_ptt *p_ptt,
+                                        u64 source_addr,
+                                        u32 grc_addr,
+                                        u32 size_in_dwords,
+                                        struct ecore_dmae_params *p_params)
 {
        u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-       struct ecore_dmae_params params;
        enum _ecore_status_t rc;
 
-       OSAL_MEMSET(&params, 0, sizeof(struct ecore_dmae_params));
-       params.flags = flags;
-
        OSAL_SPIN_LOCK(&p_hwfn->dmae_info.lock);
 
        rc = ecore_dmae_execute_command(p_hwfn, p_ptt, source_addr,
                                        grc_addr_in_dw,
                                        ECORE_DMAE_ADDRESS_HOST_VIRT,
                                        ECORE_DMAE_ADDRESS_GRC,
-                                       size_in_dwords, &params);
+                                       size_in_dwords, p_params);
 
        OSAL_SPIN_UNLOCK(&p_hwfn->dmae_info.lock);
 
        return rc;
 }
 
-enum _ecore_status_t
-ecore_dmae_grc2host(struct ecore_hwfn *p_hwfn,
-                   struct ecore_ptt *p_ptt,
-                   u32 grc_addr,
-                   dma_addr_t dest_addr, u32 size_in_dwords, u32 flags)
+enum _ecore_status_t ecore_dmae_grc2host(struct ecore_hwfn *p_hwfn,
+                                        struct ecore_ptt *p_ptt,
+                                        u32 grc_addr,
+                                        dma_addr_t dest_addr,
+                                        u32 size_in_dwords,
+                                        struct ecore_dmae_params *p_params)
 {
        u32 grc_addr_in_dw = grc_addr / sizeof(u32);
-       struct ecore_dmae_params params;
        enum _ecore_status_t rc;
 
-       OSAL_MEMSET(&params, 0, sizeof(struct ecore_dmae_params));
-       params.flags = flags;
-
        OSAL_SPIN_LOCK(&p_hwfn->dmae_info.lock);
 
        rc = ecore_dmae_execute_command(p_hwfn, p_ptt, grc_addr_in_dw,
                                        dest_addr, ECORE_DMAE_ADDRESS_GRC,
                                        ECORE_DMAE_ADDRESS_HOST_VIRT,
-                                       size_in_dwords, &params);
+                                       size_in_dwords, p_params);
 
        OSAL_SPIN_UNLOCK(&p_hwfn->dmae_info.lock);
 
@@ -965,7 +992,6 @@ enum _ecore_status_t ecore_dmae_sanity(struct ecore_hwfn *p_hwfn,
                                       const char *phase)
 {
        u32 size = OSAL_PAGE_SIZE / 2, val;
-       struct ecore_dmae_params params;
        enum _ecore_status_t rc = ECORE_SUCCESS;
        dma_addr_t p_phys;
        void *p_virt;
@@ -997,9 +1023,9 @@ enum _ecore_status_t ecore_dmae_sanity(struct ecore_hwfn *p_hwfn,
                   (unsigned long)(p_phys + size),
                   (u8 *)p_virt + size, size);
 
-       OSAL_MEMSET(&params, 0, sizeof(params));
        rc = ecore_dmae_host2host(p_hwfn, p_ptt, p_phys, p_phys + size,
-                                 size / 4 /* size_in_dwords */, &params);
+                                 size / 4 /* size_in_dwords */,
+                                 OSAL_NULL /* default parameters */);
        if (rc != ECORE_SUCCESS) {
                DP_NOTICE(p_hwfn, false,
                          "DMAE sanity [%s]: ecore_dmae_host2host() failed. rc = %d.\n",
@@ -1030,3 +1056,32 @@ out:
        OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_virt, p_phys, 2 * size);
        return rc;
 }
+
+void ecore_ppfid_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                   u8 abs_ppfid, u32 hw_addr, u32 val)
+{
+       u8 pfid = ECORE_PFID_BY_PPFID(p_hwfn, abs_ppfid);
+
+       ecore_fid_pretend(p_hwfn, p_ptt,
+                         pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+       ecore_wr(p_hwfn, p_ptt, hw_addr, val);
+       ecore_fid_pretend(p_hwfn, p_ptt,
+                         p_hwfn->rel_pf_id <<
+                         PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+}
+
+u32 ecore_ppfid_rd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                  u8 abs_ppfid, u32 hw_addr)
+{
+       u8 pfid = ECORE_PFID_BY_PPFID(p_hwfn, abs_ppfid);
+       u32 val;
+
+       ecore_fid_pretend(p_hwfn, p_ptt,
+                         pfid << PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+       val = ecore_rd(p_hwfn, p_ptt, hw_addr);
+       ecore_fid_pretend(p_hwfn, p_ptt,
+                         p_hwfn->rel_pf_id <<
+                         PXP_PRETEND_CONCRETE_FID_PFID_SHIFT);
+
+       return val;
+}
index 394207e..0b5b40c 100644 (file)
@@ -134,8 +134,8 @@ struct ecore_ptt *ecore_get_reserved_ptt(struct ecore_hwfn  *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
- * @param val
  * @param hw_addr
+ * @param val
  */
 void ecore_wr(struct ecore_hwfn        *p_hwfn,
              struct ecore_ptt  *p_ptt,
@@ -147,7 +147,6 @@ void ecore_wr(struct ecore_hwfn     *p_hwfn,
  *
  * @param p_hwfn
  * @param p_ptt
- * @param val
  * @param hw_addr
  */
 u32 ecore_rd(struct ecore_hwfn *p_hwfn,
@@ -222,6 +221,18 @@ void ecore_port_pretend(struct ecore_hwfn  *p_hwfn,
 void ecore_port_unpretend(struct ecore_hwfn    *p_hwfn,
                          struct ecore_ptt      *p_ptt);
 
+/**
+ * @brief ecore_port_fid_pretend - pretend to another port and another function
+ *        when accessing the ptt window
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param port_id - the port to pretend to
+ * @param fid - fid field of pxp_pretend structure. Can contain either pf / vf.
+ */
+void ecore_port_fid_pretend(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                           u8 port_id, u16 fid);
+
 /**
  * @brief ecore_vfid_to_concrete - build a concrete FID for a
  *        given VF ID
@@ -257,4 +268,29 @@ enum _ecore_status_t ecore_dmae_sanity(struct ecore_hwfn *p_hwfn,
                                       struct ecore_ptt *p_ptt,
                                       const char *phase);
 
+/**
+ * @brief ecore_ppfid_wr - Write value to BAR using the given ptt while
+ *     pretending to a PF to which the given PPFID pertains.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param abs_ppfid
+ * @param hw_addr
+ * @param val
+ */
+void ecore_ppfid_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                   u8 abs_ppfid, u32 hw_addr, u32 val);
+
+/**
+ * @brief ecore_ppfid_rd - Read value from BAR using the given ptt while
+ *      pretending to a PF to which the given PPFID pertains.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param abs_ppfid
+ * @param hw_addr
+ */
+u32 ecore_ppfid_rd(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                  u8 abs_ppfid, u32 hw_addr);
+
 #endif /* __ECORE_HW_H__ */
index b8496cb..cfc1156 100644 (file)
@@ -1665,7 +1665,7 @@ void ecore_gft_config(struct ecore_hwfn *p_hwfn,
                               bool ipv6,
                               enum gft_profile_type profile_type)
 {
-       u32 reg_val, cam_line, ram_line_lo, ram_line_hi;
+       u32 reg_val, cam_line, ram_line_lo, ram_line_hi, search_non_ip_as_gft;
 
        if (!ipv6 && !ipv4)
                DP_NOTICE(p_hwfn, true, "gft_config: must accept at least on of - ipv4 or ipv6'\n");
@@ -1729,6 +1729,9 @@ void ecore_gft_config(struct ecore_hwfn *p_hwfn,
        ram_line_lo = 0;
        ram_line_hi = 0;
 
+       /* Search no IP as GFT */
+       search_non_ip_as_gft = 0;
+
        /* Tunnel type */
        SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_DST_PORT, 1);
        SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_OVER_IP_PROTOCOL, 1);
@@ -1752,8 +1755,13 @@ void ecore_gft_config(struct ecore_hwfn *p_hwfn,
                SET_FIELD(ram_line_lo, GFT_RAM_LINE_ETHERTYPE, 1);
        } else if (profile_type == GFT_PROFILE_TYPE_TUNNEL_TYPE) {
                SET_FIELD(ram_line_lo, GFT_RAM_LINE_TUNNEL_ETHERTYPE, 1);
+
+               /* Allow tunneled traffic without inner IP */
+               search_non_ip_as_gft = 1;
        }
 
+       ecore_wr(p_hwfn, p_ptt, PRS_REG_SEARCH_NON_IP_AS_GFT,
+                search_non_ip_as_gft);
        ecore_wr(p_hwfn, p_ptt,
                 PRS_REG_GFT_PROFILE_MASK_RAM + RAM_LINE_SIZE * pf_id,
                 ram_line_lo);
@@ -1996,52 +2004,49 @@ void ecore_enable_context_validation(struct ecore_hwfn *p_hwfn,
        ecore_wr(p_hwfn, p_ptt, CDU_REG_TCFC_CTX_VALID0, ctx_validation);
 }
 
-#define RSS_IND_TABLE_BASE_ADDR       4112
-#define RSS_IND_TABLE_VPORT_SIZE      16
-#define RSS_IND_TABLE_ENTRY_PER_LINE  8
 
-/* Update RSS indirection table entry. */
-void ecore_update_eth_rss_ind_table_entry(struct ecore_hwfn *p_hwfn,
-                                         struct ecore_ptt *p_ptt,
-                                         u8 rss_id,
-                                         u8 ind_table_index,
-                                         u16 ind_table_value)
+/*******************************************************************************
+ * File name : rdma_init.c
+ * Author    : Michael Shteinbok
+ *******************************************************************************
+ *******************************************************************************
+ * Description:
+ * RDMA HSI functions
+ *
+ *******************************************************************************
+ * Notes: This is the input to the auto generated file drv_init_fw_funcs.c
+ *
+ *******************************************************************************
+ */
+static u32 ecore_get_rdma_assert_ram_addr(struct ecore_hwfn *p_hwfn,
+                                         u8 storm_id)
 {
-       u32 cnt, rss_addr;
-       u32 *reg_val;
-       u16 rss_ind_entry[RSS_IND_TABLE_ENTRY_PER_LINE];
-       u16 rss_ind_mask[RSS_IND_TABLE_ENTRY_PER_LINE];
-
-       /* get entry address */
-       rss_addr =  RSS_IND_TABLE_BASE_ADDR +
-                   RSS_IND_TABLE_VPORT_SIZE * rss_id +
-                   ind_table_index / RSS_IND_TABLE_ENTRY_PER_LINE;
-
-       /* prepare update command */
-       ind_table_index %= RSS_IND_TABLE_ENTRY_PER_LINE;
-
-       for (cnt = 0; cnt < RSS_IND_TABLE_ENTRY_PER_LINE; cnt++) {
-               if (cnt == ind_table_index) {
-                       rss_ind_entry[cnt] = ind_table_value;
-                       rss_ind_mask[cnt]  = 0xFFFF;
-               } else {
-                       rss_ind_entry[cnt] = 0;
-                       rss_ind_mask[cnt]  = 0;
-               }
+       switch (storm_id) {
+       case 0: return TSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      TSTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+       case 1: return MSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      MSTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+       case 2: return USEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      USTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+       case 3: return XSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      XSTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+       case 4: return YSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      YSTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+       case 5: return PSEM_REG_FAST_MEMORY + SEM_FAST_REG_INT_RAM +
+                      PSTORM_RDMA_ASSERT_LEVEL_OFFSET(p_hwfn->rel_pf_id);
+
+       default: return 0;
        }
+}
 
-       /* Update entry in HW*/
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, rss_addr);
-
-       reg_val = (u32 *)rss_ind_mask;
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_MASK, reg_val[0]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_MASK + 4, reg_val[1]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_MASK + 8, reg_val[2]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_MASK + 12, reg_val[3]);
+void ecore_set_rdma_error_level(struct ecore_hwfn *p_hwfn,
+                               struct ecore_ptt *p_ptt,
+                               u8 assert_level[NUM_STORMS])
+{
+       u8 storm_id;
+       for (storm_id = 0; storm_id < NUM_STORMS; storm_id++) {
+               u32 ram_addr = ecore_get_rdma_assert_ram_addr(p_hwfn, storm_id);
 
-       reg_val = (u32 *)rss_ind_entry;
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_DATA, reg_val[0]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_DATA + 4, reg_val[1]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_DATA + 8, reg_val[2]);
-       ecore_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_DATA + 12, reg_val[3]);
+               ecore_wr(p_hwfn, p_ptt, ram_addr, assert_level[storm_id]);
+       }
 }
index 1024bb2..3503a90 100644 (file)
@@ -472,21 +472,35 @@ void ecore_memset_task_ctx(void *p_ctx_mem,
                           u32 ctx_size,
                           u8 ctx_type);
 
-/**
- * @brief ecore_update_eth_rss_ind_table_entry - Update RSS indirection table
- * entry.
- * The function must run in exclusive mode to prevent wrong RSS configuration.
+
+/*******************************************************************************
+ * File name : rdma_init.h
+ * Author    : Michael Shteinbok
+ *******************************************************************************
+ *******************************************************************************
+ * Description:
+ * RDMA HSI functions header
+ *
+ *******************************************************************************
+ * Notes: This is the input to the auto generated file drv_init_fw_funcs.h
  *
- * @param p_hwfn    - HW device data
- * @param p_ptt  - ptt window used for writing the registers.
- * @param rss_id - RSS engine ID.
- * @param ind_table_index -  RSS indirect table index.
- * @param ind_table_value -  RSS indirect table new value.
+ *******************************************************************************
  */
-void ecore_update_eth_rss_ind_table_entry(struct ecore_hwfn *p_hwfn,
-                                         struct ecore_ptt *p_ptt,
-                                         u8 rss_id,
-                                         u8 ind_table_index,
-                                         u16 ind_table_value);
+#define NUM_STORMS 6
+
+
+
+/**
+ * @brief ecore_set_rdma_error_level - Sets the RDMA assert level.
+ *                                     If the severity of the error will be
+ *                                    above the level, the FW will assert.
+ * @param p_hwfn -                HW device data
+ * @param p_ptt -                 ptt window used for writing the registers
+ * @param assert_level - An array of assert levels for each storm.
+ */
+void ecore_set_rdma_error_level(struct ecore_hwfn *p_hwfn,
+                               struct ecore_ptt *p_ptt,
+                               u8 assert_level[NUM_STORMS]);
+
 
 #endif
index b7636f3..044308b 100644 (file)
@@ -101,7 +101,8 @@ static enum _ecore_status_t ecore_init_rt(struct ecore_hwfn *p_hwfn,
 
                rc = ecore_dmae_host2grc(p_hwfn, p_ptt,
                                         (osal_uintptr_t)(p_init_val + i),
-                                        addr + (i << 2), segment, 0);
+                                        addr + (i << 2), segment,
+                                        OSAL_NULL /* default parameters */);
                if (rc != ECORE_SUCCESS)
                        return rc;
 
@@ -165,8 +166,9 @@ static enum _ecore_status_t ecore_init_array_dmae(struct ecore_hwfn *p_hwfn,
        } else {
                rc = ecore_dmae_host2grc(p_hwfn, p_ptt,
                                         (osal_uintptr_t)(p_buf +
-                                                          dmae_data_offset),
-                                        addr, size, 0);
+                                                         dmae_data_offset),
+                                        addr, size,
+                                        OSAL_NULL /* default parameters */);
        }
 
        return rc;
@@ -177,13 +179,15 @@ static enum _ecore_status_t ecore_init_fill_dmae(struct ecore_hwfn *p_hwfn,
                                                 u32 addr, u32 fill_count)
 {
        static u32 zero_buffer[DMAE_MAX_RW_SIZE];
+       struct ecore_dmae_params params;
 
        OSAL_MEMSET(zero_buffer, 0, sizeof(u32) * DMAE_MAX_RW_SIZE);
 
+       OSAL_MEMSET(&params, 0, sizeof(params));
+       params.flags = ECORE_DMAE_FLAG_RW_REPL_SRC;
        return ecore_dmae_host2grc(p_hwfn, p_ptt,
                                   (osal_uintptr_t)&zero_buffer[0],
-                                  addr, fill_count,
-                                  ECORE_DMAE_FLAG_RW_REPL_SRC);
+                                  addr, fill_count, &params);
 }
 
 static void ecore_init_fill(struct ecore_hwfn *p_hwfn,
@@ -416,11 +420,11 @@ static u8 ecore_init_cmd_mode_match(struct ecore_hwfn *p_hwfn,
                                    u16 *p_offset, int modes)
 {
        struct ecore_dev *p_dev = p_hwfn->p_dev;
-       const u8 *modes_tree_buf;
        u8 arg1, arg2, tree_val;
+       const u8 *modes_tree;
 
-       modes_tree_buf = p_dev->fw_data->modes_tree_buf;
-       tree_val = modes_tree_buf[(*p_offset)++];
+       modes_tree = p_dev->fw_data->modes_tree_buf;
+       tree_val = modes_tree[(*p_offset)++];
        switch (tree_val) {
        case INIT_MODE_OP_NOT:
                return ecore_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1;
@@ -470,12 +474,12 @@ enum _ecore_status_t ecore_init_run(struct ecore_hwfn *p_hwfn,
 {
        struct ecore_dev *p_dev = p_hwfn->p_dev;
        u32 cmd_num, num_init_ops;
-       union init_op *init_ops;
+       union init_op *init;
        bool b_dmae = false;
        enum _ecore_status_t rc = ECORE_SUCCESS;
 
        num_init_ops = p_dev->fw_data->init_ops_size;
-       init_ops = p_dev->fw_data->init_ops;
+       init = p_dev->fw_data->init_ops;
 
 #ifdef CONFIG_ECORE_ZIPPED_FW
        p_hwfn->unzip_buf = OSAL_ZALLOC(p_hwfn->p_dev, GFP_ATOMIC,
@@ -487,7 +491,7 @@ enum _ecore_status_t ecore_init_run(struct ecore_hwfn *p_hwfn,
 #endif
 
        for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) {
-               union init_op *cmd = &init_ops[cmd_num];
+               union init_op *cmd = &init[cmd_num];
                u32 data = OSAL_LE32_TO_CPU(cmd->raw.op_data);
 
                switch (GET_FIELD(data, INIT_CALLBACK_OP_OP)) {
index 4c271d3..7368d55 100644 (file)
@@ -428,14 +428,13 @@ ecore_general_attention_35(struct ecore_hwfn *p_hwfn)
 #define ECORE_DORQ_ATTENTION_SIZE_MASK         (0x7f)
 #define ECORE_DORQ_ATTENTION_SIZE_SHIFT                (16)
 
-#define ECORE_DB_REC_COUNT                     10
+#define ECORE_DB_REC_COUNT                     1000
 #define ECORE_DB_REC_INTERVAL                  100
 
-/* assumes sticky overflow indication was set for this PF */
-static enum _ecore_status_t ecore_db_rec_attn(struct ecore_hwfn *p_hwfn,
-                                             struct ecore_ptt *p_ptt)
+static enum _ecore_status_t ecore_db_rec_flush_queue(struct ecore_hwfn *p_hwfn,
+                                                    struct ecore_ptt *p_ptt)
 {
-       u8 count = ECORE_DB_REC_COUNT;
+       u32 count = ECORE_DB_REC_COUNT;
        u32 usage = 1;
 
        /* wait for usage to zero or count to run out. This is necessary since
@@ -461,6 +460,28 @@ static enum _ecore_status_t ecore_db_rec_attn(struct ecore_hwfn *p_hwfn,
                return ECORE_TIMEOUT;
        }
 
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_db_rec_handler(struct ecore_hwfn *p_hwfn,
+                                         struct ecore_ptt *p_ptt)
+{
+       u32 overflow;
+       enum _ecore_status_t rc;
+
+       overflow = ecore_rd(p_hwfn, p_ptt, DORQ_REG_PF_OVFL_STICKY);
+       DP_NOTICE(p_hwfn, false, "PF Overflow sticky 0x%x\n", overflow);
+       if (!overflow) {
+               ecore_db_recovery_execute(p_hwfn, DB_REC_ONCE);
+               return ECORE_SUCCESS;
+       }
+
+       if (ecore_edpm_enabled(p_hwfn)) {
+               rc = ecore_db_rec_flush_queue(p_hwfn, p_ptt);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
+       }
+
        /* flush any pedning (e)dpm as they may never arrive */
        ecore_wr(p_hwfn, p_ptt, DORQ_REG_DPM_FORCE_ABORT, 0x1);
 
@@ -477,8 +498,7 @@ static enum _ecore_status_t ecore_db_rec_attn(struct ecore_hwfn *p_hwfn,
 
 static enum _ecore_status_t ecore_dorq_attn_cb(struct ecore_hwfn *p_hwfn)
 {
-       u32 int_sts, first_drop_reason, details, address, overflow,
-               all_drops_reason;
+       u32 int_sts, first_drop_reason, details, address, all_drops_reason;
        struct ecore_ptt *p_ptt = p_hwfn->p_dpc_ptt;
        enum _ecore_status_t rc;
 
@@ -504,8 +524,6 @@ static enum _ecore_status_t ecore_dorq_attn_cb(struct ecore_hwfn *p_hwfn)
                                   DORQ_REG_DB_DROP_DETAILS);
                address = ecore_rd(p_hwfn, p_ptt,
                                   DORQ_REG_DB_DROP_DETAILS_ADDRESS);
-               overflow = ecore_rd(p_hwfn, p_ptt,
-                                   DORQ_REG_PF_OVFL_STICKY);
                all_drops_reason = ecore_rd(p_hwfn, p_ptt,
                                            DORQ_REG_DB_DROP_DETAILS_REASON);
 
@@ -516,19 +534,16 @@ static enum _ecore_status_t ecore_dorq_attn_cb(struct ecore_hwfn *p_hwfn)
                          "FID\t\t0x%04x\t\t(Opaque FID)\n"
                          "Size\t\t0x%04x\t\t(in bytes)\n"
                          "1st drop reason\t0x%08x\t(details on first drop since last handling)\n"
-                         "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n"
-                         "Overflow\t0x%x\t\t(a per PF indication)\n",
+                         "Sticky reasons\t0x%08x\t(all drop reasons since last handling)\n",
                          address,
                          GET_FIELD(details, ECORE_DORQ_ATTENTION_OPAQUE),
                          GET_FIELD(details, ECORE_DORQ_ATTENTION_SIZE) * 4,
-                         first_drop_reason, all_drops_reason, overflow);
+                         first_drop_reason, all_drops_reason);
 
-               /* if this PF caused overflow, initiate recovery */
-               if (overflow) {
-                       rc = ecore_db_rec_attn(p_hwfn, p_ptt);
-                       if (rc != ECORE_SUCCESS)
-                               return rc;
-               }
+               rc = ecore_db_rec_handler(p_hwfn, p_ptt);
+               OSAL_DB_REC_OCCURRED(p_hwfn);
+               if (rc != ECORE_SUCCESS)
+                       return rc;
 
                /* clear the doorbell drop details and prepare for next drop */
                ecore_wr(p_hwfn, p_ptt, DORQ_REG_DB_DROP_DETAILS_REL, 0);
@@ -1209,8 +1224,9 @@ static enum _ecore_status_t ecore_int_attentions(struct ecore_hwfn *p_hwfn)
 static void ecore_sb_ack_attn(struct ecore_hwfn *p_hwfn,
                              void OSAL_IOMEM *igu_addr, u32 ack_cons)
 {
-       struct igu_prod_cons_update igu_ack = { 0 };
+       struct igu_prod_cons_update igu_ack;
 
+       OSAL_MEMSET(&igu_ack, 0, sizeof(struct igu_prod_cons_update));
        igu_ack.sb_id_and_flags =
            ((ack_cons << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) |
             (1 << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) |
@@ -1546,11 +1562,13 @@ void ecore_int_cau_conf_sb(struct ecore_hwfn *p_hwfn,
                ecore_dmae_host2grc(p_hwfn, p_ptt,
                                    (u64)(osal_uintptr_t)&phys_addr,
                                    CAU_REG_SB_ADDR_MEMORY +
-                                   igu_sb_id * sizeof(u64), 2, 0);
+                                   igu_sb_id * sizeof(u64), 2,
+                                   OSAL_NULL /* default parameters */);
                ecore_dmae_host2grc(p_hwfn, p_ptt,
                                    (u64)(osal_uintptr_t)&sb_entry,
                                    CAU_REG_SB_VAR_MEMORY +
-                                   igu_sb_id * sizeof(u64), 2, 0);
+                                   igu_sb_id * sizeof(u64), 2,
+                                   OSAL_NULL /* default parameters */);
        } else {
                /* Initialize Status Block Address */
                STORE_RT_REG_AGG(p_hwfn,
@@ -2631,7 +2649,8 @@ enum _ecore_status_t ecore_int_set_timer_res(struct ecore_hwfn *p_hwfn,
 
        rc = ecore_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
                                 sb_id * sizeof(u64),
-                                (u64)(osal_uintptr_t)&sb_entry, 2, 0);
+                                (u64)(osal_uintptr_t)&sb_entry, 2,
+                                OSAL_NULL /* default parameters */);
        if (rc != ECORE_SUCCESS) {
                DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
                return rc;
@@ -2644,8 +2663,8 @@ enum _ecore_status_t ecore_int_set_timer_res(struct ecore_hwfn *p_hwfn,
 
        rc = ecore_dmae_host2grc(p_hwfn, p_ptt,
                                 (u64)(osal_uintptr_t)&sb_entry,
-                                CAU_REG_SB_VAR_MEMORY +
-                                sb_id * sizeof(u64), 2, 0);
+                                CAU_REG_SB_VAR_MEMORY + sb_id * sizeof(u64), 2,
+                                OSAL_NULL /* default parameters */);
        if (rc != ECORE_SUCCESS) {
                DP_ERR(p_hwfn, "dmae_host2grc failed %d\n", rc);
                return rc;
@@ -2681,3 +2700,35 @@ enum _ecore_status_t ecore_int_get_sb_dbg(struct ecore_hwfn *p_hwfn,
 
        return ECORE_SUCCESS;
 }
+
+void ecore_pf_flr_igu_cleanup(struct ecore_hwfn *p_hwfn)
+{
+       struct ecore_ptt *p_ptt = p_hwfn->p_main_ptt;
+       struct ecore_ptt *p_dpc_ptt = ecore_get_reserved_ptt(p_hwfn,
+                                                            RESERVED_PTT_DPC);
+       int i;
+
+       /* Do not reorder the following cleanup sequence */
+       /* Ack all attentions */
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_ATTENTION_ACK_BITS, 0xfff);
+
+       /* Clear driver attention */
+       ecore_wr(p_hwfn,  p_dpc_ptt,
+               ((p_hwfn->rel_pf_id << 3) + MISC_REG_AEU_GENERAL_ATTN_0), 0);
+
+       /* Clear per-PF IGU registers to restore them as if the IGU
+        * was reset for this PF
+        */
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_LEADING_EDGE_LATCH, 0);
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_TRAILING_EDGE_LATCH, 0);
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_PF_CONFIGURATION, 0);
+
+       /* Execute IGU clean up*/
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_PF_FUNCTIONAL_CLEANUP, 1);
+
+       /* Clear Stats */
+       ecore_wr(p_hwfn, p_ptt, IGU_REG_STATISTIC_NUM_OF_INTA_ASSERTED, 0);
+
+       for (i = 0; i < IGU_REG_PBA_STS_PF_SIZE; i++)
+               ecore_wr(p_hwfn, p_ptt, IGU_REG_PBA_STS_PF + i * 4, 0);
+}
index 041240d..ff2310c 100644 (file)
@@ -256,5 +256,6 @@ enum _ecore_status_t ecore_int_set_timer_res(struct ecore_hwfn *p_hwfn,
 enum _ecore_status_t ecore_pglueb_rbc_attn_handler(struct ecore_hwfn *p_hwfn,
                                                   struct ecore_ptt *p_ptt,
                                                   bool is_hw_init);
+void ecore_pf_flr_igu_cleanup(struct ecore_hwfn *p_hwfn);
 
 #endif /* __ECORE_INT_H__ */
index aeaf469..42538a4 100644 (file)
@@ -92,8 +92,9 @@ static OSAL_INLINE u16 ecore_sb_update_sb_idx(struct ecore_sb_info *sb_info)
 static OSAL_INLINE void ecore_sb_ack(struct ecore_sb_info *sb_info,
                                     enum igu_int_cmd int_cmd, u8 upd_flg)
 {
-       struct igu_prod_cons_update igu_ack = { 0 };
+       struct igu_prod_cons_update igu_ack;
 
+       OSAL_MEMSET(&igu_ack, 0, sizeof(struct igu_prod_cons_update));
        igu_ack.sb_id_and_flags =
            ((sb_info->sb_ack << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) |
             (upd_flg << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) |
@@ -343,4 +344,15 @@ enum _ecore_status_t ecore_int_get_sb_dbg(struct ecore_hwfn *p_hwfn,
 enum _ecore_status_t
 ecore_int_igu_relocate_sb(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                          u16 sb_id, bool b_to_vf);
+
+/**
+ * @brief - Doorbell Recovery handler.
+ *          Run DB_REAL_DEAL doorbell recovery in case of PF overflow
+ *          (and flush DORQ if needed), otherwise run DB_REC_ONCE.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+enum _ecore_status_t ecore_db_rec_handler(struct ecore_hwfn *p_hwfn,
+                                         struct ecore_ptt *p_ptt);
 #endif
index 29001d7..55de708 100644 (file)
@@ -84,6 +84,13 @@ struct ecore_public_vf_info {
         */
        u8 forced_mac[ETH_ALEN];
        u16 forced_vlan;
+
+       /* Trusted VFs can configure promiscuous mode and
+        * set MAC address inspite PF has set forced MAC.
+        * Also store shadow promisc configuration if needed.
+        */
+       bool is_trusted_configured;
+       bool is_trusted_request;
 };
 
 struct ecore_iov_vf_init_params {
@@ -695,6 +702,16 @@ bool ecore_iov_is_vf_started(struct ecore_hwfn *p_hwfn,
  */
 int ecore_iov_get_vf_min_rate(struct ecore_hwfn *p_hwfn, int vfid);
 
+/**
+ * @brief - Configure min rate for VF's vport.
+ * @param p_dev
+ * @param vfid
+ * @param - rate in Mbps
+ *
+ * @return
+ */
+enum _ecore_status_t ecore_iov_configure_min_tx_rate(struct ecore_dev *p_dev,
+                                                    int vfid, u32 rate);
 #endif
 
 /**
index 0569302..12d45c1 100644 (file)
 /* Tstorm Eth limit Rx rate */
 #define ETH_RX_RATE_LIMIT_OFFSET(pf_id) (IRO[29].base + ((pf_id) * IRO[29].m1))
 #define ETH_RX_RATE_LIMIT_SIZE (IRO[29].size)
+/* RSS indirection table entry update command per PF offset in TSTORM PF BAR0.
+ * Use eth_tstorm_rss_update_data for update.
+ */
+#define TSTORM_ETH_RSS_UPDATE_OFFSET(pf_id) (IRO[30].base + \
+       ((pf_id) * IRO[30].m1))
+#define TSTORM_ETH_RSS_UPDATE_SIZE (IRO[30].size)
 /* Xstorm queue zone */
-#define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) (IRO[30].base + \
-       ((queue_id) * IRO[30].m1))
-#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[30].size)
+#define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) (IRO[31].base + \
+       ((queue_id) * IRO[31].m1))
+#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[31].size)
 /* Ystorm cqe producer */
-#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[31].base + \
-       ((rss_id) * IRO[31].m1))
-#define YSTORM_TOE_CQ_PROD_SIZE (IRO[31].size)
-/* Ustorm cqe producer */
-#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[32].base + \
+#define YSTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[32].base + \
        ((rss_id) * IRO[32].m1))
-#define USTORM_TOE_CQ_PROD_SIZE (IRO[32].size)
+#define YSTORM_TOE_CQ_PROD_SIZE (IRO[32].size)
+/* Ustorm cqe producer */
+#define USTORM_TOE_CQ_PROD_OFFSET(rss_id) (IRO[33].base + \
+       ((rss_id) * IRO[33].m1))
+#define USTORM_TOE_CQ_PROD_SIZE (IRO[33].size)
 /* Ustorm grq producer */
-#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) (IRO[33].base + \
-       ((pf_id) * IRO[33].m1))
-#define USTORM_TOE_GRQ_PROD_SIZE (IRO[33].size)
+#define USTORM_TOE_GRQ_PROD_OFFSET(pf_id) (IRO[34].base + \
+       ((pf_id) * IRO[34].m1))
+#define USTORM_TOE_GRQ_PROD_SIZE (IRO[34].size)
 /* Tstorm cmdq-cons of given command queue-id */
-#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) (IRO[34].base + \
-       ((cmdq_queue_id) * IRO[34].m1))
-#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[34].size)
+#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) (IRO[35].base + \
+       ((cmdq_queue_id) * IRO[35].m1))
+#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[35].size)
 /* Tstorm (reflects M-Storm) bdq-external-producer of given function ID,
  * BDqueue-id
  */
-#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) (IRO[35].base + \
-       ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2))
-#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[35].size)
-/* Mstorm bdq-external-producer of given BDQ resource ID, BDqueue-id */
-#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) (IRO[36].base + \
+#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) (IRO[36].base + \
        ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2))
-#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[36].size)
+#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[36].size)
+/* Mstorm bdq-external-producer of given BDQ resource ID, BDqueue-id */
+#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) (IRO[37].base + \
+       ((func_id) * IRO[37].m1) + ((bdq_id) * IRO[37].m2))
+#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[37].size)
 /* Tstorm iSCSI RX stats */
-#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[37].base + \
-       ((pf_id) * IRO[37].m1))
-#define TSTORM_ISCSI_RX_STATS_SIZE (IRO[37].size)
-/* Mstorm iSCSI RX stats */
-#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[38].base + \
+#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[38].base + \
        ((pf_id) * IRO[38].m1))
-#define MSTORM_ISCSI_RX_STATS_SIZE (IRO[38].size)
-/* Ustorm iSCSI RX stats */
-#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[39].base + \
+#define TSTORM_ISCSI_RX_STATS_SIZE (IRO[38].size)
+/* Mstorm iSCSI RX stats */
+#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[39].base + \
        ((pf_id) * IRO[39].m1))
-#define USTORM_ISCSI_RX_STATS_SIZE (IRO[39].size)
-/* Xstorm iSCSI TX stats */
-#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[40].base + \
+#define MSTORM_ISCSI_RX_STATS_SIZE (IRO[39].size)
+/* Ustorm iSCSI RX stats */
+#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) (IRO[40].base + \
        ((pf_id) * IRO[40].m1))
-#define XSTORM_ISCSI_TX_STATS_SIZE (IRO[40].size)
-/* Ystorm iSCSI TX stats */
-#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[41].base + \
+#define USTORM_ISCSI_RX_STATS_SIZE (IRO[40].size)
+/* Xstorm iSCSI TX stats */
+#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[41].base + \
        ((pf_id) * IRO[41].m1))
-#define YSTORM_ISCSI_TX_STATS_SIZE (IRO[41].size)
-/* Pstorm iSCSI TX stats */
-#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[42].base + \
+#define XSTORM_ISCSI_TX_STATS_SIZE (IRO[41].size)
+/* Ystorm iSCSI TX stats */
+#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[42].base + \
        ((pf_id) * IRO[42].m1))
-#define PSTORM_ISCSI_TX_STATS_SIZE (IRO[42].size)
-/* Tstorm FCoE RX stats */
-#define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) (IRO[43].base + \
+#define YSTORM_ISCSI_TX_STATS_SIZE (IRO[42].size)
+/* Pstorm iSCSI TX stats */
+#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) (IRO[43].base + \
        ((pf_id) * IRO[43].m1))
-#define TSTORM_FCOE_RX_STATS_SIZE (IRO[43].size)
-/* Pstorm FCoE TX stats */
-#define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) (IRO[44].base + \
+#define PSTORM_ISCSI_TX_STATS_SIZE (IRO[43].size)
+/* Tstorm FCoE RX stats */
+#define TSTORM_FCOE_RX_STATS_OFFSET(pf_id) (IRO[44].base + \
        ((pf_id) * IRO[44].m1))
-#define PSTORM_FCOE_TX_STATS_SIZE (IRO[44].size)
+#define TSTORM_FCOE_RX_STATS_SIZE (IRO[44].size)
+/* Pstorm FCoE TX stats */
+#define PSTORM_FCOE_TX_STATS_OFFSET(pf_id) (IRO[45].base + \
+       ((pf_id) * IRO[45].m1))
+#define PSTORM_FCOE_TX_STATS_SIZE (IRO[45].size)
 /* Pstorm RDMA queue statistics */
-#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \
-       (IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1))
-#define PSTORM_RDMA_QUEUE_STAT_SIZE (IRO[45].size)
-/* Tstorm RDMA queue statistics */
-#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) (IRO[46].base + \
+#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) (IRO[46].base + \
        ((rdma_stat_counter_id) * IRO[46].m1))
-#define TSTORM_RDMA_QUEUE_STAT_SIZE (IRO[46].size)
+#define PSTORM_RDMA_QUEUE_STAT_SIZE (IRO[46].size)
+/* Tstorm RDMA queue statistics */
+#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) (IRO[47].base + \
+       ((rdma_stat_counter_id) * IRO[47].m1))
+#define TSTORM_RDMA_QUEUE_STAT_SIZE (IRO[47].size)
+/* Xstorm error level for assert */
+#define XSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[48].base + \
+       ((pf_id) * IRO[48].m1))
+#define XSTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[48].size)
+/* Ystorm error level for assert */
+#define YSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[49].base + \
+       ((pf_id) * IRO[49].m1))
+#define YSTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[49].size)
+/* Pstorm error level for assert */
+#define PSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[50].base + \
+       ((pf_id) * IRO[50].m1))
+#define PSTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[50].size)
+/* Tstorm error level for assert */
+#define TSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[51].base + \
+       ((pf_id) * IRO[51].m1))
+#define TSTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[51].size)
+/* Mstorm error level for assert */
+#define MSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[52].base + \
+       ((pf_id) * IRO[52].m1))
+#define MSTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[52].size)
+/* Ustorm error level for assert */
+#define USTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) (IRO[53].base + \
+       ((pf_id) * IRO[53].m1))
+#define USTORM_RDMA_ASSERT_LEVEL_SIZE (IRO[53].size)
 /* Xstorm iWARP rxmit stats */
-#define XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) (IRO[47].base + \
-       ((pf_id) * IRO[47].m1))
-#define XSTORM_IWARP_RXMIT_STATS_SIZE (IRO[47].size)
+#define XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) (IRO[54].base + \
+       ((pf_id) * IRO[54].m1))
+#define XSTORM_IWARP_RXMIT_STATS_SIZE (IRO[54].size)
 /* Tstorm RoCE Event Statistics */
-#define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) (IRO[48].base + \
-       ((roce_pf_id) * IRO[48].m1))
-#define TSTORM_ROCE_EVENTS_STAT_SIZE (IRO[48].size)
+#define TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) (IRO[55].base + \
+       ((roce_pf_id) * IRO[55].m1))
+#define TSTORM_ROCE_EVENTS_STAT_SIZE (IRO[55].size)
 /* DCQCN Received Statistics */
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) (IRO[49].base + \
-       ((roce_pf_id) * IRO[49].m1))
-#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE (IRO[49].size)
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) (IRO[56].base + \
+       ((roce_pf_id) * IRO[56].m1))
+#define YSTORM_ROCE_DCQCN_RECEIVED_STATS_SIZE (IRO[56].size)
+/* RoCE Error Statistics */
+#define YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id) (IRO[57].base + \
+       ((roce_pf_id) * IRO[57].m1))
+#define YSTORM_ROCE_ERROR_STATS_SIZE (IRO[57].size)
 /* DCQCN Sent Statistics */
-#define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) (IRO[50].base + \
-       ((roce_pf_id) * IRO[50].m1))
-#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE (IRO[50].size)
+#define PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) (IRO[58].base + \
+       ((roce_pf_id) * IRO[58].m1))
+#define PSTORM_ROCE_DCQCN_SENT_STATS_SIZE (IRO[58].size)
+/* RoCE CQEs Statistics */
+#define USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id) (IRO[59].base + \
+       ((roce_pf_id) * IRO[59].m1))
+#define USTORM_ROCE_CQE_STATS_SIZE (IRO[59].size)
 
 #endif /* __IRO_H__ */
index 685fa2e..30e632c 100644 (file)
@@ -7,7 +7,7 @@
 #ifndef __IRO_VALUES_H__
 #define __IRO_VALUES_H__
 
-static const struct iro iro_arr[51] = {
+static const struct iro iro_arr[60] = {
 /* YSTORM_FLOW_CONTROL_MODE_OFFSET */
        {      0x0,      0x0,      0x0,      0x0,      0x8},
 /* TSTORM_PORT_STAT_OFFSET(port_id) */
@@ -29,7 +29,7 @@ static const struct iro iro_arr[51] = {
 /* YSTORM_INTEG_TEST_DATA_OFFSET */
        {   0x3e38,      0x0,      0x0,      0x0,     0x78},
 /* PSTORM_INTEG_TEST_DATA_OFFSET */
-       {   0x2b78,      0x0,      0x0,      0x0,     0x78},
+       {   0x3ef8,      0x0,      0x0,      0x0,     0x78},
 /* TSTORM_INTEG_TEST_DATA_OFFSET */
        {   0x4c40,      0x0,      0x0,      0x0,     0x78},
 /* MSTORM_INTEG_TEST_DATA_OFFSET */
@@ -43,7 +43,7 @@ static const struct iro iro_arr[51] = {
 /* CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) */
        {   0xb820,     0x30,      0x0,      0x0,     0x30},
 /* CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) */
-       {   0x96c0,     0x30,      0x0,      0x0,     0x30},
+       {   0xa990,     0x30,      0x0,      0x0,     0x30},
 /* MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) */
        {   0x4b68,     0x80,      0x0,      0x0,     0x40},
 /* MSTORM_ETH_PF_PRODS_OFFSET(queue_id) */
@@ -59,15 +59,17 @@ static const struct iro iro_arr[51] = {
 /* USTORM_ETH_PF_STAT_OFFSET(pf_id) */
        {   0xe770,     0x60,      0x0,      0x0,     0x60},
 /* PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) */
-       {   0x2d10,     0x80,      0x0,      0x0,     0x38},
+       {   0x4090,     0x80,      0x0,      0x0,     0x38},
 /* PSTORM_ETH_PF_STAT_OFFSET(pf_id) */
-       {   0xf2b8,     0x78,      0x0,      0x0,     0x78},
+       {   0xfea8,     0x78,      0x0,      0x0,     0x78},
 /* PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethType_id) */
        {    0x1f8,      0x4,      0x0,      0x0,      0x4},
 /* TSTORM_ETH_PRS_INPUT_OFFSET */
        {   0xaf20,      0x0,      0x0,      0x0,     0xf0},
 /* ETH_RX_RATE_LIMIT_OFFSET(pf_id) */
        {   0xb010,      0x8,      0x0,      0x0,      0x8},
+/* TSTORM_ETH_RSS_UPDATE_OFFSET(pf_id) */
+       {    0xc00,      0x8,      0x0,      0x0,      0x8},
 /* XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) */
        {    0x1f8,      0x8,      0x0,      0x0,      0x8},
 /* YSTORM_TOE_CQ_PROD_OFFSET(rss_id) */
@@ -91,25 +93,41 @@ static const struct iro iro_arr[51] = {
 /* XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
        {   0xa588,     0x50,      0x0,      0x0,     0x20},
 /* YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
-       {   0x8700,     0x40,      0x0,      0x0,     0x28},
+       {   0x8f00,     0x40,      0x0,      0x0,     0x28},
 /* PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) */
-       {  0x10300,     0x18,      0x0,      0x0,     0x10},
+       {  0x10e30,     0x18,      0x0,      0x0,     0x10},
 /* TSTORM_FCOE_RX_STATS_OFFSET(pf_id) */
        {   0xde48,     0x48,      0x0,      0x0,     0x38},
 /* PSTORM_FCOE_TX_STATS_OFFSET(pf_id) */
-       {  0x10768,     0x20,      0x0,      0x0,     0x20},
+       {  0x11298,     0x20,      0x0,      0x0,     0x20},
 /* PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) */
-       {   0x2d48,     0x80,      0x0,      0x0,     0x10},
+       {   0x40c8,     0x80,      0x0,      0x0,     0x10},
 /* TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) */
        {   0x5048,     0x10,      0x0,      0x0,     0x10},
+/* XSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {   0xa928,      0x8,      0x0,      0x0,      0x1},
+/* YSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {   0xa128,      0x8,      0x0,      0x0,      0x1},
+/* PSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {  0x11a30,      0x8,      0x0,      0x0,      0x1},
+/* TSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {   0xf030,      0x8,      0x0,      0x0,      0x1},
+/* MSTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {  0x13028,      0x8,      0x0,      0x0,      0x1},
+/* USTORM_RDMA_ASSERT_LEVEL_OFFSET(pf_id) */
+       {  0x12c58,      0x8,      0x0,      0x0,      0x1},
 /* XSTORM_IWARP_RXMIT_STATS_OFFSET(pf_id) */
        {   0xc9b8,     0x30,      0x0,      0x0,     0x10},
 /* TSTORM_ROCE_EVENTS_STAT_OFFSET(roce_pf_id) */
-       {   0xed90,     0x10,      0x0,      0x0,     0x10},
+       {   0xed90,     0x28,      0x0,      0x0,     0x28},
 /* YSTORM_ROCE_DCQCN_RECEIVED_STATS_OFFSET(roce_pf_id) */
-       {   0xa520,     0x10,      0x0,      0x0,     0x10},
+       {   0xad20,     0x18,      0x0,      0x0,     0x18},
+/* YSTORM_ROCE_ERROR_STATS_OFFSET(roce_pf_id) */
+       {   0xaea0,      0x8,      0x0,      0x0,      0x8},
 /* PSTORM_ROCE_DCQCN_SENT_STATS_OFFSET(roce_pf_id) */
-       {  0x13108,      0x8,      0x0,      0x0,      0x8},
+       {  0x13c38,      0x8,      0x0,      0x0,      0x8},
+/* USTORM_ROCE_CQE_STATS_OFFSET(roce_pf_id) */
+       {  0x13c50,     0x18,      0x0,      0x0,     0x18},
 };
 
 #endif /* __IRO_VALUES_H__ */
index d71f461..8b9817e 100644 (file)
@@ -608,6 +608,9 @@ ecore_sp_update_accept_mode(struct ecore_hwfn *p_hwfn,
                SET_FIELD(state, ETH_VPORT_RX_MODE_BCAST_ACCEPT_ALL,
                          !!(accept_filter & ECORE_ACCEPT_BCAST));
 
+               SET_FIELD(state, ETH_VPORT_RX_MODE_ACCEPT_ANY_VNI,
+                         !!(accept_filter & ECORE_ACCEPT_ANY_VNI));
+
                p_ramrod->rx_mode.state = OSAL_CPU_TO_LE16(state);
                DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
                           "vport[%02x] p_ramrod->rx_mode.state = 0x%x\n",
@@ -783,6 +786,11 @@ ecore_sp_vport_update(struct ecore_hwfn *p_hwfn,
                return rc;
        }
 
+       if (p_params->update_ctl_frame_check) {
+               p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en;
+               p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en;
+       }
+
        /* Update mcast bins for VFs, PF doesn't use this functionality */
        ecore_sp_update_mcast_bin(p_ramrod, p_params);
 
@@ -2084,6 +2092,24 @@ void ecore_reset_vport_stats(struct ecore_dev *p_dev)
        }
 }
 
+static enum gft_profile_type
+ecore_arfs_mode_to_hsi(enum ecore_filter_config_mode mode)
+{
+       if (mode == ECORE_FILTER_CONFIG_MODE_5_TUPLE)
+               return GFT_PROFILE_TYPE_4_TUPLE;
+
+       if (mode == ECORE_FILTER_CONFIG_MODE_IP_DEST)
+               return GFT_PROFILE_TYPE_IP_DST_ADDR;
+
+       if (mode == ECORE_FILTER_CONFIG_MODE_TUNN_TYPE)
+               return GFT_PROFILE_TYPE_TUNNEL_TYPE;
+
+       if (mode == ECORE_FILTER_CONFIG_MODE_IP_SRC)
+               return GFT_PROFILE_TYPE_IP_SRC_ADDR;
+
+       return GFT_PROFILE_TYPE_L4_DST_PORT;
+}
+
 void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
                               struct ecore_ptt *p_ptt,
                               struct ecore_arfs_config_params *p_cfg_params)
@@ -2091,13 +2117,13 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
        if (OSAL_TEST_BIT(ECORE_MF_DISABLE_ARFS, &p_hwfn->p_dev->mf_bits))
                return;
 
-       if (p_cfg_params->arfs_enable) {
+       if (p_cfg_params->mode != ECORE_FILTER_CONFIG_MODE_DISABLE) {
                ecore_gft_config(p_hwfn, p_ptt, p_hwfn->rel_pf_id,
                                 p_cfg_params->tcp,
                                 p_cfg_params->udp,
                                 p_cfg_params->ipv4,
                                 p_cfg_params->ipv6,
-                                GFT_PROFILE_TYPE_4_TUPLE);
+                                ecore_arfs_mode_to_hsi(p_cfg_params->mode));
                DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
                           "tcp = %s, udp = %s, ipv4 = %s, ipv6 =%s\n",
                           p_cfg_params->tcp ? "Enable" : "Disable",
@@ -2107,8 +2133,8 @@ void ecore_arfs_mode_configure(struct ecore_hwfn *p_hwfn,
        } else {
                ecore_gft_disable(p_hwfn, p_ptt, p_hwfn->rel_pf_id);
        }
-       DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "Configured ARFS mode : %s\n",
-                  p_cfg_params->arfs_enable ? "Enable" : "Disable");
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "Configured ARFS mode : %d\n",
+                  (int)p_cfg_params->mode);
 }
 
 enum _ecore_status_t
@@ -2179,10 +2205,10 @@ ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
        return ecore_spq_post(p_hwfn, p_ent, OSAL_NULL);
 }
 
-int ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
-                          struct ecore_ptt *p_ptt,
-                          struct ecore_queue_cid *p_cid,
-                          u16 *p_rx_coal)
+enum _ecore_status_t ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
+                                           struct ecore_ptt *p_ptt,
+                                           struct ecore_queue_cid *p_cid,
+                                           u16 *p_rx_coal)
 {
        u32 coalesce, address, is_valid;
        struct cau_sb_entry sb_entry;
@@ -2191,7 +2217,8 @@ int ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
 
        rc = ecore_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
                                 p_cid->sb_igu_id * sizeof(u64),
-                                (u64)(osal_uintptr_t)&sb_entry, 2, 0);
+                                (u64)(osal_uintptr_t)&sb_entry, 2,
+                                OSAL_NULL /* default parameters */);
        if (rc != ECORE_SUCCESS) {
                DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
                return rc;
@@ -2213,10 +2240,10 @@ int ecore_get_rxq_coalesce(struct ecore_hwfn *p_hwfn,
        return ECORE_SUCCESS;
 }
 
-int ecore_get_txq_coalesce(struct ecore_hwfn *p_hwfn,
-                          struct ecore_ptt *p_ptt,
-                          struct ecore_queue_cid *p_cid,
-                          u16 *p_tx_coal)
+enum _ecore_status_t ecore_get_txq_coalesce(struct ecore_hwfn *p_hwfn,
+                                           struct ecore_ptt *p_ptt,
+                                           struct ecore_queue_cid *p_cid,
+                                           u16 *p_tx_coal)
 {
        u32 coalesce, address, is_valid;
        struct cau_sb_entry sb_entry;
@@ -2225,7 +2252,8 @@ int ecore_get_txq_coalesce(struct ecore_hwfn *p_hwfn,
 
        rc = ecore_dmae_grc2host(p_hwfn, p_ptt, CAU_REG_SB_VAR_MEMORY +
                                 p_cid->sb_igu_id * sizeof(u64),
-                                (u64)(osal_uintptr_t)&sb_entry, 2, 0);
+                                (u64)(osal_uintptr_t)&sb_entry, 2,
+                                OSAL_NULL /* default parameters */);
        if (rc != ECORE_SUCCESS) {
                DP_ERR(p_hwfn, "dmae_grc2host failed %d\n", rc);
                return rc;
@@ -2302,3 +2330,55 @@ ecore_eth_tx_queue_maxrate(struct ecore_hwfn *p_hwfn,
        return ecore_init_vport_rl(p_hwfn, p_ptt, vport, rate,
                                   p_link->speed);
 }
+
+#define RSS_TSTORM_UPDATE_STATUS_MAX_POLL_COUNT    100
+#define RSS_TSTORM_UPDATE_STATUS_POLL_PERIOD_US    1
+
+enum _ecore_status_t
+ecore_update_eth_rss_ind_table_entry(struct ecore_hwfn *p_hwfn,
+                                    u8 vport_id,
+                                    u8 ind_table_index,
+                                    u16 ind_table_value)
+{
+       struct eth_tstorm_rss_update_data update_data = { 0 };
+       void OSAL_IOMEM *addr = OSAL_NULL;
+       enum _ecore_status_t rc;
+       u8 abs_vport_id;
+       u32 cnt = 0;
+
+       OSAL_BUILD_BUG_ON(sizeof(update_data) != sizeof(u64));
+
+       rc = ecore_fw_vport(p_hwfn, vport_id, &abs_vport_id);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       addr = (u8 OSAL_IOMEM *)p_hwfn->regview +
+              GTT_BAR0_MAP_REG_TSDM_RAM +
+              TSTORM_ETH_RSS_UPDATE_OFFSET(p_hwfn->rel_pf_id);
+
+       *(u64 *)(&update_data) = DIRECT_REG_RD64(p_hwfn, addr);
+
+       for (cnt = 0; update_data.valid &&
+            cnt < RSS_TSTORM_UPDATE_STATUS_MAX_POLL_COUNT; cnt++) {
+               OSAL_UDELAY(RSS_TSTORM_UPDATE_STATUS_POLL_PERIOD_US);
+               *(u64 *)(&update_data) = DIRECT_REG_RD64(p_hwfn, addr);
+       }
+
+       if (update_data.valid) {
+               DP_NOTICE(p_hwfn, true,
+                         "rss update valid status is not clear! valid=0x%x vport id=%d ind_Table_idx=%d ind_table_value=%d.\n",
+                         update_data.valid, vport_id, ind_table_index,
+                         ind_table_value);
+
+               return ECORE_AGAIN;
+       }
+
+       update_data.valid           = 1;
+       update_data.ind_table_index = ind_table_index;
+       update_data.ind_table_value = ind_table_value;
+       update_data.vport_id        = abs_vport_id;
+
+       DIRECT_REG_WR64(p_hwfn, addr, *(u64 *)(&update_data));
+
+       return ECORE_SUCCESS;
+}
index 575b9e3..004fb61 100644 (file)
@@ -137,6 +137,16 @@ struct ecore_filter_accept_flags {
 #define ECORE_ACCEPT_MCAST_MATCHED     0x08
 #define ECORE_ACCEPT_MCAST_UNMATCHED   0x10
 #define ECORE_ACCEPT_BCAST             0x20
+#define ECORE_ACCEPT_ANY_VNI           0x40
+};
+
+enum ecore_filter_config_mode {
+       ECORE_FILTER_CONFIG_MODE_DISABLE,
+       ECORE_FILTER_CONFIG_MODE_5_TUPLE,
+       ECORE_FILTER_CONFIG_MODE_L4_PORT,
+       ECORE_FILTER_CONFIG_MODE_IP_DEST,
+       ECORE_FILTER_CONFIG_MODE_TUNN_TYPE,
+       ECORE_FILTER_CONFIG_MODE_IP_SRC,
 };
 
 struct ecore_arfs_config_params {
@@ -144,7 +154,7 @@ struct ecore_arfs_config_params {
        bool udp;
        bool ipv4;
        bool ipv6;
-       bool arfs_enable;       /* Enable or disable arfs mode */
+       enum ecore_filter_config_mode mode;
 };
 
 /* Add / remove / move / remove-all unicast MAC-VLAN filters.
@@ -337,7 +347,10 @@ struct ecore_sp_vport_update_params {
        /* MTU change - notice this requires the vport to be disabled.
         * If non-zero, value would be used.
         */
-       u16 mtu;
+       u16                     mtu;
+       u8                      update_ctl_frame_check;
+       u8                      mac_chk_en;
+       u8                      ethtype_chk_en;
 };
 
 /**
@@ -460,4 +473,28 @@ ecore_configure_rfs_ntuple_filter(struct ecore_hwfn *p_hwfn,
                                  dma_addr_t p_addr, u16 length,
                                  u16 qid, u8 vport_id,
                                  bool b_is_add);
+
+/**
+ * @brief - ecore_update_eth_rss_ind_table_entry
+ *
+ * This function being used to update RSS indirection table entry to FW RAM
+ * instead of using the SP vport update ramrod with rss params.
+ *
+ * Notice:
+ * This function supports only one outstanding command per engine. Ecore
+ * clients which use this function should call ecore_mcp_ind_table_lock() prior
+ * to it and ecore_mcp_ind_table_unlock() after it.
+ *
+ * @params p_hwfn
+ * @params vport_id
+ * @params ind_table_index
+ * @params ind_table_value
+ *
+ * @return enum _ecore_status_t
+ */
+enum _ecore_status_t
+ecore_update_eth_rss_ind_table_entry(struct ecore_hwfn *p_hwfn,
+                                    u8 vport_id,
+                                    u8 ind_table_index,
+                                    u16 ind_table_value);
 #endif
index ea14c17..6c65606 100644 (file)
@@ -177,10 +177,16 @@ enum _ecore_status_t ecore_mcp_free(struct ecore_hwfn *p_hwfn)
        return ECORE_SUCCESS;
 }
 
+/* Maximum of 1 sec to wait for the SHMEM ready indication */
+#define ECORE_MCP_SHMEM_RDY_MAX_RETRIES        20
+#define ECORE_MCP_SHMEM_RDY_ITER_MS    50
+
 static enum _ecore_status_t ecore_load_mcp_offsets(struct ecore_hwfn *p_hwfn,
                                                   struct ecore_ptt *p_ptt)
 {
        struct ecore_mcp_info *p_info = p_hwfn->mcp_info;
+       u8 cnt = ECORE_MCP_SHMEM_RDY_MAX_RETRIES;
+       u8 msec = ECORE_MCP_SHMEM_RDY_ITER_MS;
        u32 drv_mb_offsize, mfw_mb_offsize;
        u32 mcp_pf_id = MCP_PF_ID(p_hwfn);
 
@@ -198,6 +204,35 @@ static enum _ecore_status_t ecore_load_mcp_offsets(struct ecore_hwfn *p_hwfn,
 
        p_info->public_base |= GRCBASE_MCP;
 
+       /* Get the MFW MB address and number of supported messages */
+       mfw_mb_offsize = ecore_rd(p_hwfn, p_ptt,
+                                 SECTION_OFFSIZE_ADDR(p_info->public_base,
+                                 PUBLIC_MFW_MB));
+       p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
+       p_info->mfw_mb_length = (u16)ecore_rd(p_hwfn, p_ptt,
+                                             p_info->mfw_mb_addr);
+
+       /* @@@TBD:
+        * The driver can notify that there was an MCP reset, and read the SHMEM
+        * values before the MFW has completed initializing them.
+        * As a temporary solution, the "sup_msgs" field is used as a data ready
+        * indication.
+        * This should be replaced with an actual indication when it is provided
+        * by the MFW.
+        */
+       while (!p_info->mfw_mb_length && cnt--) {
+               OSAL_MSLEEP(msec);
+               p_info->mfw_mb_length = (u16)ecore_rd(p_hwfn, p_ptt,
+                                                     p_info->mfw_mb_addr);
+       }
+
+       if (!cnt) {
+               DP_NOTICE(p_hwfn, false,
+                         "Failed to get the SHMEM ready notification after %d msec\n",
+                         ECORE_MCP_SHMEM_RDY_MAX_RETRIES * msec);
+               return ECORE_TIMEOUT;
+       }
+
        /* Calculate the driver and MFW mailbox address */
        drv_mb_offsize = ecore_rd(p_hwfn, p_ptt,
                                  SECTION_OFFSIZE_ADDR(p_info->public_base,
@@ -208,14 +243,6 @@ static enum _ecore_status_t ecore_load_mcp_offsets(struct ecore_hwfn *p_hwfn,
                   " mcp_pf_id = 0x%x\n",
                   drv_mb_offsize, p_info->drv_mb_addr, mcp_pf_id);
 
-       /* Set the MFW MB address */
-       mfw_mb_offsize = ecore_rd(p_hwfn, p_ptt,
-                                 SECTION_OFFSIZE_ADDR(p_info->public_base,
-                                                      PUBLIC_MFW_MB));
-       p_info->mfw_mb_addr = SECTION_ADDR(mfw_mb_offsize, mcp_pf_id);
-       p_info->mfw_mb_length = (u16)ecore_rd(p_hwfn, p_ptt,
-                                              p_info->mfw_mb_addr);
-
        /* Get the current driver mailbox sequence before sending
         * the first command
         */
@@ -1656,6 +1683,49 @@ ecore_mcp_update_bw(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
                      &param);
 }
 
+static void ecore_mcp_update_stag(struct ecore_hwfn *p_hwfn,
+                                 struct ecore_ptt *p_ptt)
+{
+       struct public_func shmem_info;
+       u32 resp = 0, param = 0;
+
+       ecore_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
+                                MCP_PF_ID(p_hwfn));
+
+       p_hwfn->mcp_info->func_info.ovlan = (u16)shmem_info.ovlan_stag &
+                                                FUNC_MF_CFG_OV_STAG_MASK;
+       p_hwfn->hw_info.ovlan = p_hwfn->mcp_info->func_info.ovlan;
+       if (OSAL_TEST_BIT(ECORE_MF_OVLAN_CLSS, &p_hwfn->p_dev->mf_bits)) {
+               if (p_hwfn->hw_info.ovlan != ECORE_MCP_VLAN_UNSET) {
+                       ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE,
+                                p_hwfn->hw_info.ovlan);
+                       ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_EN, 1);
+
+                       /* Configure DB to add external vlan to EDPM packets */
+                       ecore_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 1);
+                       ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_EXT_VID_BB_K2,
+                                p_hwfn->hw_info.ovlan);
+               } else {
+                       ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_EN, 0);
+                       ecore_wr(p_hwfn, p_ptt, NIG_REG_LLH_FUNC_TAG_VALUE, 0);
+
+                       /* Configure DB to add external vlan to EDPM packets */
+                       ecore_wr(p_hwfn, p_ptt, DORQ_REG_TAG1_OVRD_MODE, 0);
+                       ecore_wr(p_hwfn, p_ptt, DORQ_REG_PF_EXT_VID_BB_K2, 0);
+               }
+
+               ecore_sp_pf_update_stag(p_hwfn);
+       }
+
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "ovlan  = %d hw_mode = 0x%x\n",
+                  p_hwfn->mcp_info->func_info.ovlan, p_hwfn->hw_info.hw_mode);
+       OSAL_HW_INFO_CHANGE(p_hwfn, ECORE_HW_INFO_CHANGE_OVLAN);
+
+       /* Acknowledge the MFW */
+       ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_S_TAG_UPDATE_ACK, 0,
+                     &resp, &param);
+}
+
 static void ecore_mcp_handle_fan_failure(struct ecore_hwfn *p_hwfn)
 {
        /* A single notification should be sent to upper driver in CMT mode */
@@ -1946,7 +2016,7 @@ ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt)
                DP_NOTICE(p_hwfn, false, "Unknown Host priority control %d\n",
                          val);
 
-       DP_VERBOSE(p_hwfn, ECORE_MSG_DCB,
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SP,
                   "UFP shmem config: mode = %d tc = %d pri_type = %d\n",
                   p_hwfn->ufp_info.mode, p_hwfn->ufp_info.tc,
                   p_hwfn->ufp_info.pri_type);
@@ -2041,6 +2111,9 @@ enum _ecore_status_t ecore_mcp_handle_events(struct ecore_hwfn *p_hwfn,
                case MFW_DRV_MSG_BW_UPDATE:
                        ecore_mcp_update_bw(p_hwfn, p_ptt);
                        break;
+               case MFW_DRV_MSG_S_TAG_UPDATE:
+                       ecore_mcp_update_stag(p_hwfn, p_ptt);
+                       break;
                case MFW_DRV_MSG_FAILURE_DETECTED:
                        ecore_mcp_handle_fan_failure(p_hwfn);
                        break;
@@ -2155,8 +2228,10 @@ enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_hwfn *p_hwfn,
 
 enum _ecore_status_t ecore_mcp_get_transceiver_data(struct ecore_hwfn *p_hwfn,
                                                    struct ecore_ptt *p_ptt,
-                                                   u32 *p_tranceiver_type)
+                                                   u32 *p_transceiver_state,
+                                                   u32 *p_transceiver_type)
 {
+       u32 transceiver_info;
        enum _ecore_status_t rc = ECORE_SUCCESS;
 
        /* TODO - Add support for VFs */
@@ -2167,14 +2242,23 @@ enum _ecore_status_t ecore_mcp_get_transceiver_data(struct ecore_hwfn *p_hwfn,
                DP_NOTICE(p_hwfn, false, "MFW is not initialized!\n");
                return ECORE_BUSY;
        }
-       if (!p_ptt) {
-               *p_tranceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
-               rc = ECORE_INVAL;
+
+       *p_transceiver_type = ETH_TRANSCEIVER_TYPE_NONE;
+       *p_transceiver_state = ETH_TRANSCEIVER_STATE_UPDATING;
+
+       transceiver_info = ecore_rd(p_hwfn, p_ptt,
+                                   p_hwfn->mcp_info->port_addr +
+                                   offsetof(struct public_port,
+                                   transceiver_data));
+
+       *p_transceiver_state = GET_MFW_FIELD(transceiver_info,
+                                            ETH_TRANSCEIVER_STATE);
+
+       if (*p_transceiver_state == ETH_TRANSCEIVER_STATE_PRESENT) {
+               *p_transceiver_type = GET_MFW_FIELD(transceiver_info,
+                                           ETH_TRANSCEIVER_TYPE);
        } else {
-               *p_tranceiver_type = ecore_rd(p_hwfn, p_ptt,
-                               p_hwfn->mcp_info->port_addr +
-                               offsetof(struct public_port,
-                                       transceiver_data));
+               *p_transceiver_type = ETH_TRANSCEIVER_TYPE_UNKNOWN;
        }
 
        return rc;
@@ -2194,15 +2278,11 @@ enum _ecore_status_t ecore_mcp_trans_speed_mask(struct ecore_hwfn *p_hwfn,
                                                struct ecore_ptt *p_ptt,
                                                u32 *p_speed_mask)
 {
-       u32 transceiver_data, transceiver_type, transceiver_state;
+       u32 transceiver_type, transceiver_state;
 
-       ecore_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_data);
+       ecore_mcp_get_transceiver_data(p_hwfn, p_ptt, &transceiver_state,
+                                      &transceiver_type);
 
-       transceiver_state = GET_MFW_FIELD(transceiver_data,
-                           ETH_TRANSCEIVER_STATE);
-
-       transceiver_type = GET_MFW_FIELD(transceiver_data,
-                          ETH_TRANSCEIVER_TYPE);
 
        if (is_transceiver_ready(transceiver_state, transceiver_type) == 0)
                return ECORE_INVAL;
@@ -2823,10 +2903,72 @@ ecore_mcp_ov_get_fc_npiv(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 }
 
 enum _ecore_status_t
-ecore_mcp_ov_update_mtu(struct ecore_hwfn *p_hwfn,
-                       struct ecore_ptt *p_ptt, u16 mtu)
+ecore_mcp_ov_update_mtu(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u16 mtu)
 {
-       return 0;
+       u32 resp = 0, param = 0, drv_mb_param = 0;
+       enum _ecore_status_t rc;
+
+       SET_MFW_FIELD(drv_mb_param, DRV_MB_PARAM_OV_MTU_SIZE, (u32)mtu);
+       rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU,
+                          drv_mb_param, &resp, &param);
+       if (rc != ECORE_SUCCESS)
+               DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc);
+
+       return rc;
+}
+
+enum _ecore_status_t
+ecore_mcp_ov_update_mac(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u8 *mac)
+{
+       struct ecore_mcp_mb_params mb_params;
+       union drv_union_data union_data;
+       enum _ecore_status_t rc;
+
+       OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_SET_VMAC;
+       SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_VMAC_TYPE,
+                     DRV_MSG_CODE_VMAC_TYPE_MAC);
+       mb_params.param |= MCP_PF_ID(p_hwfn);
+       OSAL_MEMCPY(&union_data.raw_data, mac, ETH_ALEN);
+       mb_params.p_data_src = &union_data;
+       rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc != ECORE_SUCCESS)
+               DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
+
+       return rc;
+}
+
+enum _ecore_status_t
+ecore_mcp_ov_update_eswitch(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                           enum ecore_ov_eswitch eswitch)
+{
+       enum _ecore_status_t rc;
+       u32 resp = 0, param = 0;
+       u32 drv_mb_param;
+
+       switch (eswitch) {
+       case ECORE_OV_ESWITCH_NONE:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE;
+               break;
+       case ECORE_OV_ESWITCH_VEB:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB;
+               break;
+       case ECORE_OV_ESWITCH_VEPA:
+               drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA;
+               break;
+       default:
+               DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch);
+               return ECORE_INVAL;
+       }
+
+       rc = ecore_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE,
+                          drv_mb_param, &resp, &param);
+       if (rc != ECORE_SUCCESS)
+               DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc);
+
+       return rc;
 }
 
 enum _ecore_status_t ecore_mcp_set_led(struct ecore_hwfn *p_hwfn,
@@ -2938,11 +3080,11 @@ enum _ecore_status_t ecore_mcp_nvm_read(struct ecore_dev *p_dev, u32 addr,
 }
 
 enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
-                                       u32 addr, u8 *p_buf, u32 len)
+                                       u32 addr, u8 *p_buf, u32 *p_len)
 {
        struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
        struct ecore_ptt *p_ptt;
-       u32 resp, param;
+       u32 resp = 0, param;
        enum _ecore_status_t rc;
 
        p_ptt = ecore_ptt_acquire(p_hwfn);
@@ -2953,7 +3095,7 @@ enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
                                  (cmd == ECORE_PHY_CORE_READ) ?
                                  DRV_MSG_CODE_PHY_CORE_READ :
                                  DRV_MSG_CODE_PHY_RAW_READ,
-                                 addr, &resp, &param, &len, (u32 *)p_buf);
+                                 addr, &resp, &param, p_len, (u32 *)p_buf);
        if (rc != ECORE_SUCCESS)
                DP_NOTICE(p_dev, false, "MCP command rc = %d\n", rc);
 
@@ -2982,7 +3124,7 @@ enum _ecore_status_t ecore_mcp_nvm_del_file(struct ecore_dev *p_dev, u32 addr)
 {
        struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
        struct ecore_ptt *p_ptt;
-       u32 resp, param;
+       u32 resp = 0, param;
        enum _ecore_status_t rc;
 
        p_ptt = ecore_ptt_acquire(p_hwfn);
@@ -3001,7 +3143,7 @@ enum _ecore_status_t ecore_mcp_nvm_put_file_begin(struct ecore_dev *p_dev,
 {
        struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
        struct ecore_ptt *p_ptt;
-       u32 resp, param;
+       u32 resp = 0, param;
        enum _ecore_status_t rc;
 
        p_ptt = ecore_ptt_acquire(p_hwfn);
@@ -3095,8 +3237,8 @@ enum _ecore_status_t ecore_mcp_phy_write(struct ecore_dev *p_dev, u32 cmd,
                                         u32 addr, u8 *p_buf, u32 len)
 {
        struct ecore_hwfn *p_hwfn = ECORE_LEADING_HWFN(p_dev);
+       u32 resp = 0, param, nvm_cmd;
        struct ecore_ptt *p_ptt;
-       u32 resp, param, nvm_cmd;
        enum _ecore_status_t rc;
 
        p_ptt = ecore_ptt_acquire(p_hwfn);
@@ -4002,13 +4144,83 @@ ecore_mcp_drv_attribute(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
        return ECORE_SUCCESS;
 }
 
+enum _ecore_status_t ecore_mcp_get_engine_config(struct ecore_hwfn *p_hwfn,
+                                                struct ecore_ptt *p_ptt)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       struct ecore_mcp_mb_params mb_params;
+       u8 fir_valid, l2_valid;
+       enum _ecore_status_t rc;
+
+       OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_GET_ENGINE_CONFIG;
+       rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+               DP_INFO(p_hwfn,
+                       "The get_engine_config command is unsupported by the MFW\n");
+               return ECORE_NOTIMPL;
+       }
+
+       fir_valid = GET_MFW_FIELD(mb_params.mcp_param,
+                                 FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID);
+       if (fir_valid)
+               p_dev->fir_affin =
+                       GET_MFW_FIELD(mb_params.mcp_param,
+                                     FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE);
+
+       l2_valid = GET_MFW_FIELD(mb_params.mcp_param,
+                                FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID);
+       if (l2_valid)
+               p_dev->l2_affin_hint =
+                       GET_MFW_FIELD(mb_params.mcp_param,
+                                     FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE);
+
+       DP_INFO(p_hwfn,
+               "Engine affinity config: FIR={valid %hhd, value %hhd}, L2_hint={valid %hhd, value %hhd}\n",
+               fir_valid, p_dev->fir_affin, l2_valid, p_dev->l2_affin_hint);
+
+       return ECORE_SUCCESS;
+}
+
+enum _ecore_status_t ecore_mcp_get_ppfid_bitmap(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt)
+{
+       struct ecore_dev *p_dev = p_hwfn->p_dev;
+       struct ecore_mcp_mb_params mb_params;
+       enum _ecore_status_t rc;
+
+       OSAL_MEM_ZERO(&mb_params, sizeof(mb_params));
+       mb_params.cmd = DRV_MSG_CODE_GET_PPFID_BITMAP;
+       rc = ecore_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+               DP_INFO(p_hwfn,
+                       "The get_ppfid_bitmap command is unsupported by the MFW\n");
+               return ECORE_NOTIMPL;
+       }
+
+       p_dev->ppfid_bitmap = GET_MFW_FIELD(mb_params.mcp_param,
+                                           FW_MB_PARAM_PPFID_BITMAP);
+
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SP, "PPFID bitmap 0x%hhx\n",
+                  p_dev->ppfid_bitmap);
+
+       return ECORE_SUCCESS;
+}
+
 void ecore_mcp_wol_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                      u32 offset, u32 val)
 {
-       struct ecore_mcp_mb_params mb_params = {0};
        enum _ecore_status_t       rc = ECORE_SUCCESS;
        u32                        dword = val;
+       struct ecore_mcp_mb_params mb_params;
 
+       OSAL_MEMSET(&mb_params, 0, sizeof(struct ecore_mcp_mb_params));
        mb_params.cmd = DRV_MSG_CODE_WRITE_WOL_REG;
        mb_params.param = offset;
        mb_params.p_data_src = &dword;
index 8e12531..2c052b7 100644 (file)
@@ -25,9 +25,6 @@
                                             rel_pfid)
 #define MCP_PF_ID(p_hwfn)      MCP_PF_ID_BY_REL(p_hwfn, (p_hwfn)->rel_pf_id)
 
-#define MFW_PORT(_p_hwfn)      ((_p_hwfn)->abs_pf_id % \
-                                ecore_device_num_ports((_p_hwfn)->p_dev))
-
 struct ecore_mcp_info {
        /* List for mailbox commands which were sent and wait for a response */
        osal_list_t cmd_list;
@@ -566,4 +563,22 @@ ecore_mcp_read_ufp_config(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt);
 void ecore_mcp_wol_wr(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
                      u32 offset, u32 val);
 
+/**
+ * @brief Get the engine affinity configuration.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+enum _ecore_status_t ecore_mcp_get_engine_config(struct ecore_hwfn *p_hwfn,
+                                                struct ecore_ptt *p_ptt);
+
+/**
+ * @brief Get the PPFID bitmap.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ */
+enum _ecore_status_t ecore_mcp_get_ppfid_bitmap(struct ecore_hwfn *p_hwfn,
+                                               struct ecore_ptt *p_ptt);
+
 #endif /* __ECORE_MCP_H__ */
index cfb9f99..7327074 100644 (file)
@@ -185,6 +185,12 @@ enum ecore_ov_driver_state {
        ECORE_OV_DRIVER_STATE_ACTIVE
 };
 
+enum ecore_ov_eswitch {
+       ECORE_OV_ESWITCH_NONE,
+       ECORE_OV_ESWITCH_VEB,
+       ECORE_OV_ESWITCH_VEPA
+};
+
 #define ECORE_MAX_NPIV_ENTRIES 128
 #define ECORE_WWN_SIZE 8
 struct ecore_fc_npiv_tbl {
@@ -521,6 +527,10 @@ union ecore_mfw_tlv_data {
        struct ecore_mfw_tlv_iscsi iscsi;
 };
 
+enum ecore_hw_info_change {
+       ECORE_HW_INFO_CHANGE_OVLAN,
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -597,6 +607,7 @@ enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_hwfn *p_hwfn,
  *
  * @param p_dev      - ecore dev pointer
  * @param p_ptt
+ * @param p_transceiver_state - transceiver state.
  * @param p_transceiver_type - media type value
  *
  * @return enum _ecore_status_t -
@@ -605,6 +616,7 @@ enum _ecore_status_t ecore_mcp_get_media_type(struct ecore_hwfn *p_hwfn,
  */
 enum _ecore_status_t ecore_mcp_get_transceiver_data(struct ecore_hwfn *p_hwfn,
                                                    struct ecore_ptt *p_ptt,
+                                                   u32 *p_transceiver_state,
                                                    u32 *p_tranceiver_type);
 
 /**
@@ -809,6 +821,32 @@ ecore_mcp_ov_get_fc_npiv(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
 enum _ecore_status_t ecore_mcp_ov_update_mtu(struct ecore_hwfn *p_hwfn,
                                             struct ecore_ptt *p_ptt, u16 mtu);
 
+/**
+ * @brief Send MAC address to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mac - MAC address
+ *
+ * @return enum _ecore_status_t - ECORE_SUCCESS - operation was successful.
+ */
+enum _ecore_status_t
+ecore_mcp_ov_update_mac(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                       u8 *mac);
+
+/**
+ * @brief Send eswitch mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param eswitch - eswitch mode
+ *
+ * @return enum _ecore_status_t - ECORE_SUCCESS - operation was successful.
+ */
+enum _ecore_status_t
+ecore_mcp_ov_update_eswitch(struct ecore_hwfn *p_hwfn, struct ecore_ptt *p_ptt,
+                           enum ecore_ov_eswitch eswitch);
+
 /**
  * @brief Set LED status
  *
@@ -905,7 +943,7 @@ enum _ecore_status_t ecore_mcp_nvm_resp(struct ecore_dev *p_dev, u8 *p_buf);
  * @return enum _ecore_status_t - ECORE_SUCCESS - operation was successful.
  */
 enum _ecore_status_t ecore_mcp_phy_read(struct ecore_dev *p_dev, u32 cmd,
-                                       u32 addr, u8 *p_buf, u32 len);
+                                       u32 addr, u8 *p_buf, u32 *p_len);
 
 /**
  * @brief Read from nvm
index 721b8c1..3860e1a 100644 (file)
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET                   39769
 #define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE                     16
 #define NIG_REG_TX_EDPM_CTRL_RT_OFFSET                              39785
-#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET                    39786
-#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET                         39787
+#define NIG_REG_PPF_TO_ENGINE_SEL_RT_OFFSET                         39786
 #define NIG_REG_PPF_TO_ENGINE_SEL_RT_SIZE                           8
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_OFFSET              39795
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_OFFSET              39794
 #define NIG_REG_LLH_PF_CLS_FUNC_FILTER_VALUE_RT_SIZE                1024
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_OFFSET                 40819
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_OFFSET                 40818
 #define NIG_REG_LLH_PF_CLS_FUNC_FILTER_EN_RT_SIZE                   512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_OFFSET               41331
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_OFFSET               41330
 #define NIG_REG_LLH_PF_CLS_FUNC_FILTER_MODE_RT_SIZE                 512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET      41843
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET      41842
 #define NIG_REG_LLH_PF_CLS_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE        512
-#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_OFFSET            42355
+#define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_OFFSET            42354
 #define NIG_REG_LLH_PF_CLS_FUNC_FILTER_HDR_SEL_RT_SIZE              512
-#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_OFFSET                    42867
+#define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_OFFSET                    42866
 #define NIG_REG_LLH_PF_CLS_FILTERS_MAP_RT_SIZE                      32
-#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                           42899
-#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                           42900
-#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                           42901
-#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                       42902
-#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                       42903
-#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                       42904
-#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                       42905
-#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                    42906
-#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                    42907
-#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                    42908
-#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                    42909
-#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                        42910
-#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                     42911
-#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                           42912
-#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                      42913
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                    42914
-#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                       42915
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                42916
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                    42917
-#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                       42918
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                42919
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                    42920
-#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                       42921
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                42922
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                    42923
-#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                       42924
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                42925
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                    42926
-#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                       42927
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                42928
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                    42929
-#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                       42930
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                42931
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                    42932
-#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                       42933
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                42934
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                    42935
-#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                       42936
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                42937
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                    42938
-#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                       42939
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                42940
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                    42941
-#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                       42942
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                42943
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                   42944
-#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                      42945
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET               42946
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                   42947
-#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                      42948
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET               42949
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                   42950
-#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                      42951
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET               42952
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                   42953
-#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                      42954
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET               42955
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                   42956
-#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                      42957
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET               42958
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                   42959
-#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                      42960
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET               42961
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                   42962
-#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                      42963
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET               42964
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                   42965
-#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                      42966
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET               42967
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                   42968
-#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                      42969
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET               42970
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                   42971
-#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                      42972
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET               42973
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ20_RT_OFFSET                   42974
-#define PBF_REG_BTB_GUARANTEED_VOQ20_RT_OFFSET                      42975
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ20_RT_OFFSET               42976
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ21_RT_OFFSET                   42977
-#define PBF_REG_BTB_GUARANTEED_VOQ21_RT_OFFSET                      42978
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ21_RT_OFFSET               42979
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ22_RT_OFFSET                   42980
-#define PBF_REG_BTB_GUARANTEED_VOQ22_RT_OFFSET                      42981
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ22_RT_OFFSET               42982
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ23_RT_OFFSET                   42983
-#define PBF_REG_BTB_GUARANTEED_VOQ23_RT_OFFSET                      42984
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ23_RT_OFFSET               42985
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ24_RT_OFFSET                   42986
-#define PBF_REG_BTB_GUARANTEED_VOQ24_RT_OFFSET                      42987
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ24_RT_OFFSET               42988
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ25_RT_OFFSET                   42989
-#define PBF_REG_BTB_GUARANTEED_VOQ25_RT_OFFSET                      42990
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ25_RT_OFFSET               42991
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ26_RT_OFFSET                   42992
-#define PBF_REG_BTB_GUARANTEED_VOQ26_RT_OFFSET                      42993
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ26_RT_OFFSET               42994
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ27_RT_OFFSET                   42995
-#define PBF_REG_BTB_GUARANTEED_VOQ27_RT_OFFSET                      42996
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ27_RT_OFFSET               42997
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ28_RT_OFFSET                   42998
-#define PBF_REG_BTB_GUARANTEED_VOQ28_RT_OFFSET                      42999
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ28_RT_OFFSET               43000
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ29_RT_OFFSET                   43001
-#define PBF_REG_BTB_GUARANTEED_VOQ29_RT_OFFSET                      43002
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ29_RT_OFFSET               43003
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ30_RT_OFFSET                   43004
-#define PBF_REG_BTB_GUARANTEED_VOQ30_RT_OFFSET                      43005
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ30_RT_OFFSET               43006
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ31_RT_OFFSET                   43007
-#define PBF_REG_BTB_GUARANTEED_VOQ31_RT_OFFSET                      43008
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ31_RT_OFFSET               43009
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ32_RT_OFFSET                   43010
-#define PBF_REG_BTB_GUARANTEED_VOQ32_RT_OFFSET                      43011
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ32_RT_OFFSET               43012
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ33_RT_OFFSET                   43013
-#define PBF_REG_BTB_GUARANTEED_VOQ33_RT_OFFSET                      43014
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ33_RT_OFFSET               43015
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ34_RT_OFFSET                   43016
-#define PBF_REG_BTB_GUARANTEED_VOQ34_RT_OFFSET                      43017
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ34_RT_OFFSET               43018
-#define PBF_REG_YCMD_QS_NUM_LINES_VOQ35_RT_OFFSET                   43019
-#define PBF_REG_BTB_GUARANTEED_VOQ35_RT_OFFSET                      43020
-#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ35_RT_OFFSET               43021
-#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                43022
+#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET                           42898
+#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET                           42899
+#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET                           42900
+#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET                       42901
+#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET                       42902
+#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET                       42903
+#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET                       42904
+#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET                    42905
+#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET                    42906
+#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET                    42907
+#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET                    42908
+#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET                        42909
+#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET                     42910
+#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET                           42911
+#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET                      42912
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET                    42913
+#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET                       42914
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET                42915
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET                    42916
+#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET                       42917
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET                42918
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET                    42919
+#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET                       42920
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET                42921
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET                    42922
+#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET                       42923
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET                42924
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET                    42925
+#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET                       42926
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET                42927
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET                    42928
+#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET                       42929
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET                42930
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET                    42931
+#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET                       42932
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET                42933
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET                    42934
+#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET                       42935
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET                42936
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET                    42937
+#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET                       42938
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET                42939
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET                    42940
+#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET                       42941
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET                42942
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET                   42943
+#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET                      42944
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET               42945
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET                   42946
+#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET                      42947
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET               42948
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET                   42949
+#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET                      42950
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET               42951
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET                   42952
+#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET                      42953
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET               42954
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET                   42955
+#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET                      42956
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET               42957
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET                   42958
+#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET                      42959
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET               42960
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET                   42961
+#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET                      42962
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET               42963
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET                   42964
+#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET                      42965
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET               42966
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET                   42967
+#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET                      42968
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET               42969
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET                   42970
+#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET                      42971
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET               42972
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ20_RT_OFFSET                   42973
+#define PBF_REG_BTB_GUARANTEED_VOQ20_RT_OFFSET                      42974
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ20_RT_OFFSET               42975
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ21_RT_OFFSET                   42976
+#define PBF_REG_BTB_GUARANTEED_VOQ21_RT_OFFSET                      42977
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ21_RT_OFFSET               42978
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ22_RT_OFFSET                   42979
+#define PBF_REG_BTB_GUARANTEED_VOQ22_RT_OFFSET                      42980
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ22_RT_OFFSET               42981
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ23_RT_OFFSET                   42982
+#define PBF_REG_BTB_GUARANTEED_VOQ23_RT_OFFSET                      42983
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ23_RT_OFFSET               42984
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ24_RT_OFFSET                   42985
+#define PBF_REG_BTB_GUARANTEED_VOQ24_RT_OFFSET                      42986
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ24_RT_OFFSET               42987
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ25_RT_OFFSET                   42988
+#define PBF_REG_BTB_GUARANTEED_VOQ25_RT_OFFSET                      42989
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ25_RT_OFFSET               42990
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ26_RT_OFFSET                   42991
+#define PBF_REG_BTB_GUARANTEED_VOQ26_RT_OFFSET                      42992
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ26_RT_OFFSET               42993
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ27_RT_OFFSET                   42994
+#define PBF_REG_BTB_GUARANTEED_VOQ27_RT_OFFSET                      42995
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ27_RT_OFFSET               42996
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ28_RT_OFFSET                   42997
+#define PBF_REG_BTB_GUARANTEED_VOQ28_RT_OFFSET                      42998
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ28_RT_OFFSET               42999
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ29_RT_OFFSET                   43000
+#define PBF_REG_BTB_GUARANTEED_VOQ29_RT_OFFSET                      43001
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ29_RT_OFFSET               43002
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ30_RT_OFFSET                   43003
+#define PBF_REG_BTB_GUARANTEED_VOQ30_RT_OFFSET                      43004
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ30_RT_OFFSET               43005
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ31_RT_OFFSET                   43006
+#define PBF_REG_BTB_GUARANTEED_VOQ31_RT_OFFSET                      43007
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ31_RT_OFFSET               43008
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ32_RT_OFFSET                   43009
+#define PBF_REG_BTB_GUARANTEED_VOQ32_RT_OFFSET                      43010
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ32_RT_OFFSET               43011
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ33_RT_OFFSET                   43012
+#define PBF_REG_BTB_GUARANTEED_VOQ33_RT_OFFSET                      43013
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ33_RT_OFFSET               43014
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ34_RT_OFFSET                   43015
+#define PBF_REG_BTB_GUARANTEED_VOQ34_RT_OFFSET                      43016
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ34_RT_OFFSET               43017
+#define PBF_REG_YCMD_QS_NUM_LINES_VOQ35_RT_OFFSET                   43018
+#define PBF_REG_BTB_GUARANTEED_VOQ35_RT_OFFSET                      43019
+#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ35_RT_OFFSET               43020
+#define XCM_REG_CON_PHY_Q3_RT_OFFSET                                43021
 
-#define RUNTIME_ARRAY_SIZE 43023
+#define RUNTIME_ARRAY_SIZE 43022
 
 /* Init Callbacks */
 #define DMAE_READY_CB                                               0
index b43baf9..49a5ff5 100644 (file)
@@ -515,6 +515,10 @@ enum _ecore_status_t ecore_sp_rl_update(struct ecore_hwfn *p_hwfn,
        rl_update->rl_id_first = params->rl_id_first;
        rl_update->rl_id_last = params->rl_id_last;
        rl_update->rl_dc_qcn_flg = params->rl_dc_qcn_flg;
+       rl_update->dcqcn_reset_alpha_on_idle =
+               params->dcqcn_reset_alpha_on_idle;
+       rl_update->rl_bc_stage_th = params->rl_bc_stage_th;
+       rl_update->rl_timer_stage_th = params->rl_timer_stage_th;
        rl_update->rl_bc_rate = OSAL_CPU_TO_LE32(params->rl_bc_rate);
        rl_update->rl_max_rate =
                OSAL_CPU_TO_LE16(ecore_sp_rl_mb_to_qm(params->rl_max_rate));
@@ -529,12 +533,14 @@ enum _ecore_status_t ecore_sp_rl_update(struct ecore_hwfn *p_hwfn,
                OSAL_CPU_TO_LE32(params->dcqcn_timeuot_us);
        rl_update->qcn_timeuot_us = OSAL_CPU_TO_LE32(params->qcn_timeuot_us);
 
-       DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "rl_params: qcn_update_param_flg %x, dcqcn_update_param_flg %x, rl_init_flg %x, rl_start_flg %x, rl_stop_flg %x, rl_id_first %x, rl_id_last %x, rl_dc_qcn_flg %x, rl_bc_rate %x, rl_max_rate %x, rl_r_ai %x, rl_r_hai %x, dcqcn_g %x, dcqcn_k_us %x, dcqcn_timeuot_us %x, qcn_timeuot_us %x\n",
+       DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "rl_params: qcn_update_param_flg %x, dcqcn_update_param_flg %x, rl_init_flg %x, rl_start_flg %x, rl_stop_flg %x, rl_id_first %x, rl_id_last %x, rl_dc_qcn_flg %x,dcqcn_reset_alpha_on_idle %x, rl_bc_stage_th %x, rl_timer_stage_th %x, rl_bc_rate %x, rl_max_rate %x, rl_r_ai %x, rl_r_hai %x, dcqcn_g %x, dcqcn_k_us %x, dcqcn_timeuot_us %x, qcn_timeuot_us %x\n",
                   rl_update->qcn_update_param_flg,
                   rl_update->dcqcn_update_param_flg,
                   rl_update->rl_init_flg, rl_update->rl_start_flg,
                   rl_update->rl_stop_flg, rl_update->rl_id_first,
                   rl_update->rl_id_last, rl_update->rl_dc_qcn_flg,
+                  rl_update->dcqcn_reset_alpha_on_idle,
+                  rl_update->rl_bc_stage_th, rl_update->rl_timer_stage_th,
                   rl_update->rl_bc_rate, rl_update->rl_max_rate,
                   rl_update->rl_r_ai, rl_update->rl_r_hai,
                   rl_update->dcqcn_g, rl_update->dcqcn_k_us,
index e57414c..524fe57 100644 (file)
@@ -119,6 +119,9 @@ struct ecore_rl_update_params {
        u8 rl_stop_flg;
        u8 rl_id_first;
        u8 rl_id_last;
+       u8 dcqcn_reset_alpha_on_idle;
+       u8 rl_bc_stage_th;
+       u8 rl_timer_stage_th;
        u8 rl_dc_qcn_flg; /* If set, RL will used for DCQCN */
        u32 rl_bc_rate; /* Byte Counter Limit */
        u32 rl_max_rate; /* Maximum rate in Mbps resolution */
index 776c86f..88ad961 100644 (file)
@@ -282,20 +282,30 @@ ecore_async_event_completion(struct ecore_hwfn *p_hwfn,
                             struct event_ring_entry *p_eqe)
 {
        ecore_spq_async_comp_cb cb;
+       enum _ecore_status_t rc;
 
-       if (!p_hwfn->p_spq || (p_eqe->protocol_id >= MAX_PROTOCOL_TYPE))
+       if (p_eqe->protocol_id >= MAX_PROTOCOL_TYPE) {
+               DP_ERR(p_hwfn, "Wrong protocol: %d\n", p_eqe->protocol_id);
                return ECORE_INVAL;
+       }
 
        cb = p_hwfn->p_spq->async_comp_cb[p_eqe->protocol_id];
-       if (cb) {
-               return cb(p_hwfn, p_eqe->opcode, p_eqe->echo,
-                         &p_eqe->data, p_eqe->fw_return_code);
-       } else {
+       if (!cb) {
                DP_NOTICE(p_hwfn,
                          true, "Unknown Async completion for protocol: %d\n",
                          p_eqe->protocol_id);
                return ECORE_INVAL;
        }
+
+       rc = cb(p_hwfn, p_eqe->opcode, p_eqe->echo,
+               &p_eqe->data, p_eqe->fw_return_code);
+       if (rc != ECORE_SUCCESS)
+               DP_NOTICE(p_hwfn, true,
+                         "Async completion callback failed, rc = %d [opcode %x, echo %x, fw_return_code %x]",
+                         rc, p_eqe->opcode, p_eqe->echo,
+                         p_eqe->fw_return_code);
+
+       return rc;
 }
 
 enum _ecore_status_t
@@ -339,10 +349,16 @@ enum _ecore_status_t ecore_eq_completion(struct ecore_hwfn *p_hwfn,
 {
        struct ecore_eq *p_eq = cookie;
        struct ecore_chain *p_chain = &p_eq->chain;
-       enum _ecore_status_t rc = 0;
+       u16 fw_cons_idx             = 0;
+       enum _ecore_status_t rc = ECORE_SUCCESS;
+
+       if (!p_hwfn->p_spq) {
+               DP_ERR(p_hwfn, "Unexpected NULL p_spq\n");
+               return ECORE_INVAL;
+       }
 
        /* take a snapshot of the FW consumer */
-       u16 fw_cons_idx = OSAL_LE16_TO_CPU(*p_eq->p_fw_cons);
+       fw_cons_idx = OSAL_LE16_TO_CPU(*p_eq->p_fw_cons);
 
        DP_VERBOSE(p_hwfn, ECORE_MSG_SPQ, "fw_cons_idx %x\n", fw_cons_idx);
 
@@ -358,7 +374,8 @@ enum _ecore_status_t ecore_eq_completion(struct ecore_hwfn *p_hwfn,
        while (fw_cons_idx != ecore_chain_get_cons_idx(p_chain)) {
                struct event_ring_entry *p_eqe = ecore_chain_consume(p_chain);
                if (!p_eqe) {
-                       rc = ECORE_INVAL;
+                       DP_ERR(p_hwfn,
+                              "Unexpected NULL chain consumer entry\n");
                        break;
                }
 
@@ -374,15 +391,13 @@ enum _ecore_status_t ecore_eq_completion(struct ecore_hwfn *p_hwfn,
                                                      */
                           p_eqe->flags);
 
-               if (GET_FIELD(p_eqe->flags, EVENT_RING_ENTRY_ASYNC)) {
-                       if (ecore_async_event_completion(p_hwfn, p_eqe))
-                               rc = ECORE_INVAL;
-               } else if (ecore_spq_completion(p_hwfn,
-                                               p_eqe->echo,
-                                               p_eqe->fw_return_code,
-                                               &p_eqe->data)) {
-                       rc = ECORE_INVAL;
-               }
+               if (GET_FIELD(p_eqe->flags, EVENT_RING_ENTRY_ASYNC))
+                       ecore_async_event_completion(p_hwfn, p_eqe);
+               else
+                       ecore_spq_completion(p_hwfn,
+                                            p_eqe->echo,
+                                            p_eqe->fw_return_code,
+                                            &p_eqe->data);
 
                ecore_chain_recycle_consumed(p_chain);
        }
@@ -928,12 +943,11 @@ enum _ecore_status_t ecore_spq_completion(struct ecore_hwfn *p_hwfn,
        struct ecore_spq_entry *found = OSAL_NULL;
        enum _ecore_status_t rc;
 
-       if (!p_hwfn)
-               return ECORE_INVAL;
-
        p_spq = p_hwfn->p_spq;
-       if (!p_spq)
+       if (!p_spq) {
+               DP_ERR(p_hwfn, "Unexpected NULL p_spq\n");
                return ECORE_INVAL;
+       }
 
        OSAL_SPIN_LOCK(&p_spq->lock);
        OSAL_LIST_FOR_EACH_ENTRY_SAFE(p_ent,
index f7ebf7a..7d73ef9 100644 (file)
@@ -31,7 +31,7 @@ static enum _ecore_status_t ecore_sriov_eqe_event(struct ecore_hwfn *p_hwfn,
                                                  union event_ring_data *data,
                                                  u8 fw_return_code);
 
-const char *ecore_channel_tlvs_string[] = {
+const char *qede_ecore_channel_tlvs_string[] = {
        "CHANNEL_TLV_NONE",     /* ends tlv sequence */
        "CHANNEL_TLV_ACQUIRE",
        "CHANNEL_TLV_VPORT_START",
@@ -218,7 +218,7 @@ struct ecore_vf_info *ecore_iov_get_vf_info(struct ecore_hwfn *p_hwfn,
 static struct ecore_queue_cid *
 ecore_iov_get_vf_rx_queue_cid(struct ecore_vf_queue *p_queue)
 {
-       int i;
+       u32 i;
 
        for (i = 0; i < MAX_QUEUES_PER_QZONE; i++) {
                if (p_queue->cids[i].p_cid &&
@@ -240,7 +240,7 @@ static bool ecore_iov_validate_queue_mode(struct ecore_vf_info *p_vf,
                                          enum ecore_iov_validate_q_mode mode,
                                          bool b_is_tx)
 {
-       int i;
+       u32 i;
 
        if (mode == ECORE_IOV_VALIDATE_Q_NA)
                return true;
@@ -979,10 +979,12 @@ static u8 ecore_iov_alloc_vf_igu_sbs(struct ecore_hwfn *p_hwfn,
                ecore_init_cau_sb_entry(p_hwfn, &sb_entry,
                                        p_hwfn->rel_pf_id,
                                        vf->abs_vf_id, 1);
+
                ecore_dmae_host2grc(p_hwfn, p_ptt,
                                    (u64)(osal_uintptr_t)&sb_entry,
                                    CAU_REG_SB_VAR_MEMORY +
-                                   p_block->igu_sb_id * sizeof(u64), 2, 0);
+                                   p_block->igu_sb_id * sizeof(u64), 2,
+                                   OSAL_NULL /* default parameters */);
        }
 
        vf->num_sbs = (u8)num_rx_queues;
@@ -1278,7 +1280,7 @@ static void ecore_iov_lock_vf_pf_channel(struct ecore_hwfn *p_hwfn,
                           ECORE_MSG_IOV,
                           "VF[%d]: vf pf channel locked by %s\n",
                           vf->abs_vf_id,
-                          ecore_channel_tlvs_string[tlv]);
+                          qede_ecore_channel_tlvs_string[tlv]);
        else
                DP_VERBOSE(p_hwfn,
                           ECORE_MSG_IOV,
@@ -1296,7 +1298,7 @@ static void ecore_iov_unlock_vf_pf_channel(struct ecore_hwfn *p_hwfn,
                           ECORE_MSG_IOV,
                           "VF[%d]: vf pf channel unlocked by %s\n",
                           vf->abs_vf_id,
-                          ecore_channel_tlvs_string[expected_tlv]);
+                          qede_ecore_channel_tlvs_string[expected_tlv]);
        else
                DP_VERBOSE(p_hwfn,
                           ECORE_MSG_IOV,
@@ -1336,7 +1338,7 @@ void ecore_dp_tlv_list(struct ecore_hwfn *p_hwfn, void *tlvs_list)
                if (ecore_iov_tlv_supported(tlv->type))
                        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
                                   "TLV number %d: type %s, length %d\n",
-                                  i, ecore_channel_tlvs_string[tlv->type],
+                                  i, qede_ecore_channel_tlvs_string[tlv->type],
                                   tlv->length);
                else
                        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
@@ -1968,7 +1970,8 @@ ecore_iov_configure_vport_forced(struct ecore_hwfn *p_hwfn,
                return ECORE_INVAL;
 
        if ((events & (1 << MAC_ADDR_FORCED)) ||
-           p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change) {
+           p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change ||
+           p_vf->p_vf_info.is_trusted_configured) {
                /* Since there's no way [currently] of removing the MAC,
                 * we can always assume this means we need to force it.
                 */
@@ -1989,7 +1992,8 @@ ecore_iov_configure_vport_forced(struct ecore_hwfn *p_hwfn,
                        return rc;
                }
 
-               if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change)
+               if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change ||
+                   p_vf->p_vf_info.is_trusted_configured)
                        p_vf->configured_features |=
                                1 << VFPF_BULLETIN_MAC_ADDR;
                else
@@ -2085,8 +2089,8 @@ static void ecore_iov_vf_mbx_start_vport(struct ecore_hwfn *p_hwfn,
                                         struct ecore_ptt *p_ptt,
                                         struct ecore_vf_info *vf)
 {
-       struct ecore_sp_vport_start_params params = { 0 };
        struct ecore_iov_vf_mbx *mbx = &vf->vf_mbx;
+       struct ecore_sp_vport_start_params params;
        struct vfpf_vport_start_tlv *start;
        u8 status = PFVF_STATUS_SUCCESS;
        struct ecore_vf_info *vf_info;
@@ -2137,6 +2141,7 @@ static void ecore_iov_vf_mbx_start_vport(struct ecore_hwfn *p_hwfn,
                *p_bitmap |= 1 << VFPF_BULLETIN_UNTAGGED_DEFAULT;
        }
 
+       OSAL_MEMSET(&params, 0, sizeof(struct ecore_sp_vport_start_params));
        params.tpa_mode = start->tpa_mode;
        params.remove_inner_vlan = start->inner_vlan_removal;
        params.tx_switching = true;
@@ -2156,7 +2161,9 @@ static void ecore_iov_vf_mbx_start_vport(struct ecore_hwfn *p_hwfn,
        params.vport_id = vf->vport_id;
        params.max_buffers_per_cqe = start->max_buffers_per_cqe;
        params.mtu = vf->mtu;
-       params.check_mac = true;
+
+       /* Non trusted VFs should enable control frame filtering */
+       params.check_mac = !vf->p_vf_info.is_trusted_configured;
 
        rc = ecore_sp_eth_vport_start(p_hwfn, &params);
        if (rc != ECORE_SUCCESS) {
@@ -2912,7 +2919,7 @@ void *ecore_iov_search_list_tlvs(struct ecore_hwfn *p_hwfn,
                if (p_tlv->type == req_type) {
                        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
                                   "Extended tlv type %s, length %d found\n",
-                                  ecore_channel_tlvs_string[p_tlv->type],
+                                  qede_ecore_channel_tlvs_string[p_tlv->type],
                                   p_tlv->length);
                        return p_tlv;
                }
@@ -3351,6 +3358,15 @@ ecore_iov_vf_update_mac_shadow(struct ecore_hwfn *p_hwfn,
        if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))
                return ECORE_SUCCESS;
 
+       /* Since we don't have the implementation of the logic for removing
+        * a forced MAC and restoring shadow MAC, let's not worry about
+        * processing shadow copies of MAC as long as VF trust mode is ON,
+        * to keep things simple.
+        */
+       if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change ||
+           p_vf->p_vf_info.is_trusted_configured)
+               return ECORE_SUCCESS;
+
        /* First remove entries and then add new ones */
        if (p_params->opcode == ECORE_FILTER_REMOVE) {
                for (i = 0; i < ECORE_ETH_VF_NUM_MAC_FILTERS; i++) {
@@ -3653,7 +3669,7 @@ static void ecore_iov_vf_pf_set_coalesce(struct ecore_hwfn *p_hwfn,
        struct ecore_queue_cid *p_cid;
        u16 rx_coal, tx_coal;
        u16 qid;
-       int i;
+       u32 i;
 
        req = &mbx->req_virt->update_coalesce;
 
@@ -3733,7 +3749,8 @@ ecore_iov_pf_configure_vf_queue_coalesce(struct ecore_hwfn *p_hwfn,
        struct ecore_queue_cid *p_cid;
        struct ecore_vf_info *vf;
        struct ecore_ptt *p_ptt;
-       int i, rc = 0;
+       int rc = 0;
+       u32 i;
 
        if (!ecore_iov_is_valid_vfid(p_hwfn, vf_id, true, true)) {
                DP_NOTICE(p_hwfn, true,
@@ -4415,17 +4432,23 @@ void ecore_iov_bulletin_set_forced_mac(struct ecore_hwfn *p_hwfn,
                return;
        }
 
-       if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change)
+       if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change ||
+           vf_info->p_vf_info.is_trusted_configured) {
                feature = 1 << VFPF_BULLETIN_MAC_ADDR;
-       else
+               /* Trust mode will disable Forced MAC */
+               vf_info->bulletin.p_virt->valid_bitmap &=
+                       ~(1 << MAC_ADDR_FORCED);
+       } else {
                feature = 1 << MAC_ADDR_FORCED;
+               /* Forced MAC will disable MAC_ADDR */
+               vf_info->bulletin.p_virt->valid_bitmap &=
+                       ~(1 << VFPF_BULLETIN_MAC_ADDR);
+       }
 
-       OSAL_MEMCPY(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN);
+       OSAL_MEMCPY(vf_info->bulletin.p_virt->mac,
+                   mac, ETH_ALEN);
 
        vf_info->bulletin.p_virt->valid_bitmap |= feature;
-       /* Forced MAC will disable MAC_ADDR */
-       vf_info->bulletin.p_virt->valid_bitmap &=
-           ~(1 << VFPF_BULLETIN_MAC_ADDR);
 
        ecore_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 }
@@ -4460,7 +4483,8 @@ enum _ecore_status_t ecore_iov_bulletin_set_mac(struct ecore_hwfn *p_hwfn,
 
        vf_info->bulletin.p_virt->valid_bitmap |= feature;
 
-       if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change)
+       if (p_hwfn->pf_params.eth_pf_params.allow_vf_mac_change ||
+           vf_info->p_vf_info.is_trusted_configured)
                ecore_iov_configure_vport_forced(p_hwfn, vf_info, feature);
 
        return ECORE_SUCCESS;
@@ -4780,6 +4804,32 @@ enum _ecore_status_t ecore_iov_configure_tx_rate(struct ecore_hwfn *p_hwfn,
                                   p_link->speed);
 }
 
+enum _ecore_status_t ecore_iov_configure_min_tx_rate(struct ecore_dev *p_dev,
+                                                    int vfid, u32 rate)
+{
+       struct ecore_vf_info *vf;
+       int i;
+
+       for_each_hwfn(p_dev, i) {
+               struct ecore_hwfn *p_hwfn = &p_dev->hwfns[i];
+
+               if (!ecore_iov_pf_sanity_check(p_hwfn, vfid)) {
+                       DP_NOTICE(p_hwfn, true,
+                                 "SR-IOV sanity check failed, can't set min rate\n");
+                       return ECORE_INVAL;
+               }
+       }
+
+       vf = ecore_iov_get_vf_info(ECORE_LEADING_HWFN(p_dev), (u16)vfid, true);
+       if (!vf) {
+               DP_NOTICE(p_dev, true,
+                         "Getting vf info failed, can't set min rate\n");
+               return ECORE_INVAL;
+       }
+
+       return ecore_configure_vport_wfq(p_dev, vf->vport_id, rate);
+}
+
 enum _ecore_status_t ecore_iov_get_vf_stats(struct ecore_hwfn *p_hwfn,
                                            struct ecore_ptt *p_ptt,
                                            int vfid,
@@ -4890,7 +4940,7 @@ bool ecore_iov_is_vf_started(struct ecore_hwfn *p_hwfn,
        return (p_vf->state != VF_FREE && p_vf->state != VF_STOPPED);
 }
 
-enum _ecore_status_t
+int
 ecore_iov_get_vf_min_rate(struct ecore_hwfn *p_hwfn, int vfid)
 {
        struct ecore_wfq_data *vf_vp_wfq;
index d2213f7..3ba6a0c 100644 (file)
@@ -32,7 +32,7 @@ static void *ecore_vf_pf_prep(struct ecore_hwfn *p_hwfn, u16 type, u16 length)
 
        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
                   "preparing to send %s tlv over vf pf channel\n",
-                  ecore_channel_tlvs_string[type]);
+                  qede_ecore_channel_tlvs_string[type]);
 
        /* Reset Request offset */
        p_iov->offset = (u8 *)(p_iov->vf2pf_request);
@@ -565,13 +565,20 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
                                                           phys,
                                                           p_iov->bulletin.
                                                           size);
+       if (!p_iov->bulletin.p_virt) {
+               DP_NOTICE(p_hwfn, false, "Failed to alloc bulletin memory\n");
+               goto free_pf2vf_reply;
+       }
        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
                   "VF's bulletin Board [%p virt 0x%lx phys 0x%08x bytes]\n",
                   p_iov->bulletin.p_virt, (unsigned long)p_iov->bulletin.phys,
                   p_iov->bulletin.size);
 
 #ifdef CONFIG_ECORE_LOCK_ALLOC
-       OSAL_MUTEX_ALLOC(p_hwfn, &p_iov->mutex);
+       if (OSAL_MUTEX_ALLOC(p_hwfn, &p_iov->mutex)) {
+               DP_NOTICE(p_hwfn, false, "Failed to allocate p_iov->mutex\n");
+               goto free_bulletin_mem;
+       }
 #endif
        OSAL_MUTEX_INIT(&p_iov->mutex);
 
@@ -609,6 +616,16 @@ enum _ecore_status_t ecore_vf_hw_prepare(struct ecore_hwfn *p_hwfn)
 
        return rc;
 
+#ifdef CONFIG_ECORE_LOCK_ALLOC
+free_bulletin_mem:
+       OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_iov->bulletin.p_virt,
+                              p_iov->bulletin.phys,
+                              p_iov->bulletin.size);
+#endif
+free_pf2vf_reply:
+       OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_iov->pf2vf_reply,
+                              p_iov->pf2vf_reply_phys,
+                              sizeof(union pfvf_tlvs));
 free_vf2pf_request:
        OSAL_DMA_FREE_COHERENT(p_hwfn->p_dev, p_iov->vf2pf_request,
                               p_iov->vf2pf_request_phys,
@@ -1167,7 +1184,7 @@ ecore_vf_handle_vp_update_is_needed(struct ecore_hwfn *p_hwfn,
                return !!p_data->sge_tpa_params;
        default:
                DP_INFO(p_hwfn, "Unexpected vport-update TLV[%d] %s\n",
-                       tlv, ecore_channel_tlvs_string[tlv]);
+                       tlv, qede_ecore_channel_tlvs_string[tlv]);
                return false;
        }
 }
@@ -1191,7 +1208,7 @@ ecore_vf_handle_vp_update_tlvs_resp(struct ecore_hwfn *p_hwfn,
                if (p_resp && p_resp->hdr.status)
                        DP_VERBOSE(p_hwfn, ECORE_MSG_IOV,
                                   "TLV[%d] type %s Configuration %s\n",
-                                  tlv, ecore_channel_tlvs_string[tlv],
+                                  tlv, qede_ecore_channel_tlvs_string[tlv],
                                   (p_resp && p_resp->hdr.status) ? "succeeded"
                                                                  : "failed");
        }
index c30677a..c7ecb01 100644 (file)
@@ -698,6 +698,6 @@ enum {
 
 /*!!!!! Make sure to update STRINGS structure accordingly !!!!!*/
 };
-extern const char *ecore_channel_tlvs_string[];
+extern const char *qede_ecore_channel_tlvs_string[];
 
 #endif /* __ECORE_VF_PF_IF_H__ */
index abfa685..9a401ed 100644 (file)
@@ -178,6 +178,11 @@ struct eth_tx_1st_bd_flags {
 #define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT    6
 /* Recalculate Tunnel UDP/GRE Checksum (Depending on Tunnel Type) */
 #define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK     0x1
+/* Recalculate Tunnel UDP/GRE Checksum (Depending on Tunnel Type). In case of
+ * GRE tunnel, this flag means GRE CSO, and in this case GRE checksum field
+ * Must be present.
+ */
+#define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK     0x1
 #define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT    7
 };
 
index 81aa88e..13c2e2d 100644 (file)
@@ -1258,6 +1258,17 @@ struct public_drv_mb {
  */
 #define DRV_MSG_GET_RESOURCE_ALLOC_MSG         0x34000000
 #define DRV_MSG_SET_RESOURCE_VALUE_MSG         0x35000000
+#define DRV_MSG_CODE_OV_UPDATE_WOL             0x38000000
+#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE    0x39000000
+#define DRV_MSG_CODE_S_TAG_UPDATE_ACK          0x3b000000
+#define DRV_MSG_CODE_OEM_UPDATE_FCOE_CVID      0x3c000000
+#define DRV_MSG_CODE_OEM_UPDATE_FCOE_FABRIC_NAME       0x3d000000
+#define DRV_MSG_CODE_OEM_UPDATE_BOOT_CFG       0x3e000000
+#define DRV_MSG_CODE_OEM_RESET_TO_DEFAULT      0x3f000000
+#define DRV_MSG_CODE_OV_GET_CURR_CFG           0x40000000
+#define DRV_MSG_CODE_GET_OEM_UPDATES           0x41000000
+/* params [31:8] - reserved, [7:0] - bitmap */
+#define DRV_MSG_CODE_GET_PPFID_BITMAP          0x43000000
 
 /*deprecated don't use*/
 #define DRV_MSG_CODE_INITIATE_FLR_DEPRECATED    0x02000000
@@ -1467,6 +1478,7 @@ struct public_drv_mb {
 
 /* Param: Password len. Union: Plain Password */
 #define DRV_MSG_CODE_ENCRYPT_PASSWORD          0x00360000
+#define DRV_MSG_CODE_GET_ENGINE_CONFIG         0x00370000 /* Param: None */
 
 #define DRV_MSG_SEQ_NUMBER_MASK                 0x0000ffff
 
@@ -1582,6 +1594,16 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_OV_MTU_SIZE_OFFSET                0
 #define DRV_MB_PARAM_OV_MTU_SIZE_MASK          0xFFFFFFFF
 
+#define DRV_MB_PARAM_ESWITCH_MODE_MASK  (DRV_MB_PARAM_ESWITCH_MODE_NONE | \
+                                        DRV_MB_PARAM_ESWITCH_MODE_VEB |   \
+                                        DRV_MB_PARAM_ESWITCH_MODE_VEPA)
+#define DRV_MB_PARAM_ESWITCH_MODE_NONE  0x0
+#define DRV_MB_PARAM_ESWITCH_MODE_VEB   0x1
+#define DRV_MB_PARAM_ESWITCH_MODE_VEPA  0x2
+
+#define DRV_MB_PARAM_DUMMY_OEM_UPDATES_MASK     0x1
+#define DRV_MB_PARAM_DUMMY_OEM_UPDATES_OFFSET   0
+
 #define DRV_MB_PARAM_SET_LED_MODE_OPER         0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON           0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF          0x2
@@ -1677,6 +1699,8 @@ struct public_drv_mb {
 #define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN      0x35000000
 #define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED   0x36000000
 #define FW_MSG_CODE_RESOURCE_ALLOC_GEN_ERR      0x37000000
+#define FW_MSG_CODE_GET_OEM_UPDATES_DONE       0x41000000
+
 #define FW_MSG_CODE_NIG_DRAIN_DONE              0x30000000
 #define FW_MSG_CODE_VF_DISABLED_DONE            0xb0000000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE        0xb0010000
@@ -1778,11 +1802,31 @@ struct public_drv_mb {
 #define FW_MB_PARAM_FEATURE_SUPPORT_EEE         0x00000002
 /* MFW supports DRV_LOAD Timeout */
 #define FW_MB_PARAM_FEATURE_SUPPORT_DRV_LOAD_TO  0x00000004
+/* MFW support complete IGU cleanup upon FLR */
+#define FW_MB_PARAM_FEATURE_SUPPORT_IGU_CLEANUP        0x00000080
 /* MFW supports virtual link */
 #define FW_MB_PARAM_FEATURE_SUPPORT_VLINK       0x00010000
 
 #define FW_MB_PARAM_LOAD_DONE_DID_EFUSE_ERROR  (1 << 0)
 
+#define FW_MB_PARAM_OEM_UPDATE_MASK            0xFF
+#define FW_MB_PARAM_OEM_UPDATE_OFFSET          0
+#define FW_MB_PARAM_OEM_UPDATE_BW              0x01
+#define FW_MB_PARAM_OEM_UPDATE_S_TAG           0x02
+#define FW_MB_PARAM_OEM_UPDATE_CFG             0x04
+
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_MASK   0x00000001
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALID_OFFSET 0
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_MASK   0x00000002
+#define FW_MB_PARAM_ENG_CFG_FIR_AFFIN_VALUE_OFFSET 1
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_MASK    0x00000004
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALID_OFFSET  2
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_MASK    0x00000008
+#define FW_MB_PARAM_ENG_CFG_L2_AFFIN_VALUE_OFFSET  3
+
+#define FW_MB_PARAM_PPFID_BITMAP_MASK   0xFF
+#define FW_MB_PARAM_PPFID_BITMAP_OFFSET    0
+
        u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK                      0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK              0xffff0000
diff --git a/drivers/net/qede/base/meson.build b/drivers/net/qede/base/meson.build
new file mode 100644 (file)
index 0000000..71b8973
--- /dev/null
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+sources = [
+       'bcm_osal.c',
+       'ecore_cxt.c',
+       'ecore_dcbx.c',
+       'ecore_dev.c',
+       'ecore_hw.c',
+       'ecore_init_fw_funcs.c',
+       'ecore_init_ops.c',
+       'ecore_int.c',
+       'ecore_l2.c',
+       'ecore_mcp.c',
+       'ecore_sp_commands.c',
+       'ecore_spq.c',
+       'ecore_sriov.c',
+       'ecore_vf.c',
+]
+
+
+error_cflags = [
+       '-Wno-unused-parameter',
+       '-Wno-sign-compare',
+       '-Wno-missing-prototypes',
+       '-Wno-cast-qual',
+       '-Wno-unused-function',
+       '-Wno-unused-variable',
+       '-Wno-strict-aliasing',
+       '-Wno-missing-prototypes',
+       '-Wno-unused-value',
+       '-Wno-format-nonliteral',
+       '-Wno-shift-negative-value',
+       '-Wno-unused-but-set-variable',
+       '-Wno-missing-declarations',
+       '-Wno-maybe-uninitialized',
+       '-Wno-strict-prototypes',
+       '-Wno-shift-negative-value',
+       '-Wno-implicit-fallthrough',
+       '-Wno-format-extra-args',
+       '-Wno-visibility',
+       '-Wno-empty-body',
+       '-Wno-invalid-source-encoding',
+       '-Wno-sometimes-uninitialized',
+       '-Wno-pointer-bool-conversion',
+]
+c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
+foreach flag: error_cflags
+        if cc.has_argument(flag)
+                c_args += flag
+        endif
+endforeach
+
+base_lib = static_library('qede_base', sources,
+       dependencies: static_rte_net,
+       c_args: c_args)
+base_objs = base_lib.extract_all_objects()
index 402f620..be59f77 100644 (file)
@@ -8,13 +8,13 @@
        0
 
 #define  CDU_REG_CID_ADDR_PARAMS_CONTEXT_SIZE          ( \
-               0xfff << 0)
+               0xfffUL << 0)
 
 #define  CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE_SHIFT \
        12
 
 #define  CDU_REG_CID_ADDR_PARAMS_BLOCK_WASTE           ( \
-               0xfff << 12)
+               0xfffUL << 12)
 
 #define  CDU_REG_CID_ADDR_PARAMS_NCIB_SHIFT \
        24
        0x180820UL
 #define  IGU_REG_ATTN_MSG_ADDR_H       \
        0x180824UL
+#define IGU_REG_LEADING_EDGE_LATCH \
+       0x18082cUL
+#define IGU_REG_TRAILING_EDGE_LATCH \
+       0x180830UL
+#define IGU_REG_ATTENTION_ACK_BITS \
+       0x180838UL
+#define IGU_REG_PBA_STS_PF \
+       0x180d20UL
+#define IGU_REG_PF_FUNCTIONAL_CLEANUP \
+       0x181210UL
+#define IGU_REG_STATISTIC_NUM_OF_INTA_ASSERTED \
+       0x18042cUL
+#define IGU_REG_PBA_STS_PF_SIZE 5
+#define IGU_REG_PBA_STS_PF \
+       0x180d20UL
 #define  MISC_REG_AEU_GENERAL_ATTN_0 \
        0x008400UL
 #define  CAU_REG_SB_ADDR_MEMORY \
 #define  IGU_REG_COMMAND_REG_CTRL \
        0x180848UL
 #define  IGU_REG_BLOCK_CONFIGURATION_VF_CLEANUP_EN     ( \
-               0x1 << 1)
+               0x1UL << 1)
 #define  IGU_REG_BLOCK_CONFIGURATION_PXP_TPH_INTERFACE_EN      ( \
-               0x1 << 0)
+               0x1UL << 0)
 #define  IGU_REG_MAPPING_MEMORY \
        0x184000UL
 #define  MISCS_REG_GENERIC_POR_0       \
 #define  MCP_REG_NVM_CFG4 \
        0xe0642cUL
 #define  MCP_REG_NVM_CFG4_FLASH_SIZE   ( \
-               0x7 << 0)
+               0x7UL << 0)
 #define  MCP_REG_NVM_CFG4_FLASH_SIZE_SHIFT \
        0
 #define CCFC_REG_STRONG_ENABLE_VF 0x2e070cUL
 #define XMAC_REG_TX_CTRL_LO 0x210020UL
 #define XMAC_REG_CTRL 0x210000UL
 #define XMAC_REG_RX_CTRL 0x210030UL
-#define XMAC_REG_RX_CTRL_PROCESS_VARIABLE_PREAMBLE (0x1 << 12)
+#define XMAC_REG_RX_CTRL_PROCESS_VARIABLE_PREAMBLE (0x1UL << 12)
 #define MISC_REG_CLK_100G_MODE 0x008c10UL
 #define MISC_REG_OPTE_MODE 0x008c0cUL
 #define NIG_REG_LLH_ENG_CLS_TCP_4_TUPLE_SEARCH 0x501b84UL
 #define NIG_REG_LLH_FUNC_FILTER_EN 0x501a80UL
 #define NIG_REG_LLH_FUNC_FILTER_EN_SIZE 16
 #define NIG_REG_LLH_FUNC_FILTER_VALUE 0x501a00UL
-#define XMAC_REG_CTRL_TX_EN (0x1 << 0)
-#define XMAC_REG_CTRL_RX_EN (0x1 << 1)
+#define XMAC_REG_CTRL_TX_EN (0x1UL << 0)
+#define XMAC_REG_CTRL_RX_EN (0x1UL << 1)
 #define CDU_REG_SEGMENT0_PARAMS_T0_TID_SIZE (0xffUL << 24) /* @DPDK */
-#define CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE (0xff << 16)
+#define CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE (0xffUL << 16)
 #define CDU_REG_SEGMENT0_PARAMS_T0_TID_BLOCK_WASTE_SHIFT 16
-#define CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE (0xff << 16)
+#define CDU_REG_SEGMENT1_PARAMS_T1_TID_BLOCK_WASTE (0xffUL << 16)
 #define CDU_REG_SEGMENT1_PARAMS_T1_TID_SIZE (0xffUL << 24) /* @DPDK */
-#define CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK (0xfff << 0)
+#define CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK (0xfffUL << 0)
 #define CDU_REG_SEGMENT1_PARAMS_T1_NUM_TIDS_IN_BLOCK_SHIFT 0
-#define CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK (0xfff << 0)
+#define CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK (0xfffUL << 0)
 #define CDU_REG_SEGMENT0_PARAMS_T0_NUM_TIDS_IN_BLOCK_SHIFT 0
 #define PSWRQ2_REG_ILT_MEMORY 0x260000UL
 #define QM_REG_WFQPFWEIGHT 0x2f4e80UL
 #define MISC_REG_AEU_GENERAL_ATTN_35 0x00848cUL
 #define MCP_REG_CPU_STATE 0xe05004UL
 #define MCP_REG_CPU_MODE 0xe05000UL
-#define MCP_REG_CPU_MODE_SOFT_HALT (0x1 << 10)
+#define MCP_REG_CPU_MODE_SOFT_HALT (0x1UL << 10)
 #define MCP_REG_CPU_EVENT_MASK 0xe05008UL
 #define PSWHST_REG_VF_DISABLED_ERROR_VALID 0x2a0060UL
 #define PSWHST_REG_VF_DISABLED_ERROR_ADDRESS 0x2a0064UL
 #define PGLUE_B_REG_VF_ILT_ERR_ADD_63_32 0x2aae78UL
 #define PGLUE_B_REG_VF_ILT_ERR_DETAILS 0x2aae7cUL
 #define PGLUE_B_REG_LATCHED_ERRORS_CLR 0x2aa3bcUL
-#define NIG_REG_INT_MASK_3_P0_LB_TC1_PAUSE_TOO_LONG_INT (0x1 << 10)
+#define NIG_REG_INT_MASK_3_P0_LB_TC1_PAUSE_TOO_LONG_INT (0x1UL << 10)
 #define DORQ_REG_DB_DROP_REASON 0x100a2cUL
 #define DORQ_REG_DB_DROP_DETAILS 0x100a24UL
 #define TM_REG_INT_STS_1 0x2c0190UL
-#define TM_REG_INT_STS_1_PEND_TASK_SCAN (0x1 << 6)
-#define TM_REG_INT_STS_1_PEND_CONN_SCAN (0x1 << 5)
+#define TM_REG_INT_STS_1_PEND_TASK_SCAN (0x1UL << 6)
+#define TM_REG_INT_STS_1_PEND_CONN_SCAN (0x1UL << 5)
 #define TM_REG_INT_MASK_1 0x2c0194UL
-#define TM_REG_INT_MASK_1_PEND_CONN_SCAN (0x1 << 5)
-#define TM_REG_INT_MASK_1_PEND_TASK_SCAN (0x1 << 6)
+#define TM_REG_INT_MASK_1_PEND_CONN_SCAN (0x1UL << 5)
+#define TM_REG_INT_MASK_1_PEND_TASK_SCAN (0x1UL << 6)
 #define MISC_REG_AEU_AFTER_INVERT_1_IGU 0x0087b4UL
 #define MISC_REG_AEU_ENABLE4_IGU_OUT_0 0x0084a8UL
 #define MISC_REG_AEU_ENABLE3_IGU_OUT_0 0x0084a4UL
 #define XMAC_REG_RX_MAX_SIZE_BB  0x210040UL
 #define XMAC_REG_TX_CTRL_LO_BB 0x210020UL
 #define XMAC_REG_CTRL_BB 0x210000UL
-#define XMAC_REG_CTRL_TX_EN_BB (0x1 << 0)
-#define XMAC_REG_CTRL_RX_EN_BB (0x1 << 1)
+#define XMAC_REG_CTRL_TX_EN_BB (0x1UL << 0)
+#define XMAC_REG_CTRL_RX_EN_BB (0x1UL << 1)
 #define XMAC_REG_RX_CTRL_BB 0x210030UL
-#define XMAC_REG_RX_CTRL_PROCESS_VARIABLE_PREAMBLE_BB (0x1 << 12)
+#define XMAC_REG_RX_CTRL_PROCESS_VARIABLE_PREAMBLE_BB (0x1UL << 12)
 
 #define PGLUE_B_REG_PGL_ADDR_E8_F0_K2_E5 0x2aaf98UL
 #define PGLUE_B_REG_PGL_ADDR_EC_F0_K2_E5 0x2aaf9cUL
 #define DORQ_REG_DPM_FORCE_ABORT 0x1009d8UL
 #define DORQ_REG_PF_OVFL_STICKY 0x1009d0UL
 #define DORQ_REG_INT_STS 0x100180UL
-  #define DORQ_REG_INT_STS_DB_DROP (0x1 << 1)
-  #define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR (0x1 << 2)
-  #define DORQ_REG_INT_STS_DORQ_FIFO_AFULL (0x1 << 3)
+  #define DORQ_REG_INT_STS_DB_DROP (0x1UL << 1)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_OVFL_ERR (0x1UL << 2)
+  #define DORQ_REG_INT_STS_DORQ_FIFO_AFULL (0x1UL << 3)
 #define DORQ_REG_DB_DROP_DETAILS_REL 0x100a28UL
 #define DORQ_REG_INT_STS_WR 0x100188UL
 #define DORQ_REG_DB_DROP_DETAILS_REASON 0x100a20UL
 #define MCP_REG_CPU_PROGRAM_COUNTER 0xe0501cUL
-  #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1 << 10)
+  #define MCP_REG_CPU_STATE_SOFT_HALTED (0x1UL << 10)
 #define PRS_REG_SEARCH_TENANT_ID 0x1f044cUL
 #define PGLUE_B_REG_VF_BAR1_SIZE 0x2aae68UL
 
 #define RSS_REG_RSS_RAM_MASK 0x238c10UL
+
+#define NIG_REG_LLH_FUNC_TAG_EN 0x5019b0UL
+#define NIG_REG_LLH_FUNC_TAG_VALUE 0x5019d0UL
+#define DORQ_REG_TAG1_OVRD_MODE 0x1008b4UL
+#define DORQ_REG_PF_PCP_BB_K2 0x1008c4UL
+#define DORQ_REG_PF_EXT_VID_BB_K2 0x1008c8UL
+#define PRS_REG_SEARCH_NON_IP_AS_GFT 0x1f11c0UL
+#define NIG_REG_LLH_PPFID2PFID_TBL_0 0x501970UL
+#define NIG_REG_PPF_TO_ENGINE_SEL 0x508900UL
+#define NIG_REG_LLH_ENG_CLS_ROCE_QP_SEL 0x501b98UL
+#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_BB_K2 0x501b40UL
diff --git a/drivers/net/qede/meson.build b/drivers/net/qede/meson.build
new file mode 100644 (file)
index 0000000..12388a6
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+subdir('base')
+objs = [base_objs]
+
+sources = files(
+       'qede_ethdev.c',
+       'qede_filter.c',
+       'qede_main.c',
+       'qede_rxtx.c',
+)
index df52ea9..518673d 100644 (file)
@@ -14,112 +14,10 @@ int qede_logtype_init;
 int qede_logtype_driver;
 
 static const struct qed_eth_ops *qed_ops;
-#define QEDE_SP_TIMER_PERIOD   10000 /* 100ms */
+static int qede_eth_dev_uninit(struct rte_eth_dev *eth_dev);
+static int qede_eth_dev_init(struct rte_eth_dev *eth_dev);
 
-/* VXLAN tunnel classification mapping */
-const struct _qede_udp_tunn_types {
-       uint16_t rte_filter_type;
-       enum ecore_filter_ucast_type qede_type;
-       enum ecore_tunn_clss qede_tunn_clss;
-       const char *string;
-} qede_tunn_types[] = {
-       {
-               ETH_TUNNEL_FILTER_OMAC,
-               ECORE_FILTER_MAC,
-               ECORE_TUNN_CLSS_MAC_VLAN,
-               "outer-mac"
-       },
-       {
-               ETH_TUNNEL_FILTER_TENID,
-               ECORE_FILTER_VNI,
-               ECORE_TUNN_CLSS_MAC_VNI,
-               "vni"
-       },
-       {
-               ETH_TUNNEL_FILTER_IMAC,
-               ECORE_FILTER_INNER_MAC,
-               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
-               "inner-mac"
-       },
-       {
-               ETH_TUNNEL_FILTER_IVLAN,
-               ECORE_FILTER_INNER_VLAN,
-               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
-               "inner-vlan"
-       },
-       {
-               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_TENID,
-               ECORE_FILTER_MAC_VNI_PAIR,
-               ECORE_TUNN_CLSS_MAC_VNI,
-               "outer-mac and vni"
-       },
-       {
-               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_IMAC,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "outer-mac and inner-mac"
-       },
-       {
-               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_IVLAN,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "outer-mac and inner-vlan"
-       },
-       {
-               ETH_TUNNEL_FILTER_TENID | ETH_TUNNEL_FILTER_IMAC,
-               ECORE_FILTER_INNER_MAC_VNI_PAIR,
-               ECORE_TUNN_CLSS_INNER_MAC_VNI,
-               "vni and inner-mac",
-       },
-       {
-               ETH_TUNNEL_FILTER_TENID | ETH_TUNNEL_FILTER_IVLAN,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "vni and inner-vlan",
-       },
-       {
-               ETH_TUNNEL_FILTER_IMAC | ETH_TUNNEL_FILTER_IVLAN,
-               ECORE_FILTER_INNER_PAIR,
-               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
-               "inner-mac and inner-vlan",
-       },
-       {
-               ETH_TUNNEL_FILTER_OIP,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "outer-IP"
-       },
-       {
-               ETH_TUNNEL_FILTER_IIP,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "inner-IP"
-       },
-       {
-               RTE_TUNNEL_FILTER_IMAC_IVLAN,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "IMAC_IVLAN"
-       },
-       {
-               RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "IMAC_IVLAN_TENID"
-       },
-       {
-               RTE_TUNNEL_FILTER_IMAC_TENID,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "IMAC_TENID"
-       },
-       {
-               RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,
-               ECORE_FILTER_UNUSED,
-               MAX_ECORE_TUNN_CLSS,
-               "OMAC_TENID_IMAC"
-       },
-};
+#define QEDE_SP_TIMER_PERIOD   10000 /* 100ms */
 
 struct rte_qede_xstats_name_off {
        char name[RTE_ETH_XSTATS_NAME_SIZE];
@@ -399,7 +297,7 @@ static void qede_print_adapter_info(struct qede_dev *qdev)
                (info->mfw_rev >> 16) & 0xff,
                (info->mfw_rev >> 8) & 0xff, (info->mfw_rev) & 0xff);
        DP_INFO(edev, " Management Firmware version : %s\n", ver_str);
-       DP_INFO(edev, " Firmware file : %s\n", fw_file);
+       DP_INFO(edev, " Firmware file : %s\n", qede_fw_file);
        DP_INFO(edev, "*********************************\n");
 }
 
@@ -614,14 +512,6 @@ int qede_enable_tpa(struct rte_eth_dev *eth_dev, bool flg)
        return 0;
 }
 
-static void qede_set_ucast_cmn_params(struct ecore_filter_ucast *ucast)
-{
-       memset(ucast, 0, sizeof(struct ecore_filter_ucast));
-       ucast->is_rx_filter = true;
-       ucast->is_tx_filter = true;
-       /* ucast->assert_on_error = true; - For debug */
-}
-
 static int
 qed_configure_filter_rx_mode(struct rte_eth_dev *eth_dev,
                             enum qed_filter_rx_mode_type type)
@@ -660,167 +550,7 @@ qed_configure_filter_rx_mode(struct rte_eth_dev *eth_dev,
                        ECORE_SPQ_MODE_CB, NULL);
 }
 
-static int
-qede_tunnel_update(struct qede_dev *qdev,
-                  struct ecore_tunnel_info *tunn_info)
-{
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum _ecore_status_t rc = ECORE_INVAL;
-       struct ecore_hwfn *p_hwfn;
-       struct ecore_ptt *p_ptt;
-       int i;
-
-       for_each_hwfn(edev, i) {
-               p_hwfn = &edev->hwfns[i];
-               if (IS_PF(edev)) {
-                       p_ptt = ecore_ptt_acquire(p_hwfn);
-                       if (!p_ptt) {
-                               DP_ERR(p_hwfn, "Can't acquire PTT\n");
-                               return -EAGAIN;
-                       }
-               } else {
-                       p_ptt = NULL;
-               }
-
-               rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, p_ptt,
-                               tunn_info, ECORE_SPQ_MODE_CB, NULL);
-               if (IS_PF(edev))
-                       ecore_ptt_release(p_hwfn, p_ptt);
-
-               if (rc != ECORE_SUCCESS)
-                       break;
-       }
-
-       return rc;
-}
-
-static int
-qede_vxlan_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
-                 bool enable)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum _ecore_status_t rc = ECORE_INVAL;
-       struct ecore_tunnel_info tunn;
-
-       if (qdev->vxlan.enable == enable)
-               return ECORE_SUCCESS;
-
-       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
-       tunn.vxlan.b_update_mode = true;
-       tunn.vxlan.b_mode_enabled = enable;
-       tunn.b_update_rx_cls = true;
-       tunn.b_update_tx_cls = true;
-       tunn.vxlan.tun_cls = clss;
-
-       tunn.vxlan_port.b_update_port = true;
-       tunn.vxlan_port.port = enable ? QEDE_VXLAN_DEF_PORT : 0;
-
-       rc = qede_tunnel_update(qdev, &tunn);
-       if (rc == ECORE_SUCCESS) {
-               qdev->vxlan.enable = enable;
-               qdev->vxlan.udp_port = (enable) ? QEDE_VXLAN_DEF_PORT : 0;
-               DP_INFO(edev, "vxlan is %s, UDP port = %d\n",
-                       enable ? "enabled" : "disabled", qdev->vxlan.udp_port);
-       } else {
-               DP_ERR(edev, "Failed to update tunn_clss %u\n",
-                      tunn.vxlan.tun_cls);
-       }
-
-       return rc;
-}
-
-static int
-qede_geneve_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
-                 bool enable)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum _ecore_status_t rc = ECORE_INVAL;
-       struct ecore_tunnel_info tunn;
-
-       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
-       tunn.l2_geneve.b_update_mode = true;
-       tunn.l2_geneve.b_mode_enabled = enable;
-       tunn.ip_geneve.b_update_mode = true;
-       tunn.ip_geneve.b_mode_enabled = enable;
-       tunn.l2_geneve.tun_cls = clss;
-       tunn.ip_geneve.tun_cls = clss;
-       tunn.b_update_rx_cls = true;
-       tunn.b_update_tx_cls = true;
-
-       tunn.geneve_port.b_update_port = true;
-       tunn.geneve_port.port = enable ? QEDE_GENEVE_DEF_PORT : 0;
-
-       rc = qede_tunnel_update(qdev, &tunn);
-       if (rc == ECORE_SUCCESS) {
-               qdev->geneve.enable = enable;
-               qdev->geneve.udp_port = (enable) ? QEDE_GENEVE_DEF_PORT : 0;
-               DP_INFO(edev, "GENEVE is %s, UDP port = %d\n",
-                       enable ? "enabled" : "disabled", qdev->geneve.udp_port);
-       } else {
-               DP_ERR(edev, "Failed to update tunn_clss %u\n",
-                      clss);
-       }
-
-       return rc;
-}
-
-static int
-qede_ipgre_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
-                 bool enable)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum _ecore_status_t rc = ECORE_INVAL;
-       struct ecore_tunnel_info tunn;
-
-       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
-       tunn.ip_gre.b_update_mode = true;
-       tunn.ip_gre.b_mode_enabled = enable;
-       tunn.ip_gre.tun_cls = clss;
-       tunn.ip_gre.tun_cls = clss;
-       tunn.b_update_rx_cls = true;
-       tunn.b_update_tx_cls = true;
-
-       rc = qede_tunnel_update(qdev, &tunn);
-       if (rc == ECORE_SUCCESS) {
-               qdev->ipgre.enable = enable;
-               DP_INFO(edev, "IPGRE is %s\n",
-                       enable ? "enabled" : "disabled");
-       } else {
-               DP_ERR(edev, "Failed to update tunn_clss %u\n",
-                      clss);
-       }
-
-       return rc;
-}
-
-static int
-qede_tunn_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
-                enum rte_eth_tunnel_type tunn_type, bool enable)
-{
-       int rc = -EINVAL;
-
-       switch (tunn_type) {
-       case RTE_TUNNEL_TYPE_VXLAN:
-               rc = qede_vxlan_enable(eth_dev, clss, enable);
-               break;
-       case RTE_TUNNEL_TYPE_GENEVE:
-               rc = qede_geneve_enable(eth_dev, clss, enable);
-               break;
-       case RTE_TUNNEL_TYPE_IP_IN_GRE:
-               rc = qede_ipgre_enable(eth_dev, clss, enable);
-               break;
-       default:
-               rc = -EINVAL;
-               break;
-       }
-
-       return rc;
-}
-
-static int
+int
 qede_ucast_filter(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
                  bool add)
 {
@@ -941,7 +671,7 @@ static int qede_del_mcast_filters(struct rte_eth_dev *eth_dev)
        return 0;
 }
 
-static enum _ecore_status_t
+enum _ecore_status_t
 qede_mac_int_ops(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
                 bool add)
 {
@@ -1033,7 +763,7 @@ qede_mac_addr_set(struct rte_eth_dev *eth_dev, struct ether_addr *mac_addr)
        return qede_mac_addr_add(eth_dev, mac_addr, 0, 0);
 }
 
-static void qede_config_accept_any_vlan(struct qede_dev *qdev, bool flg)
+void qede_config_accept_any_vlan(struct qede_dev *qdev, bool flg)
 {
        struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
        struct ecore_sp_vport_update_params params;
@@ -1359,7 +1089,7 @@ static void qede_dev_stop(struct rte_eth_dev *eth_dev)
        DP_INFO(edev, "Device is stopped\n");
 }
 
-const char *valid_args[] = {
+static const char * const valid_args[] = {
        QEDE_NPAR_TX_SWITCHING,
        QEDE_VF_TX_SWITCHING,
        NULL,
@@ -1483,7 +1213,7 @@ static int qede_dev_configure(struct rte_eth_dev *eth_dev)
        if (rxmode->offloads & DEV_RX_OFFLOAD_JUMBO_FRAME)
                eth_dev->data->mtu =
                        eth_dev->data->dev_conf.rxmode.max_rx_pkt_len -
-                       ETHER_HDR_LEN - ETHER_CRC_LEN;
+                       ETHER_HDR_LEN - QEDE_ETH_OVERHEAD;
 
        if (rxmode->offloads & DEV_RX_OFFLOAD_SCATTER)
                eth_dev->data->scattered_rx = 1;
@@ -1554,7 +1284,6 @@ qede_dev_info_get(struct rte_eth_dev *eth_dev,
                                     DEV_RX_OFFLOAD_TCP_CKSUM   |
                                     DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM |
                                     DEV_RX_OFFLOAD_TCP_LRO     |
-                                    DEV_RX_OFFLOAD_CRC_STRIP   |
                                     DEV_RX_OFFLOAD_KEEP_CRC    |
                                     DEV_RX_OFFLOAD_SCATTER     |
                                     DEV_RX_OFFLOAD_JUMBO_FRAME |
@@ -1651,14 +1380,11 @@ qede_link_update(struct rte_eth_dev *eth_dev, __rte_unused int wait_to_complete)
 
 static void qede_promiscuous_enable(struct rte_eth_dev *eth_dev)
 {
-#ifdef RTE_LIBRTE_QEDE_DEBUG_INIT
        struct qede_dev *qdev = eth_dev->data->dev_private;
        struct ecore_dev *edev = &qdev->edev;
+       enum qed_filter_rx_mode_type type = QED_FILTER_RX_MODE_TYPE_PROMISC;
 
        PMD_INIT_FUNC_TRACE(edev);
-#endif
-
-       enum qed_filter_rx_mode_type type = QED_FILTER_RX_MODE_TYPE_PROMISC;
 
        if (rte_eth_allmulticast_get(eth_dev->data->port_id) == 1)
                type |= QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC;
@@ -1668,12 +1394,10 @@ static void qede_promiscuous_enable(struct rte_eth_dev *eth_dev)
 
 static void qede_promiscuous_disable(struct rte_eth_dev *eth_dev)
 {
-#ifdef RTE_LIBRTE_QEDE_DEBUG_INIT
        struct qede_dev *qdev = eth_dev->data->dev_private;
        struct ecore_dev *edev = &qdev->edev;
 
        PMD_INIT_FUNC_TRACE(edev);
-#endif
 
        if (rte_eth_allmulticast_get(eth_dev->data->port_id) == 1)
                qed_configure_filter_rx_mode(eth_dev,
@@ -2499,19 +2223,18 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
        struct qede_fastpath *fp;
        uint32_t max_rx_pkt_len;
        uint32_t frame_size;
-       uint16_t rx_buf_size;
        uint16_t bufsz;
        bool restart = false;
-       int i;
+       int i, rc;
 
        PMD_INIT_FUNC_TRACE(edev);
        qede_dev_info_get(dev, &dev_info);
-       max_rx_pkt_len = mtu + ETHER_HDR_LEN + ETHER_CRC_LEN;
-       frame_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD;
+       max_rx_pkt_len = mtu + QEDE_MAX_ETHER_HDR_LEN;
+       frame_size = max_rx_pkt_len;
        if ((mtu < ETHER_MIN_MTU) || (frame_size > dev_info.max_rx_pktlen)) {
                DP_ERR(edev, "MTU %u out of range, %u is maximum allowable\n",
                       mtu, dev_info.max_rx_pktlen - ETHER_HDR_LEN -
-                       ETHER_CRC_LEN - QEDE_ETH_OVERHEAD);
+                      QEDE_ETH_OVERHEAD);
                return -EINVAL;
        }
        if (!dev->data->scattered_rx &&
@@ -2539,14 +2262,15 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
                if (fp->rxq != NULL) {
                        bufsz = (uint16_t)rte_pktmbuf_data_room_size(
                                fp->rxq->mb_pool) - RTE_PKTMBUF_HEADROOM;
-                       if (dev->data->scattered_rx)
-                               rx_buf_size = bufsz + ETHER_HDR_LEN +
-                                             ETHER_CRC_LEN + QEDE_ETH_OVERHEAD;
-                       else
-                               rx_buf_size = frame_size;
-                       rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rx_buf_size);
-                       fp->rxq->rx_buf_size = rx_buf_size;
-                       DP_INFO(edev, "RX buffer size %u\n", rx_buf_size);
+                       /* cache align the mbuf size to simplfy rx_buf_size
+                        * calculation
+                        */
+                       bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz);
+                       rc = qede_calc_rx_buf_size(dev, bufsz, frame_size);
+                       if (rc < 0)
+                               return rc;
+
+                       fp->rxq->rx_buf_size = rc;
                }
        }
        if (max_rx_pkt_len > ETHER_MAX_LEN)
@@ -2569,411 +2293,15 @@ static int qede_set_mtu(struct rte_eth_dev *dev, uint16_t mtu)
 }
 
 static int
-qede_udp_dst_port_del(struct rte_eth_dev *eth_dev,
-                     struct rte_eth_udp_tunnel *tunnel_udp)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct ecore_tunnel_info tunn; /* @DPDK */
-       uint16_t udp_port;
-       int rc;
-
-       PMD_INIT_FUNC_TRACE(edev);
-
-       memset(&tunn, 0, sizeof(tunn));
-
-       switch (tunnel_udp->prot_type) {
-       case RTE_TUNNEL_TYPE_VXLAN:
-               if (qdev->vxlan.udp_port != tunnel_udp->udp_port) {
-                       DP_ERR(edev, "UDP port %u doesn't exist\n",
-                               tunnel_udp->udp_port);
-                       return ECORE_INVAL;
-               }
-               udp_port = 0;
-
-               tunn.vxlan_port.b_update_port = true;
-               tunn.vxlan_port.port = udp_port;
-
-               rc = qede_tunnel_update(qdev, &tunn);
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Unable to config UDP port %u\n",
-                              tunn.vxlan_port.port);
-                       return rc;
-               }
-
-               qdev->vxlan.udp_port = udp_port;
-               /* If the request is to delete UDP port and if the number of
-                * VXLAN filters have reached 0 then VxLAN offload can be be
-                * disabled.
-                */
-               if (qdev->vxlan.enable && qdev->vxlan.num_filters == 0)
-                       return qede_vxlan_enable(eth_dev,
-                                       ECORE_TUNN_CLSS_MAC_VLAN, false);
-
-               break;
-       case RTE_TUNNEL_TYPE_GENEVE:
-               if (qdev->geneve.udp_port != tunnel_udp->udp_port) {
-                       DP_ERR(edev, "UDP port %u doesn't exist\n",
-                               tunnel_udp->udp_port);
-                       return ECORE_INVAL;
-               }
-
-               udp_port = 0;
-
-               tunn.geneve_port.b_update_port = true;
-               tunn.geneve_port.port = udp_port;
-
-               rc = qede_tunnel_update(qdev, &tunn);
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Unable to config UDP port %u\n",
-                              tunn.vxlan_port.port);
-                       return rc;
-               }
-
-               qdev->vxlan.udp_port = udp_port;
-               /* If the request is to delete UDP port and if the number of
-                * GENEVE filters have reached 0 then GENEVE offload can be be
-                * disabled.
-                */
-               if (qdev->geneve.enable && qdev->geneve.num_filters == 0)
-                       return qede_geneve_enable(eth_dev,
-                                       ECORE_TUNN_CLSS_MAC_VLAN, false);
-
-               break;
-
-       default:
-               return ECORE_INVAL;
-       }
-
-       return 0;
-
-}
-static int
-qede_udp_dst_port_add(struct rte_eth_dev *eth_dev,
-                     struct rte_eth_udp_tunnel *tunnel_udp)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct ecore_tunnel_info tunn; /* @DPDK */
-       uint16_t udp_port;
-       int rc;
-
-       PMD_INIT_FUNC_TRACE(edev);
-
-       memset(&tunn, 0, sizeof(tunn));
-
-       switch (tunnel_udp->prot_type) {
-       case RTE_TUNNEL_TYPE_VXLAN:
-               if (qdev->vxlan.udp_port == tunnel_udp->udp_port) {
-                       DP_INFO(edev,
-                               "UDP port %u for VXLAN was already configured\n",
-                               tunnel_udp->udp_port);
-                       return ECORE_SUCCESS;
-               }
-
-               /* Enable VxLAN tunnel with default MAC/VLAN classification if
-                * it was not enabled while adding VXLAN filter before UDP port
-                * update.
-                */
-               if (!qdev->vxlan.enable) {
-                       rc = qede_vxlan_enable(eth_dev,
-                               ECORE_TUNN_CLSS_MAC_VLAN, true);
-                       if (rc != ECORE_SUCCESS) {
-                               DP_ERR(edev, "Failed to enable VXLAN "
-                                       "prior to updating UDP port\n");
-                               return rc;
-                       }
-               }
-               udp_port = tunnel_udp->udp_port;
-
-               tunn.vxlan_port.b_update_port = true;
-               tunn.vxlan_port.port = udp_port;
-
-               rc = qede_tunnel_update(qdev, &tunn);
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Unable to config UDP port %u for VXLAN\n",
-                              udp_port);
-                       return rc;
-               }
-
-               DP_INFO(edev, "Updated UDP port %u for VXLAN\n", udp_port);
-
-               qdev->vxlan.udp_port = udp_port;
-               break;
-       case RTE_TUNNEL_TYPE_GENEVE:
-               if (qdev->geneve.udp_port == tunnel_udp->udp_port) {
-                       DP_INFO(edev,
-                               "UDP port %u for GENEVE was already configured\n",
-                               tunnel_udp->udp_port);
-                       return ECORE_SUCCESS;
-               }
-
-               /* Enable GENEVE tunnel with default MAC/VLAN classification if
-                * it was not enabled while adding GENEVE filter before UDP port
-                * update.
-                */
-               if (!qdev->geneve.enable) {
-                       rc = qede_geneve_enable(eth_dev,
-                               ECORE_TUNN_CLSS_MAC_VLAN, true);
-                       if (rc != ECORE_SUCCESS) {
-                               DP_ERR(edev, "Failed to enable GENEVE "
-                                       "prior to updating UDP port\n");
-                               return rc;
-                       }
-               }
-               udp_port = tunnel_udp->udp_port;
-
-               tunn.geneve_port.b_update_port = true;
-               tunn.geneve_port.port = udp_port;
-
-               rc = qede_tunnel_update(qdev, &tunn);
-               if (rc != ECORE_SUCCESS) {
-                       DP_ERR(edev, "Unable to config UDP port %u for GENEVE\n",
-                              udp_port);
-                       return rc;
-               }
-
-               DP_INFO(edev, "Updated UDP port %u for GENEVE\n", udp_port);
-
-               qdev->geneve.udp_port = udp_port;
-               break;
-       default:
-               return ECORE_INVAL;
-       }
-
-       return 0;
-}
-
-static void qede_get_ecore_tunn_params(uint32_t filter, uint32_t *type,
-                                      uint32_t *clss, char *str)
-{
-       uint16_t j;
-       *clss = MAX_ECORE_TUNN_CLSS;
-
-       for (j = 0; j < RTE_DIM(qede_tunn_types); j++) {
-               if (filter == qede_tunn_types[j].rte_filter_type) {
-                       *type = qede_tunn_types[j].qede_type;
-                       *clss = qede_tunn_types[j].qede_tunn_clss;
-                       strcpy(str, qede_tunn_types[j].string);
-                       return;
-               }
-       }
-}
-
-static int
-qede_set_ucast_tunn_cmn_param(struct ecore_filter_ucast *ucast,
-                             const struct rte_eth_tunnel_filter_conf *conf,
-                             uint32_t type)
-{
-       /* Init commmon ucast params first */
-       qede_set_ucast_cmn_params(ucast);
-
-       /* Copy out the required fields based on classification type */
-       ucast->type = type;
-
-       switch (type) {
-       case ECORE_FILTER_VNI:
-               ucast->vni = conf->tenant_id;
-       break;
-       case ECORE_FILTER_INNER_VLAN:
-               ucast->vlan = conf->inner_vlan;
-       break;
-       case ECORE_FILTER_MAC:
-               memcpy(ucast->mac, conf->outer_mac.addr_bytes,
-                      ETHER_ADDR_LEN);
-       break;
-       case ECORE_FILTER_INNER_MAC:
-               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
-                      ETHER_ADDR_LEN);
-       break;
-       case ECORE_FILTER_MAC_VNI_PAIR:
-               memcpy(ucast->mac, conf->outer_mac.addr_bytes,
-                       ETHER_ADDR_LEN);
-               ucast->vni = conf->tenant_id;
-       break;
-       case ECORE_FILTER_INNER_MAC_VNI_PAIR:
-               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
-                       ETHER_ADDR_LEN);
-               ucast->vni = conf->tenant_id;
-       break;
-       case ECORE_FILTER_INNER_PAIR:
-               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
-                       ETHER_ADDR_LEN);
-               ucast->vlan = conf->inner_vlan;
-       break;
-       default:
-               return -EINVAL;
-       }
-
-       return ECORE_SUCCESS;
-}
-
-static int
-_qede_tunn_filter_config(struct rte_eth_dev *eth_dev,
-                        const struct rte_eth_tunnel_filter_conf *conf,
-                        __attribute__((unused)) enum rte_filter_op filter_op,
-                        enum ecore_tunn_clss *clss,
-                        bool add)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct ecore_filter_ucast ucast = {0};
-       enum ecore_filter_ucast_type type;
-       uint16_t filter_type = 0;
-       char str[80];
-       int rc;
-
-       filter_type = conf->filter_type;
-       /* Determine if the given filter classification is supported */
-       qede_get_ecore_tunn_params(filter_type, &type, clss, str);
-       if (*clss == MAX_ECORE_TUNN_CLSS) {
-               DP_ERR(edev, "Unsupported filter type\n");
-               return -EINVAL;
-       }
-       /* Init tunnel ucast params */
-       rc = qede_set_ucast_tunn_cmn_param(&ucast, conf, type);
-       if (rc != ECORE_SUCCESS) {
-               DP_ERR(edev, "Unsupported Tunnel filter type 0x%x\n",
-               conf->filter_type);
-               return rc;
-       }
-       DP_INFO(edev, "Rule: \"%s\", op %d, type 0x%x\n",
-               str, filter_op, ucast.type);
-
-       ucast.opcode = add ? ECORE_FILTER_ADD : ECORE_FILTER_REMOVE;
-
-       /* Skip MAC/VLAN if filter is based on VNI */
-       if (!(filter_type & ETH_TUNNEL_FILTER_TENID)) {
-               rc = qede_mac_int_ops(eth_dev, &ucast, add);
-               if ((rc == 0) && add) {
-                       /* Enable accept anyvlan */
-                       qede_config_accept_any_vlan(qdev, true);
-               }
-       } else {
-               rc = qede_ucast_filter(eth_dev, &ucast, add);
-               if (rc == 0)
-                       rc = ecore_filter_ucast_cmd(edev, &ucast,
-                                           ECORE_SPQ_MODE_CB, NULL);
-       }
-
-       return rc;
-}
-
-static int
-qede_tunn_filter_config(struct rte_eth_dev *eth_dev,
-                       enum rte_filter_op filter_op,
-                       const struct rte_eth_tunnel_filter_conf *conf)
+qede_dev_reset(struct rte_eth_dev *dev)
 {
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       enum ecore_tunn_clss clss = MAX_ECORE_TUNN_CLSS;
-       bool add;
-       int rc;
-
-       PMD_INIT_FUNC_TRACE(edev);
-
-       switch (filter_op) {
-       case RTE_ETH_FILTER_ADD:
-               add = true;
-               break;
-       case RTE_ETH_FILTER_DELETE:
-               add = false;
-               break;
-       default:
-               DP_ERR(edev, "Unsupported operation %d\n", filter_op);
-               return -EINVAL;
-       }
-
-       if (IS_VF(edev))
-               return qede_tunn_enable(eth_dev,
-                                       ECORE_TUNN_CLSS_MAC_VLAN,
-                                       conf->tunnel_type, add);
-
-       rc = _qede_tunn_filter_config(eth_dev, conf, filter_op, &clss, add);
-       if (rc != ECORE_SUCCESS)
-               return rc;
-
-       if (add) {
-               if (conf->tunnel_type == RTE_TUNNEL_TYPE_VXLAN) {
-                       qdev->vxlan.num_filters++;
-                       qdev->vxlan.filter_type = conf->filter_type;
-               } else { /* GENEVE */
-                       qdev->geneve.num_filters++;
-                       qdev->geneve.filter_type = conf->filter_type;
-               }
-
-               if (!qdev->vxlan.enable || !qdev->geneve.enable ||
-                   !qdev->ipgre.enable)
-                       return qede_tunn_enable(eth_dev, clss,
-                                               conf->tunnel_type,
-                                               true);
-       } else {
-               if (conf->tunnel_type == RTE_TUNNEL_TYPE_VXLAN)
-                       qdev->vxlan.num_filters--;
-               else /*GENEVE*/
-                       qdev->geneve.num_filters--;
-
-               /* Disable VXLAN if VXLAN filters become 0 */
-               if ((qdev->vxlan.num_filters == 0) ||
-                   (qdev->geneve.num_filters == 0))
-                       return qede_tunn_enable(eth_dev, clss,
-                                               conf->tunnel_type,
-                                               false);
-       }
-
-       return 0;
-}
+       int ret;
 
-int qede_dev_filter_ctrl(struct rte_eth_dev *eth_dev,
-                        enum rte_filter_type filter_type,
-                        enum rte_filter_op filter_op,
-                        void *arg)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct rte_eth_tunnel_filter_conf *filter_conf =
-                       (struct rte_eth_tunnel_filter_conf *)arg;
-
-       switch (filter_type) {
-       case RTE_ETH_FILTER_TUNNEL:
-               switch (filter_conf->tunnel_type) {
-               case RTE_TUNNEL_TYPE_VXLAN:
-               case RTE_TUNNEL_TYPE_GENEVE:
-               case RTE_TUNNEL_TYPE_IP_IN_GRE:
-                       DP_INFO(edev,
-                               "Packet steering to the specified Rx queue"
-                               " is not supported with UDP tunneling");
-                       return(qede_tunn_filter_config(eth_dev, filter_op,
-                                                     filter_conf));
-               case RTE_TUNNEL_TYPE_TEREDO:
-               case RTE_TUNNEL_TYPE_NVGRE:
-               case RTE_L2_TUNNEL_TYPE_E_TAG:
-                       DP_ERR(edev, "Unsupported tunnel type %d\n",
-                               filter_conf->tunnel_type);
-                       return -EINVAL;
-               case RTE_TUNNEL_TYPE_NONE:
-               default:
-                       return 0;
-               }
-               break;
-       case RTE_ETH_FILTER_FDIR:
-               return qede_fdir_filter_conf(eth_dev, filter_op, arg);
-       case RTE_ETH_FILTER_NTUPLE:
-               return qede_ntuple_filter_conf(eth_dev, filter_op, arg);
-       case RTE_ETH_FILTER_MACVLAN:
-       case RTE_ETH_FILTER_ETHERTYPE:
-       case RTE_ETH_FILTER_FLEXIBLE:
-       case RTE_ETH_FILTER_SYN:
-       case RTE_ETH_FILTER_HASH:
-       case RTE_ETH_FILTER_L2_TUNNEL:
-       case RTE_ETH_FILTER_MAX:
-       default:
-               DP_ERR(edev, "Unsupported filter type %d\n",
-                       filter_type);
-               return -EINVAL;
-       }
+       ret = qede_eth_dev_uninit(dev);
+       if (ret)
+               return ret;
 
-       return 0;
+       return qede_eth_dev_init(dev);
 }
 
 static const struct eth_dev_ops qede_eth_dev_ops = {
@@ -2981,9 +2309,11 @@ static const struct eth_dev_ops qede_eth_dev_ops = {
        .dev_infos_get = qede_dev_info_get,
        .rx_queue_setup = qede_rx_queue_setup,
        .rx_queue_release = qede_rx_queue_release,
+       .rx_descriptor_status = qede_rx_descriptor_status,
        .tx_queue_setup = qede_tx_queue_setup,
        .tx_queue_release = qede_tx_queue_release,
        .dev_start = qede_dev_start,
+       .dev_reset = qede_dev_reset,
        .dev_set_link_up = qede_dev_set_link_up,
        .dev_set_link_down = qede_dev_set_link_down,
        .link_update = qede_link_update,
@@ -3022,9 +2352,11 @@ static const struct eth_dev_ops qede_eth_vf_dev_ops = {
        .dev_infos_get = qede_dev_info_get,
        .rx_queue_setup = qede_rx_queue_setup,
        .rx_queue_release = qede_rx_queue_release,
+       .rx_descriptor_status = qede_rx_descriptor_status,
        .tx_queue_setup = qede_tx_queue_setup,
        .tx_queue_release = qede_tx_queue_release,
        .dev_start = qede_dev_start,
+       .dev_reset = qede_dev_reset,
        .dev_set_link_up = qede_dev_set_link_up,
        .dev_set_link_down = qede_dev_set_link_down,
        .link_update = qede_link_update,
@@ -3257,7 +2589,7 @@ static int qede_common_dev_init(struct rte_eth_dev *eth_dev, bool is_vf)
 
        adapter->num_tx_queues = 0;
        adapter->num_rx_queues = 0;
-       SLIST_INIT(&adapter->fdir_info.fdir_list_head);
+       SLIST_INIT(&adapter->arfs_info.arfs_list_head);
        SLIST_INIT(&adapter->vlan_list_head);
        SLIST_INIT(&adapter->uc_list_head);
        SLIST_INIT(&adapter->mc_list_head);
@@ -3311,12 +2643,10 @@ static int qede_eth_dev_init(struct rte_eth_dev *eth_dev)
 
 static int qede_dev_common_uninit(struct rte_eth_dev *eth_dev)
 {
-#ifdef RTE_LIBRTE_QEDE_DEBUG_INIT
        struct qede_dev *qdev = eth_dev->data->dev_private;
        struct ecore_dev *edev = &qdev->edev;
 
        PMD_INIT_FUNC_TRACE(edev);
-#endif
 
        /* only uninitialize in the primary process */
        if (rte_eal_process_type() != RTE_PROC_PRIMARY)
@@ -3329,11 +2659,6 @@ static int qede_dev_common_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->rx_pkt_burst = NULL;
        eth_dev->tx_pkt_burst = NULL;
 
-       if (eth_dev->data->mac_addrs)
-               rte_free(eth_dev->data->mac_addrs);
-
-       eth_dev->data->mac_addrs = NULL;
-
        return 0;
 }
 
index 6e9a5b4..c06274d 100644 (file)
@@ -44,7 +44,7 @@
 /* Driver versions */
 #define QEDE_PMD_VER_PREFIX            "QEDE PMD"
 #define QEDE_PMD_VERSION_MAJOR         2
-#define QEDE_PMD_VERSION_MINOR         9
+#define QEDE_PMD_VERSION_MINOR         10
 #define QEDE_PMD_VERSION_REVISION       0
 #define QEDE_PMD_VERSION_PATCH         1
 
 
 
 
-extern char fw_file[];
+extern char qede_fw_file[];
 
 /* Number of PF connections - 32 RX + 32 TX */
 #define QEDE_PF_NUM_CONNS              (64)
@@ -151,18 +151,48 @@ struct qede_ucast_entry {
        SLIST_ENTRY(qede_ucast_entry) list;
 };
 
-struct qede_fdir_entry {
+#ifndef IPV6_ADDR_LEN
+#define IPV6_ADDR_LEN                          (16)
+#endif
+
+struct qede_arfs_tuple {
+       union {
+               uint32_t src_ipv4;
+               uint8_t src_ipv6[IPV6_ADDR_LEN];
+       };
+
+       union {
+               uint32_t dst_ipv4;
+               uint8_t dst_ipv6[IPV6_ADDR_LEN];
+       };
+
+       uint16_t        src_port;
+       uint16_t        dst_port;
+       uint16_t        eth_proto;
+       uint8_t         ip_proto;
+
+       /* Describe filtering mode needed for this kind of filter */
+       enum ecore_filter_config_mode mode;
+};
+
+struct qede_arfs_entry {
        uint32_t soft_id; /* unused for now */
        uint16_t pkt_len; /* actual packet length to match */
        uint16_t rx_queue; /* queue to be steered to */
        const struct rte_memzone *mz; /* mz used to hold L2 frame */
-       SLIST_ENTRY(qede_fdir_entry) list;
+       struct qede_arfs_tuple tuple;
+       SLIST_ENTRY(qede_arfs_entry) list;
 };
 
-struct qede_fdir_info {
+/* Opaque handle for rte flow managed by PMD */
+struct rte_flow {
+       struct qede_arfs_entry entry;
+};
+
+struct qede_arfs_info {
        struct ecore_arfs_config_params arfs;
        uint16_t filter_count;
-       SLIST_HEAD(fdir_list_head, qede_fdir_entry)fdir_list_head;
+       SLIST_HEAD(arfs_list_head, qede_arfs_entry)arfs_list_head;
 };
 
 /* IANA assigned default UDP ports for encapsulation protocols */
@@ -207,7 +237,7 @@ struct qede_dev {
        struct qede_tunn_params vxlan;
        struct qede_tunn_params geneve;
        struct qede_tunn_params ipgre;
-       struct qede_fdir_info fdir_info;
+       struct qede_arfs_info arfs_info;
        bool vlan_strip_flg;
        char drv_ver[QEDE_PMD_DRV_VER_STR_SIZE];
        bool vport_started;
@@ -215,6 +245,15 @@ struct qede_dev {
        void *ethdev;
 };
 
+static inline void qede_set_ucast_cmn_params(struct ecore_filter_ucast *ucast)
+{
+       memset(ucast, 0, sizeof(struct ecore_filter_ucast));
+       ucast->is_rx_filter = true;
+       ucast->is_tx_filter = true;
+       /* ucast->assert_on_error = true; - For debug */
+}
+
+
 /* Non-static functions */
 int qede_config_rss(struct rte_eth_dev *eth_dev);
 
@@ -235,9 +274,6 @@ int qede_link_update(struct rte_eth_dev *eth_dev,
 int qede_dev_filter_ctrl(struct rte_eth_dev *dev, enum rte_filter_type type,
                         enum rte_filter_op op, void *arg);
 
-int qede_fdir_filter_conf(struct rte_eth_dev *eth_dev,
-                         enum rte_filter_op filter_op, void *arg);
-
 int qede_ntuple_filter_conf(struct rte_eth_dev *eth_dev,
                            enum rte_filter_op filter_op, void *arg);
 
@@ -255,5 +291,16 @@ int qede_activate_vport(struct rte_eth_dev *eth_dev, bool flg);
 int qede_update_mtu(struct rte_eth_dev *eth_dev, uint16_t mtu);
 
 int qede_enable_tpa(struct rte_eth_dev *eth_dev, bool flg);
-
+int qede_udp_dst_port_del(struct rte_eth_dev *eth_dev,
+                         struct rte_eth_udp_tunnel *tunnel_udp);
+int qede_udp_dst_port_add(struct rte_eth_dev *eth_dev,
+                         struct rte_eth_udp_tunnel *tunnel_udp);
+
+enum _ecore_status_t
+qede_mac_int_ops(struct rte_eth_dev *eth_dev, struct ecore_filter_ucast *ucast,
+                bool add);
+void qede_config_accept_any_vlan(struct qede_dev *qdev, bool flg);
+int qede_ucast_filter(struct rte_eth_dev *eth_dev,
+                     struct ecore_filter_ucast *ucast,
+                     bool add);
 #endif /* _QEDE_ETHDEV_H_ */
diff --git a/drivers/net/qede/qede_fdir.c b/drivers/net/qede/qede_fdir.c
deleted file mode 100644 (file)
index 83580d0..0000000
+++ /dev/null
@@ -1,470 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright (c) 2017 Cavium Inc.
- * All rights reserved.
- * www.cavium.com
- */
-
-#include <rte_udp.h>
-#include <rte_tcp.h>
-#include <rte_sctp.h>
-#include <rte_errno.h>
-
-#include "qede_ethdev.h"
-
-#define IP_VERSION                             (0x40)
-#define IP_HDRLEN                              (0x5)
-#define QEDE_FDIR_IP_DEFAULT_VERSION_IHL       (IP_VERSION | IP_HDRLEN)
-#define QEDE_FDIR_TCP_DEFAULT_DATAOFF          (0x50)
-#define QEDE_FDIR_IPV4_DEF_TTL                 (64)
-
-/* Sum of length of header types of L2, L3, L4.
- * L2 : ether_hdr + vlan_hdr + vxlan_hdr
- * L3 : ipv6_hdr
- * L4 : tcp_hdr
- */
-#define QEDE_MAX_FDIR_PKT_LEN                  (86)
-
-#ifndef IPV6_ADDR_LEN
-#define IPV6_ADDR_LEN                          (16)
-#endif
-
-#define QEDE_VALID_FLOW(flow_type) \
-       ((flow_type) == RTE_ETH_FLOW_NONFRAG_IPV4_TCP   || \
-       (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV4_UDP    || \
-       (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV6_TCP    || \
-       (flow_type) == RTE_ETH_FLOW_NONFRAG_IPV6_UDP)
-
-/* Note: Flowdir support is only partial.
- * For ex: drop_queue, FDIR masks, flex_conf are not supported.
- * Parameters like pballoc/status fields are irrelevant here.
- */
-int qede_check_fdir_support(struct rte_eth_dev *eth_dev)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct rte_fdir_conf *fdir = &eth_dev->data->dev_conf.fdir_conf;
-
-       /* check FDIR modes */
-       switch (fdir->mode) {
-       case RTE_FDIR_MODE_NONE:
-               qdev->fdir_info.arfs.arfs_enable = false;
-               DP_INFO(edev, "flowdir is disabled\n");
-       break;
-       case RTE_FDIR_MODE_PERFECT:
-               if (ECORE_IS_CMT(edev)) {
-                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
-                       qdev->fdir_info.arfs.arfs_enable = false;
-                       return -ENOTSUP;
-               }
-               qdev->fdir_info.arfs.arfs_enable = true;
-               DP_INFO(edev, "flowdir is enabled\n");
-       break;
-       case RTE_FDIR_MODE_PERFECT_TUNNEL:
-       case RTE_FDIR_MODE_SIGNATURE:
-       case RTE_FDIR_MODE_PERFECT_MAC_VLAN:
-               DP_ERR(edev, "Unsupported flowdir mode %d\n", fdir->mode);
-               return -ENOTSUP;
-       }
-
-       return 0;
-}
-
-void qede_fdir_dealloc_resc(struct rte_eth_dev *eth_dev)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct qede_fdir_entry *tmp = NULL;
-
-       SLIST_FOREACH(tmp, &qdev->fdir_info.fdir_list_head, list) {
-               if (tmp) {
-                       if (tmp->mz)
-                               rte_memzone_free(tmp->mz);
-                       SLIST_REMOVE(&qdev->fdir_info.fdir_list_head, tmp,
-                                    qede_fdir_entry, list);
-                       rte_free(tmp);
-               }
-       }
-}
-
-static int
-qede_config_cmn_fdir_filter(struct rte_eth_dev *eth_dev,
-                           struct rte_eth_fdir_filter *fdir_filter,
-                           bool add)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       char mz_name[RTE_MEMZONE_NAMESIZE] = {0};
-       struct qede_fdir_entry *tmp = NULL;
-       struct qede_fdir_entry *fdir = NULL;
-       const struct rte_memzone *mz;
-       struct ecore_hwfn *p_hwfn;
-       enum _ecore_status_t rc;
-       uint16_t pkt_len;
-       void *pkt;
-
-       if (add) {
-               if (qdev->fdir_info.filter_count == QEDE_RFS_MAX_FLTR - 1) {
-                       DP_ERR(edev, "Reached max flowdir filter limit\n");
-                       return -EINVAL;
-               }
-               fdir = rte_malloc(NULL, sizeof(struct qede_fdir_entry),
-                                 RTE_CACHE_LINE_SIZE);
-               if (!fdir) {
-                       DP_ERR(edev, "Did not allocate memory for fdir\n");
-                       return -ENOMEM;
-               }
-       }
-       /* soft_id could have been used as memzone string, but soft_id is
-        * not currently used so it has no significance.
-        */
-       snprintf(mz_name, sizeof(mz_name) - 1, "%lx",
-                (unsigned long)rte_get_timer_cycles());
-       mz = rte_memzone_reserve_aligned(mz_name, QEDE_MAX_FDIR_PKT_LEN,
-                                        SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
-       if (!mz) {
-               DP_ERR(edev, "Failed to allocate memzone for fdir, err = %s\n",
-                      rte_strerror(rte_errno));
-               rc = -rte_errno;
-               goto err1;
-       }
-
-       pkt = mz->addr;
-       memset(pkt, 0, QEDE_MAX_FDIR_PKT_LEN);
-       pkt_len = qede_fdir_construct_pkt(eth_dev, fdir_filter, pkt,
-                                         &qdev->fdir_info.arfs);
-       if (pkt_len == 0) {
-               rc = -EINVAL;
-               goto err2;
-       }
-       DP_INFO(edev, "pkt_len = %u memzone = %s\n", pkt_len, mz_name);
-       if (add) {
-               SLIST_FOREACH(tmp, &qdev->fdir_info.fdir_list_head, list) {
-                       if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0) {
-                               DP_INFO(edev, "flowdir filter exist\n");
-                               rc = 0;
-                               goto err2;
-                       }
-               }
-       } else {
-               SLIST_FOREACH(tmp, &qdev->fdir_info.fdir_list_head, list) {
-                       if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0)
-                               break;
-               }
-               if (!tmp) {
-                       DP_ERR(edev, "flowdir filter does not exist\n");
-                       rc = -EEXIST;
-                       goto err2;
-               }
-       }
-       p_hwfn = ECORE_LEADING_HWFN(edev);
-       if (add) {
-               if (!qdev->fdir_info.arfs.arfs_enable) {
-                       /* Force update */
-                       eth_dev->data->dev_conf.fdir_conf.mode =
-                                               RTE_FDIR_MODE_PERFECT;
-                       qdev->fdir_info.arfs.arfs_enable = true;
-                       DP_INFO(edev, "Force enable flowdir in perfect mode\n");
-               }
-               /* Enable ARFS searcher with updated flow_types */
-               ecore_arfs_mode_configure(p_hwfn, p_hwfn->p_arfs_ptt,
-                                         &qdev->fdir_info.arfs);
-       }
-       /* configure filter with ECORE_SPQ_MODE_EBLOCK */
-       rc = ecore_configure_rfs_ntuple_filter(p_hwfn, NULL,
-                                              (dma_addr_t)mz->iova,
-                                              pkt_len,
-                                              fdir_filter->action.rx_queue,
-                                              0, add);
-       if (rc == ECORE_SUCCESS) {
-               if (add) {
-                       fdir->rx_queue = fdir_filter->action.rx_queue;
-                       fdir->pkt_len = pkt_len;
-                       fdir->mz = mz;
-                       SLIST_INSERT_HEAD(&qdev->fdir_info.fdir_list_head,
-                                         fdir, list);
-                       qdev->fdir_info.filter_count++;
-                       DP_INFO(edev, "flowdir filter added, count = %d\n",
-                               qdev->fdir_info.filter_count);
-               } else {
-                       rte_memzone_free(tmp->mz);
-                       SLIST_REMOVE(&qdev->fdir_info.fdir_list_head, tmp,
-                                    qede_fdir_entry, list);
-                       rte_free(tmp); /* the node deleted */
-                       rte_memzone_free(mz); /* temp node allocated */
-                       qdev->fdir_info.filter_count--;
-                       DP_INFO(edev, "Fdir filter deleted, count = %d\n",
-                               qdev->fdir_info.filter_count);
-               }
-       } else {
-               DP_ERR(edev, "flowdir filter failed, rc=%d filter_count=%d\n",
-                      rc, qdev->fdir_info.filter_count);
-       }
-
-       /* Disable ARFS searcher if there are no more filters */
-       if (qdev->fdir_info.filter_count == 0) {
-               memset(&qdev->fdir_info.arfs, 0,
-                      sizeof(struct ecore_arfs_config_params));
-               DP_INFO(edev, "Disabling flowdir\n");
-               qdev->fdir_info.arfs.arfs_enable = false;
-               ecore_arfs_mode_configure(p_hwfn, p_hwfn->p_arfs_ptt,
-                                         &qdev->fdir_info.arfs);
-       }
-       return 0;
-
-err2:
-       rte_memzone_free(mz);
-err1:
-       if (add)
-               rte_free(fdir);
-       return rc;
-}
-
-static int
-qede_fdir_filter_add(struct rte_eth_dev *eth_dev,
-                    struct rte_eth_fdir_filter *fdir,
-                    bool add)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-
-       if (!QEDE_VALID_FLOW(fdir->input.flow_type)) {
-               DP_ERR(edev, "invalid flow_type input\n");
-               return -EINVAL;
-       }
-
-       if (fdir->action.rx_queue >= QEDE_RSS_COUNT(qdev)) {
-               DP_ERR(edev, "invalid queue number %u\n",
-                      fdir->action.rx_queue);
-               return -EINVAL;
-       }
-
-       if (fdir->input.flow_ext.is_vf) {
-               DP_ERR(edev, "flowdir is not supported over VF\n");
-               return -EINVAL;
-       }
-
-       return qede_config_cmn_fdir_filter(eth_dev, fdir, add);
-}
-
-/* Fills the L3/L4 headers and returns the actual length  of flowdir packet */
-uint16_t
-qede_fdir_construct_pkt(struct rte_eth_dev *eth_dev,
-                       struct rte_eth_fdir_filter *fdir,
-                       void *buff,
-                       struct ecore_arfs_config_params *params)
-
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       uint16_t *ether_type;
-       uint8_t *raw_pkt;
-       struct rte_eth_fdir_input *input;
-       static uint8_t vlan_frame[] = {0x81, 0, 0, 0};
-       struct ipv4_hdr *ip;
-       struct ipv6_hdr *ip6;
-       struct udp_hdr *udp;
-       struct tcp_hdr *tcp;
-       uint16_t len;
-       static const uint8_t next_proto[] = {
-               [RTE_ETH_FLOW_NONFRAG_IPV4_TCP] = IPPROTO_TCP,
-               [RTE_ETH_FLOW_NONFRAG_IPV4_UDP] = IPPROTO_UDP,
-               [RTE_ETH_FLOW_NONFRAG_IPV6_TCP] = IPPROTO_TCP,
-               [RTE_ETH_FLOW_NONFRAG_IPV6_UDP] = IPPROTO_UDP,
-       };
-       raw_pkt = (uint8_t *)buff;
-       input = &fdir->input;
-       DP_INFO(edev, "flow_type %d\n", input->flow_type);
-
-       len =  2 * sizeof(struct ether_addr);
-       raw_pkt += 2 * sizeof(struct ether_addr);
-       if (input->flow_ext.vlan_tci) {
-               DP_INFO(edev, "adding VLAN header\n");
-               rte_memcpy(raw_pkt, vlan_frame, sizeof(vlan_frame));
-               rte_memcpy(raw_pkt + sizeof(uint16_t),
-                          &input->flow_ext.vlan_tci,
-                          sizeof(uint16_t));
-               raw_pkt += sizeof(vlan_frame);
-               len += sizeof(vlan_frame);
-       }
-       ether_type = (uint16_t *)raw_pkt;
-       raw_pkt += sizeof(uint16_t);
-       len += sizeof(uint16_t);
-
-       switch (input->flow_type) {
-       case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
-       case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
-               /* fill the common ip header */
-               ip = (struct ipv4_hdr *)raw_pkt;
-               *ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-               ip->version_ihl = QEDE_FDIR_IP_DEFAULT_VERSION_IHL;
-               ip->total_length = sizeof(struct ipv4_hdr);
-               ip->next_proto_id = input->flow.ip4_flow.proto ?
-                                   input->flow.ip4_flow.proto :
-                                   next_proto[input->flow_type];
-               ip->time_to_live = input->flow.ip4_flow.ttl ?
-                                  input->flow.ip4_flow.ttl :
-                                  QEDE_FDIR_IPV4_DEF_TTL;
-               ip->type_of_service = input->flow.ip4_flow.tos;
-               ip->dst_addr = input->flow.ip4_flow.dst_ip;
-               ip->src_addr = input->flow.ip4_flow.src_ip;
-               len += sizeof(struct ipv4_hdr);
-               params->ipv4 = true;
-
-               raw_pkt = (uint8_t *)buff;
-               /* UDP */
-               if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) {
-                       udp = (struct udp_hdr *)(raw_pkt + len);
-                       udp->dst_port = input->flow.udp4_flow.dst_port;
-                       udp->src_port = input->flow.udp4_flow.src_port;
-                       udp->dgram_len = sizeof(struct udp_hdr);
-                       len += sizeof(struct udp_hdr);
-                       /* adjust ip total_length */
-                       ip->total_length += sizeof(struct udp_hdr);
-                       params->udp = true;
-               } else { /* TCP */
-                       tcp = (struct tcp_hdr *)(raw_pkt + len);
-                       tcp->src_port = input->flow.tcp4_flow.src_port;
-                       tcp->dst_port = input->flow.tcp4_flow.dst_port;
-                       tcp->data_off = QEDE_FDIR_TCP_DEFAULT_DATAOFF;
-                       len += sizeof(struct tcp_hdr);
-                       /* adjust ip total_length */
-                       ip->total_length += sizeof(struct tcp_hdr);
-                       params->tcp = true;
-               }
-               break;
-       case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
-       case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
-               ip6 = (struct ipv6_hdr *)raw_pkt;
-               *ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv6);
-               ip6->proto = input->flow.ipv6_flow.proto ?
-                                       input->flow.ipv6_flow.proto :
-                                       next_proto[input->flow_type];
-               rte_memcpy(&ip6->src_addr, &input->flow.ipv6_flow.dst_ip,
-                          IPV6_ADDR_LEN);
-               rte_memcpy(&ip6->dst_addr, &input->flow.ipv6_flow.src_ip,
-                          IPV6_ADDR_LEN);
-               len += sizeof(struct ipv6_hdr);
-
-               raw_pkt = (uint8_t *)buff;
-               /* UDP */
-               if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_UDP) {
-                       udp = (struct udp_hdr *)(raw_pkt + len);
-                       udp->src_port = input->flow.udp6_flow.dst_port;
-                       udp->dst_port = input->flow.udp6_flow.src_port;
-                       len += sizeof(struct udp_hdr);
-                       params->udp = true;
-               } else { /* TCP */
-                       tcp = (struct tcp_hdr *)(raw_pkt + len);
-                       tcp->src_port = input->flow.tcp4_flow.src_port;
-                       tcp->dst_port = input->flow.tcp4_flow.dst_port;
-                       tcp->data_off = QEDE_FDIR_TCP_DEFAULT_DATAOFF;
-                       len += sizeof(struct tcp_hdr);
-                       params->tcp = true;
-               }
-               break;
-       default:
-               DP_ERR(edev, "Unsupported flow_type %u\n",
-                      input->flow_type);
-               return 0;
-       }
-
-       return len;
-}
-
-int
-qede_fdir_filter_conf(struct rte_eth_dev *eth_dev,
-                     enum rte_filter_op filter_op,
-                     void *arg)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct rte_eth_fdir_filter *fdir;
-       int ret;
-
-       fdir = (struct rte_eth_fdir_filter *)arg;
-       switch (filter_op) {
-       case RTE_ETH_FILTER_NOP:
-               /* Typically used to query flowdir support */
-               if (ECORE_IS_CMT(edev)) {
-                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
-                       return -ENOTSUP;
-               }
-               return 0; /* means supported */
-       case RTE_ETH_FILTER_ADD:
-               ret = qede_fdir_filter_add(eth_dev, fdir, 1);
-       break;
-       case RTE_ETH_FILTER_DELETE:
-               ret = qede_fdir_filter_add(eth_dev, fdir, 0);
-       break;
-       case RTE_ETH_FILTER_FLUSH:
-       case RTE_ETH_FILTER_UPDATE:
-       case RTE_ETH_FILTER_INFO:
-               return -ENOTSUP;
-       break;
-       default:
-               DP_ERR(edev, "unknown operation %u", filter_op);
-               ret = -EINVAL;
-       }
-
-       return ret;
-}
-
-int qede_ntuple_filter_conf(struct rte_eth_dev *eth_dev,
-                           enum rte_filter_op filter_op,
-                           void *arg)
-{
-       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
-       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
-       struct rte_eth_ntuple_filter *ntuple;
-       struct rte_eth_fdir_filter fdir_entry;
-       struct rte_eth_tcpv4_flow *tcpv4_flow;
-       struct rte_eth_udpv4_flow *udpv4_flow;
-       bool add = false;
-
-       switch (filter_op) {
-       case RTE_ETH_FILTER_NOP:
-               /* Typically used to query fdir support */
-               if (ECORE_IS_CMT(edev)) {
-                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
-                       return -ENOTSUP;
-               }
-               return 0; /* means supported */
-       case RTE_ETH_FILTER_ADD:
-               add = true;
-       break;
-       case RTE_ETH_FILTER_DELETE:
-       break;
-       case RTE_ETH_FILTER_INFO:
-       case RTE_ETH_FILTER_GET:
-       case RTE_ETH_FILTER_UPDATE:
-       case RTE_ETH_FILTER_FLUSH:
-       case RTE_ETH_FILTER_SET:
-       case RTE_ETH_FILTER_STATS:
-       case RTE_ETH_FILTER_OP_MAX:
-               DP_ERR(edev, "Unsupported filter_op %d\n", filter_op);
-               return -ENOTSUP;
-       }
-       ntuple = (struct rte_eth_ntuple_filter *)arg;
-       /* Internally convert ntuple to fdir entry */
-       memset(&fdir_entry, 0, sizeof(fdir_entry));
-       if (ntuple->proto == IPPROTO_TCP) {
-               fdir_entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP;
-               tcpv4_flow = &fdir_entry.input.flow.tcp4_flow;
-               tcpv4_flow->ip.src_ip = ntuple->src_ip;
-               tcpv4_flow->ip.dst_ip = ntuple->dst_ip;
-               tcpv4_flow->ip.proto = IPPROTO_TCP;
-               tcpv4_flow->src_port = ntuple->src_port;
-               tcpv4_flow->dst_port = ntuple->dst_port;
-       } else {
-               fdir_entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
-               udpv4_flow = &fdir_entry.input.flow.udp4_flow;
-               udpv4_flow->ip.src_ip = ntuple->src_ip;
-               udpv4_flow->ip.dst_ip = ntuple->dst_ip;
-               udpv4_flow->ip.proto = IPPROTO_TCP;
-               udpv4_flow->src_port = ntuple->src_port;
-               udpv4_flow->dst_port = ntuple->dst_port;
-       }
-
-       fdir_entry.action.rx_queue = ntuple->queue;
-
-       return qede_config_cmn_fdir_filter(eth_dev, &fdir_entry, add);
-}
diff --git a/drivers/net/qede/qede_filter.c b/drivers/net/qede/qede_filter.c
new file mode 100644 (file)
index 0000000..5e6571c
--- /dev/null
@@ -0,0 +1,1546 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2017 Cavium Inc.
+ * All rights reserved.
+ * www.cavium.com
+ */
+
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+#include <rte_errno.h>
+#include <rte_flow_driver.h>
+
+#include "qede_ethdev.h"
+
+/* VXLAN tunnel classification mapping */
+const struct _qede_udp_tunn_types {
+       uint16_t rte_filter_type;
+       enum ecore_filter_ucast_type qede_type;
+       enum ecore_tunn_clss qede_tunn_clss;
+       const char *string;
+} qede_tunn_types[] = {
+       {
+               ETH_TUNNEL_FILTER_OMAC,
+               ECORE_FILTER_MAC,
+               ECORE_TUNN_CLSS_MAC_VLAN,
+               "outer-mac"
+       },
+       {
+               ETH_TUNNEL_FILTER_TENID,
+               ECORE_FILTER_VNI,
+               ECORE_TUNN_CLSS_MAC_VNI,
+               "vni"
+       },
+       {
+               ETH_TUNNEL_FILTER_IMAC,
+               ECORE_FILTER_INNER_MAC,
+               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
+               "inner-mac"
+       },
+       {
+               ETH_TUNNEL_FILTER_IVLAN,
+               ECORE_FILTER_INNER_VLAN,
+               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
+               "inner-vlan"
+       },
+       {
+               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_TENID,
+               ECORE_FILTER_MAC_VNI_PAIR,
+               ECORE_TUNN_CLSS_MAC_VNI,
+               "outer-mac and vni"
+       },
+       {
+               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_IMAC,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "outer-mac and inner-mac"
+       },
+       {
+               ETH_TUNNEL_FILTER_OMAC | ETH_TUNNEL_FILTER_IVLAN,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "outer-mac and inner-vlan"
+       },
+       {
+               ETH_TUNNEL_FILTER_TENID | ETH_TUNNEL_FILTER_IMAC,
+               ECORE_FILTER_INNER_MAC_VNI_PAIR,
+               ECORE_TUNN_CLSS_INNER_MAC_VNI,
+               "vni and inner-mac",
+       },
+       {
+               ETH_TUNNEL_FILTER_TENID | ETH_TUNNEL_FILTER_IVLAN,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "vni and inner-vlan",
+       },
+       {
+               ETH_TUNNEL_FILTER_IMAC | ETH_TUNNEL_FILTER_IVLAN,
+               ECORE_FILTER_INNER_PAIR,
+               ECORE_TUNN_CLSS_INNER_MAC_VLAN,
+               "inner-mac and inner-vlan",
+       },
+       {
+               ETH_TUNNEL_FILTER_OIP,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "outer-IP"
+       },
+       {
+               ETH_TUNNEL_FILTER_IIP,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "inner-IP"
+       },
+       {
+               RTE_TUNNEL_FILTER_IMAC_IVLAN,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "IMAC_IVLAN"
+       },
+       {
+               RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "IMAC_IVLAN_TENID"
+       },
+       {
+               RTE_TUNNEL_FILTER_IMAC_TENID,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "IMAC_TENID"
+       },
+       {
+               RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,
+               ECORE_FILTER_UNUSED,
+               MAX_ECORE_TUNN_CLSS,
+               "OMAC_TENID_IMAC"
+       },
+};
+
+#define IP_VERSION                             (0x40)
+#define IP_HDRLEN                              (0x5)
+#define QEDE_FDIR_IP_DEFAULT_VERSION_IHL       (IP_VERSION | IP_HDRLEN)
+#define QEDE_FDIR_TCP_DEFAULT_DATAOFF          (0x50)
+#define QEDE_FDIR_IPV4_DEF_TTL                 (64)
+#define QEDE_FDIR_IPV6_DEFAULT_VTC_FLOW                (0x60000000)
+/* Sum of length of header types of L2, L3, L4.
+ * L2 : ether_hdr + vlan_hdr + vxlan_hdr
+ * L3 : ipv6_hdr
+ * L4 : tcp_hdr
+ */
+#define QEDE_MAX_FDIR_PKT_LEN                  (86)
+
+static inline bool qede_valid_flow(uint16_t flow_type)
+{
+       return  ((flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_TCP) ||
+                (flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) ||
+                (flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_TCP) ||
+                (flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_UDP));
+}
+
+static uint16_t
+qede_arfs_construct_pkt(struct rte_eth_dev *eth_dev,
+                       struct qede_arfs_entry *arfs,
+                       void *buff,
+                       struct ecore_arfs_config_params *params);
+
+/* Note: Flowdir support is only partial.
+ * For ex: drop_queue, FDIR masks, flex_conf are not supported.
+ * Parameters like pballoc/status fields are irrelevant here.
+ */
+int qede_check_fdir_support(struct rte_eth_dev *eth_dev)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct rte_fdir_conf *fdir = &eth_dev->data->dev_conf.fdir_conf;
+
+       /* check FDIR modes */
+       switch (fdir->mode) {
+       case RTE_FDIR_MODE_NONE:
+               qdev->arfs_info.arfs.mode = ECORE_FILTER_CONFIG_MODE_DISABLE;
+               DP_INFO(edev, "flowdir is disabled\n");
+       break;
+       case RTE_FDIR_MODE_PERFECT:
+               if (ECORE_IS_CMT(edev)) {
+                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
+                       qdev->arfs_info.arfs.mode =
+                               ECORE_FILTER_CONFIG_MODE_DISABLE;
+                       return -ENOTSUP;
+               }
+               qdev->arfs_info.arfs.mode =
+                               ECORE_FILTER_CONFIG_MODE_5_TUPLE;
+               DP_INFO(edev, "flowdir is enabled (5 Tuple mode)\n");
+       break;
+       case RTE_FDIR_MODE_PERFECT_TUNNEL:
+       case RTE_FDIR_MODE_SIGNATURE:
+       case RTE_FDIR_MODE_PERFECT_MAC_VLAN:
+               DP_ERR(edev, "Unsupported flowdir mode %d\n", fdir->mode);
+               return -ENOTSUP;
+       }
+
+       return 0;
+}
+
+void qede_fdir_dealloc_resc(struct rte_eth_dev *eth_dev)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct qede_arfs_entry *tmp = NULL;
+
+       SLIST_FOREACH(tmp, &qdev->arfs_info.arfs_list_head, list) {
+               if (tmp) {
+                       if (tmp->mz)
+                               rte_memzone_free(tmp->mz);
+                       SLIST_REMOVE(&qdev->arfs_info.arfs_list_head, tmp,
+                                    qede_arfs_entry, list);
+                       rte_free(tmp);
+               }
+       }
+}
+
+static int
+qede_fdir_to_arfs_filter(struct rte_eth_dev *eth_dev,
+                        struct rte_eth_fdir_filter *fdir,
+                        struct qede_arfs_entry *arfs)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct rte_eth_fdir_input *input;
+
+       static const uint8_t next_proto[] = {
+               [RTE_ETH_FLOW_NONFRAG_IPV4_TCP] = IPPROTO_TCP,
+               [RTE_ETH_FLOW_NONFRAG_IPV4_UDP] = IPPROTO_UDP,
+               [RTE_ETH_FLOW_NONFRAG_IPV6_TCP] = IPPROTO_TCP,
+               [RTE_ETH_FLOW_NONFRAG_IPV6_UDP] = IPPROTO_UDP,
+       };
+
+       input = &fdir->input;
+
+       DP_INFO(edev, "flow_type %d\n", input->flow_type);
+
+       switch (input->flow_type) {
+       case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
+       case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
+               /* fill the common ip header */
+               arfs->tuple.eth_proto = ETHER_TYPE_IPv4;
+               arfs->tuple.dst_ipv4 = input->flow.ip4_flow.dst_ip;
+               arfs->tuple.src_ipv4 = input->flow.ip4_flow.src_ip;
+               arfs->tuple.ip_proto = next_proto[input->flow_type];
+
+               /* UDP */
+               if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV4_UDP) {
+                       arfs->tuple.dst_port = input->flow.udp4_flow.dst_port;
+                       arfs->tuple.src_port = input->flow.udp4_flow.src_port;
+               } else { /* TCP */
+                       arfs->tuple.dst_port = input->flow.tcp4_flow.dst_port;
+                       arfs->tuple.src_port = input->flow.tcp4_flow.src_port;
+               }
+               break;
+       case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
+       case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
+               arfs->tuple.eth_proto = ETHER_TYPE_IPv6;
+               arfs->tuple.ip_proto = next_proto[input->flow_type];
+               rte_memcpy(arfs->tuple.dst_ipv6,
+                          &input->flow.ipv6_flow.dst_ip,
+                          IPV6_ADDR_LEN);
+               rte_memcpy(arfs->tuple.src_ipv6,
+                          &input->flow.ipv6_flow.src_ip,
+                          IPV6_ADDR_LEN);
+
+               /* UDP */
+               if (input->flow_type == RTE_ETH_FLOW_NONFRAG_IPV6_UDP) {
+                       arfs->tuple.dst_port = input->flow.udp6_flow.dst_port;
+                       arfs->tuple.src_port = input->flow.udp6_flow.src_port;
+               } else { /* TCP */
+                       arfs->tuple.dst_port = input->flow.tcp6_flow.dst_port;
+                       arfs->tuple.src_port = input->flow.tcp6_flow.src_port;
+               }
+               break;
+       default:
+               DP_ERR(edev, "Unsupported flow_type %u\n",
+                      input->flow_type);
+               return -ENOTSUP;
+       }
+
+       arfs->rx_queue = fdir->action.rx_queue;
+       return 0;
+}
+
+static int
+qede_config_arfs_filter(struct rte_eth_dev *eth_dev,
+                       struct qede_arfs_entry *arfs,
+                       bool add)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       char mz_name[RTE_MEMZONE_NAMESIZE] = {0};
+       struct qede_arfs_entry *tmp = NULL;
+       const struct rte_memzone *mz;
+       struct ecore_hwfn *p_hwfn;
+       enum _ecore_status_t rc;
+       uint16_t pkt_len;
+       void *pkt;
+
+       if (add) {
+               if (qdev->arfs_info.filter_count == QEDE_RFS_MAX_FLTR - 1) {
+                       DP_ERR(edev, "Reached max flowdir filter limit\n");
+                       return -EINVAL;
+               }
+       }
+
+       /* soft_id could have been used as memzone string, but soft_id is
+        * not currently used so it has no significance.
+        */
+       snprintf(mz_name, sizeof(mz_name) - 1, "%lx",
+                (unsigned long)rte_get_timer_cycles());
+       mz = rte_memzone_reserve_aligned(mz_name, QEDE_MAX_FDIR_PKT_LEN,
+                                        SOCKET_ID_ANY, 0, RTE_CACHE_LINE_SIZE);
+       if (!mz) {
+               DP_ERR(edev, "Failed to allocate memzone for fdir, err = %s\n",
+                      rte_strerror(rte_errno));
+               return -rte_errno;
+       }
+
+       pkt = mz->addr;
+       memset(pkt, 0, QEDE_MAX_FDIR_PKT_LEN);
+       pkt_len = qede_arfs_construct_pkt(eth_dev, arfs, pkt,
+                                         &qdev->arfs_info.arfs);
+       if (pkt_len == 0) {
+               rc = -EINVAL;
+               goto err1;
+       }
+
+       DP_INFO(edev, "pkt_len = %u memzone = %s\n", pkt_len, mz_name);
+       if (add) {
+               SLIST_FOREACH(tmp, &qdev->arfs_info.arfs_list_head, list) {
+                       if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0) {
+                               DP_INFO(edev, "flowdir filter exist\n");
+                               rc = -EEXIST;
+                               goto err1;
+                       }
+               }
+       } else {
+               SLIST_FOREACH(tmp, &qdev->arfs_info.arfs_list_head, list) {
+                       if (memcmp(tmp->mz->addr, pkt, pkt_len) == 0)
+                               break;
+               }
+               if (!tmp) {
+                       DP_ERR(edev, "flowdir filter does not exist\n");
+                       rc = -EEXIST;
+                       goto err1;
+               }
+       }
+       p_hwfn = ECORE_LEADING_HWFN(edev);
+       if (add) {
+               if (qdev->arfs_info.arfs.mode ==
+                       ECORE_FILTER_CONFIG_MODE_DISABLE) {
+                       /* Force update */
+                       eth_dev->data->dev_conf.fdir_conf.mode =
+                                               RTE_FDIR_MODE_PERFECT;
+                       qdev->arfs_info.arfs.mode =
+                                       ECORE_FILTER_CONFIG_MODE_5_TUPLE;
+                       DP_INFO(edev, "Force enable flowdir in perfect mode\n");
+               }
+               /* Enable ARFS searcher with updated flow_types */
+               ecore_arfs_mode_configure(p_hwfn, p_hwfn->p_arfs_ptt,
+                                         &qdev->arfs_info.arfs);
+       }
+       /* configure filter with ECORE_SPQ_MODE_EBLOCK */
+       rc = ecore_configure_rfs_ntuple_filter(p_hwfn, NULL,
+                                              (dma_addr_t)mz->iova,
+                                              pkt_len,
+                                              arfs->rx_queue,
+                                              0, add);
+       if (rc == ECORE_SUCCESS) {
+               if (add) {
+                       arfs->pkt_len = pkt_len;
+                       arfs->mz = mz;
+                       SLIST_INSERT_HEAD(&qdev->arfs_info.arfs_list_head,
+                                         arfs, list);
+                       qdev->arfs_info.filter_count++;
+                       DP_INFO(edev, "flowdir filter added, count = %d\n",
+                               qdev->arfs_info.filter_count);
+               } else {
+                       rte_memzone_free(tmp->mz);
+                       SLIST_REMOVE(&qdev->arfs_info.arfs_list_head, tmp,
+                                    qede_arfs_entry, list);
+                       rte_free(tmp); /* the node deleted */
+                       rte_memzone_free(mz); /* temp node allocated */
+                       qdev->arfs_info.filter_count--;
+                       DP_INFO(edev, "Fdir filter deleted, count = %d\n",
+                               qdev->arfs_info.filter_count);
+               }
+       } else {
+               DP_ERR(edev, "flowdir filter failed, rc=%d filter_count=%d\n",
+                      rc, qdev->arfs_info.filter_count);
+       }
+
+       /* Disable ARFS searcher if there are no more filters */
+       if (qdev->arfs_info.filter_count == 0) {
+               memset(&qdev->arfs_info.arfs, 0,
+                      sizeof(struct ecore_arfs_config_params));
+               DP_INFO(edev, "Disabling flowdir\n");
+               qdev->arfs_info.arfs.mode = ECORE_FILTER_CONFIG_MODE_DISABLE;
+               ecore_arfs_mode_configure(p_hwfn, p_hwfn->p_arfs_ptt,
+                                         &qdev->arfs_info.arfs);
+       }
+       return 0;
+
+err1:
+       rte_memzone_free(mz);
+       return rc;
+}
+
+static int
+qede_config_cmn_fdir_filter(struct rte_eth_dev *eth_dev,
+                           struct rte_eth_fdir_filter *fdir_filter,
+                           bool add)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct qede_arfs_entry *arfs = NULL;
+       int rc = 0;
+
+       arfs = rte_malloc(NULL, sizeof(struct qede_arfs_entry),
+                                 RTE_CACHE_LINE_SIZE);
+       if (!arfs) {
+               DP_ERR(edev, "Did not allocate memory for arfs\n");
+               return -ENOMEM;
+       }
+
+       rc = qede_fdir_to_arfs_filter(eth_dev, fdir_filter, arfs);
+       if (rc < 0)
+               return rc;
+
+       rc = qede_config_arfs_filter(eth_dev, arfs, add);
+       if (rc < 0)
+               rte_free(arfs);
+
+       return rc;
+}
+
+static int
+qede_fdir_filter_add(struct rte_eth_dev *eth_dev,
+                    struct rte_eth_fdir_filter *fdir,
+                    bool add)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+
+       if (!qede_valid_flow(fdir->input.flow_type)) {
+               DP_ERR(edev, "invalid flow_type input\n");
+               return -EINVAL;
+       }
+
+       if (fdir->action.rx_queue >= QEDE_RSS_COUNT(qdev)) {
+               DP_ERR(edev, "invalid queue number %u\n",
+                      fdir->action.rx_queue);
+               return -EINVAL;
+       }
+
+       if (fdir->input.flow_ext.is_vf) {
+               DP_ERR(edev, "flowdir is not supported over VF\n");
+               return -EINVAL;
+       }
+
+       return qede_config_cmn_fdir_filter(eth_dev, fdir, add);
+}
+
+/* Fills the L3/L4 headers and returns the actual length  of flowdir packet */
+static uint16_t
+qede_arfs_construct_pkt(struct rte_eth_dev *eth_dev,
+                       struct qede_arfs_entry *arfs,
+                       void *buff,
+                       struct ecore_arfs_config_params *params)
+
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       uint16_t *ether_type;
+       uint8_t *raw_pkt;
+       struct ipv4_hdr *ip;
+       struct ipv6_hdr *ip6;
+       struct udp_hdr *udp;
+       struct tcp_hdr *tcp;
+       uint16_t len;
+
+       raw_pkt = (uint8_t *)buff;
+
+       len =  2 * sizeof(struct ether_addr);
+       raw_pkt += 2 * sizeof(struct ether_addr);
+       ether_type = (uint16_t *)raw_pkt;
+       raw_pkt += sizeof(uint16_t);
+       len += sizeof(uint16_t);
+
+       *ether_type = rte_cpu_to_be_16(arfs->tuple.eth_proto);
+       switch (arfs->tuple.eth_proto) {
+       case ETHER_TYPE_IPv4:
+               ip = (struct ipv4_hdr *)raw_pkt;
+               ip->version_ihl = QEDE_FDIR_IP_DEFAULT_VERSION_IHL;
+               ip->total_length = sizeof(struct ipv4_hdr);
+               ip->next_proto_id = arfs->tuple.ip_proto;
+               ip->time_to_live = QEDE_FDIR_IPV4_DEF_TTL;
+               ip->dst_addr = arfs->tuple.dst_ipv4;
+               ip->src_addr = arfs->tuple.src_ipv4;
+               len += sizeof(struct ipv4_hdr);
+               params->ipv4 = true;
+
+               raw_pkt = (uint8_t *)buff;
+               /* UDP */
+               if (arfs->tuple.ip_proto == IPPROTO_UDP) {
+                       udp = (struct udp_hdr *)(raw_pkt + len);
+                       udp->dst_port = arfs->tuple.dst_port;
+                       udp->src_port = arfs->tuple.src_port;
+                       udp->dgram_len = sizeof(struct udp_hdr);
+                       len += sizeof(struct udp_hdr);
+                       /* adjust ip total_length */
+                       ip->total_length += sizeof(struct udp_hdr);
+                       params->udp = true;
+               } else { /* TCP */
+                       tcp = (struct tcp_hdr *)(raw_pkt + len);
+                       tcp->src_port = arfs->tuple.src_port;
+                       tcp->dst_port = arfs->tuple.dst_port;
+                       tcp->data_off = QEDE_FDIR_TCP_DEFAULT_DATAOFF;
+                       len += sizeof(struct tcp_hdr);
+                       /* adjust ip total_length */
+                       ip->total_length += sizeof(struct tcp_hdr);
+                       params->tcp = true;
+               }
+               break;
+       case ETHER_TYPE_IPv6:
+               ip6 = (struct ipv6_hdr *)raw_pkt;
+               ip6->proto = arfs->tuple.ip_proto;
+               ip6->vtc_flow =
+                       rte_cpu_to_be_32(QEDE_FDIR_IPV6_DEFAULT_VTC_FLOW);
+
+               rte_memcpy(&ip6->src_addr, arfs->tuple.src_ipv6,
+                          IPV6_ADDR_LEN);
+               rte_memcpy(&ip6->dst_addr, arfs->tuple.dst_ipv6,
+                          IPV6_ADDR_LEN);
+               len += sizeof(struct ipv6_hdr);
+               params->ipv6 = true;
+
+               raw_pkt = (uint8_t *)buff;
+               /* UDP */
+               if (arfs->tuple.ip_proto == IPPROTO_UDP) {
+                       udp = (struct udp_hdr *)(raw_pkt + len);
+                       udp->src_port = arfs->tuple.src_port;
+                       udp->dst_port = arfs->tuple.dst_port;
+                       len += sizeof(struct udp_hdr);
+                       params->udp = true;
+               } else { /* TCP */
+                       tcp = (struct tcp_hdr *)(raw_pkt + len);
+                       tcp->src_port = arfs->tuple.src_port;
+                       tcp->dst_port = arfs->tuple.dst_port;
+                       tcp->data_off = QEDE_FDIR_TCP_DEFAULT_DATAOFF;
+                       len += sizeof(struct tcp_hdr);
+                       params->tcp = true;
+               }
+               break;
+       default:
+               DP_ERR(edev, "Unsupported eth_proto %u\n",
+                      arfs->tuple.eth_proto);
+               return 0;
+       }
+
+       return len;
+}
+
+static int
+qede_fdir_filter_conf(struct rte_eth_dev *eth_dev,
+                     enum rte_filter_op filter_op,
+                     void *arg)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct rte_eth_fdir_filter *fdir;
+       int ret;
+
+       fdir = (struct rte_eth_fdir_filter *)arg;
+       switch (filter_op) {
+       case RTE_ETH_FILTER_NOP:
+               /* Typically used to query flowdir support */
+               if (ECORE_IS_CMT(edev)) {
+                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
+                       return -ENOTSUP;
+               }
+               return 0; /* means supported */
+       case RTE_ETH_FILTER_ADD:
+               ret = qede_fdir_filter_add(eth_dev, fdir, 1);
+       break;
+       case RTE_ETH_FILTER_DELETE:
+               ret = qede_fdir_filter_add(eth_dev, fdir, 0);
+       break;
+       case RTE_ETH_FILTER_FLUSH:
+       case RTE_ETH_FILTER_UPDATE:
+       case RTE_ETH_FILTER_INFO:
+               return -ENOTSUP;
+       break;
+       default:
+               DP_ERR(edev, "unknown operation %u", filter_op);
+               ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+int qede_ntuple_filter_conf(struct rte_eth_dev *eth_dev,
+                           enum rte_filter_op filter_op,
+                           void *arg)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct rte_eth_ntuple_filter *ntuple;
+       struct rte_eth_fdir_filter fdir_entry;
+       struct rte_eth_tcpv4_flow *tcpv4_flow;
+       struct rte_eth_udpv4_flow *udpv4_flow;
+       bool add = false;
+
+       switch (filter_op) {
+       case RTE_ETH_FILTER_NOP:
+               /* Typically used to query fdir support */
+               if (ECORE_IS_CMT(edev)) {
+                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
+                       return -ENOTSUP;
+               }
+               return 0; /* means supported */
+       case RTE_ETH_FILTER_ADD:
+               add = true;
+       break;
+       case RTE_ETH_FILTER_DELETE:
+       break;
+       case RTE_ETH_FILTER_INFO:
+       case RTE_ETH_FILTER_GET:
+       case RTE_ETH_FILTER_UPDATE:
+       case RTE_ETH_FILTER_FLUSH:
+       case RTE_ETH_FILTER_SET:
+       case RTE_ETH_FILTER_STATS:
+       case RTE_ETH_FILTER_OP_MAX:
+               DP_ERR(edev, "Unsupported filter_op %d\n", filter_op);
+               return -ENOTSUP;
+       }
+       ntuple = (struct rte_eth_ntuple_filter *)arg;
+       /* Internally convert ntuple to fdir entry */
+       memset(&fdir_entry, 0, sizeof(fdir_entry));
+       if (ntuple->proto == IPPROTO_TCP) {
+               fdir_entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_TCP;
+               tcpv4_flow = &fdir_entry.input.flow.tcp4_flow;
+               tcpv4_flow->ip.src_ip = ntuple->src_ip;
+               tcpv4_flow->ip.dst_ip = ntuple->dst_ip;
+               tcpv4_flow->ip.proto = IPPROTO_TCP;
+               tcpv4_flow->src_port = ntuple->src_port;
+               tcpv4_flow->dst_port = ntuple->dst_port;
+       } else {
+               fdir_entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+               udpv4_flow = &fdir_entry.input.flow.udp4_flow;
+               udpv4_flow->ip.src_ip = ntuple->src_ip;
+               udpv4_flow->ip.dst_ip = ntuple->dst_ip;
+               udpv4_flow->ip.proto = IPPROTO_TCP;
+               udpv4_flow->src_port = ntuple->src_port;
+               udpv4_flow->dst_port = ntuple->dst_port;
+       }
+
+       fdir_entry.action.rx_queue = ntuple->queue;
+
+       return qede_config_cmn_fdir_filter(eth_dev, &fdir_entry, add);
+}
+
+static int
+qede_tunnel_update(struct qede_dev *qdev,
+                  struct ecore_tunnel_info *tunn_info)
+{
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       enum _ecore_status_t rc = ECORE_INVAL;
+       struct ecore_hwfn *p_hwfn;
+       struct ecore_ptt *p_ptt;
+       int i;
+
+       for_each_hwfn(edev, i) {
+               p_hwfn = &edev->hwfns[i];
+               if (IS_PF(edev)) {
+                       p_ptt = ecore_ptt_acquire(p_hwfn);
+                       if (!p_ptt) {
+                               DP_ERR(p_hwfn, "Can't acquire PTT\n");
+                               return -EAGAIN;
+                       }
+               } else {
+                       p_ptt = NULL;
+               }
+
+               rc = ecore_sp_pf_update_tunn_cfg(p_hwfn, p_ptt,
+                               tunn_info, ECORE_SPQ_MODE_CB, NULL);
+               if (IS_PF(edev))
+                       ecore_ptt_release(p_hwfn, p_ptt);
+
+               if (rc != ECORE_SUCCESS)
+                       break;
+       }
+
+       return rc;
+}
+
+static int
+qede_vxlan_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
+                 bool enable)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       enum _ecore_status_t rc = ECORE_INVAL;
+       struct ecore_tunnel_info tunn;
+
+       if (qdev->vxlan.enable == enable)
+               return ECORE_SUCCESS;
+
+       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
+       tunn.vxlan.b_update_mode = true;
+       tunn.vxlan.b_mode_enabled = enable;
+       tunn.b_update_rx_cls = true;
+       tunn.b_update_tx_cls = true;
+       tunn.vxlan.tun_cls = clss;
+
+       tunn.vxlan_port.b_update_port = true;
+       tunn.vxlan_port.port = enable ? QEDE_VXLAN_DEF_PORT : 0;
+
+       rc = qede_tunnel_update(qdev, &tunn);
+       if (rc == ECORE_SUCCESS) {
+               qdev->vxlan.enable = enable;
+               qdev->vxlan.udp_port = (enable) ? QEDE_VXLAN_DEF_PORT : 0;
+               DP_INFO(edev, "vxlan is %s, UDP port = %d\n",
+                       enable ? "enabled" : "disabled", qdev->vxlan.udp_port);
+       } else {
+               DP_ERR(edev, "Failed to update tunn_clss %u\n",
+                      tunn.vxlan.tun_cls);
+       }
+
+       return rc;
+}
+
+static int
+qede_geneve_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
+                 bool enable)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       enum _ecore_status_t rc = ECORE_INVAL;
+       struct ecore_tunnel_info tunn;
+
+       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
+       tunn.l2_geneve.b_update_mode = true;
+       tunn.l2_geneve.b_mode_enabled = enable;
+       tunn.ip_geneve.b_update_mode = true;
+       tunn.ip_geneve.b_mode_enabled = enable;
+       tunn.l2_geneve.tun_cls = clss;
+       tunn.ip_geneve.tun_cls = clss;
+       tunn.b_update_rx_cls = true;
+       tunn.b_update_tx_cls = true;
+
+       tunn.geneve_port.b_update_port = true;
+       tunn.geneve_port.port = enable ? QEDE_GENEVE_DEF_PORT : 0;
+
+       rc = qede_tunnel_update(qdev, &tunn);
+       if (rc == ECORE_SUCCESS) {
+               qdev->geneve.enable = enable;
+               qdev->geneve.udp_port = (enable) ? QEDE_GENEVE_DEF_PORT : 0;
+               DP_INFO(edev, "GENEVE is %s, UDP port = %d\n",
+                       enable ? "enabled" : "disabled", qdev->geneve.udp_port);
+       } else {
+               DP_ERR(edev, "Failed to update tunn_clss %u\n",
+                      clss);
+       }
+
+       return rc;
+}
+
+static int
+qede_ipgre_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
+                 bool enable)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       enum _ecore_status_t rc = ECORE_INVAL;
+       struct ecore_tunnel_info tunn;
+
+       memset(&tunn, 0, sizeof(struct ecore_tunnel_info));
+       tunn.ip_gre.b_update_mode = true;
+       tunn.ip_gre.b_mode_enabled = enable;
+       tunn.ip_gre.tun_cls = clss;
+       tunn.ip_gre.tun_cls = clss;
+       tunn.b_update_rx_cls = true;
+       tunn.b_update_tx_cls = true;
+
+       rc = qede_tunnel_update(qdev, &tunn);
+       if (rc == ECORE_SUCCESS) {
+               qdev->ipgre.enable = enable;
+               DP_INFO(edev, "IPGRE is %s\n",
+                       enable ? "enabled" : "disabled");
+       } else {
+               DP_ERR(edev, "Failed to update tunn_clss %u\n",
+                      clss);
+       }
+
+       return rc;
+}
+
+int
+qede_udp_dst_port_del(struct rte_eth_dev *eth_dev,
+                     struct rte_eth_udp_tunnel *tunnel_udp)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct ecore_tunnel_info tunn; /* @DPDK */
+       uint16_t udp_port;
+       int rc;
+
+       PMD_INIT_FUNC_TRACE(edev);
+
+       memset(&tunn, 0, sizeof(tunn));
+
+       switch (tunnel_udp->prot_type) {
+       case RTE_TUNNEL_TYPE_VXLAN:
+               if (qdev->vxlan.udp_port != tunnel_udp->udp_port) {
+                       DP_ERR(edev, "UDP port %u doesn't exist\n",
+                               tunnel_udp->udp_port);
+                       return ECORE_INVAL;
+               }
+               udp_port = 0;
+
+               tunn.vxlan_port.b_update_port = true;
+               tunn.vxlan_port.port = udp_port;
+
+               rc = qede_tunnel_update(qdev, &tunn);
+               if (rc != ECORE_SUCCESS) {
+                       DP_ERR(edev, "Unable to config UDP port %u\n",
+                              tunn.vxlan_port.port);
+                       return rc;
+               }
+
+               qdev->vxlan.udp_port = udp_port;
+               /* If the request is to delete UDP port and if the number of
+                * VXLAN filters have reached 0 then VxLAN offload can be be
+                * disabled.
+                */
+               if (qdev->vxlan.enable && qdev->vxlan.num_filters == 0)
+                       return qede_vxlan_enable(eth_dev,
+                                       ECORE_TUNN_CLSS_MAC_VLAN, false);
+
+               break;
+       case RTE_TUNNEL_TYPE_GENEVE:
+               if (qdev->geneve.udp_port != tunnel_udp->udp_port) {
+                       DP_ERR(edev, "UDP port %u doesn't exist\n",
+                               tunnel_udp->udp_port);
+                       return ECORE_INVAL;
+               }
+
+               udp_port = 0;
+
+               tunn.geneve_port.b_update_port = true;
+               tunn.geneve_port.port = udp_port;
+
+               rc = qede_tunnel_update(qdev, &tunn);
+               if (rc != ECORE_SUCCESS) {
+                       DP_ERR(edev, "Unable to config UDP port %u\n",
+                              tunn.vxlan_port.port);
+                       return rc;
+               }
+
+               qdev->vxlan.udp_port = udp_port;
+               /* If the request is to delete UDP port and if the number of
+                * GENEVE filters have reached 0 then GENEVE offload can be be
+                * disabled.
+                */
+               if (qdev->geneve.enable && qdev->geneve.num_filters == 0)
+                       return qede_geneve_enable(eth_dev,
+                                       ECORE_TUNN_CLSS_MAC_VLAN, false);
+
+               break;
+
+       default:
+               return ECORE_INVAL;
+       }
+
+       return 0;
+}
+
+int
+qede_udp_dst_port_add(struct rte_eth_dev *eth_dev,
+                     struct rte_eth_udp_tunnel *tunnel_udp)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct ecore_tunnel_info tunn; /* @DPDK */
+       uint16_t udp_port;
+       int rc;
+
+       PMD_INIT_FUNC_TRACE(edev);
+
+       memset(&tunn, 0, sizeof(tunn));
+
+       switch (tunnel_udp->prot_type) {
+       case RTE_TUNNEL_TYPE_VXLAN:
+               if (qdev->vxlan.udp_port == tunnel_udp->udp_port) {
+                       DP_INFO(edev,
+                               "UDP port %u for VXLAN was already configured\n",
+                               tunnel_udp->udp_port);
+                       return ECORE_SUCCESS;
+               }
+
+               /* Enable VxLAN tunnel with default MAC/VLAN classification if
+                * it was not enabled while adding VXLAN filter before UDP port
+                * update.
+                */
+               if (!qdev->vxlan.enable) {
+                       rc = qede_vxlan_enable(eth_dev,
+                               ECORE_TUNN_CLSS_MAC_VLAN, true);
+                       if (rc != ECORE_SUCCESS) {
+                               DP_ERR(edev, "Failed to enable VXLAN "
+                                       "prior to updating UDP port\n");
+                               return rc;
+                       }
+               }
+               udp_port = tunnel_udp->udp_port;
+
+               tunn.vxlan_port.b_update_port = true;
+               tunn.vxlan_port.port = udp_port;
+
+               rc = qede_tunnel_update(qdev, &tunn);
+               if (rc != ECORE_SUCCESS) {
+                       DP_ERR(edev, "Unable to config UDP port %u for VXLAN\n",
+                              udp_port);
+                       return rc;
+               }
+
+               DP_INFO(edev, "Updated UDP port %u for VXLAN\n", udp_port);
+
+               qdev->vxlan.udp_port = udp_port;
+               break;
+       case RTE_TUNNEL_TYPE_GENEVE:
+               if (qdev->geneve.udp_port == tunnel_udp->udp_port) {
+                       DP_INFO(edev,
+                               "UDP port %u for GENEVE was already configured\n",
+                               tunnel_udp->udp_port);
+                       return ECORE_SUCCESS;
+               }
+
+               /* Enable GENEVE tunnel with default MAC/VLAN classification if
+                * it was not enabled while adding GENEVE filter before UDP port
+                * update.
+                */
+               if (!qdev->geneve.enable) {
+                       rc = qede_geneve_enable(eth_dev,
+                               ECORE_TUNN_CLSS_MAC_VLAN, true);
+                       if (rc != ECORE_SUCCESS) {
+                               DP_ERR(edev, "Failed to enable GENEVE "
+                                       "prior to updating UDP port\n");
+                               return rc;
+                       }
+               }
+               udp_port = tunnel_udp->udp_port;
+
+               tunn.geneve_port.b_update_port = true;
+               tunn.geneve_port.port = udp_port;
+
+               rc = qede_tunnel_update(qdev, &tunn);
+               if (rc != ECORE_SUCCESS) {
+                       DP_ERR(edev, "Unable to config UDP port %u for GENEVE\n",
+                              udp_port);
+                       return rc;
+               }
+
+               DP_INFO(edev, "Updated UDP port %u for GENEVE\n", udp_port);
+
+               qdev->geneve.udp_port = udp_port;
+               break;
+       default:
+               return ECORE_INVAL;
+       }
+
+       return 0;
+}
+
+static void qede_get_ecore_tunn_params(uint32_t filter, uint32_t *type,
+                                      uint32_t *clss, char *str)
+{
+       uint16_t j;
+       *clss = MAX_ECORE_TUNN_CLSS;
+
+       for (j = 0; j < RTE_DIM(qede_tunn_types); j++) {
+               if (filter == qede_tunn_types[j].rte_filter_type) {
+                       *type = qede_tunn_types[j].qede_type;
+                       *clss = qede_tunn_types[j].qede_tunn_clss;
+                       strcpy(str, qede_tunn_types[j].string);
+                       return;
+               }
+       }
+}
+
+static int
+qede_set_ucast_tunn_cmn_param(struct ecore_filter_ucast *ucast,
+                             const struct rte_eth_tunnel_filter_conf *conf,
+                             uint32_t type)
+{
+       /* Init commmon ucast params first */
+       qede_set_ucast_cmn_params(ucast);
+
+       /* Copy out the required fields based on classification type */
+       ucast->type = type;
+
+       switch (type) {
+       case ECORE_FILTER_VNI:
+               ucast->vni = conf->tenant_id;
+       break;
+       case ECORE_FILTER_INNER_VLAN:
+               ucast->vlan = conf->inner_vlan;
+       break;
+       case ECORE_FILTER_MAC:
+               memcpy(ucast->mac, conf->outer_mac.addr_bytes,
+                      ETHER_ADDR_LEN);
+       break;
+       case ECORE_FILTER_INNER_MAC:
+               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
+                      ETHER_ADDR_LEN);
+       break;
+       case ECORE_FILTER_MAC_VNI_PAIR:
+               memcpy(ucast->mac, conf->outer_mac.addr_bytes,
+                       ETHER_ADDR_LEN);
+               ucast->vni = conf->tenant_id;
+       break;
+       case ECORE_FILTER_INNER_MAC_VNI_PAIR:
+               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
+                       ETHER_ADDR_LEN);
+               ucast->vni = conf->tenant_id;
+       break;
+       case ECORE_FILTER_INNER_PAIR:
+               memcpy(ucast->mac, conf->inner_mac.addr_bytes,
+                       ETHER_ADDR_LEN);
+               ucast->vlan = conf->inner_vlan;
+       break;
+       default:
+               return -EINVAL;
+       }
+
+       return ECORE_SUCCESS;
+}
+
+static int
+_qede_tunn_filter_config(struct rte_eth_dev *eth_dev,
+                        const struct rte_eth_tunnel_filter_conf *conf,
+                        __attribute__((unused)) enum rte_filter_op filter_op,
+                        enum ecore_tunn_clss *clss,
+                        bool add)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct ecore_filter_ucast ucast = {0};
+       enum ecore_filter_ucast_type type;
+       uint16_t filter_type = 0;
+       char str[80];
+       int rc;
+
+       filter_type = conf->filter_type;
+       /* Determine if the given filter classification is supported */
+       qede_get_ecore_tunn_params(filter_type, &type, clss, str);
+       if (*clss == MAX_ECORE_TUNN_CLSS) {
+               DP_ERR(edev, "Unsupported filter type\n");
+               return -EINVAL;
+       }
+       /* Init tunnel ucast params */
+       rc = qede_set_ucast_tunn_cmn_param(&ucast, conf, type);
+       if (rc != ECORE_SUCCESS) {
+               DP_ERR(edev, "Unsupported Tunnel filter type 0x%x\n",
+               conf->filter_type);
+               return rc;
+       }
+       DP_INFO(edev, "Rule: \"%s\", op %d, type 0x%x\n",
+               str, filter_op, ucast.type);
+
+       ucast.opcode = add ? ECORE_FILTER_ADD : ECORE_FILTER_REMOVE;
+
+       /* Skip MAC/VLAN if filter is based on VNI */
+       if (!(filter_type & ETH_TUNNEL_FILTER_TENID)) {
+               rc = qede_mac_int_ops(eth_dev, &ucast, add);
+               if (rc == 0 && add) {
+                       /* Enable accept anyvlan */
+                       qede_config_accept_any_vlan(qdev, true);
+               }
+       } else {
+               rc = qede_ucast_filter(eth_dev, &ucast, add);
+               if (rc == 0)
+                       rc = ecore_filter_ucast_cmd(edev, &ucast,
+                                           ECORE_SPQ_MODE_CB, NULL);
+       }
+
+       return rc;
+}
+
+static int
+qede_tunn_enable(struct rte_eth_dev *eth_dev, uint8_t clss,
+                enum rte_eth_tunnel_type tunn_type, bool enable)
+{
+       int rc = -EINVAL;
+
+       switch (tunn_type) {
+       case RTE_TUNNEL_TYPE_VXLAN:
+               rc = qede_vxlan_enable(eth_dev, clss, enable);
+               break;
+       case RTE_TUNNEL_TYPE_GENEVE:
+               rc = qede_geneve_enable(eth_dev, clss, enable);
+               break;
+       case RTE_TUNNEL_TYPE_IP_IN_GRE:
+               rc = qede_ipgre_enable(eth_dev, clss, enable);
+               break;
+       default:
+               rc = -EINVAL;
+               break;
+       }
+
+       return rc;
+}
+
+static int
+qede_tunn_filter_config(struct rte_eth_dev *eth_dev,
+                       enum rte_filter_op filter_op,
+                       const struct rte_eth_tunnel_filter_conf *conf)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       enum ecore_tunn_clss clss = MAX_ECORE_TUNN_CLSS;
+       bool add;
+       int rc;
+
+       PMD_INIT_FUNC_TRACE(edev);
+
+       switch (filter_op) {
+       case RTE_ETH_FILTER_ADD:
+               add = true;
+               break;
+       case RTE_ETH_FILTER_DELETE:
+               add = false;
+               break;
+       default:
+               DP_ERR(edev, "Unsupported operation %d\n", filter_op);
+               return -EINVAL;
+       }
+
+       if (IS_VF(edev))
+               return qede_tunn_enable(eth_dev,
+                                       ECORE_TUNN_CLSS_MAC_VLAN,
+                                       conf->tunnel_type, add);
+
+       rc = _qede_tunn_filter_config(eth_dev, conf, filter_op, &clss, add);
+       if (rc != ECORE_SUCCESS)
+               return rc;
+
+       if (add) {
+               if (conf->tunnel_type == RTE_TUNNEL_TYPE_VXLAN) {
+                       qdev->vxlan.num_filters++;
+                       qdev->vxlan.filter_type = conf->filter_type;
+               } else { /* GENEVE */
+                       qdev->geneve.num_filters++;
+                       qdev->geneve.filter_type = conf->filter_type;
+               }
+
+               if (!qdev->vxlan.enable || !qdev->geneve.enable ||
+                   !qdev->ipgre.enable)
+                       return qede_tunn_enable(eth_dev, clss,
+                                               conf->tunnel_type,
+                                               true);
+       } else {
+               if (conf->tunnel_type == RTE_TUNNEL_TYPE_VXLAN)
+                       qdev->vxlan.num_filters--;
+               else /*GENEVE*/
+                       qdev->geneve.num_filters--;
+
+               /* Disable VXLAN if VXLAN filters become 0 */
+               if (qdev->vxlan.num_filters == 0 ||
+                   qdev->geneve.num_filters == 0)
+                       return qede_tunn_enable(eth_dev, clss,
+                                               conf->tunnel_type,
+                                               false);
+       }
+
+       return 0;
+}
+
+static int
+qede_flow_validate_attr(__attribute__((unused))struct rte_eth_dev *dev,
+                       const struct rte_flow_attr *attr,
+                       struct rte_flow_error *error)
+{
+       if (attr == NULL) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ATTR, NULL,
+                                  "NULL attribute");
+               return -rte_errno;
+       }
+
+       if (attr->group != 0) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, attr,
+                                  "Groups are not supported");
+               return -rte_errno;
+       }
+
+       if (attr->priority != 0) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY, attr,
+                                  "Priorities are not supported");
+               return -rte_errno;
+       }
+
+       if (attr->egress != 0) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, attr,
+                                  "Egress is not supported");
+               return -rte_errno;
+       }
+
+       if (attr->transfer != 0) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER, attr,
+                                  "Transfer is not supported");
+               return -rte_errno;
+       }
+
+       if (attr->ingress == 0) {
+               rte_flow_error_set(error, ENOTSUP,
+                                  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS, attr,
+                                  "Only ingress is supported");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+qede_flow_parse_pattern(__attribute__((unused))struct rte_eth_dev *dev,
+                       const struct rte_flow_item pattern[],
+                       struct rte_flow_error *error,
+                       struct rte_flow *flow)
+{
+       bool l3 = false, l4 = false;
+
+       if (pattern == NULL) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM_NUM, NULL,
+                                  "NULL pattern");
+               return -rte_errno;
+       }
+
+       for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
+               if (!pattern->spec) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          pattern,
+                                          "Item spec not defined");
+                       return -rte_errno;
+               }
+
+               if (pattern->last) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          pattern,
+                                          "Item last not supported");
+                       return -rte_errno;
+               }
+
+               if (pattern->mask) {
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          pattern,
+                                          "Item mask not supported");
+                       return -rte_errno;
+               }
+
+               /* Below validation is only for 4 tuple flow
+                * (GFT_PROFILE_TYPE_4_TUPLE)
+                * - src and dst L3 address (IPv4 or IPv6)
+                * - src and dst L4 port (TCP or UDP)
+                */
+
+               switch (pattern->type) {
+               case RTE_FLOW_ITEM_TYPE_IPV4:
+                       l3 = true;
+
+                       if (flow) {
+                               const struct rte_flow_item_ipv4 *spec;
+
+                               spec = pattern->spec;
+                               flow->entry.tuple.src_ipv4 = spec->hdr.src_addr;
+                               flow->entry.tuple.dst_ipv4 = spec->hdr.dst_addr;
+                               flow->entry.tuple.eth_proto = ETHER_TYPE_IPv4;
+                       }
+                       break;
+
+               case RTE_FLOW_ITEM_TYPE_IPV6:
+                       l3 = true;
+
+                       if (flow) {
+                               const struct rte_flow_item_ipv6 *spec;
+
+                               spec = pattern->spec;
+                               rte_memcpy(flow->entry.tuple.src_ipv6,
+                                          spec->hdr.src_addr,
+                                          IPV6_ADDR_LEN);
+                               rte_memcpy(flow->entry.tuple.dst_ipv6,
+                                          spec->hdr.dst_addr,
+                                          IPV6_ADDR_LEN);
+                               flow->entry.tuple.eth_proto = ETHER_TYPE_IPv6;
+                       }
+                       break;
+
+               case RTE_FLOW_ITEM_TYPE_UDP:
+                       l4 = true;
+
+                       if (flow) {
+                               const struct rte_flow_item_udp *spec;
+
+                               spec = pattern->spec;
+                               flow->entry.tuple.src_port =
+                                               spec->hdr.src_port;
+                               flow->entry.tuple.dst_port =
+                                               spec->hdr.dst_port;
+                               flow->entry.tuple.ip_proto = IPPROTO_UDP;
+                       }
+                       break;
+
+               case RTE_FLOW_ITEM_TYPE_TCP:
+                       l4 = true;
+
+                       if (flow) {
+                               const struct rte_flow_item_tcp *spec;
+
+                               spec = pattern->spec;
+                               flow->entry.tuple.src_port =
+                                               spec->hdr.src_port;
+                               flow->entry.tuple.dst_port =
+                                               spec->hdr.dst_port;
+                               flow->entry.tuple.ip_proto = IPPROTO_TCP;
+                       }
+
+                       break;
+               default:
+                       rte_flow_error_set(error, EINVAL,
+                                          RTE_FLOW_ERROR_TYPE_ITEM,
+                                          pattern,
+                                          "Only 4 tuple (IPV4, IPV6, UDP and TCP) item types supported");
+                       return -rte_errno;
+               }
+       }
+
+       if (!(l3 && l4)) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ITEM,
+                                  pattern,
+                                  "Item types need to have both L3 and L4 protocols");
+               return -rte_errno;
+       }
+
+       return 0;
+}
+
+static int
+qede_flow_parse_actions(struct rte_eth_dev *dev,
+                       const struct rte_flow_action actions[],
+                       struct rte_flow_error *error,
+                       struct rte_flow *flow)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
+       const struct rte_flow_action_queue *queue;
+
+       if (actions == NULL) {
+               rte_flow_error_set(error, EINVAL,
+                                  RTE_FLOW_ERROR_TYPE_ACTION_NUM, NULL,
+                                  "NULL actions");
+               return -rte_errno;
+       }
+
+       for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
+               switch (actions->type) {
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+                       queue = actions->conf;
+
+                       if (queue->index >= QEDE_RSS_COUNT(qdev)) {
+                               rte_flow_error_set(error, EINVAL,
+                                                  RTE_FLOW_ERROR_TYPE_ACTION,
+                                                  actions,
+                                                  "Bad QUEUE action");
+                               return -rte_errno;
+                       }
+
+                       if (flow)
+                               flow->entry.rx_queue = queue->index;
+
+                       break;
+
+               default:
+                       rte_flow_error_set(error, ENOTSUP,
+                                          RTE_FLOW_ERROR_TYPE_ACTION,
+                                          actions,
+                                          "Action is not supported - only ACTION_TYPE_QUEUE supported");
+                       return -rte_errno;
+               }
+       }
+
+       return 0;
+}
+
+static int
+qede_flow_parse(struct rte_eth_dev *dev,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item patterns[],
+               const struct rte_flow_action actions[],
+               struct rte_flow_error *error,
+               struct rte_flow *flow)
+
+{
+       int rc = 0;
+
+       rc = qede_flow_validate_attr(dev, attr, error);
+       if (rc)
+               return rc;
+
+       /* parse and validate item pattern and actions.
+        * Given item list and actions will be translate to qede PMD
+        * specific arfs structure.
+        */
+       rc = qede_flow_parse_pattern(dev, patterns, error, flow);
+       if (rc)
+               return rc;
+
+       rc = qede_flow_parse_actions(dev, actions, error, flow);
+
+       return rc;
+}
+
+static int
+qede_flow_validate(struct rte_eth_dev *dev,
+                  const struct rte_flow_attr *attr,
+                  const struct rte_flow_item patterns[],
+                  const struct rte_flow_action actions[],
+                  struct rte_flow_error *error)
+{
+       return qede_flow_parse(dev, attr, patterns, actions, error, NULL);
+}
+
+static struct rte_flow *
+qede_flow_create(struct rte_eth_dev *dev,
+                const struct rte_flow_attr *attr,
+                const struct rte_flow_item pattern[],
+                const struct rte_flow_action actions[],
+                struct rte_flow_error *error)
+{
+       struct rte_flow *flow = NULL;
+       int rc;
+
+       flow = rte_zmalloc("qede_rte_flow", sizeof(*flow), 0);
+       if (flow == NULL) {
+               rte_flow_error_set(error, ENOMEM,
+                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                                  "Failed to allocate memory");
+               return NULL;
+       }
+
+       rc = qede_flow_parse(dev, attr, pattern, actions, error, flow);
+       if (rc < 0) {
+               rte_free(flow);
+               return NULL;
+       }
+
+       rc = qede_config_arfs_filter(dev, &flow->entry, true);
+       if (rc < 0) {
+               rte_flow_error_set(error, rc,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to configure flow filter");
+               rte_free(flow);
+               return NULL;
+       }
+
+       return flow;
+}
+
+static int
+qede_flow_destroy(struct rte_eth_dev *eth_dev,
+                 struct rte_flow *flow,
+                 struct rte_flow_error *error)
+{
+       int rc = 0;
+
+       rc = qede_config_arfs_filter(eth_dev, &flow->entry, false);
+       if (rc < 0) {
+               rte_flow_error_set(error, rc,
+                                  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
+                                  "Failed to delete flow filter");
+               rte_free(flow);
+       }
+
+       return rc;
+}
+
+const struct rte_flow_ops qede_flow_ops = {
+       .validate = qede_flow_validate,
+       .create = qede_flow_create,
+       .destroy = qede_flow_destroy,
+};
+
+int qede_dev_filter_ctrl(struct rte_eth_dev *eth_dev,
+                        enum rte_filter_type filter_type,
+                        enum rte_filter_op filter_op,
+                        void *arg)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(eth_dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       struct rte_eth_tunnel_filter_conf *filter_conf =
+                       (struct rte_eth_tunnel_filter_conf *)arg;
+
+       switch (filter_type) {
+       case RTE_ETH_FILTER_TUNNEL:
+               switch (filter_conf->tunnel_type) {
+               case RTE_TUNNEL_TYPE_VXLAN:
+               case RTE_TUNNEL_TYPE_GENEVE:
+               case RTE_TUNNEL_TYPE_IP_IN_GRE:
+                       DP_INFO(edev,
+                               "Packet steering to the specified Rx queue"
+                               " is not supported with UDP tunneling");
+                       return(qede_tunn_filter_config(eth_dev, filter_op,
+                                                     filter_conf));
+               case RTE_TUNNEL_TYPE_TEREDO:
+               case RTE_TUNNEL_TYPE_NVGRE:
+               case RTE_L2_TUNNEL_TYPE_E_TAG:
+                       DP_ERR(edev, "Unsupported tunnel type %d\n",
+                               filter_conf->tunnel_type);
+                       return -EINVAL;
+               case RTE_TUNNEL_TYPE_NONE:
+               default:
+                       return 0;
+               }
+               break;
+       case RTE_ETH_FILTER_FDIR:
+               return qede_fdir_filter_conf(eth_dev, filter_op, arg);
+       case RTE_ETH_FILTER_NTUPLE:
+               return qede_ntuple_filter_conf(eth_dev, filter_op, arg);
+       case RTE_ETH_FILTER_GENERIC:
+               if (ECORE_IS_CMT(edev)) {
+                       DP_ERR(edev, "flowdir is not supported in 100G mode\n");
+                       return -ENOTSUP;
+               }
+
+               if (filter_op != RTE_ETH_FILTER_GET)
+                       return -EINVAL;
+
+               *(const void **)arg = &qede_flow_ops;
+               return 0;
+       case RTE_ETH_FILTER_MACVLAN:
+       case RTE_ETH_FILTER_ETHERTYPE:
+       case RTE_ETH_FILTER_FLEXIBLE:
+       case RTE_ETH_FILTER_SYN:
+       case RTE_ETH_FILTER_HASH:
+       case RTE_ETH_FILTER_L2_TUNNEL:
+       case RTE_ETH_FILTER_MAX:
+       default:
+               DP_ERR(edev, "Unsupported filter type %d\n",
+                       filter_type);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
index 46fa837..df83666 100644 (file)
 #define QEDE_ALARM_TIMEOUT_US 100000
 
 /* Global variable to hold absolute path of fw file */
-char fw_file[PATH_MAX];
+char qede_fw_file[PATH_MAX];
 
-const char *QEDE_DEFAULT_FIRMWARE =
-       "/lib/firmware/qed/qed_init_values-8.33.12.0.bin";
+static const char * const QEDE_DEFAULT_FIRMWARE =
+       "/lib/firmware/qed/qed_init_values-8.37.7.0.bin";
 
 static void
 qed_update_pf_params(struct ecore_dev *edev, struct ecore_pf_params *params)
@@ -126,11 +126,11 @@ static int qed_load_firmware_data(struct ecore_dev *edev)
        const char *fw = RTE_LIBRTE_QEDE_FW;
 
        if (strcmp(fw, "") == 0)
-               strcpy(fw_file, QEDE_DEFAULT_FIRMWARE);
+               strcpy(qede_fw_file, QEDE_DEFAULT_FIRMWARE);
        else
-               strcpy(fw_file, fw);
+               strcpy(qede_fw_file, fw);
 
-       fd = open(fw_file, O_RDONLY);
+       fd = open(qede_fw_file, O_RDONLY);
        if (fd < 0) {
                DP_ERR(edev, "Can't open firmware file\n");
                return -ENOENT;
@@ -234,7 +234,8 @@ static int qed_slowpath_start(struct ecore_dev *edev,
 #ifdef CONFIG_ECORE_BINARY_FW
                rc = qed_load_firmware_data(edev);
                if (rc) {
-                       DP_ERR(edev, "Failed to find fw file %s\n", fw_file);
+                       DP_ERR(edev, "Failed to find fw file %s\n",
+                               qede_fw_file);
                        goto err;
                }
 #endif
index 0f157de..8a4772f 100644 (file)
@@ -35,6 +35,49 @@ static inline int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
        return 0;
 }
 
+/* Criterias for calculating Rx buffer size -
+ * 1) rx_buf_size should not exceed the size of mbuf
+ * 2) In scattered_rx mode - minimum rx_buf_size should be
+ *    (MTU + Maximum L2 Header Size + 2) / ETH_RX_MAX_BUFF_PER_PKT
+ * 3) In regular mode - minimum rx_buf_size should be
+ *    (MTU + Maximum L2 Header Size + 2)
+ *    In above cases +2 corrosponds to 2 bytes padding in front of L2
+ *    header.
+ * 4) rx_buf_size should be cacheline-size aligned. So considering
+ *    criteria 1, we need to adjust the size to floor instead of ceil,
+ *    so that we don't exceed mbuf size while ceiling rx_buf_size.
+ */
+int
+qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz,
+                     uint16_t max_frame_size)
+{
+       struct qede_dev *qdev = QEDE_INIT_QDEV(dev);
+       struct ecore_dev *edev = QEDE_INIT_EDEV(qdev);
+       int rx_buf_size;
+
+       if (dev->data->scattered_rx) {
+               /* per HW limitation, only ETH_RX_MAX_BUFF_PER_PKT number of
+                * bufferes can be used for single packet. So need to make sure
+                * mbuf size is sufficient enough for this.
+                */
+               if ((mbufsz * ETH_RX_MAX_BUFF_PER_PKT) <
+                    (max_frame_size + QEDE_ETH_OVERHEAD)) {
+                       DP_ERR(edev, "mbuf %d size is not enough to hold max fragments (%d) for max rx packet length (%d)\n",
+                              mbufsz, ETH_RX_MAX_BUFF_PER_PKT, max_frame_size);
+                       return -EINVAL;
+               }
+
+               rx_buf_size = RTE_MAX(mbufsz,
+                                     (max_frame_size + QEDE_ETH_OVERHEAD) /
+                                      ETH_RX_MAX_BUFF_PER_PKT);
+       } else {
+               rx_buf_size = max_frame_size + QEDE_ETH_OVERHEAD;
+       }
+
+       /* Align to cache-line size if needed */
+       return QEDE_FLOOR_TO_CACHE_LINE_SIZE(rx_buf_size);
+}
+
 int
 qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                    uint16_t nb_desc, unsigned int socket_id,
@@ -85,6 +128,8 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 
        /* Fix up RX buffer size */
        bufsz = (uint16_t)rte_pktmbuf_data_room_size(mp) - RTE_PKTMBUF_HEADROOM;
+       /* cache align the mbuf size to simplfy rx_buf_size calculation */
+       bufsz = QEDE_FLOOR_TO_CACHE_LINE_SIZE(bufsz);
        if ((rxmode->offloads & DEV_RX_OFFLOAD_SCATTER) ||
            (max_rx_pkt_len + QEDE_ETH_OVERHEAD) > bufsz) {
                if (!dev->data->scattered_rx) {
@@ -93,13 +138,13 @@ qede_rx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
                }
        }
 
-       if (dev->data->scattered_rx)
-               rxq->rx_buf_size = bufsz + ETHER_HDR_LEN +
-                                  ETHER_CRC_LEN + QEDE_ETH_OVERHEAD;
-       else
-               rxq->rx_buf_size = max_rx_pkt_len + QEDE_ETH_OVERHEAD;
-       /* Align to cache-line size if needed */
-       rxq->rx_buf_size = QEDE_CEIL_TO_CACHE_LINE_SIZE(rxq->rx_buf_size);
+       rc = qede_calc_rx_buf_size(dev, bufsz, max_rx_pkt_len);
+       if (rc < 0) {
+               rte_free(rxq);
+               return rc;
+       }
+
+       rxq->rx_buf_size = rc;
 
        DP_INFO(edev, "mtu %u mbufsz %u bd_max_bytes %u scatter_mode %d\n",
                qdev->mtu, bufsz, rxq->rx_buf_size, dev->data->scattered_rx);
@@ -2106,3 +2151,84 @@ qede_rxtx_pkts_dummy(__rte_unused void *p_rxq,
 {
        return 0;
 }
+
+
+/* this function does a fake walk through over completion queue
+ * to calculate number of BDs used by HW.
+ * At the end, it restores the state of completion queue.
+ */
+static uint16_t
+qede_parse_fp_cqe(struct qede_rx_queue *rxq)
+{
+       uint16_t hw_comp_cons, sw_comp_cons, bd_count = 0;
+       union eth_rx_cqe *cqe, *orig_cqe = NULL;
+
+       hw_comp_cons = rte_le_to_cpu_16(*rxq->hw_cons_ptr);
+       sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
+
+       if (hw_comp_cons == sw_comp_cons)
+               return 0;
+
+       /* Get the CQE from the completion ring */
+       cqe = (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
+       orig_cqe = cqe;
+
+       while (sw_comp_cons != hw_comp_cons) {
+               switch (cqe->fast_path_regular.type) {
+               case ETH_RX_CQE_TYPE_REGULAR:
+                       bd_count += cqe->fast_path_regular.bd_num;
+                       break;
+               case ETH_RX_CQE_TYPE_TPA_END:
+                       bd_count += cqe->fast_path_tpa_end.num_of_bds;
+                       break;
+               default:
+                       break;
+               }
+
+               cqe =
+               (union eth_rx_cqe *)ecore_chain_consume(&rxq->rx_comp_ring);
+               sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring);
+       }
+
+       /* revert comp_ring to original state */
+       ecore_chain_set_cons(&rxq->rx_comp_ring, sw_comp_cons, orig_cqe);
+
+       return bd_count;
+}
+
+int
+qede_rx_descriptor_status(void *p_rxq, uint16_t offset)
+{
+       uint16_t hw_bd_cons, sw_bd_cons, sw_bd_prod;
+       uint16_t produced, consumed;
+       struct qede_rx_queue *rxq = p_rxq;
+
+       if (offset > rxq->nb_rx_desc)
+               return -EINVAL;
+
+       sw_bd_cons = ecore_chain_get_cons_idx(&rxq->rx_bd_ring);
+       sw_bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring);
+
+       /* find BDs used by HW from completion queue elements */
+       hw_bd_cons = sw_bd_cons + qede_parse_fp_cqe(rxq);
+
+       if (hw_bd_cons < sw_bd_cons)
+               /* wraparound case */
+               consumed = (0xffff - sw_bd_cons) + hw_bd_cons;
+       else
+               consumed = hw_bd_cons - sw_bd_cons;
+
+       if (offset <= consumed)
+               return RTE_ETH_RX_DESC_DONE;
+
+       if (sw_bd_prod < sw_bd_cons)
+               /* wraparound case */
+               produced = (0xffff - sw_bd_cons) + sw_bd_prod;
+       else
+               produced = sw_bd_prod - sw_bd_cons;
+
+       if (offset <= produced)
+               return RTE_ETH_RX_DESC_AVAIL;
+
+       return RTE_ETH_RX_DESC_UNAVAIL;
+}
index e710fba..d3a41e9 100644 (file)
 #define QEDE_FW_RX_ALIGN_END   (1UL << QEDE_RX_ALIGN_SHIFT)
 #define QEDE_CEIL_TO_CACHE_LINE_SIZE(n) (((n) + (QEDE_FW_RX_ALIGN_END - 1)) & \
                                        ~(QEDE_FW_RX_ALIGN_END - 1))
-/* Note: QEDE_LLC_SNAP_HDR_LEN is optional */
-#define QEDE_ETH_OVERHEAD      (((2 * QEDE_VLAN_TAG_SIZE)) - (ETHER_CRC_LEN) \
-                               + (QEDE_LLC_SNAP_HDR_LEN))
+#define QEDE_FLOOR_TO_CACHE_LINE_SIZE(n) RTE_ALIGN_FLOOR(n, \
+                                                        QEDE_FW_RX_ALIGN_END)
+
+/* Note: QEDE_LLC_SNAP_HDR_LEN is optional,
+ * +2 is for padding in front of L2 header
+ */
+#define QEDE_ETH_OVERHEAD      (((2 * QEDE_VLAN_TAG_SIZE)) \
+                                + (QEDE_LLC_SNAP_HDR_LEN) + 2)
+
+#define QEDE_MAX_ETHER_HDR_LEN (ETHER_HDR_LEN + QEDE_ETH_OVERHEAD)
 
 #define QEDE_RSS_OFFLOAD_ALL    (ETH_RSS_IPV4                  |\
                                 ETH_RSS_NONFRAG_IPV4_TCP       |\
@@ -267,6 +274,10 @@ uint16_t qede_rxtx_pkts_dummy(void *p_rxq,
 int qede_start_queues(struct rte_eth_dev *eth_dev);
 
 void qede_stop_queues(struct rte_eth_dev *eth_dev);
+int qede_calc_rx_buf_size(struct rte_eth_dev *dev, uint16_t mbufsz,
+                         uint16_t max_frame_size);
+int
+qede_rx_descriptor_status(void *rxq, uint16_t offset);
 
 /* Fastpath resource alloc/dealloc helpers */
 int qede_alloc_fp_resc(struct qede_dev *qdev);
index 791deb0..aeb48f5 100644 (file)
@@ -164,7 +164,6 @@ eth_dev_info(struct rte_eth_dev *dev,
        dev_info->max_rx_queues = (uint16_t)internals->max_rx_queues;
        dev_info->max_tx_queues = (uint16_t)internals->max_tx_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
@@ -667,10 +666,8 @@ rte_pmd_ring_remove(struct rte_vdev_device *dev)
                }
        }
 
-       rte_free(eth_dev->data->rx_queues);
-       rte_free(eth_dev->data->tx_queues);
-       rte_free(eth_dev->data->dev_private);
-
+       /* mac_addrs must not be freed alone because part of dev_private */
+       eth_dev->data->mac_addrs = NULL;
        rte_eth_dev_release_port(eth_dev);
        return 0;
 }
index 7f89a7b..cdf835f 100644 (file)
@@ -73,11 +73,10 @@ efx_mcdi_set_evq_tmr(
        __in            uint32_t timer_ns)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_EVQ_TMR_IN_LEN,
-                           MC_CMD_SET_EVQ_TMR_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_EVQ_TMR_IN_LEN,
+               MC_CMD_SET_EVQ_TMR_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_EVQ_TMR;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_EVQ_TMR_IN_LEN;
@@ -123,9 +122,9 @@ efx_mcdi_init_evq(
        __in            boolean_t low_latency)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[
-           MAX(MC_CMD_INIT_EVQ_IN_LEN(EFX_EVQ_NBUFS(EFX_EVQ_MAXNEVS)),
-               MC_CMD_INIT_EVQ_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MC_CMD_INIT_EVQ_IN_LEN(EFX_EVQ_NBUFS(EFX_EVQ_MAXNEVS)),
+               MC_CMD_INIT_EVQ_OUT_LEN);
        efx_qword_t *dma_addr;
        uint64_t addr;
        int npages;
@@ -140,7 +139,6 @@ efx_mcdi_init_evq(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_INIT_EVQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_INIT_EVQ_IN_LEN(npages);
@@ -260,9 +258,9 @@ efx_mcdi_init_evq_v2(
        __in            uint32_t flags)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[
-               MAX(MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_EVQ_NBUFS(EFX_EVQ_MAXNEVS)),
-                   MC_CMD_INIT_EVQ_V2_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_EVQ_NBUFS(EFX_EVQ_MAXNEVS)),
+               MC_CMD_INIT_EVQ_V2_OUT_LEN);
        boolean_t interrupting;
        unsigned int evq_type;
        efx_qword_t *dma_addr;
@@ -277,7 +275,6 @@ efx_mcdi_init_evq_v2(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_INIT_EVQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_INIT_EVQ_V2_IN_LEN(npages);
@@ -384,11 +381,10 @@ efx_mcdi_fini_evq(
        __in            uint32_t instance)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FINI_EVQ_IN_LEN,
-                           MC_CMD_FINI_EVQ_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FINI_EVQ_IN_LEN,
+               MC_CMD_FINI_EVQ_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FINI_EVQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FINI_EVQ_IN_LEN;
@@ -603,8 +599,8 @@ efx_mcdi_driver_event(
        __in            efx_qword_t data)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_DRIVER_EVENT_IN_LEN,
-                           MC_CMD_DRIVER_EVENT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_DRIVER_EVENT_IN_LEN,
+               MC_CMD_DRIVER_EVENT_OUT_LEN);
        efx_rc_t rc;
 
        req.emr_cmd = MC_CMD_DRIVER_EVENT;
@@ -867,8 +863,9 @@ ef10_ev_rx(
 
        EFX_EV_QSTAT_INCR(eep, EV_RX);
 
-       /* Discard events after RXQ/TXQ errors */
-       if (enp->en_reset_flags & (EFX_RESET_RXQ_ERR | EFX_RESET_TXQ_ERR))
+       /* Discard events after RXQ/TXQ errors, or hardware not available */
+       if (enp->en_reset_flags &
+           (EFX_RESET_RXQ_ERR | EFX_RESET_TXQ_ERR | EFX_RESET_HW_UNAVAIL))
                return (B_FALSE);
 
        /* Basic packet information */
@@ -1068,8 +1065,9 @@ ef10_ev_tx(
 
        EFX_EV_QSTAT_INCR(eep, EV_TX);
 
-       /* Discard events after RXQ/TXQ errors */
-       if (enp->en_reset_flags & (EFX_RESET_RXQ_ERR | EFX_RESET_TXQ_ERR))
+       /* Discard events after RXQ/TXQ errors, or hardware not available */
+       if (enp->en_reset_flags &
+           (EFX_RESET_RXQ_ERR | EFX_RESET_TXQ_ERR | EFX_RESET_HW_UNAVAIL))
                return (B_FALSE);
 
        if (EFX_QWORD_FIELD(*eqp, ESF_DZ_TX_DROP_EVENT) != 0) {
index ae87285..afe4064 100644 (file)
@@ -172,12 +172,11 @@ efx_mcdi_filter_op_add(
        __inout         ef10_filter_handle_t *handle)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FILTER_OP_V3_IN_LEN,
-                           MC_CMD_FILTER_OP_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FILTER_OP_V3_IN_LEN,
+               MC_CMD_FILTER_OP_EXT_OUT_LEN);
        efx_filter_match_flags_t match_flags;
        efx_rc_t rc;
 
-       memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FILTER_OP;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FILTER_OP_V3_IN_LEN;
@@ -376,11 +375,10 @@ efx_mcdi_filter_op_delete(
        __inout         ef10_filter_handle_t *handle)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FILTER_OP_EXT_IN_LEN,
-                           MC_CMD_FILTER_OP_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FILTER_OP_EXT_IN_LEN,
+               MC_CMD_FILTER_OP_EXT_OUT_LEN);
        efx_rc_t rc;
 
-       memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FILTER_OP;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FILTER_OP_EXT_IN_LEN;
@@ -950,13 +948,12 @@ efx_mcdi_get_parser_disp_info(
        __out                           size_t *list_lengthp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PARSER_DISP_INFO_IN_LEN,
-                           MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PARSER_DISP_INFO_IN_LEN,
+               MC_CMD_GET_PARSER_DISP_INFO_OUT_LENMAX);
        size_t matches_count;
        size_t list_size;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PARSER_DISP_INFO;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PARSER_DISP_INFO_IN_LEN;
@@ -1144,12 +1141,15 @@ ef10_filter_insert_unicast(
        efx_filter_spec_init_rx(&spec, EFX_FILTER_PRI_AUTO,
            filter_flags,
            eftp->eft_default_rxq);
-       efx_filter_spec_set_eth_local(&spec, EFX_FILTER_SPEC_VID_UNSPEC, addr);
+       rc = efx_filter_spec_set_eth_local(&spec, EFX_FILTER_SPEC_VID_UNSPEC,
+           addr);
+       if (rc != 0)
+               goto fail1;
 
        rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
            &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]);
        if (rc != 0)
-               goto fail1;
+               goto fail2;
 
        eftp->eft_unicst_filter_count++;
        EFSYS_ASSERT(eftp->eft_unicst_filter_count <=
@@ -1157,6 +1157,8 @@ ef10_filter_insert_unicast(
 
        return (0);
 
+fail2:
+       EFSYS_PROBE(fail2);
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
        return (rc);
@@ -1175,11 +1177,13 @@ ef10_filter_insert_all_unicast(
        efx_filter_spec_init_rx(&spec, EFX_FILTER_PRI_AUTO,
            filter_flags,
            eftp->eft_default_rxq);
-       efx_filter_spec_set_uc_def(&spec);
+       rc = efx_filter_spec_set_uc_def(&spec);
+       if (rc != 0)
+               goto fail1;
        rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
            &eftp->eft_unicst_filter_indexes[eftp->eft_unicst_filter_count]);
        if (rc != 0)
-               goto fail1;
+               goto fail2;
 
        eftp->eft_unicst_filter_count++;
        EFSYS_ASSERT(eftp->eft_unicst_filter_count <=
@@ -1187,6 +1191,8 @@ ef10_filter_insert_all_unicast(
 
        return (0);
 
+fail2:
+       EFSYS_PROBE(fail2);
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
        return (rc);
@@ -1228,9 +1234,21 @@ ef10_filter_insert_multicast_list(
                    filter_flags,
                    eftp->eft_default_rxq);
 
-               efx_filter_spec_set_eth_local(&spec,
+               rc = efx_filter_spec_set_eth_local(&spec,
                    EFX_FILTER_SPEC_VID_UNSPEC,
                    &addrs[i * EFX_MAC_ADDR_LEN]);
+               if (rc != 0) {
+                       if (rollback == B_TRUE) {
+                               /* Only stop upon failure if told to rollback */
+                               goto rollback;
+                       } else {
+                               /*
+                                * Don't try to add a filter with a corrupt
+                                * specification.
+                                */
+                               continue;
+                       }
+               }
 
                rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
                                            &filter_index);
@@ -1253,8 +1271,12 @@ ef10_filter_insert_multicast_list(
                    eftp->eft_default_rxq);
 
                EFX_MAC_BROADCAST_ADDR_SET(addr);
-               efx_filter_spec_set_eth_local(&spec, EFX_FILTER_SPEC_VID_UNSPEC,
-                   addr);
+               rc = efx_filter_spec_set_eth_local(&spec,
+                   EFX_FILTER_SPEC_VID_UNSPEC, addr);
+               if ((rc != 0) && (rollback == B_TRUE)) {
+                       /* Only stop upon failure if told to rollback */
+                       goto rollback;
+               }
 
                rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
                                            &filter_index);
@@ -1302,12 +1324,14 @@ ef10_filter_insert_all_multicast(
        efx_filter_spec_init_rx(&spec, EFX_FILTER_PRI_AUTO,
            filter_flags,
            eftp->eft_default_rxq);
-       efx_filter_spec_set_mc_def(&spec);
+       rc = efx_filter_spec_set_mc_def(&spec);
+       if (rc != 0)
+               goto fail1;
 
        rc = ef10_filter_add_internal(enp, &spec, B_TRUE,
            &eftp->eft_mulcst_filter_indexes[0]);
        if (rc != 0)
-               goto fail1;
+               goto fail2;
 
        eftp->eft_mulcst_filter_count = 1;
        eftp->eft_using_all_mulcst = B_TRUE;
@@ -1318,6 +1342,8 @@ ef10_filter_insert_all_multicast(
 
        return (0);
 
+fail2:
+       EFSYS_PROBE(fail2);
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
@@ -1552,7 +1578,7 @@ ef10_filter_reconfigure(
        /*
         * Insert or renew unicast filters.
         *
-        * Frimware does not perform chaining on unicast filters. As traffic is
+        * Firmware does not perform chaining on unicast filters. As traffic is
         * therefore only delivered to the first matching filter, we should
         * always insert the specific filter for our MAC address, to try and
         * ensure we get that traffic.
index 6fb7e47..c035e0d 100644 (file)
@@ -577,7 +577,8 @@ fail1:
 
        __checkReturn   efx_rc_t
 efx_build_signed_image_write_buffer(
-       __out           uint8_t                 *bufferp,
+       __out_bcount(buffer_size)
+                       uint8_t                 *bufferp,
        __in            uint32_t                buffer_size,
        __in            efx_image_info_t        *infop,
        __out           efx_image_header_t      **headerpp)
@@ -704,7 +705,7 @@ efx_build_signed_image_write_buffer(
         * results in the layout used for the data chunks and chunk headers.
         */
        /* END CSTYLED */
-       memset(bufferp, buffer_size, 0xFF);
+       memset(bufferp, 0xFF, buffer_size);
 
        EFX_STATIC_ASSERT(sizeof (chunk_hdr) == SIGNED_IMAGE_CHUNK_HDR_LEN);
        memset(&chunk_hdr, 0, SIGNED_IMAGE_CHUNK_HDR_LEN);
index 4751faf..f971063 100644 (file)
@@ -190,6 +190,14 @@ extern     __checkReturn   efx_rc_t
 ef10_nic_init(
        __in            efx_nic_t *enp);
 
+extern __checkReturn   boolean_t
+ef10_nic_hw_unavailable(
+       __in            efx_nic_t *enp);
+
+extern                 void
+ef10_nic_set_hw_unavailable(
+       __in            efx_nic_t *enp);
+
 #if EFSYS_OPT_DIAG
 
 extern __checkReturn   efx_rc_t
@@ -453,7 +461,7 @@ ef10_nvram_partn_write(
        __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in                    unsigned int offset,
-       __out_bcount(size)      caddr_t data,
+       __in_bcount(size)       caddr_t data,
        __in                    size_t size);
 
 extern __checkReturn           efx_rc_t
@@ -477,17 +485,21 @@ ef10_nvram_partn_set_version(
 
 extern __checkReturn           efx_rc_t
 ef10_nvram_buffer_validate(
-       __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in_bcount(buffer_size)
                                caddr_t bufferp,
        __in                    size_t buffer_size);
 
+extern                 void
+ef10_nvram_buffer_init(
+       __out_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size);
+
 extern __checkReturn           efx_rc_t
 ef10_nvram_buffer_create(
-       __in                    efx_nic_t *enp,
-       __in                    uint16_t partn_type,
-       __in_bcount(buffer_size)
+       __in                    uint32_t partn_type,
+       __out_bcount(buffer_size)
                                caddr_t bufferp,
        __in                    size_t buffer_size);
 
@@ -515,6 +527,16 @@ ef10_nvram_buffer_find_item(
        __out                   uint32_t *startp,
        __out                   uint32_t *lengthp);
 
+extern __checkReturn           efx_rc_t
+ef10_nvram_buffer_peek_item(
+       __in_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size,
+       __in                    uint32_t offset,
+       __out                   uint32_t *tagp,
+       __out                   uint32_t *lengthp,
+       __out                   uint32_t *value_offsetp);
+
 extern __checkReturn           efx_rc_t
 ef10_nvram_buffer_get_item(
        __in_bcount(buffer_size)
@@ -522,9 +544,10 @@ ef10_nvram_buffer_get_item(
        __in                    size_t buffer_size,
        __in                    uint32_t offset,
        __in                    uint32_t length,
-       __out_bcount_part(item_max_size, *lengthp)
-                               caddr_t itemp,
-       __in                    size_t item_max_size,
+       __out                   uint32_t *tagp,
+       __out_bcount_part(value_max_size, *lengthp)
+                               caddr_t valuep,
+       __in                    size_t value_max_size,
        __out                   uint32_t *lengthp);
 
 extern __checkReturn           efx_rc_t
@@ -533,7 +556,19 @@ ef10_nvram_buffer_insert_item(
                                caddr_t bufferp,
        __in                    size_t buffer_size,
        __in                    uint32_t offset,
-       __in_bcount(length)     caddr_t keyp,
+       __in                    uint32_t tag,
+       __in_bcount(length)     caddr_t valuep,
+       __in                    uint32_t length,
+       __out                   uint32_t *lengthp);
+
+extern __checkReturn           efx_rc_t
+ef10_nvram_buffer_modify_item(
+       __in_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size,
+       __in                    uint32_t offset,
+       __in                    uint32_t tag,
+       __in_bcount(length)     caddr_t valuep,
        __in                    uint32_t length,
        __out                   uint32_t *lengthp);
 
@@ -558,10 +593,7 @@ ef10_nvram_buffer_finish(
 /* PHY */
 
 typedef struct ef10_link_state_s {
-       uint32_t                els_adv_cap_mask;
-       uint32_t                els_lp_cap_mask;
-       unsigned int            els_fcntl;
-       efx_link_mode_t         els_link_mode;
+       efx_phy_link_state_t    epls;
 #if EFSYS_OPT_LOOPBACK
        efx_loopback_type_t     els_loopback;
 #endif
@@ -597,6 +629,11 @@ ef10_phy_oui_get(
        __in            efx_nic_t *enp,
        __out           uint32_t *ouip);
 
+extern __checkReturn   efx_rc_t
+ef10_phy_link_state_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_link_state_t *eplsp);
+
 #if EFSYS_OPT_PHY_STATS
 
 extern __checkReturn                   efx_rc_t
@@ -1128,11 +1165,12 @@ extern  __checkReturn   efx_rc_t
 efx_mcdi_get_port_modes(
        __in            efx_nic_t *enp,
        __out           uint32_t *modesp,
-       __out_opt       uint32_t *current_modep);
+       __out_opt       uint32_t *current_modep,
+       __out_opt       uint32_t *default_modep);
 
 extern __checkReturn   efx_rc_t
 ef10_nic_get_port_mode_bandwidth(
-       __in            uint32_t port_mode,
+       __in            efx_nic_t *enp,
        __out           uint32_t *bandwidth_mbpsp);
 
 extern __checkReturn   efx_rc_t
index 1ffe266..efa1571 100644 (file)
@@ -51,8 +51,8 @@ efx_mcdi_trigger_interrupt(
        __in            unsigned int level)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_TRIGGER_INTERRUPT_IN_LEN,
-                           MC_CMD_TRIGGER_INTERRUPT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_TRIGGER_INTERRUPT_IN_LEN,
+               MC_CMD_TRIGGER_INTERRUPT_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
@@ -64,7 +64,6 @@ efx_mcdi_trigger_interrupt(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_TRIGGER_INTERRUPT;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_TRIGGER_INTERRUPT_IN_LEN;
index 1031e83..9f10f6f 100644 (file)
@@ -22,10 +22,10 @@ ef10_mac_poll(
        if ((rc = ef10_phy_get_link(enp, &els)) != 0)
                goto fail1;
 
-       epp->ep_adv_cap_mask = els.els_adv_cap_mask;
-       epp->ep_fcntl = els.els_fcntl;
+       epp->ep_adv_cap_mask = els.epls.epls_adv_cap_mask;
+       epp->ep_fcntl = els.epls.epls_fcntl;
 
-       *link_modep = els.els_link_mode;
+       *link_modep = els.epls.epls_link_mode;
 
        return (0);
 
@@ -75,11 +75,10 @@ efx_mcdi_vadapter_set_mac(
 {
        efx_port_t *epp = &(enp->en_port);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_VADAPTOR_SET_MAC_IN_LEN,
-                           MC_CMD_VADAPTOR_SET_MAC_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_VADAPTOR_SET_MAC_IN_LEN,
+               MC_CMD_VADAPTOR_SET_MAC_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_VADAPTOR_SET_MAC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_VADAPTOR_SET_MAC_IN_LEN;
@@ -141,11 +140,10 @@ efx_mcdi_mtu_set(
        __in            uint32_t mtu)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_MAC_EXT_IN_LEN,
-                           MC_CMD_SET_MAC_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_MAC_EXT_IN_LEN,
+               MC_CMD_SET_MAC_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_MAC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_MAC_EXT_IN_LEN;
@@ -178,11 +176,10 @@ efx_mcdi_mtu_get(
        __out           size_t *mtu)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_MAC_EXT_IN_LEN,
-                           MC_CMD_SET_MAC_V2_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_MAC_EXT_IN_LEN,
+               MC_CMD_SET_MAC_V2_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_MAC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_MAC_EXT_IN_LEN;
@@ -274,11 +271,10 @@ ef10_mac_reconfigure(
 {
        efx_port_t *epp = &(enp->en_port);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_MAC_IN_LEN,
-                           MC_CMD_SET_MAC_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_MAC_IN_LEN,
+               MC_CMD_SET_MAC_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_MAC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_MAC_IN_LEN;
@@ -412,7 +408,7 @@ ef10_mac_filter_default_rxq_clear(
 
        ef10_filter_default_rxq_clear(enp);
 
-       efx_filter_reconfigure(enp, epp->ep_mac_addr,
+       (void) efx_filter_reconfigure(enp, epp->ep_mac_addr,
                                    epp->ep_all_unicst, epp->ep_mulcst,
                                    epp->ep_all_mulcst, epp->ep_brdcst,
                                    epp->ep_mulcst_addr_list,
@@ -654,7 +650,7 @@ ef10_mac_stats_update(
        EF10_MAC_STAT_READ(esmp, MC_CMD_MAC_TX_LT64_PKTS, &value);
        EFSYS_STAT_SET_QWORD(&(stat[EFX_MAC_TX_LE_64_PKTS]), &value);
        EF10_MAC_STAT_READ(esmp, MC_CMD_MAC_TX_64_PKTS, &value);
-       EFSYS_STAT_SET_QWORD(&(stat[EFX_MAC_TX_LE_64_PKTS]), &value);
+       EFSYS_STAT_INCR_QWORD(&(stat[EFX_MAC_TX_LE_64_PKTS]), &value);
 
        EF10_MAC_STAT_READ(esmp, MC_CMD_MAC_TX_65_TO_127_PKTS, &value);
        EFSYS_STAT_SET_QWORD(&(stat[EFX_MAC_TX_65_TO_127_PKTS]), &value);
index 7dbf843..50e23b7 100644 (file)
@@ -20,15 +20,14 @@ efx_mcdi_get_port_assignment(
        __out           uint32_t *portp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PORT_ASSIGNMENT_IN_LEN,
-                           MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PORT_ASSIGNMENT_IN_LEN,
+               MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
            enp->en_family == EFX_FAMILY_MEDFORD ||
            enp->en_family == EFX_FAMILY_MEDFORD2);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PORT_ASSIGNMENT;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PORT_ASSIGNMENT_IN_LEN;
@@ -63,18 +62,18 @@ fail1:
 efx_mcdi_get_port_modes(
        __in            efx_nic_t *enp,
        __out           uint32_t *modesp,
-       __out_opt       uint32_t *current_modep)
+       __out_opt       uint32_t *current_modep,
+       __out_opt       uint32_t *default_modep)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PORT_MODES_IN_LEN,
-                           MC_CMD_GET_PORT_MODES_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PORT_MODES_IN_LEN,
+               MC_CMD_GET_PORT_MODES_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
            enp->en_family == EFX_FAMILY_MEDFORD ||
            enp->en_family == EFX_FAMILY_MEDFORD2);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PORT_MODES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PORT_MODES_IN_LEN;
@@ -110,6 +109,11 @@ efx_mcdi_get_port_modes(
                                            GET_PORT_MODES_OUT_CURRENT_MODE);
        }
 
+       if (default_modep != NULL) {
+               *default_modep = MCDI_OUT_DWORD(req,
+                                           GET_PORT_MODES_OUT_DEFAULT_MODE);
+       }
+
        return (0);
 
 fail3:
@@ -124,44 +128,99 @@ fail1:
 
        __checkReturn   efx_rc_t
 ef10_nic_get_port_mode_bandwidth(
-       __in            uint32_t port_mode,
+       __in            efx_nic_t *enp,
        __out           uint32_t *bandwidth_mbpsp)
 {
+       uint32_t port_modes;
+       uint32_t current_mode;
+       efx_port_t *epp = &(enp->en_port);
+
+       uint32_t single_lane;
+       uint32_t dual_lane;
+       uint32_t quad_lane;
        uint32_t bandwidth;
        efx_rc_t rc;
 
-       switch (port_mode) {
-       case TLV_PORT_MODE_10G:
-               bandwidth = 10000;
+       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes,
+                                   &current_mode, NULL)) != 0) {
+               /* No port mode info available. */
+               goto fail1;
+       }
+
+       if (epp->ep_phy_cap_mask & (1 << EFX_PHY_CAP_25000FDX))
+               single_lane = 25000;
+       else
+               single_lane = 10000;
+
+       if (epp->ep_phy_cap_mask & (1 << EFX_PHY_CAP_50000FDX))
+               dual_lane = 50000;
+       else
+               dual_lane = 20000;
+
+       if (epp->ep_phy_cap_mask & (1 << EFX_PHY_CAP_100000FDX))
+               quad_lane = 100000;
+       else
+               quad_lane = 40000;
+
+       switch (current_mode) {
+       case TLV_PORT_MODE_1x1_NA:                      /* mode 0 */
+               bandwidth = single_lane;
+               break;
+       case TLV_PORT_MODE_1x2_NA:                      /* mode 10 */
+       case TLV_PORT_MODE_NA_1x2:                      /* mode 11 */
+               bandwidth = dual_lane;
+               break;
+       case TLV_PORT_MODE_1x1_1x1:                     /* mode 2 */
+               bandwidth = single_lane + single_lane;
+               break;
+       case TLV_PORT_MODE_4x1_NA:                      /* mode 4 */
+       case TLV_PORT_MODE_NA_4x1:                      /* mode 8 */
+               bandwidth = 4 * single_lane;
+               break;
+       case TLV_PORT_MODE_2x1_2x1:                     /* mode 5 */
+               bandwidth = (2 * single_lane) + (2 * single_lane);
+               break;
+       case TLV_PORT_MODE_1x2_1x2:                     /* mode 12 */
+               bandwidth = dual_lane + dual_lane;
+               break;
+       case TLV_PORT_MODE_1x2_2x1:                     /* mode 17 */
+       case TLV_PORT_MODE_2x1_1x2:                     /* mode 18 */
+               bandwidth = dual_lane + (2 * single_lane);
                break;
-       case TLV_PORT_MODE_10G_10G:
-               bandwidth = 10000 * 2;
+       /* Legacy Medford-only mode. Do not use (see bug63270) */
+       case TLV_PORT_MODE_10G_10G_10G_10G_Q1_Q2:       /* mode 9 */
+               bandwidth = 4 * single_lane;
                break;
-       case TLV_PORT_MODE_10G_10G_10G_10G:
-       case TLV_PORT_MODE_10G_10G_10G_10G_Q:
-       case TLV_PORT_MODE_10G_10G_10G_10G_Q1_Q2:
-       case TLV_PORT_MODE_10G_10G_10G_10G_Q2:
-               bandwidth = 10000 * 4;
+       case TLV_PORT_MODE_1x4_NA:                      /* mode 1 */
+       case TLV_PORT_MODE_NA_1x4:                      /* mode 22 */
+               bandwidth = quad_lane;
                break;
-       case TLV_PORT_MODE_40G:
-               bandwidth = 40000;
+       case TLV_PORT_MODE_2x2_NA:                      /* mode 13 */
+       case TLV_PORT_MODE_NA_2x2:                      /* mode 14 */
+               bandwidth = 2 * dual_lane;
                break;
-       case TLV_PORT_MODE_40G_40G:
-               bandwidth = 40000 * 2;
+       case TLV_PORT_MODE_1x4_2x1:                     /* mode 6 */
+       case TLV_PORT_MODE_2x1_1x4:                     /* mode 7 */
+               bandwidth = quad_lane + (2 * single_lane);
                break;
-       case TLV_PORT_MODE_40G_10G_10G:
-       case TLV_PORT_MODE_10G_10G_40G:
-               bandwidth = 40000 + (10000 * 2);
+       case TLV_PORT_MODE_1x4_1x2:                     /* mode 15 */
+       case TLV_PORT_MODE_1x2_1x4:                     /* mode 16 */
+               bandwidth = quad_lane + dual_lane;
+               break;
+       case TLV_PORT_MODE_1x4_1x4:                     /* mode 3 */
+               bandwidth = quad_lane + quad_lane;
                break;
        default:
                rc = EINVAL;
-               goto fail1;
+               goto fail2;
        }
 
        *bandwidth_mbpsp = bandwidth;
 
        return (0);
 
+fail2:
+       EFSYS_PROBE(fail2);
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
@@ -174,13 +233,12 @@ efx_mcdi_vadaptor_alloc(
        __in                    uint32_t port_id)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_VADAPTOR_ALLOC_IN_LEN,
-                           MC_CMD_VADAPTOR_ALLOC_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_VADAPTOR_ALLOC_IN_LEN,
+               MC_CMD_VADAPTOR_ALLOC_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT3U(enp->en_vport_id, ==, EVB_PORT_ID_NULL);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_VADAPTOR_ALLOC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_VADAPTOR_ALLOC_IN_LEN;
@@ -213,11 +271,10 @@ efx_mcdi_vadaptor_free(
        __in                    uint32_t port_id)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_VADAPTOR_FREE_IN_LEN,
-                           MC_CMD_VADAPTOR_FREE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_VADAPTOR_FREE_IN_LEN,
+               MC_CMD_VADAPTOR_FREE_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_VADAPTOR_FREE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_VADAPTOR_FREE_IN_LEN;
@@ -247,15 +304,14 @@ efx_mcdi_get_mac_address_pf(
        __out_ecount_opt(6)     uint8_t mac_addrp[6])
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_MAC_ADDRESSES_IN_LEN,
-                           MC_CMD_GET_MAC_ADDRESSES_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_MAC_ADDRESSES_IN_LEN,
+               MC_CMD_GET_MAC_ADDRESSES_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
            enp->en_family == EFX_FAMILY_MEDFORD ||
            enp->en_family == EFX_FAMILY_MEDFORD2);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_MAC_ADDRESSES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_MAC_ADDRESSES_IN_LEN;
@@ -306,15 +362,14 @@ efx_mcdi_get_mac_address_vf(
        __out_ecount_opt(6)     uint8_t mac_addrp[6])
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_VPORT_GET_MAC_ADDRESSES_IN_LEN,
-                           MC_CMD_VPORT_GET_MAC_ADDRESSES_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_VPORT_GET_MAC_ADDRESSES_IN_LEN,
+               MC_CMD_VPORT_GET_MAC_ADDRESSES_OUT_LENMAX);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
            enp->en_family == EFX_FAMILY_MEDFORD ||
            enp->en_family == EFX_FAMILY_MEDFORD2);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_VPORT_GET_MAC_ADDRESSES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_VPORT_GET_MAC_ADDRESSES_IN_LEN;
@@ -371,15 +426,14 @@ efx_mcdi_get_clock(
        __out           uint32_t *dpcpu_freqp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_CLOCK_IN_LEN,
-                           MC_CMD_GET_CLOCK_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_CLOCK_IN_LEN,
+               MC_CMD_GET_CLOCK_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON ||
            enp->en_family == EFX_FAMILY_MEDFORD ||
            enp->en_family == EFX_FAMILY_MEDFORD2);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_CLOCK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_CLOCK_IN_LEN;
@@ -429,12 +483,11 @@ efx_mcdi_get_rxdp_config(
        __out           uint32_t *end_paddingp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_RXDP_CONFIG_IN_LEN,
-                           MC_CMD_GET_RXDP_CONFIG_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_RXDP_CONFIG_IN_LEN,
+               MC_CMD_GET_RXDP_CONFIG_OUT_LEN);
        uint32_t end_padding;
        efx_rc_t rc;
 
-       memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_RXDP_CONFIG;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_RXDP_CONFIG_IN_LEN;
@@ -489,11 +542,10 @@ efx_mcdi_get_vector_cfg(
        __out_opt       uint32_t *vf_nvecp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_VECTOR_CFG_IN_LEN,
-                           MC_CMD_GET_VECTOR_CFG_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_VECTOR_CFG_IN_LEN,
+               MC_CMD_GET_VECTOR_CFG_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_VECTOR_CFG;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_VECTOR_CFG_IN_LEN;
@@ -539,8 +591,8 @@ efx_mcdi_alloc_vis(
        __out           uint32_t *vi_shiftp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_ALLOC_VIS_IN_LEN,
-                           MC_CMD_ALLOC_VIS_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_ALLOC_VIS_IN_LEN,
+               MC_CMD_ALLOC_VIS_EXT_OUT_LEN);
        efx_rc_t rc;
 
        if (vi_countp == NULL) {
@@ -548,7 +600,6 @@ efx_mcdi_alloc_vis(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_ALLOC_VIS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_ALLOC_VIS_IN_LEN;
@@ -631,8 +682,8 @@ efx_mcdi_alloc_piobuf(
        __out           efx_piobuf_handle_t *handlep)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_ALLOC_PIOBUF_IN_LEN,
-                           MC_CMD_ALLOC_PIOBUF_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_ALLOC_PIOBUF_IN_LEN,
+               MC_CMD_ALLOC_PIOBUF_OUT_LEN);
        efx_rc_t rc;
 
        if (handlep == NULL) {
@@ -640,7 +691,6 @@ efx_mcdi_alloc_piobuf(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_ALLOC_PIOBUF;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_ALLOC_PIOBUF_IN_LEN;
@@ -679,11 +729,10 @@ efx_mcdi_free_piobuf(
        __in            efx_piobuf_handle_t handle)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FREE_PIOBUF_IN_LEN,
-                           MC_CMD_FREE_PIOBUF_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FREE_PIOBUF_IN_LEN,
+               MC_CMD_FREE_PIOBUF_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FREE_PIOBUF;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FREE_PIOBUF_IN_LEN;
@@ -714,11 +763,10 @@ efx_mcdi_link_piobuf(
        __in            efx_piobuf_handle_t handle)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_LINK_PIOBUF_IN_LEN,
-                           MC_CMD_LINK_PIOBUF_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LINK_PIOBUF_IN_LEN,
+               MC_CMD_LINK_PIOBUF_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LINK_PIOBUF;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LINK_PIOBUF_IN_LEN;
@@ -749,11 +797,10 @@ efx_mcdi_unlink_piobuf(
        __in            uint32_t vi_index)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_UNLINK_PIOBUF_IN_LEN,
-                           MC_CMD_UNLINK_PIOBUF_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_UNLINK_PIOBUF_IN_LEN,
+               MC_CMD_UNLINK_PIOBUF_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_UNLINK_PIOBUF;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_UNLINK_PIOBUF_IN_LEN;
@@ -806,7 +853,7 @@ fail1:
        for (i = 0; i < enp->en_arch.ef10.ena_piobuf_count; i++) {
                handlep = &enp->en_arch.ef10.ena_piobuf_handle[i];
 
-               efx_mcdi_free_piobuf(enp, *handlep);
+               (void) efx_mcdi_free_piobuf(enp, *handlep);
                *handlep = EFX_PIOBUF_HANDLE_INVALID;
        }
        enp->en_arch.ef10.ena_piobuf_count = 0;
@@ -823,7 +870,7 @@ ef10_nic_free_piobufs(
        for (i = 0; i < enp->en_arch.ef10.ena_piobuf_count; i++) {
                handlep = &enp->en_arch.ef10.ena_piobuf_handle[i];
 
-               efx_mcdi_free_piobuf(enp, *handlep);
+               (void) efx_mcdi_free_piobuf(enp, *handlep);
                *handlep = EFX_PIOBUF_HANDLE_INVALID;
        }
        enp->en_arch.ef10.ena_piobuf_count = 0;
@@ -951,11 +998,10 @@ ef10_mcdi_get_pf_count(
        __out           uint32_t *pf_countp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PF_COUNT_IN_LEN,
-                           MC_CMD_GET_PF_COUNT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PF_COUNT_IN_LEN,
+               MC_CMD_GET_PF_COUNT_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PF_COUNT;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PF_COUNT_IN_LEN;
@@ -995,15 +1041,14 @@ ef10_get_datapath_caps(
 {
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_CAPABILITIES_IN_LEN,
-                           MC_CMD_GET_CAPABILITIES_V5_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_CAPABILITIES_IN_LEN,
+               MC_CMD_GET_CAPABILITIES_V5_OUT_LEN);
        efx_rc_t rc;
 
        if ((rc = ef10_mcdi_get_pf_count(enp, &encp->enc_hw_pf_count)) != 0)
                goto fail1;
 
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_CAPABILITIES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_CAPABILITIES_IN_LEN;
@@ -1041,11 +1086,13 @@ ef10_get_datapath_caps(
        }
        encp->enc_rx_prefix_size = 14;
 
+#if EFSYS_OPT_RX_SCALE
        /* Check if the firmware supports additional RSS modes */
        if (CAP_FLAGS1(req, ADDITIONAL_RSS_MODES))
                encp->enc_rx_scale_additional_modes_supported = B_TRUE;
        else
                encp->enc_rx_scale_additional_modes_supported = B_FALSE;
+#endif /* EFSYS_OPT_RX_SCALE */
 
        /* Check if the firmware supports TSO */
        if (CAP_FLAGS1(req, TX_TSO))
@@ -1251,6 +1298,7 @@ ef10_get_datapath_caps(
        else
                encp->enc_hlb_counters = B_FALSE;
 
+#if EFSYS_OPT_RX_SCALE
        if (CAP_FLAGS1(req, RX_RSS_LIMITED)) {
                /* Only one exclusive RSS context is available per port. */
                encp->enc_rx_scale_max_exclusive_contexts = 1;
@@ -1300,6 +1348,8 @@ ef10_get_datapath_caps(
                 */
                encp->enc_rx_scale_l4_hash_supported = B_TRUE;
        }
+#endif /* EFSYS_OPT_RX_SCALE */
+
        /* Check if the firmware supports "FLAG" and "MARK" filter actions */
        if (CAP_FLAGS2(req, FILTER_ACTION_FLAG))
                encp->enc_filter_action_flag_supported = B_TRUE;
@@ -1323,8 +1373,10 @@ ef10_get_datapath_caps(
 
        return (0);
 
+#if EFSYS_OPT_RX_SCALE
 fail5:
        EFSYS_PROBE(fail5);
+#endif /* EFSYS_OPT_RX_SCALE */
 fail4:
        EFSYS_PROBE(fail4);
 fail3:
@@ -1478,8 +1530,8 @@ static struct ef10_external_port_map_s {
         */
        {
                EFX_FAMILY_MEDFORD,
-               (1U << TLV_PORT_MODE_10G) |                     /* mode 0 */
-               (1U << TLV_PORT_MODE_10G_10G),                  /* mode 2 */
+               (1U << TLV_PORT_MODE_1x1_NA) |                  /* mode 0 */
+               (1U << TLV_PORT_MODE_1x1_1x1),                  /* mode 2 */
                1,      /* ports per cage */
                1       /* first cage */
        },
@@ -1493,10 +1545,10 @@ static struct ef10_external_port_map_s {
         */
        {
                EFX_FAMILY_MEDFORD,
-               (1U << TLV_PORT_MODE_40G) |                     /* mode 1 */
-               (1U << TLV_PORT_MODE_40G_40G) |                 /* mode 3 */
-               (1U << TLV_PORT_MODE_40G_10G_10G) |             /* mode 6 */
-               (1U << TLV_PORT_MODE_10G_10G_40G) |             /* mode 7 */
+               (1U << TLV_PORT_MODE_1x4_NA) |                  /* mode 1 */
+               (1U << TLV_PORT_MODE_1x4_1x4) |                 /* mode 3 */
+               (1U << TLV_PORT_MODE_1x4_2x1) |                 /* mode 6 */
+               (1U << TLV_PORT_MODE_2x1_1x4) |                 /* mode 7 */
                /* Do not use 10G_10G_10G_10G_Q1_Q2 (see bug63270) */
                (1U << TLV_PORT_MODE_10G_10G_10G_10G_Q1_Q2),    /* mode 9 */
                2,      /* ports per cage */
@@ -1512,9 +1564,9 @@ static struct ef10_external_port_map_s {
         */
        {
                EFX_FAMILY_MEDFORD,
-               (1U << TLV_PORT_MODE_10G_10G_10G_10G_Q) |       /* mode 5 */
+               (1U << TLV_PORT_MODE_2x1_2x1) |                 /* mode 5 */
                /* Do not use 10G_10G_10G_10G_Q1 (see bug63270) */
-               (1U << TLV_PORT_MODE_10G_10G_10G_10G_Q1),       /* mode 4 */
+               (1U << TLV_PORT_MODE_4x1_NA),                   /* mode 4 */
                4,      /* ports per cage */
                1       /* first cage */
        },
@@ -1528,7 +1580,7 @@ static struct ef10_external_port_map_s {
         */
        {
                EFX_FAMILY_MEDFORD,
-               (1U << TLV_PORT_MODE_10G_10G_10G_10G_Q2),       /* mode 8 */
+               (1U << TLV_PORT_MODE_NA_4x1),                   /* mode 8 */
                4,      /* ports per cage */
                2       /* first cage */
        },
@@ -1635,13 +1687,14 @@ ef10_external_port_mapping(
        int32_t count = 1; /* Default 1-1 mapping */
        int32_t offset = 1; /* Default starting external port number */
 
-       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes, &current)) != 0) {
+       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes, &current,
+                   NULL)) != 0) {
                /*
                 * No current port mode information (i.e. Huntington)
                 * - infer mapping from available modes
                 */
                if ((rc = efx_mcdi_get_port_modes(enp,
-                           &port_modes, NULL)) != 0) {
+                           &port_modes, NULL, NULL)) != 0) {
                        /*
                         * No port mode information available
                         * - use default mapping
@@ -1781,11 +1834,26 @@ ef10_nic_board_cfg(
        if ((rc = efx_mcdi_get_phy_cfg(enp)) != 0)
                goto fail6;
 
+       /*
+        * Firmware with support for *_FEC capability bits does not
+        * report that the corresponding *_FEC_REQUESTED bits are supported.
+        * Add them here so that drivers understand that they are supported.
+        */
+       if (epp->ep_phy_cap_mask & (1u << EFX_PHY_CAP_BASER_FEC))
+               epp->ep_phy_cap_mask |=
+                   (1u << EFX_PHY_CAP_BASER_FEC_REQUESTED);
+       if (epp->ep_phy_cap_mask & (1u << EFX_PHY_CAP_RS_FEC))
+               epp->ep_phy_cap_mask |=
+                   (1u << EFX_PHY_CAP_RS_FEC_REQUESTED);
+       if (epp->ep_phy_cap_mask & (1u << EFX_PHY_CAP_25G_BASER_FEC))
+               epp->ep_phy_cap_mask |=
+                   (1u << EFX_PHY_CAP_25G_BASER_FEC_REQUESTED);
+
        /* Obtain the default PHY advertised capabilities */
        if ((rc = ef10_phy_get_link(enp, &els)) != 0)
                goto fail7;
-       epp->ep_default_adv_cap_mask = els.els_adv_cap_mask;
-       epp->ep_adv_cap_mask = els.els_adv_cap_mask;
+       epp->ep_default_adv_cap_mask = els.epls.epls_adv_cap_mask;
+       epp->ep_adv_cap_mask = els.epls.epls_adv_cap_mask;
 
        /* Check capabilities of running datapath firmware */
        if ((rc = ef10_get_datapath_caps(enp)) != 0)
@@ -2039,8 +2107,8 @@ ef10_nic_reset(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_ENTITY_RESET_IN_LEN,
-                           MC_CMD_ENTITY_RESET_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_ENTITY_RESET_IN_LEN,
+               MC_CMD_ENTITY_RESET_OUT_LEN);
        efx_rc_t rc;
 
        /* ef10_nic_reset() is called to recover from BADASSERT failures. */
@@ -2049,7 +2117,6 @@ ef10_nic_reset(
        if ((rc = efx_mcdi_exit_assertion_handler(enp)) != 0)
                goto fail2;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_ENTITY_RESET;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_ENTITY_RESET_IN_LEN;
@@ -2314,6 +2381,36 @@ fail1:
        return (rc);
 }
 
+       __checkReturn   boolean_t
+ef10_nic_hw_unavailable(
+       __in            efx_nic_t *enp)
+{
+       efx_dword_t dword;
+
+       if (enp->en_reset_flags & EFX_RESET_HW_UNAVAIL)
+               return (B_TRUE);
+
+       EFX_BAR_READD(enp, ER_DZ_BIU_MC_SFT_STATUS_REG, &dword, B_FALSE);
+       if (EFX_DWORD_FIELD(dword, EFX_DWORD_0) == 0xffffffff)
+               goto unavail;
+
+       return (B_FALSE);
+
+unavail:
+       ef10_nic_set_hw_unavailable(enp);
+
+       return (B_TRUE);
+}
+
+                       void
+ef10_nic_set_hw_unavailable(
+       __in            efx_nic_t *enp)
+{
+       EFSYS_PROBE(hw_unavail);
+       enp->en_reset_flags |= EFX_RESET_HW_UNAVAIL;
+}
+
+
                        void
 ef10_nic_fini(
        __in            efx_nic_t *enp)
@@ -2386,11 +2483,10 @@ efx_mcdi_get_nic_global(
        __out           uint32_t *valuep)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_NIC_GLOBAL_IN_LEN,
-                           MC_CMD_GET_NIC_GLOBAL_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_NIC_GLOBAL_IN_LEN,
+               MC_CMD_GET_NIC_GLOBAL_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_NIC_GLOBAL;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_NIC_GLOBAL_IN_LEN;
@@ -2430,10 +2526,9 @@ efx_mcdi_set_nic_global(
        __in            uint32_t value)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MC_CMD_SET_NIC_GLOBAL_IN_LEN];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_NIC_GLOBAL_IN_LEN, 0);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_NIC_GLOBAL;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_NIC_GLOBAL_IN_LEN;
index 2883ec8..8d1b64f 100644 (file)
@@ -203,14 +203,14 @@ tlv_validate_state(
 
        if (tlv_tag(cursor) != TLV_TAG_END) {
                /* Check current item has space for tag and length */
-               if (cursor->current > (cursor->limit - 2)) {
+               if (cursor->current > (cursor->limit - 1)) {
                        cursor->current = NULL;
                        rc = EFAULT;
                        goto fail3;
                }
 
-               /* Check we have value data for current item and another tag */
-               if (tlv_next_item_ptr(cursor) > (cursor->limit - 1)) {
+               /* Check we have value data for current item and an END tag */
+               if (tlv_next_item_ptr(cursor) > cursor->limit) {
                        cursor->current = NULL;
                        rc = EFAULT;
                        goto fail4;
@@ -635,7 +635,6 @@ fail1:
 /* Validate buffer contents (before writing to flash) */
        __checkReturn           efx_rc_t
 ef10_nvram_buffer_validate(
-       __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in_bcount(partn_size) caddr_t partn_data,
        __in                    size_t partn_size)
@@ -648,7 +647,6 @@ ef10_nvram_buffer_validate(
        int pos;
        efx_rc_t rc;
 
-       _NOTE(ARGUNUSED(enp, partn))
        EFX_STATIC_ASSERT(sizeof (*header) <= EF10_NVRAM_CHUNK);
 
        if ((partn_data == NULL) || (partn_size == 0)) {
@@ -675,26 +673,32 @@ ef10_nvram_buffer_validate(
                goto fail4;
        }
 
+       /* Check partition header matches partn */
+       if (__LE_TO_CPU_16(header->type_id) != partn) {
+               rc = EINVAL;
+               goto fail5;
+       }
+
        /* Check partition ends with PARTITION_TRAILER and END tags */
        if ((rc = tlv_find(&cursor, TLV_TAG_PARTITION_TRAILER)) != 0) {
                rc = EINVAL;
-               goto fail5;
+               goto fail6;
        }
        trailer = (struct tlv_partition_trailer *)tlv_item(&cursor);
 
        if ((rc = tlv_advance(&cursor)) != 0) {
                rc = EINVAL;
-               goto fail6;
+               goto fail7;
        }
        if (tlv_tag(&cursor) != TLV_TAG_END) {
                rc = EINVAL;
-               goto fail7;
+               goto fail8;
        }
 
        /* Check generation counts are consistent */
        if (trailer->generation != header->generation) {
                rc = EINVAL;
-               goto fail8;
+               goto fail9;
        }
 
        /* Verify partition checksum */
@@ -704,11 +708,13 @@ ef10_nvram_buffer_validate(
        }
        if (cksum != 0) {
                rc = EINVAL;
-               goto fail9;
+               goto fail10;
        }
 
        return (0);
 
+fail10:
+       EFSYS_PROBE(fail10);
 fail9:
        EFSYS_PROBE(fail9);
 fail8:
@@ -731,13 +737,24 @@ fail1:
        return (rc);
 }
 
+                       void
+ef10_nvram_buffer_init(
+       __out_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size)
+{
+       uint32_t *buf = (uint32_t *)bufferp;
+
+       memset(buf, 0xff, buffer_size);
 
+       tlv_init_block(buf);
+}
 
        __checkReturn           efx_rc_t
 ef10_nvram_buffer_create(
-       __in                    efx_nic_t *enp,
-       __in                    uint16_t partn_type,
-       __in_bcount(partn_size) caddr_t partn_data,
+       __in                    uint32_t partn_type,
+       __out_bcount(partn_size)
+                               caddr_t partn_data,
        __in                    size_t partn_size)
 {
        uint32_t *buf = (uint32_t *)partn_data;
@@ -753,9 +770,8 @@ ef10_nvram_buffer_create(
                goto fail1;
        }
 
-       memset(buf, 0xff, partn_size);
+       ef10_nvram_buffer_init(partn_data, partn_size);
 
-       tlv_init_block(buf);
        if ((rc = tlv_init_cursor(&cursor, buf,
            (uint32_t *)((uint8_t *)buf + partn_size),
            buf)) != 0) {
@@ -787,7 +803,7 @@ ef10_nvram_buffer_create(
                goto fail6;
 
        /* Check that the partition is valid. */
-       if ((rc = ef10_nvram_buffer_validate(enp, partn_type,
+       if ((rc = ef10_nvram_buffer_validate(partn_type,
            partn_data, partn_size)) != 0)
                goto fail7;
 
@@ -958,6 +974,48 @@ ef10_nvram_buffer_find_item(
        return (B_FALSE);
 }
 
+       __checkReturn           efx_rc_t
+ef10_nvram_buffer_peek_item(
+       __in_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size,
+       __in                    uint32_t offset,
+       __out                   uint32_t *tagp,
+       __out                   uint32_t *lengthp,
+       __out                   uint32_t *value_offsetp)
+{
+       efx_rc_t rc;
+       tlv_cursor_t cursor;
+       uint32_t tag;
+
+       if ((rc = tlv_init_cursor_at_offset(&cursor, (uint8_t *)bufferp,
+                       buffer_size, offset)) != 0) {
+               goto fail1;
+       }
+
+       tag = tlv_tag(&cursor);
+       *tagp = tag;
+       if (tag == TLV_TAG_END) {
+               /*
+                * To allow stepping over the END tag, report the full tag
+                * length and a zero length value.
+                */
+               *lengthp = sizeof (tag);
+               *value_offsetp = sizeof (tag);
+       } else {
+               *lengthp = byte_offset(tlv_next_item_ptr(&cursor),
+                           cursor.current);
+               *value_offsetp = byte_offset((uint32_t *)tlv_value(&cursor),
+                           cursor.current);
+       }
+       return (0);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
        __checkReturn           efx_rc_t
 ef10_nvram_buffer_get_item(
        __in_bcount(buffer_size)
@@ -965,16 +1023,17 @@ ef10_nvram_buffer_get_item(
        __in                    size_t buffer_size,
        __in                    uint32_t offset,
        __in                    uint32_t length,
-       __out_bcount_part(item_max_size, *lengthp)
-                               caddr_t itemp,
-       __in                    size_t item_max_size,
+       __out                   uint32_t *tagp,
+       __out_bcount_part(value_max_size, *lengthp)
+                               caddr_t valuep,
+       __in                    size_t value_max_size,
        __out                   uint32_t *lengthp)
 {
        efx_rc_t rc;
        tlv_cursor_t cursor;
-       uint32_t item_length;
+       uint32_t value_length;
 
-       if (item_max_size < length) {
+       if (buffer_size < (offset + length)) {
                rc = ENOSPC;
                goto fail1;
        }
@@ -984,14 +1043,15 @@ ef10_nvram_buffer_get_item(
                goto fail2;
        }
 
-       item_length = tlv_length(&cursor);
-       if (length < item_length) {
+       value_length = tlv_length(&cursor);
+       if (value_max_size < value_length) {
                rc = ENOSPC;
                goto fail3;
        }
-       memcpy(itemp, tlv_value(&cursor), item_length);
+       memcpy(valuep, tlv_value(&cursor), value_length);
 
-       *lengthp = item_length;
+       *tagp = tlv_tag(&cursor);
+       *lengthp = value_length;
 
        return (0);
 
@@ -1011,7 +1071,45 @@ ef10_nvram_buffer_insert_item(
                                caddr_t bufferp,
        __in                    size_t buffer_size,
        __in                    uint32_t offset,
-       __in_bcount(length)     caddr_t keyp,
+       __in                    uint32_t tag,
+       __in_bcount(length)     caddr_t valuep,
+       __in                    uint32_t length,
+       __out                   uint32_t *lengthp)
+{
+       efx_rc_t rc;
+       tlv_cursor_t cursor;
+
+       if ((rc = tlv_init_cursor_at_offset(&cursor, (uint8_t *)bufferp,
+                       buffer_size, offset)) != 0) {
+               goto fail1;
+       }
+
+       rc = tlv_insert(&cursor, tag, (uint8_t *)valuep, length);
+
+       if (rc != 0)
+               goto fail2;
+
+       *lengthp = byte_offset(tlv_next_item_ptr(&cursor),
+                   cursor.current);
+
+       return (0);
+
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+       __checkReturn           efx_rc_t
+ef10_nvram_buffer_modify_item(
+       __in_bcount(buffer_size)
+                               caddr_t bufferp,
+       __in                    size_t buffer_size,
+       __in                    uint32_t offset,
+       __in                    uint32_t tag,
+       __in_bcount(length)     caddr_t valuep,
        __in                    uint32_t length,
        __out                   uint32_t *lengthp)
 {
@@ -1023,7 +1121,7 @@ ef10_nvram_buffer_insert_item(
                goto fail1;
        }
 
-       rc = tlv_insert(&cursor, TLV_TAG_LICENSE, (uint8_t *)keyp, length);
+       rc = tlv_modify(&cursor, tag, (uint8_t *)valuep, length);
 
        if (rc != 0) {
                goto fail2;
@@ -1042,6 +1140,7 @@ fail1:
        return (rc);
 }
 
+
        __checkReturn           efx_rc_t
 ef10_nvram_buffer_delete_item(
        __in_bcount(buffer_size)
@@ -1808,7 +1907,7 @@ ef10_nvram_partn_write_segment_tlv(
                goto fail7;
 
        /* Unlock the partition */
-       ef10_nvram_partn_unlock(enp, partn, NULL);
+       (void) ef10_nvram_partn_unlock(enp, partn, NULL);
 
        EFSYS_KMEM_FREE(enp->en_esip, partn_size, partn_data);
 
@@ -1823,7 +1922,7 @@ fail5:
 fail4:
        EFSYS_PROBE(fail4);
 
-       ef10_nvram_partn_unlock(enp, partn, NULL);
+       (void) ef10_nvram_partn_unlock(enp, partn, NULL);
 fail3:
        EFSYS_PROBE(fail3);
 
@@ -2000,7 +2099,7 @@ ef10_nvram_partn_write(
        __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in                    unsigned int offset,
-       __out_bcount(size)      caddr_t data,
+       __in_bcount(size)       caddr_t data,
        __in                    size_t size)
 {
        size_t chunk;
@@ -2168,6 +2267,8 @@ static ef10_parttbl_entry_t medford2_parttbl[] = {
        PARTN_MAP_ENTRY(LICENSE,                ALL,    LICENSE),
        PARTN_MAP_ENTRY(EXPANSION_UEFI,         ALL,    UEFIROM),
        PARTN_MAP_ENTRY(MUM_FIRMWARE,           ALL,    MUM_FIRMWARE),
+       PARTN_MAP_ENTRY(DYNCONFIG_DEFAULTS,     ALL,    DYNCONFIG_DEFAULTS),
+       PARTN_MAP_ENTRY(ROMCONFIG_DEFAULTS,     ALL,    ROMCONFIG_DEFAULTS),
 };
 
 static __checkReturn           efx_rc_t
index 84acb70..84ccdde 100644 (file)
@@ -98,8 +98,10 @@ mcdi_phy_decode_link_mode(
        __in            uint32_t link_flags,
        __in            unsigned int speed,
        __in            unsigned int fcntl,
+       __in            uint32_t fec,
        __out           efx_link_mode_t *link_modep,
-       __out           unsigned int *fcntlp)
+       __out           unsigned int *fcntlp,
+       __out           efx_phy_fec_type_t *fecp)
 {
        boolean_t fd = !!(link_flags &
                    (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
@@ -141,6 +143,22 @@ mcdi_phy_decode_link_mode(
                EFSYS_PROBE1(mc_pcol_error, int, fcntl);
                *fcntlp = 0;
        }
+
+       switch (fec) {
+       case MC_CMD_FEC_NONE:
+               *fecp = EFX_PHY_FEC_NONE;
+               break;
+       case MC_CMD_FEC_BASER:
+               *fecp = EFX_PHY_FEC_BASER;
+               break;
+       case MC_CMD_FEC_RS:
+               *fecp = EFX_PHY_FEC_RS;
+               break;
+       default:
+               EFSYS_PROBE1(mc_pcol_error, int, fec);
+               *fecp = EFX_PHY_FEC_NONE;
+               break;
+       }
 }
 
 
@@ -154,6 +172,7 @@ ef10_phy_link_ev(
        unsigned int link_flags;
        unsigned int speed;
        unsigned int fcntl;
+       efx_phy_fec_type_t fec = MC_CMD_FEC_NONE;
        efx_link_mode_t link_mode;
        uint32_t lp_cap_mask;
 
@@ -191,7 +210,8 @@ ef10_phy_link_ev(
        link_flags = MCDI_EV_FIELD(eqp, LINKCHANGE_LINK_FLAGS);
        mcdi_phy_decode_link_mode(enp, link_flags, speed,
                                    MCDI_EV_FIELD(eqp, LINKCHANGE_FCNTL),
-                                   &link_mode, &fcntl);
+                                   MC_CMD_FEC_NONE, &link_mode,
+                                   &fcntl, &fec);
        mcdi_phy_decode_cap(MCDI_EV_FIELD(eqp, LINKCHANGE_LP_CAP),
                            &lp_cap_mask);
 
@@ -242,16 +262,16 @@ ef10_phy_get_link(
        __out           ef10_link_state_t *elsp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_LINK_IN_LEN,
-                           MC_CMD_GET_LINK_OUT_LEN)];
+       uint32_t fec;
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_LINK_IN_LEN,
+               MC_CMD_GET_LINK_OUT_V2_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_LINK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_LINK_IN_LEN;
        req.emr_out_buf = payload;
-       req.emr_out_length = MC_CMD_GET_LINK_OUT_LEN;
+       req.emr_out_length = MC_CMD_GET_LINK_OUT_V2_LEN;
 
        efx_mcdi_execute(enp, &req);
 
@@ -266,14 +286,28 @@ ef10_phy_get_link(
        }
 
        mcdi_phy_decode_cap(MCDI_OUT_DWORD(req, GET_LINK_OUT_CAP),
-                           &elsp->els_adv_cap_mask);
+                           &elsp->epls.epls_adv_cap_mask);
        mcdi_phy_decode_cap(MCDI_OUT_DWORD(req, GET_LINK_OUT_LP_CAP),
-                           &elsp->els_lp_cap_mask);
+                           &elsp->epls.epls_lp_cap_mask);
+
+       if (req.emr_out_length_used < MC_CMD_GET_LINK_OUT_V2_LEN)
+               fec = MC_CMD_FEC_NONE;
+       else
+               fec = MCDI_OUT_DWORD(req, GET_LINK_OUT_V2_FEC_TYPE);
 
        mcdi_phy_decode_link_mode(enp, MCDI_OUT_DWORD(req, GET_LINK_OUT_FLAGS),
                            MCDI_OUT_DWORD(req, GET_LINK_OUT_LINK_SPEED),
                            MCDI_OUT_DWORD(req, GET_LINK_OUT_FCNTL),
-                           &elsp->els_link_mode, &elsp->els_fcntl);
+                           fec, &elsp->epls.epls_link_mode,
+                           &elsp->epls.epls_fcntl, &elsp->epls.epls_fec);
+
+       if (req.emr_out_length_used < MC_CMD_GET_LINK_OUT_V2_LEN) {
+               elsp->epls.epls_ld_cap_mask = 0;
+       } else {
+               mcdi_phy_decode_cap(MCDI_OUT_DWORD(req, GET_LINK_OUT_V2_LD_CAP),
+                                   &elsp->epls.epls_ld_cap_mask);
+       }
+
 
 #if EFSYS_OPT_LOOPBACK
        /*
@@ -301,8 +335,8 @@ ef10_phy_reconfigure(
 {
        efx_port_t *epp = &(enp->en_port);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_LINK_IN_LEN,
-                           MC_CMD_SET_LINK_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SET_LINK_IN_LEN,
+               MC_CMD_SET_LINK_OUT_LEN);
        uint32_t cap_mask;
 #if EFSYS_OPT_PHY_LED_CONTROL
        unsigned int led_mode;
@@ -316,7 +350,6 @@ ef10_phy_reconfigure(
        if (supported == B_FALSE)
                goto out;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_LINK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_LINK_IN_LEN;
@@ -464,12 +497,11 @@ ef10_phy_verify(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PHY_STATE_IN_LEN,
-                           MC_CMD_GET_PHY_STATE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PHY_STATE_IN_LEN,
+               MC_CMD_GET_PHY_STATE_OUT_LEN);
        uint32_t state;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PHY_STATE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PHY_STATE_IN_LEN;
@@ -518,6 +550,29 @@ ef10_phy_oui_get(
        return (ENOTSUP);
 }
 
+       __checkReturn   efx_rc_t
+ef10_phy_link_state_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_link_state_t  *eplsp)
+{
+       efx_rc_t rc;
+       ef10_link_state_t els;
+
+       /* Obtain the active link state */
+       if ((rc = ef10_phy_get_link(enp, &els)) != 0)
+               goto fail1;
+
+       *eplsp = els.epls;
+
+       return (0);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+
 #if EFSYS_OPT_PHY_STATS
 
        __checkReturn                           efx_rc_t
@@ -583,22 +638,34 @@ ef10_bist_poll(
        unsigned long *valuesp,
        __in                    size_t count)
 {
+       /*
+        * MCDI_CTL_SDU_LEN_MAX_V1 is large enough cover all BIST results,
+        * whilst not wasting stack.
+        */
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_POLL_BIST_IN_LEN,
+               MCDI_CTL_SDU_LEN_MAX_V1);
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_POLL_BIST_IN_LEN,
-                           MCDI_CTL_SDU_LEN_MAX)];
        uint32_t value_mask = 0;
        uint32_t result;
        efx_rc_t rc;
 
+       EFX_STATIC_ASSERT(MC_CMD_POLL_BIST_OUT_LEN <=
+           MCDI_CTL_SDU_LEN_MAX_V1);
+       EFX_STATIC_ASSERT(MC_CMD_POLL_BIST_OUT_SFT9001_LEN <=
+           MCDI_CTL_SDU_LEN_MAX_V1);
+       EFX_STATIC_ASSERT(MC_CMD_POLL_BIST_OUT_MRSFP_LEN <=
+           MCDI_CTL_SDU_LEN_MAX_V1);
+       EFX_STATIC_ASSERT(MC_CMD_POLL_BIST_OUT_MEM_LEN <=
+           MCDI_CTL_SDU_LEN_MAX_V1);
+
        _NOTE(ARGUNUSED(type))
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_POLL_BIST;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_POLL_BIST_IN_LEN;
        req.emr_out_buf = payload;
-       req.emr_out_length = MCDI_CTL_SDU_LEN_MAX;
+       req.emr_out_length = MCDI_CTL_SDU_LEN_MAX_V1;
 
        efx_mcdi_execute(enp, &req);
 
index 313a369..3c8f4f3 100644 (file)
@@ -29,8 +29,8 @@ efx_mcdi_init_rxq(
 {
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_INIT_RXQ_V3_IN_LEN,
-                           MC_CMD_INIT_RXQ_V3_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_INIT_RXQ_V3_IN_LEN,
+               MC_CMD_INIT_RXQ_V3_OUT_LEN);
        int npages = EFX_RXQ_NBUFS(ndescs);
        int i;
        efx_qword_t *dma_addr;
@@ -73,7 +73,6 @@ efx_mcdi_init_rxq(
                want_outer_classes = B_FALSE;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_INIT_RXQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_INIT_RXQ_V3_IN_LEN;
@@ -146,11 +145,10 @@ efx_mcdi_fini_rxq(
        __in            uint32_t instance)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FINI_RXQ_IN_LEN,
-                           MC_CMD_FINI_RXQ_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FINI_RXQ_IN_LEN,
+               MC_CMD_FINI_RXQ_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FINI_RXQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FINI_RXQ_IN_LEN;
@@ -188,8 +186,8 @@ efx_mcdi_rss_context_alloc(
        __out           uint32_t *rss_contextp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN,
-                           MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN,
+               MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
        uint32_t rss_context;
        uint32_t context_type;
        efx_rc_t rc;
@@ -211,7 +209,6 @@ efx_mcdi_rss_context_alloc(
                goto fail2;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_RSS_CONTEXT_ALLOC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN;
@@ -274,8 +271,8 @@ efx_mcdi_rss_context_free(
        __in            uint32_t rss_context)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_RSS_CONTEXT_FREE_IN_LEN,
-                           MC_CMD_RSS_CONTEXT_FREE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_RSS_CONTEXT_FREE_IN_LEN,
+               MC_CMD_RSS_CONTEXT_FREE_OUT_LEN);
        efx_rc_t rc;
 
        if (rss_context == EF10_RSS_CONTEXT_INVALID) {
@@ -283,7 +280,6 @@ efx_mcdi_rss_context_free(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_RSS_CONTEXT_FREE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_RSS_CONTEXT_FREE_IN_LEN;
@@ -318,14 +314,9 @@ efx_mcdi_rss_context_set_flags(
        __in            efx_rx_hash_type_t type)
 {
        efx_nic_cfg_t *encp = &enp->en_nic_cfg;
-       efx_rx_hash_type_t type_ipv4;
-       efx_rx_hash_type_t type_ipv4_tcp;
-       efx_rx_hash_type_t type_ipv6;
-       efx_rx_hash_type_t type_ipv6_tcp;
-       efx_rx_hash_type_t modes;
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN,
-                           MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN,
+               MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN);
        efx_rc_t rc;
 
        EFX_STATIC_ASSERT(EFX_RX_CLASS_IPV4_TCP_LBN ==
@@ -350,7 +341,6 @@ efx_mcdi_rss_context_set_flags(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_RSS_CONTEXT_SET_FLAGS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN;
@@ -360,57 +350,38 @@ efx_mcdi_rss_context_set_flags(
        MCDI_IN_SET_DWORD(req, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID,
            rss_context);
 
-       type_ipv4 = EFX_RX_HASH(IPV4, 2TUPLE) | EFX_RX_HASH(IPV4_TCP, 2TUPLE) |
-                   EFX_RX_HASH(IPV4_UDP, 2TUPLE);
-       type_ipv4_tcp = EFX_RX_HASH(IPV4_TCP, 4TUPLE);
-       type_ipv6 = EFX_RX_HASH(IPV6, 2TUPLE) | EFX_RX_HASH(IPV6_TCP, 2TUPLE) |
-                   EFX_RX_HASH(IPV6_UDP, 2TUPLE);
-       type_ipv6_tcp = EFX_RX_HASH(IPV6_TCP, 4TUPLE);
-
-       /*
-        * Create a copy of the original hash type.
-        * The copy will be used to fill in RSS_MODE bits and
-        * may be cleared beforehand. The original variable
-        * and, thus, EN bits will remain unaffected.
-        */
-       modes = type;
-
        /*
         * If the firmware lacks support for additional modes, RSS_MODE
         * fields must contain zeros, otherwise the operation will fail.
         */
        if (encp->enc_rx_scale_additional_modes_supported == B_FALSE)
-               modes = 0;
-
-#define        EXTRACT_RSS_MODE(_type, _class)         \
-       (EFX_EXTRACT_NATIVE(_type, 0, 31,       \
-       EFX_LOW_BIT(EFX_RX_CLASS_##_class),     \
-       EFX_HIGH_BIT(EFX_RX_CLASS_##_class)) &  \
-       EFX_MASK32(EFX_RX_CLASS_##_class))
+               type &= EFX_RX_HASH_LEGACY_MASK;
 
        MCDI_IN_POPULATE_DWORD_10(req, RSS_CONTEXT_SET_FLAGS_IN_FLAGS,
            RSS_CONTEXT_SET_FLAGS_IN_TOEPLITZ_IPV4_EN,
-           ((type & type_ipv4) == type_ipv4) ? 1 : 0,
+           (type & EFX_RX_HASH_IPV4) ? 1 : 0,
            RSS_CONTEXT_SET_FLAGS_IN_TOEPLITZ_TCPV4_EN,
-           ((type & type_ipv4_tcp) == type_ipv4_tcp) ? 1 : 0,
+           (type & EFX_RX_HASH_TCPIPV4) ? 1 : 0,
            RSS_CONTEXT_SET_FLAGS_IN_TOEPLITZ_IPV6_EN,
-           ((type & type_ipv6) == type_ipv6) ? 1 : 0,
+           (type & EFX_RX_HASH_IPV6) ? 1 : 0,
            RSS_CONTEXT_SET_FLAGS_IN_TOEPLITZ_TCPV6_EN,
-           ((type & type_ipv6_tcp) == type_ipv6_tcp) ? 1 : 0,
+           (type & EFX_RX_HASH_TCPIPV6) ? 1 : 0,
            RSS_CONTEXT_SET_FLAGS_IN_TCP_IPV4_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV4_TCP),
+           (type >> EFX_RX_CLASS_IPV4_TCP_LBN) &
+           EFX_MASK32(EFX_RX_CLASS_IPV4_TCP),
            RSS_CONTEXT_SET_FLAGS_IN_UDP_IPV4_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV4_UDP),
+           (type >> EFX_RX_CLASS_IPV4_UDP_LBN) &
+           EFX_MASK32(EFX_RX_CLASS_IPV4_UDP),
            RSS_CONTEXT_SET_FLAGS_IN_OTHER_IPV4_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV4),
+           (type >> EFX_RX_CLASS_IPV4_LBN) & EFX_MASK32(EFX_RX_CLASS_IPV4),
            RSS_CONTEXT_SET_FLAGS_IN_TCP_IPV6_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV6_TCP),
+           (type >> EFX_RX_CLASS_IPV6_TCP_LBN) &
+           EFX_MASK32(EFX_RX_CLASS_IPV6_TCP),
            RSS_CONTEXT_SET_FLAGS_IN_UDP_IPV6_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV6_UDP),
+           (type >> EFX_RX_CLASS_IPV6_UDP_LBN) &
+           EFX_MASK32(EFX_RX_CLASS_IPV6_UDP),
            RSS_CONTEXT_SET_FLAGS_IN_OTHER_IPV6_RSS_MODE,
-           EXTRACT_RSS_MODE(modes, IPV6));
-
-#undef EXTRACT_RSS_MODE
+           (type >> EFX_RX_CLASS_IPV6_LBN) & EFX_MASK32(EFX_RX_CLASS_IPV6));
 
        efx_mcdi_execute(enp, &req);
 
@@ -439,8 +410,8 @@ efx_mcdi_rss_context_set_key(
        __in            size_t n)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN,
-                           MC_CMD_RSS_CONTEXT_SET_KEY_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN,
+               MC_CMD_RSS_CONTEXT_SET_KEY_OUT_LEN);
        efx_rc_t rc;
 
        if (rss_context == EF10_RSS_CONTEXT_INVALID) {
@@ -448,7 +419,6 @@ efx_mcdi_rss_context_set_key(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_RSS_CONTEXT_SET_KEY;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_RSS_CONTEXT_SET_KEY_IN_LEN;
@@ -496,8 +466,8 @@ efx_mcdi_rss_context_set_table(
        __in            size_t n)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN,
-                           MC_CMD_RSS_CONTEXT_SET_TABLE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN,
+               MC_CMD_RSS_CONTEXT_SET_TABLE_OUT_LEN);
        uint8_t *req_table;
        int i, rc;
 
@@ -506,7 +476,6 @@ efx_mcdi_rss_context_set_table(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_RSS_CONTEXT_SET_TABLE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_RSS_CONTEXT_SET_TABLE_IN_LEN;
index a35d160..c25ffe2 100644 (file)
@@ -4,6 +4,14 @@
  * All rights reserved.
  */
 
+/*
+ * This is NOT the original source file. Do NOT edit it.
+ * To update the image layout headers, please edit the copy in
+ * the sfregistry repo and then, in that repo,
+ * "make layout_headers" or "make export" to
+ * regenerate and export all types of headers.
+ */
+
 /* These structures define the layouts for the signed firmware image binary
  * saved in NVRAM. The original image is in the Cryptographic message
  * syntax (CMS) format which contains the bootable firmware binary plus the
index 7d27f71..5f3df42 100644 (file)
@@ -31,8 +31,8 @@ efx_mcdi_init_txq(
        __in            efsys_mem_t *esmp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_INIT_TXQ_IN_LEN(EFX_TXQ_MAX_BUFS),
-                           MC_CMD_INIT_TXQ_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_INIT_TXQ_IN_LEN(EFX_TXQ_MAX_BUFS),
+               MC_CMD_INIT_TXQ_OUT_LEN);
        efx_qword_t *dma_addr;
        uint64_t addr;
        int npages;
@@ -53,7 +53,6 @@ efx_mcdi_init_txq(
                goto fail2;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_INIT_TXQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_INIT_TXQ_IN_LEN(npages);
@@ -120,11 +119,10 @@ efx_mcdi_fini_txq(
        __in            uint32_t instance)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FINI_TXQ_IN_LEN,
-                           MC_CMD_FINI_TXQ_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FINI_TXQ_IN_LEN,
+               MC_CMD_FINI_TXQ_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FINI_TXQ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FINI_TXQ_IN_LEN;
@@ -278,7 +276,7 @@ ef10_tx_qpio_enable(
 
 fail3:
        EFSYS_PROBE(fail3);
-       ef10_nic_pio_free(enp, etp->et_pio_bufnum, etp->et_pio_blknum);
+       (void) ef10_nic_pio_free(enp, etp->et_pio_bufnum, etp->et_pio_blknum);
 fail2:
        EFSYS_PROBE(fail2);
        etp->et_pio_size = 0;
@@ -296,10 +294,12 @@ ef10_tx_qpio_disable(
 
        if (etp->et_pio_size != 0) {
                /* Unlink the piobuf from this TXQ */
-               ef10_nic_pio_unlink(enp, etp->et_index);
+               if (ef10_nic_pio_unlink(enp, etp->et_index) != 0)
+                       return;
 
                /* Free the sub-allocated PIO block */
-               ef10_nic_pio_free(enp, etp->et_pio_bufnum, etp->et_pio_blknum);
+               (void) ef10_nic_pio_free(enp, etp->et_pio_bufnum,
+                   etp->et_pio_blknum);
                etp->et_pio_size = 0;
                etp->et_pio_write_offset = 0;
        }
@@ -539,12 +539,9 @@ ef10_tx_qdesc_post(
 {
        unsigned int added = *addedp;
        unsigned int i;
-       efx_rc_t rc;
 
-       if (added - completed + ndescs > EFX_TXQ_LIMIT(etp->et_mask + 1)) {
-               rc = ENOSPC;
-               goto fail1;
-       }
+       if (added - completed + ndescs > EFX_TXQ_LIMIT(etp->et_mask + 1))
+               return (ENOSPC);
 
        for (i = 0; i < ndescs; i++) {
                efx_desc_t *edp = &ed[i];
@@ -564,11 +561,6 @@ ef10_tx_qdesc_post(
 
        *addedp = added;
        return (0);
-
-fail1:
-       EFSYS_PROBE1(fail1, efx_rc_t, rc);
-
-       return (rc);
 }
 
        void
@@ -637,22 +629,22 @@ ef10_tx_qdesc_tso2_create(
 
        EFSYS_ASSERT(count >= EFX_TX_FATSOV2_OPT_NDESCS);
 
-       EFX_POPULATE_QWORD_6(edp[0].ed_eq,
+       EFX_POPULATE_QWORD_5(edp[0].ed_eq,
                            ESF_DZ_TX_DESC_IS_OPT, 1,
                            ESF_DZ_TX_OPTION_TYPE,
                            ESE_DZ_TX_OPTION_DESC_TSO,
                            ESF_DZ_TX_TSO_OPTION_TYPE,
                            ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
                            ESF_DZ_TX_TSO_IP_ID, ipv4_id,
-                           ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id,
                            ESF_DZ_TX_TSO_TCP_SEQNO, tcp_seq);
-       EFX_POPULATE_QWORD_4(edp[1].ed_eq,
+       EFX_POPULATE_QWORD_5(edp[1].ed_eq,
                            ESF_DZ_TX_DESC_IS_OPT, 1,
                            ESF_DZ_TX_OPTION_TYPE,
                            ESE_DZ_TX_OPTION_DESC_TSO,
                            ESF_DZ_TX_TSO_OPTION_TYPE,
                            ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
-                           ESF_DZ_TX_TSO_TCP_MSS, tcp_mss);
+                           ESF_DZ_TX_TSO_TCP_MSS, tcp_mss,
+                           ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id);
 }
 
        void
index 5108b9b..8e10e89 100644 (file)
@@ -7,6 +7,7 @@
 #ifndef        _SYS_EFX_H
 #define        _SYS_EFX_H
 
+#include "efx_annote.h"
 #include "efsys.h"
 #include "efx_check.h"
 #include "efx_phy_ids.h"
@@ -154,6 +155,14 @@ extern     __checkReturn   efx_rc_t
 efx_nic_reset(
        __in            efx_nic_t *enp);
 
+extern __checkReturn   boolean_t
+efx_nic_hw_unavailable(
+       __in            efx_nic_t *enp);
+
+extern                 void
+efx_nic_set_hw_unavailable(
+       __in            efx_nic_t *enp);
+
 #if EFSYS_OPT_DIAG
 
 extern __checkReturn   efx_rc_t
@@ -661,77 +670,74 @@ efx_mon_init(
 #define        EFX_MON_STATS_PAGE_SIZE 0x100
 #define        EFX_MON_MASK_ELEMENT_SIZE 32
 
-/* START MKCONFIG GENERATED MonitorHeaderStatsBlock 400fdb0517af1fca */
+/* START MKCONFIG GENERATED MonitorHeaderStatsBlock 78b65c8d5af9747b */
 typedef enum efx_mon_stat_e {
-       EFX_MON_STAT_2_5V,
-       EFX_MON_STAT_VCCP1,
-       EFX_MON_STAT_VCC,
-       EFX_MON_STAT_5V,
-       EFX_MON_STAT_12V,
-       EFX_MON_STAT_VCCP2,
-       EFX_MON_STAT_EXT_TEMP,
-       EFX_MON_STAT_INT_TEMP,
-       EFX_MON_STAT_AIN1,
-       EFX_MON_STAT_AIN2,
-       EFX_MON_STAT_INT_COOLING,
-       EFX_MON_STAT_EXT_COOLING,
-       EFX_MON_STAT_1V,
-       EFX_MON_STAT_1_2V,
-       EFX_MON_STAT_1_8V,
-       EFX_MON_STAT_3_3V,
-       EFX_MON_STAT_1_2VA,
-       EFX_MON_STAT_VREF,
-       EFX_MON_STAT_VAOE,
+       EFX_MON_STAT_CONTROLLER_TEMP,
+       EFX_MON_STAT_PHY_COMMON_TEMP,
+       EFX_MON_STAT_CONTROLLER_COOLING,
+       EFX_MON_STAT_PHY0_TEMP,
+       EFX_MON_STAT_PHY0_COOLING,
+       EFX_MON_STAT_PHY1_TEMP,
+       EFX_MON_STAT_PHY1_COOLING,
+       EFX_MON_STAT_IN_1V0,
+       EFX_MON_STAT_IN_1V2,
+       EFX_MON_STAT_IN_1V8,
+       EFX_MON_STAT_IN_2V5,
+       EFX_MON_STAT_IN_3V3,
+       EFX_MON_STAT_IN_12V0,
+       EFX_MON_STAT_IN_1V2A,
+       EFX_MON_STAT_IN_VREF,
+       EFX_MON_STAT_OUT_VAOE,
        EFX_MON_STAT_AOE_TEMP,
        EFX_MON_STAT_PSU_AOE_TEMP,
        EFX_MON_STAT_PSU_TEMP,
-       EFX_MON_STAT_FAN0,
-       EFX_MON_STAT_FAN1,
-       EFX_MON_STAT_FAN2,
-       EFX_MON_STAT_FAN3,
-       EFX_MON_STAT_FAN4,
-       EFX_MON_STAT_VAOE_IN,
-       EFX_MON_STAT_IAOE,
-       EFX_MON_STAT_IAOE_IN,
+       EFX_MON_STAT_FAN_0,
+       EFX_MON_STAT_FAN_1,
+       EFX_MON_STAT_FAN_2,
+       EFX_MON_STAT_FAN_3,
+       EFX_MON_STAT_FAN_4,
+       EFX_MON_STAT_IN_VAOE,
+       EFX_MON_STAT_OUT_IAOE,
+       EFX_MON_STAT_IN_IAOE,
        EFX_MON_STAT_NIC_POWER,
-       EFX_MON_STAT_0_9V,
-       EFX_MON_STAT_I0_9V,
-       EFX_MON_STAT_I1_2V,
-       EFX_MON_STAT_0_9V_ADC,
-       EFX_MON_STAT_INT_TEMP2,
-       EFX_MON_STAT_VREG_TEMP,
-       EFX_MON_STAT_VREG_0_9V_TEMP,
-       EFX_MON_STAT_VREG_1_2V_TEMP,
-       EFX_MON_STAT_INT_VPTAT,
-       EFX_MON_STAT_INT_ADC_TEMP,
-       EFX_MON_STAT_EXT_VPTAT,
-       EFX_MON_STAT_EXT_ADC_TEMP,
+       EFX_MON_STAT_IN_0V9,
+       EFX_MON_STAT_IN_I0V9,
+       EFX_MON_STAT_IN_I1V2,
+       EFX_MON_STAT_IN_0V9_ADC,
+       EFX_MON_STAT_CONTROLLER_2_TEMP,
+       EFX_MON_STAT_VREG_INTERNAL_TEMP,
+       EFX_MON_STAT_VREG_0V9_TEMP,
+       EFX_MON_STAT_VREG_1V2_TEMP,
+       EFX_MON_STAT_CONTROLLER_VPTAT,
+       EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP,
+       EFX_MON_STAT_CONTROLLER_VPTAT_EXTADC,
+       EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP_EXTADC,
        EFX_MON_STAT_AMBIENT_TEMP,
        EFX_MON_STAT_AIRFLOW,
        EFX_MON_STAT_VDD08D_VSS08D_CSR,
        EFX_MON_STAT_VDD08D_VSS08D_CSR_EXTADC,
        EFX_MON_STAT_HOTPOINT_TEMP,
-       EFX_MON_STAT_PHY_POWER_SWITCH_PORT0,
-       EFX_MON_STAT_PHY_POWER_SWITCH_PORT1,
+       EFX_MON_STAT_PHY_POWER_PORT0,
+       EFX_MON_STAT_PHY_POWER_PORT1,
        EFX_MON_STAT_MUM_VCC,
-       EFX_MON_STAT_0V9_A,
-       EFX_MON_STAT_I0V9_A,
-       EFX_MON_STAT_0V9_A_TEMP,
-       EFX_MON_STAT_0V9_B,
-       EFX_MON_STAT_I0V9_B,
-       EFX_MON_STAT_0V9_B_TEMP,
+       EFX_MON_STAT_IN_0V9_A,
+       EFX_MON_STAT_IN_I0V9_A,
+       EFX_MON_STAT_VREG_0V9_A_TEMP,
+       EFX_MON_STAT_IN_0V9_B,
+       EFX_MON_STAT_IN_I0V9_B,
+       EFX_MON_STAT_VREG_0V9_B_TEMP,
        EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY,
-       EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY_EXT_ADC,
+       EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY_EXTADC,
        EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY,
-       EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY_EXT_ADC,
+       EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY_EXTADC,
        EFX_MON_STAT_CONTROLLER_MASTER_VPTAT,
        EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP,
-       EFX_MON_STAT_CONTROLLER_MASTER_VPTAT_EXT_ADC,
-       EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP_EXT_ADC,
+       EFX_MON_STAT_CONTROLLER_MASTER_VPTAT_EXTADC,
+       EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC,
        EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT,
        EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP,
-       EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT_EXT_ADC,
-       EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP_EXT_ADC,
+       EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT_EXTADC,
+       EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC,
        EFX_MON_STAT_SODIMM_VOUT,
        EFX_MON_STAT_SODIMM_0_TEMP,
        EFX_MON_STAT_SODIMM_1_TEMP,
@@ -740,12 +746,12 @@ typedef enum efx_mon_stat_e {
        EFX_MON_STAT_CONTROLLER_TDIODE_TEMP,
        EFX_MON_STAT_BOARD_FRONT_TEMP,
        EFX_MON_STAT_BOARD_BACK_TEMP,
-       EFX_MON_STAT_I1V8,
-       EFX_MON_STAT_I2V5,
-       EFX_MON_STAT_I3V3,
-       EFX_MON_STAT_I12V0,
-       EFX_MON_STAT_1_3V,
-       EFX_MON_STAT_I1V3,
+       EFX_MON_STAT_IN_I1V8,
+       EFX_MON_STAT_IN_I2V5,
+       EFX_MON_STAT_IN_I3V3,
+       EFX_MON_STAT_IN_I12V0,
+       EFX_MON_STAT_IN_1V3,
+       EFX_MON_STAT_IN_I1V3,
        EFX_MON_NSTATS
 } efx_mon_stat_t;
 
@@ -759,11 +765,40 @@ typedef enum efx_mon_stat_state_e {
        EFX_MON_STAT_STATE_NO_READING = 4,
 } efx_mon_stat_state_t;
 
+typedef enum efx_mon_stat_unit_e {
+       EFX_MON_STAT_UNIT_UNKNOWN = 0,
+       EFX_MON_STAT_UNIT_BOOL,
+       EFX_MON_STAT_UNIT_TEMP_C,
+       EFX_MON_STAT_UNIT_VOLTAGE_MV,
+       EFX_MON_STAT_UNIT_CURRENT_MA,
+       EFX_MON_STAT_UNIT_POWER_W,
+       EFX_MON_STAT_UNIT_RPM,
+       EFX_MON_NUNITS
+} efx_mon_stat_unit_t;
+
 typedef struct efx_mon_stat_value_s {
-       uint16_t        emsv_value;
-       uint16_t        emsv_state;
+       uint16_t                emsv_value;
+       efx_mon_stat_state_t    emsv_state;
+       efx_mon_stat_unit_t     emsv_unit;
 } efx_mon_stat_value_t;
 
+typedef struct efx_mon_limit_value_s {
+       uint16_t                        emlv_warning_min;
+       uint16_t                        emlv_warning_max;
+       uint16_t                        emlv_fatal_min;
+       uint16_t                        emlv_fatal_max;
+} efx_mon_stat_limits_t;
+
+typedef enum efx_mon_stat_portmask_e {
+       EFX_MON_STAT_PORTMAP_NONE = 0,
+       EFX_MON_STAT_PORTMAP_PORT0 = 1,
+       EFX_MON_STAT_PORTMAP_PORT1 = 2,
+       EFX_MON_STAT_PORTMAP_PORT2 = 3,
+       EFX_MON_STAT_PORTMAP_PORT3 = 4,
+       EFX_MON_STAT_PORTMAP_ALL = (-1),
+       EFX_MON_STAT_PORTMAP_UNKNOWN = (-2)
+} efx_mon_stat_portmask_t;
+
 #if EFSYS_OPT_NAMES
 
 extern                                 const char *
@@ -771,14 +806,39 @@ efx_mon_stat_name(
        __in                            efx_nic_t *enp,
        __in                            efx_mon_stat_t id);
 
+extern                                 const char *
+efx_mon_stat_description(
+       __in                            efx_nic_t *enp,
+       __in                            efx_mon_stat_t id);
+
 #endif /* EFSYS_OPT_NAMES */
 
+extern __checkReturn                   boolean_t
+efx_mon_mcdi_to_efx_stat(
+       __in                            int mcdi_index,
+       __out                           efx_mon_stat_t *statp);
+
+extern __checkReturn                   boolean_t
+efx_mon_get_stat_unit(
+       __in                            efx_mon_stat_t stat,
+       __out                           efx_mon_stat_unit_t *unitp);
+
+extern __checkReturn                   boolean_t
+efx_mon_get_stat_portmap(
+       __in                            efx_mon_stat_t stat,
+       __out                           efx_mon_stat_portmask_t *maskp);
+
 extern __checkReturn                   efx_rc_t
 efx_mon_stats_update(
        __in                            efx_nic_t *enp,
        __in                            efsys_mem_t *esmp,
        __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_value_t *values);
 
+extern __checkReturn                   efx_rc_t
+efx_mon_limits_update(
+       __in                            efx_nic_t *enp,
+       __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_limits_t *values);
+
 #endif /* EFSYS_OPT_MON_STATS */
 
 extern         void
@@ -970,12 +1030,39 @@ efx_phy_media_type_get(
        __in            efx_nic_t *enp,
        __out           efx_phy_media_type_t *typep);
 
+/*
+ * 2-wire device address of the base information in accordance with SFF-8472
+ * Diagnostic Monitoring Interface for Optical Transceivers section
+ * 4 Memory Organization.
+ */
+#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_SFP_BASE    0xA0
+
+/*
+ * 2-wire device address of the digital diagnostics monitoring interface
+ * in accordance with SFF-8472 Diagnostic Monitoring Interface for Optical
+ * Transceivers section 4 Memory Organization.
+ */
+#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_SFP_DDM     0xA2
+
+/*
+ * Hard wired 2-wire device address for QSFP+ in accordance with SFF-8436
+ * QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER section 7.4 Device Addressing and
+ * Operation.
+ */
+#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_QSFP        0xA0
+
+/*
+ * Maximum accessible data offset for PHY module information.
+ */
+#define        EFX_PHY_MEDIA_INFO_MAX_OFFSET           0x100
+
+
 extern __checkReturn           efx_rc_t
 efx_phy_module_get_info(
        __in                    efx_nic_t *enp,
        __in                    uint8_t dev_addr,
-       __in                    uint8_t offset,
-       __in                    uint8_t len,
+       __in                    size_t offset,
+       __in                    size_t len,
        __out_bcount(len)       uint8_t *data);
 
 #if EFSYS_OPT_PHY_STATS
@@ -1194,6 +1281,7 @@ typedef struct efx_nic_cfg_s {
        uint32_t                enc_rx_prefix_size;
        uint32_t                enc_rx_buf_align_start;
        uint32_t                enc_rx_buf_align_end;
+#if EFSYS_OPT_RX_SCALE
        uint32_t                enc_rx_scale_max_exclusive_contexts;
        /*
         * Mask of supported hash algorithms.
@@ -1206,6 +1294,7 @@ typedef struct efx_nic_cfg_s {
         */
        boolean_t               enc_rx_scale_l4_hash_supported;
        boolean_t               enc_rx_scale_additional_modes_supported;
+#endif /* EFSYS_OPT_RX_SCALE */
 #if EFSYS_OPT_LOOPBACK
        efx_qword_t             enc_loopback_types[EFX_LINK_NMODES];
 #endif /* EFSYS_OPT_LOOPBACK */
@@ -1240,6 +1329,7 @@ typedef struct efx_nic_cfg_s {
        boolean_t               enc_bug35388_workaround;
        boolean_t               enc_bug41750_workaround;
        boolean_t               enc_bug61265_workaround;
+       boolean_t               enc_bug61297_workaround;
        boolean_t               enc_rx_batching_enabled;
        /* Maximum number of descriptors completed in an rx event. */
        uint32_t                enc_rx_batch_max;
@@ -1483,6 +1573,8 @@ typedef enum efx_nvram_type_e {
        EFX_NVRAM_LICENSE,
        EFX_NVRAM_UEFIROM,
        EFX_NVRAM_MUM_FIRMWARE,
+       EFX_NVRAM_DYNCONFIG_DEFAULTS,
+       EFX_NVRAM_ROMCONFIG_DEFAULTS,
        EFX_NVRAM_NTYPES,
 } efx_nvram_type_t;
 
@@ -1608,6 +1700,87 @@ efx_bootcfg_write(
        __in_bcount(size)       uint8_t *data,
        __in                    size_t size);
 
+
+/*
+ * Processing routines for buffers arranged in the DHCP/BOOTP option format
+ * (see https://tools.ietf.org/html/rfc1533)
+ *
+ * Summarising the format: the buffer is a sequence of options. All options
+ * begin with a tag octet, which uniquely identifies the option.  Fixed-
+ * length options without data consist of only a tag octet.  Only options PAD
+ * (0) and END (255) are fixed length.  All other options are variable-length
+ * with a length octet following the tag octet.  The value of the length
+ * octet does not include the two octets specifying the tag and length.  The
+ * length octet is followed by "length" octets of data.
+ *
+ * Option data may be a sequence of sub-options in the same format. The data
+ * content of the encapsulating option is one or more encapsulated sub-options,
+ * with no terminating END tag is required.
+ *
+ * To be valid, the top-level sequence of options should be terminated by an
+ * END tag. The buffer should be padded with the PAD byte.
+ *
+ * When stored to NVRAM, the DHCP option format buffer is preceded by a
+ * checksum octet. The full buffer (including after the END tag) contributes
+ * to the checksum, hence the need to fill the buffer to the end with PAD.
+ */
+
+#define        EFX_DHCP_END ((uint8_t)0xff)
+#define        EFX_DHCP_PAD ((uint8_t)0)
+
+#define        EFX_DHCP_ENCAP_OPT(encapsulator, encapsulated) \
+  (uint16_t)(((encapsulator) << 8) | (encapsulated))
+
+extern __checkReturn           uint8_t
+efx_dhcp_csum(
+       __in_bcount(size)       uint8_t const *data,
+       __in                    size_t size);
+
+extern __checkReturn           efx_rc_t
+efx_dhcp_verify(
+       __in_bcount(size)       uint8_t const *data,
+       __in                    size_t size,
+       __out_opt               size_t *usedp);
+
+extern __checkReturn   efx_rc_t
+efx_dhcp_find_tag(
+       __in_bcount(buffer_length)      uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __deref_out                     uint8_t **valuepp,
+       __out                           size_t *value_lengthp);
+
+extern __checkReturn   efx_rc_t
+efx_dhcp_find_end(
+       __in_bcount(buffer_length)      uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __deref_out                     uint8_t **endpp);
+
+
+extern __checkReturn   efx_rc_t
+efx_dhcp_delete_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt);
+
+extern __checkReturn   efx_rc_t
+efx_dhcp_add_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __in_bcount_opt(value_length)   uint8_t *valuep,
+       __in                            size_t value_length);
+
+extern __checkReturn   efx_rc_t
+efx_dhcp_update_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __in                            uint8_t *value_locationp,
+       __in_bcount_opt(value_length)   uint8_t *valuep,
+       __in                            size_t value_length);
+
+
 #endif /* EFSYS_OPT_BOOTCFG */
 
 #if EFSYS_OPT_IMAGE_LAYOUT
@@ -1689,7 +1862,8 @@ efx_check_reflash_image(
 
 extern __checkReturn   efx_rc_t
 efx_build_signed_image_write_buffer(
-       __out           uint8_t                 *bufferp,
+       __out_bcount(buffer_size)
+                       uint8_t                 *bufferp,
        __in            uint32_t                buffer_size,
        __in            efx_image_info_t        *infop,
        __out           efx_image_header_t      **headerpp);
@@ -2122,7 +2296,7 @@ typedef enum efx_rx_hash_alg_e {
  *  - a combination of legacy flags
  *  - a combination of EFX_RX_HASH() flags
  */
-typedef unsigned int efx_rx_hash_type_t;
+typedef uint32_t efx_rx_hash_type_t;
 
 typedef enum efx_rx_hash_support_e {
        EFX_RX_HASH_UNAVAILABLE = 0,    /* Hardware hash not inserted */
@@ -2223,7 +2397,8 @@ extern    __checkReturn                           efx_rc_t
 efx_rx_scale_hash_flags_get(
        __in                                    efx_nic_t *enp,
        __in                                    efx_rx_hash_alg_t hash_alg,
-       __inout_ecount(EFX_RX_HASH_NFLAGS)      unsigned int *flags,
+       __out_ecount_part(max_nflags, *nflagsp) unsigned int *flagsp,
+       __in                                    unsigned int max_nflags,
        __out                                   unsigned int *nflagsp);
 
 extern __checkReturn   efx_rc_t
@@ -2813,9 +2988,23 @@ efx_filter_spec_set_encap_type(
        __in            efx_filter_inner_frame_match_t inner_frame_match);
 
 extern __checkReturn   efx_rc_t
-efx_filter_spec_set_vxlan_full(
+efx_filter_spec_set_vxlan(
+       __inout         efx_filter_spec_t *spec,
+       __in            const uint8_t *vni,
+       __in            const uint8_t *inner_addr,
+       __in            const uint8_t *outer_addr);
+
+extern __checkReturn   efx_rc_t
+efx_filter_spec_set_geneve(
+       __inout         efx_filter_spec_t *spec,
+       __in            const uint8_t *vni,
+       __in            const uint8_t *inner_addr,
+       __in            const uint8_t *outer_addr);
+
+extern __checkReturn   efx_rc_t
+efx_filter_spec_set_nvgre(
        __inout         efx_filter_spec_t *spec,
-       __in            const uint8_t *vxlan_id,
+       __in            const uint8_t *vsid,
        __in            const uint8_t *inner_addr,
        __in            const uint8_t *outer_addr);
 
@@ -3057,6 +3246,32 @@ efx_nic_set_fw_subvariant(
 
 #endif /* EFSYS_OPT_FW_SUBVARIANT_AWARE */
 
+typedef enum efx_phy_fec_type_e {
+       EFX_PHY_FEC_NONE = 0,
+       EFX_PHY_FEC_BASER,
+       EFX_PHY_FEC_RS
+} efx_phy_fec_type_t;
+
+extern __checkReturn   efx_rc_t
+efx_phy_fec_type_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_fec_type_t *typep);
+
+typedef struct efx_phy_link_state_s {
+       uint32_t                epls_adv_cap_mask;
+       uint32_t                epls_lp_cap_mask;
+       uint32_t                epls_ld_cap_mask;
+       unsigned int            epls_fcntl;
+       efx_phy_fec_type_t      epls_fec;
+       efx_link_mode_t         epls_link_mode;
+} efx_phy_link_state_t;
+
+extern __checkReturn   efx_rc_t
+efx_phy_link_state_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_link_state_t  *eplsp);
+
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/drivers/net/sfc/base/efx_annote.h b/drivers/net/sfc/base/efx_annote.h
new file mode 100644 (file)
index 0000000..607b43c
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2018 Solarflare Communications Inc.
+ * All rights reserved.
+ */
+
+#ifndef        _SYS_EFX_ANNOTE_H
+#define        _SYS_EFX_ANNOTE_H
+
+#if defined(_WIN32) || defined(_WIN64)
+#define        EFX_HAVE_WINDOWS_ANNOTATIONS 1
+#else
+#define        EFX_HAVE_WINDOWS_ANNOTATIONS 0
+#endif /* defined(_WIN32) || defined(_WIN64) */
+
+#if defined(__sun)
+#define        EFX_HAVE_SOLARIS_ANNOTATIONS 1
+#else
+#define        EFX_HAVE_SOLARIS_ANNOTATIONS 0
+#endif /* defined(__sun) */
+
+#if !EFX_HAVE_WINDOWS_ANNOTATIONS
+
+/* Ignore Windows SAL annotations on other platforms */
+#define        __in
+#define        __in_opt
+#define        __in_ecount(_n)
+#define        __in_ecount_opt(_n)
+#define        __in_bcount(_n)
+#define        __in_bcount_opt(_n)
+
+#define        __out
+#define        __out_opt
+#define        __out_ecount(_n)
+#define        __out_ecount_opt(_n)
+#define        __out_ecount_part(_n, _l)
+#define        __out_bcount(_n)
+#define        __out_bcount_opt(_n)
+#define        __out_bcount_part(_n, _l)
+#define        __out_bcount_part_opt(_n, _l)
+
+#define        __deref_out
+#define        __deref_inout
+
+#define        __inout
+#define        __inout_opt
+#define        __inout_ecount(_n)
+#define        __inout_ecount_opt(_n)
+#define        __inout_bcount(_n)
+#define        __inout_bcount_opt(_n)
+#define        __inout_bcount_full_opt(_n)
+
+#define        __deref_out_bcount_opt(n)
+
+#define        __checkReturn
+#define        __success(_x)
+
+#define        __drv_when(_p, _c)
+
+#endif /* !EFX_HAVE_WINDOWS_ANNOTATIONS */
+
+#if !EFX_HAVE_SOLARIS_ANNOTATIONS
+
+#if EFX_HAVE_WINDOWS_ANNOTATIONS
+
+/*
+ * Support some SunOS/Solaris style _NOTE() annotations
+ *
+ * At present with the facilities provided in the WDL and the SAL we can only
+ * easily act upon _NOTE(ARGUNUSED(arglist)) annotations.
+ *
+ * Intermediate macros to expand individual _NOTE annotation types into
+ * something the WDK or SAL can understand.  They shouldn't be used directly,
+ * for example EFX_NOTE_ARGUNUSED() is only used as an intermediate step on the
+ * transformation of _NOTE(ARGUNSED(arg1, arg2)) into
+ * UNREFERENCED_PARAMETER((arg1, arg2));
+ */
+#define        EFX_NOTE_ALIGNMENT(_fname, _n)
+#define        EFX_NOTE_ARGUNUSED(...)         UNREFERENCED_PARAMETER((__VA_ARGS__));
+#define        EFX_NOTE_CONSTANTCONDITION
+#define        EFX_NOTE_CONSTCOND
+#define        EFX_NOTE_EMPTY
+#define        EFX_NOTE_FALLTHROUGH
+#define        EFX_NOTE_FALLTHRU
+#define        EFX_NOTE_LINTED(_msg)
+#define        EFX_NOTE_NOTREACHED
+#define        EFX_NOTE_PRINTFLIKE(_n)
+#define        EFX_NOTE_SCANFLIKE(_n)
+#define        EFX_NOTE_VARARGS(_n)
+
+#define        _NOTE(_annotation)              EFX_NOTE_ ## _annotation
+
+#else
+
+/* Ignore Solaris annotations on other platforms */
+
+#define        _NOTE(_annotation)
+
+#endif /* EFX_HAVE_WINDOWS_ANNOTATIONS */
+
+#endif /* !EFX_HAVE_SOLARIS_ANNOTATIONS */
+
+#endif /* _SYS_EFX_ANNOTE_H */
index 715e18e..3b0401e 100644 (file)
 #define        BOOTCFG_PER_PF   0x800
 #define        BOOTCFG_PF_COUNT 16
 
-#define        DHCP_END ((uint8_t)0xff)
-#define        DHCP_PAD ((uint8_t)0)
+#define        DHCP_OPT_HAS_VALUE(opt) \
+       (((opt) > EFX_DHCP_PAD) && ((opt) < EFX_DHCP_END))
+
+#define        DHCP_MAX_VALUE 255
+
+#define        DHCP_ENCAPSULATOR(encap_opt) ((encap_opt) >> 8)
+#define        DHCP_ENCAPSULATED(encap_opt) ((encap_opt) & 0xff)
+#define        DHCP_IS_ENCAP_OPT(opt) DHCP_OPT_HAS_VALUE(DHCP_ENCAPSULATOR(opt))
+
+typedef struct efx_dhcp_tag_hdr_s {
+       uint8_t         tag;
+       uint8_t         length;
+} efx_dhcp_tag_hdr_t;
+
+/*
+ * Length calculations for tags with value field. PAD and END
+ * have a fixed length of 1, with no length or value field.
+ */
+#define        DHCP_FULL_TAG_LENGTH(hdr) \
+       (sizeof (efx_dhcp_tag_hdr_t) + (hdr)->length)
+
+#define        DHCP_NEXT_TAG(hdr) \
+       ((efx_dhcp_tag_hdr_t *)(((uint8_t *)(hdr)) + \
+       DHCP_FULL_TAG_LENGTH((hdr))))
+
+#define        DHCP_CALC_TAG_LENGTH(payload_len) \
+       ((payload_len) + sizeof (efx_dhcp_tag_hdr_t))
 
 
 /* Report the layout of bootcfg sectors in NVRAM partition. */
@@ -110,14 +135,11 @@ fail1:
 }
 
 
-static __checkReturn           uint8_t
-efx_bootcfg_csum(
-       __in                    efx_nic_t *enp,
+       __checkReturn           uint8_t
+efx_dhcp_csum(
        __in_bcount(size)       uint8_t const *data,
        __in                    size_t size)
 {
-       _NOTE(ARGUNUSED(enp))
-
        unsigned int pos;
        uint8_t checksum = 0;
 
@@ -126,9 +148,8 @@ efx_bootcfg_csum(
        return (checksum);
 }
 
-static __checkReturn           efx_rc_t
-efx_bootcfg_verify(
-       __in                    efx_nic_t *enp,
+       __checkReturn           efx_rc_t
+efx_dhcp_verify(
        __in_bcount(size)       uint8_t const *data,
        __in                    size_t size,
        __out_opt               size_t *usedp)
@@ -144,12 +165,12 @@ efx_bootcfg_verify(
 
                /* Consume tag */
                tag = data[offset];
-               if (tag == DHCP_END) {
+               if (tag == EFX_DHCP_END) {
                        offset++;
                        used = offset;
                        break;
                }
-               if (tag == DHCP_PAD) {
+               if (tag == EFX_DHCP_PAD) {
                        offset++;
                        continue;
                }
@@ -171,8 +192,8 @@ efx_bootcfg_verify(
                used = offset;
        }
 
-       /* Checksum the entire sector, including bytes after any DHCP_END */
-       if (efx_bootcfg_csum(enp, data, size) != 0) {
+       /* Checksum the entire sector, including bytes after any EFX_DHCP_END */
+       if (efx_dhcp_csum(data, size) != 0) {
                rc = EINVAL;
                goto fail3;
        }
@@ -192,6 +213,516 @@ fail1:
        return (rc);
 }
 
+/*
+ * Walk the entire tag set looking for option. The sought option may be
+ * encapsulated. ENOENT indicates the walk completed without finding the
+ * option. If we run out of buffer during the walk the function will return
+ * ENOSPC.
+ */
+static efx_rc_t
+efx_dhcp_walk_tags(
+       __deref_inout   uint8_t **tagpp,
+       __inout         size_t *buffer_sizep,
+       __in            uint16_t opt)
+{
+       efx_rc_t rc = 0;
+       boolean_t is_encap = B_FALSE;
+
+       if (DHCP_IS_ENCAP_OPT(opt)) {
+               /*
+                * Look for the encapsulator and, if found, limit ourselves
+                * to its payload. If it's not found then the entire tag
+                * cannot be found, so the encapsulated opt search is
+                * skipped.
+                */
+               rc = efx_dhcp_walk_tags(tagpp, buffer_sizep,
+                   DHCP_ENCAPSULATOR(opt));
+               if (rc == 0) {
+                       *buffer_sizep = ((efx_dhcp_tag_hdr_t *)*tagpp)->length;
+                       (*tagpp) += sizeof (efx_dhcp_tag_hdr_t);
+               }
+               opt = DHCP_ENCAPSULATED(opt);
+               is_encap = B_TRUE;
+       }
+
+       EFSYS_ASSERT(!DHCP_IS_ENCAP_OPT(opt));
+
+       while (rc == 0) {
+               size_t size;
+
+               if (*buffer_sizep == 0) {
+                       rc = ENOSPC;
+                       goto fail1;
+               }
+
+               if (DHCP_ENCAPSULATED(**tagpp) == opt)
+                       break;
+
+               if ((**tagpp) == EFX_DHCP_END) {
+                       rc = ENOENT;
+                       break;
+               } else if ((**tagpp) == EFX_DHCP_PAD) {
+                       size = 1;
+               } else {
+                       if (*buffer_sizep < sizeof (efx_dhcp_tag_hdr_t)) {
+                               rc = ENOSPC;
+                               goto fail2;
+                       }
+
+                       size =
+                           DHCP_FULL_TAG_LENGTH((efx_dhcp_tag_hdr_t *)*tagpp);
+               }
+
+               if (size > *buffer_sizep) {
+                       rc = ENOSPC;
+                       goto fail3;
+               }
+
+               (*tagpp) += size;
+               (*buffer_sizep) -= size;
+
+               if ((*buffer_sizep == 0) && is_encap) {
+                       /* Search within encapulator tag finished */
+                       rc = ENOENT;
+                       break;
+               }
+       }
+
+       /*
+        * Returns 0 if found otherwise ENOENT indicating search finished
+        * correctly
+        */
+       return (rc);
+
+fail3:
+       EFSYS_PROBE(fail3);
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+/*
+ * Locate value buffer for option in the given buffer.
+ * Returns 0 if found, ENOENT indicating search finished
+ * correctly, otherwise search failed before completion.
+ */
+       __checkReturn   efx_rc_t
+efx_dhcp_find_tag(
+       __in_bcount(buffer_length)      uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __deref_out                     uint8_t **valuepp,
+       __out                           size_t *value_lengthp)
+{
+       efx_rc_t rc;
+       uint8_t *tagp = bufferp;
+       size_t len = buffer_length;
+
+       rc = efx_dhcp_walk_tags(&tagp, &len, opt);
+       if (rc == 0) {
+               efx_dhcp_tag_hdr_t *hdrp;
+
+               hdrp = (efx_dhcp_tag_hdr_t *)tagp;
+               *valuepp = (uint8_t *)(&hdrp[1]);
+               *value_lengthp = hdrp->length;
+       } else if (rc != ENOENT) {
+               goto fail1;
+       }
+
+       return (rc);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+/*
+ * Locate the end tag in the given buffer.
+ * Returns 0 if found, ENOENT indicating search finished
+ * correctly but end tag was not found; otherwise search
+ * failed before completion.
+ */
+       __checkReturn   efx_rc_t
+efx_dhcp_find_end(
+       __in_bcount(buffer_length)      uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __deref_out                     uint8_t **endpp)
+{
+       efx_rc_t rc;
+       uint8_t *endp = bufferp;
+       size_t len = buffer_length;
+
+       rc = efx_dhcp_walk_tags(&endp, &len, EFX_DHCP_END);
+       if (rc == 0)
+               *endpp = endp;
+       else if (rc != ENOENT)
+               goto fail1;
+
+       return (rc);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+
+/*
+ * Delete the given tag from anywhere in the buffer. Copes with
+ * encapsulated tags, and updates or deletes the encapsulating opt as
+ * necessary.
+ */
+       __checkReturn   efx_rc_t
+efx_dhcp_delete_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt)
+{
+       efx_rc_t rc;
+       efx_dhcp_tag_hdr_t *hdrp;
+       size_t len;
+       uint8_t *startp;
+       uint8_t *endp;
+
+       len = buffer_length;
+       startp = bufferp;
+
+       if (!DHCP_OPT_HAS_VALUE(DHCP_ENCAPSULATED(opt))) {
+               rc = EINVAL;
+               goto fail1;
+       }
+
+       rc = efx_dhcp_walk_tags(&startp, &len, opt);
+       if (rc != 0)
+               goto fail1;
+
+       hdrp = (efx_dhcp_tag_hdr_t *)startp;
+
+       if (DHCP_IS_ENCAP_OPT(opt)) {
+               uint8_t tag_length = DHCP_FULL_TAG_LENGTH(hdrp);
+               uint8_t *encapp = bufferp;
+               efx_dhcp_tag_hdr_t *encap_hdrp;
+
+               len = buffer_length;
+               rc = efx_dhcp_walk_tags(&encapp, &len,
+                   DHCP_ENCAPSULATOR(opt));
+               if (rc != 0)
+                       goto fail2;
+
+               encap_hdrp = (efx_dhcp_tag_hdr_t *)encapp;
+               if (encap_hdrp->length > tag_length) {
+                       encap_hdrp->length = (uint8_t)(
+                           (size_t)encap_hdrp->length - tag_length);
+               } else {
+                       /* delete the encapsulating tag */
+                       hdrp = encap_hdrp;
+               }
+       }
+
+       startp = (uint8_t *)hdrp;
+       endp = (uint8_t *)DHCP_NEXT_TAG(hdrp);
+
+       if (startp < bufferp) {
+               rc = EINVAL;
+               goto fail3;
+       }
+
+       if (endp > &bufferp[buffer_length]) {
+               rc = EINVAL;
+               goto fail4;
+       }
+
+       memmove(startp, endp,
+               buffer_length - (endp - bufferp));
+
+       return (0);
+
+fail4:
+       EFSYS_PROBE(fail4);
+fail3:
+       EFSYS_PROBE(fail3);
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+/*
+ * Write the tag header into write_pointp and optionally copies the payload
+ * into the space following.
+ */
+static void
+efx_dhcp_write_tag(
+       __in            uint8_t *write_pointp,
+       __in            uint16_t opt,
+       __in_bcount_opt(value_length)
+                       uint8_t *valuep,
+       __in            size_t value_length)
+{
+       efx_dhcp_tag_hdr_t *hdrp = (efx_dhcp_tag_hdr_t *)write_pointp;
+       hdrp->tag = DHCP_ENCAPSULATED(opt);
+       hdrp->length = (uint8_t)value_length;
+       if ((value_length > 0) && (valuep != NULL))
+               memcpy(&hdrp[1], valuep, value_length);
+}
+
+/*
+ * Add the given tag to the end of the buffer. Copes with creating an
+ * encapsulated tag, and updates or creates the encapsulating opt as
+ * necessary.
+ */
+       __checkReturn   efx_rc_t
+efx_dhcp_add_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __in_bcount_opt(value_length)   uint8_t *valuep,
+       __in                            size_t value_length)
+{
+       efx_rc_t rc;
+       efx_dhcp_tag_hdr_t *encap_hdrp = NULL;
+       uint8_t *insert_pointp = NULL;
+       uint8_t *endp;
+       size_t available_space;
+       size_t added_length;
+       size_t search_size;
+       uint8_t *searchp;
+
+       if (!DHCP_OPT_HAS_VALUE(DHCP_ENCAPSULATED(opt))) {
+               rc = EINVAL;
+               goto fail1;
+       }
+
+       if (value_length > DHCP_MAX_VALUE) {
+               rc = EINVAL;
+               goto fail2;
+       }
+
+       if ((value_length > 0) && (valuep == NULL)) {
+               rc = EINVAL;
+               goto fail3;
+       }
+
+       endp = bufferp;
+       available_space = buffer_length;
+       rc = efx_dhcp_walk_tags(&endp, &available_space, EFX_DHCP_END);
+       if (rc != 0)
+               goto fail4;
+
+       searchp = bufferp;
+       search_size = buffer_length;
+       if (DHCP_IS_ENCAP_OPT(opt)) {
+               rc = efx_dhcp_walk_tags(&searchp, &search_size,
+                   DHCP_ENCAPSULATOR(opt));
+               if (rc == 0) {
+                       encap_hdrp = (efx_dhcp_tag_hdr_t *)searchp;
+
+                       /* Check encapsulated tag is not present */
+                       search_size = encap_hdrp->length;
+                       rc = efx_dhcp_walk_tags(&searchp, &search_size,
+                           opt);
+                       if (rc != ENOENT) {
+                               rc = EINVAL;
+                               goto fail5;
+                       }
+
+                       /* Check encapsulator will not overflow */
+                       if (((size_t)encap_hdrp->length +
+                           DHCP_CALC_TAG_LENGTH(value_length)) >
+                           DHCP_MAX_VALUE) {
+                               rc = E2BIG;
+                               goto fail6;
+                       }
+
+                       /* Insert at start of existing encapsulator */
+                       insert_pointp = (uint8_t *)&encap_hdrp[1];
+                       opt = DHCP_ENCAPSULATED(opt);
+               } else if (rc == ENOENT) {
+                       encap_hdrp = NULL;
+               } else {
+                       goto fail7;
+               }
+       } else {
+               /* Check unencapsulated tag is not present */
+               rc = efx_dhcp_walk_tags(&searchp, &search_size,
+                   opt);
+               if (rc != ENOENT) {
+                       rc = EINVAL;
+                       goto fail8;
+               }
+       }
+
+       if (insert_pointp == NULL) {
+               /* Insert at end of existing tags */
+               insert_pointp = endp;
+       }
+
+       /* Includes the new encapsulator tag hdr if required */
+       added_length = DHCP_CALC_TAG_LENGTH(value_length) +
+           (DHCP_IS_ENCAP_OPT(opt) ? sizeof (efx_dhcp_tag_hdr_t) : 0);
+
+       if (available_space <= added_length) {
+               rc = ENOMEM;
+               goto fail9;
+       }
+
+       memmove(insert_pointp + added_length, insert_pointp,
+           available_space - added_length);
+
+       if (DHCP_IS_ENCAP_OPT(opt)) {
+               /* Create new encapsulator header */
+               added_length -= sizeof (efx_dhcp_tag_hdr_t);
+               efx_dhcp_write_tag(insert_pointp,
+                   DHCP_ENCAPSULATOR(opt), NULL, added_length);
+               insert_pointp += sizeof (efx_dhcp_tag_hdr_t);
+       } else if (encap_hdrp)
+               /* Modify existing encapsulator header */
+               encap_hdrp->length +=
+                   ((uint8_t)DHCP_CALC_TAG_LENGTH(value_length));
+
+       efx_dhcp_write_tag(insert_pointp, opt, valuep, value_length);
+
+       return (0);
+
+fail9:
+       EFSYS_PROBE(fail9);
+fail8:
+       EFSYS_PROBE(fail8);
+fail7:
+       EFSYS_PROBE(fail7);
+fail6:
+       EFSYS_PROBE(fail6);
+fail5:
+       EFSYS_PROBE(fail5);
+fail4:
+       EFSYS_PROBE(fail4);
+fail3:
+       EFSYS_PROBE(fail3);
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+/*
+ * Update an existing tag to the new value. Copes with encapsulated
+ * tags, and updates the encapsulating opt as necessary.
+ */
+       __checkReturn   efx_rc_t
+efx_dhcp_update_tag(
+       __inout_bcount(buffer_length)   uint8_t *bufferp,
+       __in                            size_t buffer_length,
+       __in                            uint16_t opt,
+       __in                            uint8_t *value_locationp,
+       __in_bcount_opt(value_length)   uint8_t *valuep,
+       __in                            size_t value_length)
+{
+       efx_rc_t rc;
+       uint8_t *write_pointp = value_locationp - sizeof (efx_dhcp_tag_hdr_t);
+       efx_dhcp_tag_hdr_t *hdrp = (efx_dhcp_tag_hdr_t *)write_pointp;
+       efx_dhcp_tag_hdr_t *encap_hdrp = NULL;
+       size_t old_length;
+
+       if (!DHCP_OPT_HAS_VALUE(DHCP_ENCAPSULATED(opt))) {
+               rc = EINVAL;
+               goto fail1;
+       }
+
+       if (value_length > DHCP_MAX_VALUE) {
+               rc = EINVAL;
+               goto fail2;
+       }
+
+       if ((value_length > 0) && (valuep == NULL)) {
+               rc = EINVAL;
+               goto fail3;
+       }
+
+       old_length = hdrp->length;
+
+       if (old_length < value_length) {
+               uint8_t *endp = bufferp;
+               size_t available_space = buffer_length;
+
+               rc = efx_dhcp_walk_tags(&endp, &available_space,
+                   EFX_DHCP_END);
+               if (rc != 0)
+                       goto fail4;
+
+               if (available_space < (value_length - old_length)) {
+                       rc = EINVAL;
+                       goto fail5;
+               }
+       }
+
+       if (DHCP_IS_ENCAP_OPT(opt)) {
+               uint8_t *encapp = bufferp;
+               size_t following_encap = buffer_length;
+               size_t new_length;
+
+               rc = efx_dhcp_walk_tags(&encapp, &following_encap,
+                   DHCP_ENCAPSULATOR(opt));
+               if (rc != 0)
+                       goto fail6;
+
+               encap_hdrp = (efx_dhcp_tag_hdr_t *)encapp;
+
+               new_length = ((size_t)encap_hdrp->length +
+                   value_length - old_length);
+               /* Check encapsulator will not overflow */
+               if (new_length > DHCP_MAX_VALUE) {
+                       rc = E2BIG;
+                       goto fail7;
+               }
+
+               encap_hdrp->length = (uint8_t)new_length;
+       }
+
+       /*
+        * Move the following data up/down to accomodate the new payload
+        * length.
+        */
+       if (old_length != value_length) {
+               uint8_t *destp = (uint8_t *)DHCP_NEXT_TAG(hdrp) +
+                   value_length - old_length;
+               size_t count = &bufferp[buffer_length] -
+                   (uint8_t *)DHCP_NEXT_TAG(hdrp);
+
+               memmove(destp, DHCP_NEXT_TAG(hdrp), count);
+       }
+
+       EFSYS_ASSERT(hdrp->tag == DHCP_ENCAPSULATED(opt));
+       efx_dhcp_write_tag(write_pointp, opt, valuep, value_length);
+
+       return (0);
+
+fail7:
+       EFSYS_PROBE(fail7);
+fail6:
+       EFSYS_PROBE(fail6);
+fail5:
+       EFSYS_PROBE(fail5);
+fail4:
+       EFSYS_PROBE(fail4);
+fail3:
+       EFSYS_PROBE(fail3);
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+
 /*
  * Copy bootcfg sector data to a target buffer which may differ in size.
  * Optionally corrects format errors in source buffer.
@@ -206,17 +737,19 @@ efx_bootcfg_copy_sector(
        __in                    size_t data_size,
        __in                    boolean_t handle_format_errors)
 {
+       _NOTE(ARGUNUSED(enp))
+
        size_t used_bytes;
        efx_rc_t rc;
 
-       /* Minimum buffer is checksum byte and DHCP_END terminator */
+       /* Minimum buffer is checksum byte and EFX_DHCP_END terminator */
        if (data_size < 2) {
                rc = ENOSPC;
                goto fail1;
        }
 
        /* Verify that the area is correctly formatted and checksummed */
-       rc = efx_bootcfg_verify(enp, sector, sector_length,
+       rc = efx_dhcp_verify(sector, sector_length,
                                    &used_bytes);
 
        if (!handle_format_errors) {
@@ -224,8 +757,8 @@ efx_bootcfg_copy_sector(
                        goto fail2;
 
                if ((used_bytes < 2) ||
-                   (sector[used_bytes - 1] != DHCP_END)) {
-                       /* Block too short, or DHCP_END missing */
+                   (sector[used_bytes - 1] != EFX_DHCP_END)) {
+                       /* Block too short, or EFX_DHCP_END missing */
                        rc = ENOENT;
                        goto fail3;
                }
@@ -234,24 +767,24 @@ efx_bootcfg_copy_sector(
        /* Synthesize empty format on verification failure */
        if (rc != 0 || used_bytes == 0) {
                sector[0] = 0;
-               sector[1] = DHCP_END;
+               sector[1] = EFX_DHCP_END;
                used_bytes = 2;
        }
-       EFSYS_ASSERT(used_bytes >= 2);  /* checksum and DHCP_END */
+       EFSYS_ASSERT(used_bytes >= 2);  /* checksum and EFX_DHCP_END */
        EFSYS_ASSERT(used_bytes <= sector_length);
        EFSYS_ASSERT(sector_length >= 2);
 
        /*
-        * Legacy bootcfg sectors don't terminate with a DHCP_END character.
-        * Modify the returned payload so it does.
+        * Legacy bootcfg sectors don't terminate with an EFX_DHCP_END
+        * character. Modify the returned payload so it does.
         * Reinitialise the sector if there isn't room for the character.
         */
-       if (sector[used_bytes - 1] != DHCP_END) {
+       if (sector[used_bytes - 1] != EFX_DHCP_END) {
                if (used_bytes >= sector_length) {
                        sector[0] = 0;
                        used_bytes = 1;
                }
-               sector[used_bytes] = DHCP_END;
+               sector[used_bytes] = EFX_DHCP_END;
                ++used_bytes;
        }
 
@@ -274,10 +807,11 @@ efx_bootcfg_copy_sector(
                (void) memset(data + used_bytes, 0, data_size - used_bytes);
 
        /*
-        * The checksum includes trailing data after any DHCP_END character,
-        * which we've just modified (by truncation or appending DHCP_END).
+        * The checksum includes trailing data after any EFX_DHCP_END
+        * character, which we've just modified (by truncation or appending
+        * EFX_DHCP_END).
         */
-       data[0] -= efx_bootcfg_csum(enp, data, data_size);
+       data[0] -= efx_dhcp_csum(data, data_size);
 
        return (0);
 
@@ -307,7 +841,7 @@ efx_bootcfg_read(
        efx_rc_t rc;
        uint32_t sector_number;
 
-       /* Minimum buffer is checksum byte and DHCP_END terminator */
+       /* Minimum buffer is checksum byte and EFX_DHCP_END terminator */
        if (size < 2) {
                rc = ENOSPC;
                goto fail1;
@@ -343,10 +877,10 @@ efx_bootcfg_read(
        }
 
        /*
-        * We need to read the entire BOOTCFG sector to ensure we read all the
-        * tags, because legacy bootcfg sectors are not guaranteed to end with
-        * a DHCP_END character. If the user hasn't supplied a sufficiently
-        * large buffer then use our own buffer.
+        * We need to read the entire BOOTCFG sector to ensure we read all
+        * tags, because legacy bootcfg sectors are not guaranteed to end
+        * with an EFX_DHCP_END character. If the user hasn't supplied a
+        * sufficiently large buffer then use our own buffer.
         */
        if (sector_length > size) {
                EFSYS_KMEM_ALLOC(enp->en_esip, sector_length, payload);
@@ -370,28 +904,29 @@ efx_bootcfg_read(
                goto fail9;
 
        /* Verify that the area is correctly formatted and checksummed */
-       rc = efx_bootcfg_verify(enp, payload, sector_length,
+       rc = efx_dhcp_verify(payload, sector_length,
            &used_bytes);
        if (rc != 0 || used_bytes == 0) {
                payload[0] = 0;
-               payload[1] = DHCP_END;
+               payload[1] = EFX_DHCP_END;
                used_bytes = 2;
        }
 
-       EFSYS_ASSERT(used_bytes >= 2);  /* checksum and DHCP_END */
+       EFSYS_ASSERT(used_bytes >= 2);  /* checksum and EFX_DHCP_END */
        EFSYS_ASSERT(used_bytes <= sector_length);
 
        /*
-        * Legacy bootcfg sectors don't terminate with a DHCP_END character.
-        * Modify the returned payload so it does. BOOTCFG_MAX_SIZE is by
-        * definition large enough for any valid (per-port) bootcfg sector,
-        * so reinitialise the sector if there isn't room for the character.
+        * Legacy bootcfg sectors don't terminate with an EFX_DHCP_END
+        * character. Modify the returned payload so it does.
+        * BOOTCFG_MAX_SIZE is by definition large enough for any valid
+        * (per-port) bootcfg sector, so reinitialise the sector if there
+        * isn't room for the character.
         */
-       if (payload[used_bytes - 1] != DHCP_END) {
+       if (payload[used_bytes - 1] != EFX_DHCP_END) {
                if (used_bytes >= sector_length)
                        used_bytes = 1;
 
-               payload[used_bytes] = DHCP_END;
+               payload[used_bytes] = EFX_DHCP_END;
                ++used_bytes;
        }
 
@@ -417,10 +952,10 @@ efx_bootcfg_read(
                (void) memset(data + used_bytes, 0, size - used_bytes);
 
        /*
-        * The checksum includes trailing data after any DHCP_END character,
-        * which we've just modified (by truncation or appending DHCP_END).
+        * The checksum includes trailing data after any EFX_DHCP_END character,
+        * which we've just modified (by truncation or appending EFX_DHCP_END).
         */
-       data[0] -= efx_bootcfg_csum(enp, data, size);
+       data[0] -= efx_dhcp_csum(data, size);
 
        return (0);
 
@@ -490,12 +1025,16 @@ efx_bootcfg_write(
                goto fail3;
        }
 
-       if ((rc = efx_bootcfg_verify(enp, data, size, &used_bytes)) != 0)
+       if ((rc = efx_dhcp_verify(data, size, &used_bytes)) != 0)
                goto fail4;
 
-       /* The caller *must* terminate their block with a DHCP_END character */
-       if ((used_bytes < 2) || ((uint8_t)data[used_bytes - 1] != DHCP_END)) {
-               /* Block too short or DHCP_END missing */
+       /*
+        * The caller *must* terminate their block with a EFX_DHCP_END
+        * character
+        */
+       if ((used_bytes < 2) || ((uint8_t)data[used_bytes - 1] !=
+           EFX_DHCP_END)) {
+               /* Block too short or EFX_DHCP_END missing */
                rc = ENOENT;
                goto fail5;
        }
@@ -528,13 +1067,13 @@ efx_bootcfg_write(
                goto fail9;
 
        /*
-        * Insert the BOOTCFG sector into the partition, Zero out all data after
-        * the DHCP_END tag, and adjust the checksum.
+        * Insert the BOOTCFG sector into the partition, Zero out all data
+        * after the EFX_DHCP_END tag, and adjust the checksum.
         */
        (void) memset(partn_data + sector_offset, 0x0, sector_length);
        (void) memcpy(partn_data + sector_offset, data, used_bytes);
 
-       checksum = efx_bootcfg_csum(enp, data, used_bytes);
+       checksum = efx_dhcp_csum(data, used_bytes);
        partn_data[sector_offset] -= checksum;
 
        if ((rc = efx_nvram_erase(enp, EFX_NVRAM_BOOTROM_CFG)) != 0)
index 412298a..a7523b3 100644 (file)
@@ -490,27 +490,42 @@ fail1:
 }
 
 /*
- * Specify inner and outer Ethernet address and VXLAN ID in filter
+ * Specify inner and outer Ethernet address and VNI or VSID in tunnel filter
  * specification.
  */
-       __checkReturn   efx_rc_t
-efx_filter_spec_set_vxlan_full(
-       __inout         efx_filter_spec_t *spec,
-       __in            const uint8_t *vxlan_id,
+static __checkReturn   efx_rc_t
+efx_filter_spec_set_tunnel(
+       __inout efx_filter_spec_t *spec,
+       __in            efx_tunnel_protocol_t encap_type,
+       __in            const uint8_t *vni_or_vsid,
        __in            const uint8_t *inner_addr,
        __in            const uint8_t *outer_addr)
 {
+       efx_rc_t rc;
+
        EFSYS_ASSERT3P(spec, !=, NULL);
-       EFSYS_ASSERT3P(vxlan_id, !=, NULL);
+       EFSYS_ASSERT3P(vni_or_vsid, !=, NULL);
        EFSYS_ASSERT3P(inner_addr, !=, NULL);
        EFSYS_ASSERT3P(outer_addr, !=, NULL);
 
-       if ((inner_addr == NULL) && (outer_addr == NULL))
-               return (EINVAL);
+       switch (encap_type) {
+       case EFX_TUNNEL_PROTOCOL_VXLAN:
+       case EFX_TUNNEL_PROTOCOL_GENEVE:
+       case EFX_TUNNEL_PROTOCOL_NVGRE:
+               break;
+       default:
+               rc = EINVAL;
+               goto fail1;
+       }
+
+       if ((inner_addr == NULL) && (outer_addr == NULL)) {
+               rc = EINVAL;
+               goto fail2;
+       }
 
-       if (vxlan_id != NULL) {
+       if (vni_or_vsid != NULL) {
                spec->efs_match_flags |= EFX_FILTER_MATCH_VNI_OR_VSID;
-               memcpy(spec->efs_vni_or_vsid, vxlan_id, EFX_VNI_OR_VSID_LEN);
+               memcpy(spec->efs_vni_or_vsid, vni_or_vsid, EFX_VNI_OR_VSID_LEN);
        }
        if (outer_addr != NULL) {
                spec->efs_match_flags |= EFX_FILTER_MATCH_LOC_MAC;
@@ -520,10 +535,63 @@ efx_filter_spec_set_vxlan_full(
                spec->efs_match_flags |= EFX_FILTER_MATCH_IFRM_LOC_MAC;
                memcpy(spec->efs_ifrm_loc_mac, inner_addr, EFX_MAC_ADDR_LEN);
        }
+
        spec->efs_match_flags |= EFX_FILTER_MATCH_ENCAP_TYPE;
-       spec->efs_encap_type = EFX_TUNNEL_PROTOCOL_VXLAN;
+       spec->efs_encap_type = encap_type;
 
        return (0);
+
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+/*
+ * Specify inner and outer Ethernet address and VNI in VXLAN filter
+ * specification.
+ */
+__checkReturn          efx_rc_t
+efx_filter_spec_set_vxlan(
+       __inout         efx_filter_spec_t *spec,
+       __in            const uint8_t *vni,
+       __in            const uint8_t *inner_addr,
+       __in            const uint8_t *outer_addr)
+{
+       return efx_filter_spec_set_tunnel(spec, EFX_TUNNEL_PROTOCOL_VXLAN,
+           vni, inner_addr, outer_addr);
+}
+
+/*
+ * Specify inner and outer Ethernet address and VNI in Geneve filter
+ * specification.
+ */
+__checkReturn          efx_rc_t
+efx_filter_spec_set_geneve(
+       __inout         efx_filter_spec_t *spec,
+       __in            const uint8_t *vni,
+       __in            const uint8_t *inner_addr,
+       __in            const uint8_t *outer_addr)
+{
+       return efx_filter_spec_set_tunnel(spec, EFX_TUNNEL_PROTOCOL_GENEVE,
+           vni, inner_addr, outer_addr);
+}
+
+/*
+ * Specify inner and outer Ethernet address and vsid in NVGRE filter
+ * specification.
+ */
+__checkReturn          efx_rc_t
+efx_filter_spec_set_nvgre(
+       __inout         efx_filter_spec_t *spec,
+       __in            const uint8_t *vsid,
+       __in            const uint8_t *inner_addr,
+       __in            const uint8_t *outer_addr)
+{
+       return efx_filter_spec_set_tunnel(spec, EFX_TUNNEL_PROTOCOL_NVGRE,
+           vsid, inner_addr, outer_addr);
 }
 
 #if EFSYS_OPT_RX_SCALE
index 548834f..bad23f8 100644 (file)
@@ -59,6 +59,7 @@ extern "C" {
 #define        EFX_RESET_PHY           0x00000001
 #define        EFX_RESET_RXQ_ERR       0x00000002
 #define        EFX_RESET_TXQ_ERR       0x00000004
+#define        EFX_RESET_HW_UNAVAIL    0x00000008
 
 typedef enum efx_mac_type_e {
        EFX_MAC_INVALID = 0,
@@ -223,6 +224,7 @@ typedef struct efx_phy_ops_s {
        efx_rc_t        (*epo_reconfigure)(efx_nic_t *);
        efx_rc_t        (*epo_verify)(efx_nic_t *);
        efx_rc_t        (*epo_oui_get)(efx_nic_t *, uint32_t *);
+       efx_rc_t        (*epo_link_state_get)(efx_nic_t *, efx_phy_link_state_t *);
 #if EFSYS_OPT_PHY_STATS
        efx_rc_t        (*epo_stats_update)(efx_nic_t *, efsys_mem_t *,
                                            uint32_t *);
@@ -317,6 +319,8 @@ typedef struct efx_mon_ops_s {
 #if EFSYS_OPT_MON_STATS
        efx_rc_t        (*emo_stats_update)(efx_nic_t *, efsys_mem_t *,
                                            efx_mon_stat_value_t *);
+       efx_rc_t        (*emo_limits_update)(efx_nic_t *,
+                                            efx_mon_stat_limits_t *);
 #endif /* EFSYS_OPT_MON_STATS */
 } efx_mon_ops_t;
 
@@ -354,6 +358,8 @@ typedef struct efx_nic_ops_s {
        efx_rc_t        (*eno_get_vi_pool)(efx_nic_t *, uint32_t *);
        efx_rc_t        (*eno_get_bar_region)(efx_nic_t *, efx_nic_region_t,
                                        uint32_t *, size_t *);
+       boolean_t       (*eno_hw_unavailable)(efx_nic_t *);
+       void            (*eno_set_hw_unavailable)(efx_nic_t *);
 #if EFSYS_OPT_DIAG
        efx_rc_t        (*eno_register_test)(efx_nic_t *);
 #endif /* EFSYS_OPT_DIAG */
@@ -507,7 +513,7 @@ typedef struct efx_nvram_ops_s {
                                            uint32_t *, uint16_t *);
        efx_rc_t        (*envo_partn_set_version)(efx_nic_t *, uint32_t,
                                            uint16_t *);
-       efx_rc_t        (*envo_buffer_validate)(efx_nic_t *, uint32_t,
+       efx_rc_t        (*envo_buffer_validate)(uint32_t,
                                            caddr_t, size_t);
 } efx_nvram_ops_t;
 #endif /* EFSYS_OPT_NVRAM */
@@ -583,7 +589,7 @@ efx_mcdi_nvram_write(
        __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in                    uint32_t offset,
-       __out_bcount(size)      caddr_t data,
+       __in_bcount(size)       caddr_t data,
        __in                    size_t size);
 
        __checkReturn           efx_rc_t
index 49c0034..4081aef 100644 (file)
@@ -301,12 +301,11 @@ efx_mcdi_fc_license_update_license(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MC_CMD_FC_IN_LICENSE_LEN];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FC_IN_LICENSE_LEN, 0);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_SIENA);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FC_IN_LICENSE_LEN;
@@ -347,13 +346,12 @@ efx_mcdi_fc_license_get_key_stats(
        __out           efx_key_stats_t *eksp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_FC_IN_LICENSE_LEN,
-                           MC_CMD_FC_OUT_LICENSE_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_FC_IN_LICENSE_LEN,
+               MC_CMD_FC_OUT_LICENSE_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_SIENA);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_FC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_FC_IN_LICENSE_LEN;
@@ -663,8 +661,8 @@ efx_mcdi_licensed_app_state(
        __out           boolean_t *licensedp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_LICENSED_APP_STATE_IN_LEN,
-                           MC_CMD_GET_LICENSED_APP_STATE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_LICENSED_APP_STATE_IN_LEN,
+               MC_CMD_GET_LICENSED_APP_STATE_OUT_LEN);
        uint32_t app_state;
        efx_rc_t rc;
 
@@ -676,7 +674,6 @@ efx_mcdi_licensed_app_state(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_LICENSED_APP_STATE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_LICENSED_APP_STATE_IN_LEN;
@@ -722,12 +719,11 @@ efx_mcdi_licensing_update_licenses(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MC_CMD_LICENSING_IN_LEN];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LICENSING_IN_LEN, 0);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LICENSING;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LICENSING_IN_LEN;
@@ -765,13 +761,12 @@ efx_mcdi_licensing_get_key_stats(
        __out           efx_key_stats_t *eksp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_LICENSING_IN_LEN,
-                           MC_CMD_LICENSING_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LICENSING_IN_LEN,
+               MC_CMD_LICENSING_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT(enp->en_family == EFX_FAMILY_HUNTINGTON);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LICENSING;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LICENSING_IN_LEN;
@@ -829,13 +824,12 @@ efx_mcdi_licensing_v3_update_licenses(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MC_CMD_LICENSING_V3_IN_LEN];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LICENSING_V3_IN_LEN, 0);
        efx_rc_t rc;
 
        EFSYS_ASSERT((enp->en_family == EFX_FAMILY_MEDFORD) ||
            (enp->en_family == EFX_FAMILY_MEDFORD2));
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LICENSING_V3;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LICENSING_V3_IN_LEN;
@@ -866,14 +860,13 @@ efx_mcdi_licensing_v3_report_license(
        __out           efx_key_stats_t *eksp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_LICENSING_V3_IN_LEN,
-                           MC_CMD_LICENSING_V3_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LICENSING_V3_IN_LEN,
+               MC_CMD_LICENSING_V3_OUT_LEN);
        efx_rc_t rc;
 
        EFSYS_ASSERT((enp->en_family == EFX_FAMILY_MEDFORD) ||
            (enp->en_family == EFX_FAMILY_MEDFORD2));
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LICENSING_V3;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LICENSING_V3_IN_LEN;
@@ -930,15 +923,14 @@ efx_mcdi_licensing_v3_app_state(
        __out           boolean_t *licensedp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN,
-                           MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN,
+               MC_CMD_GET_LICENSED_V3_APP_STATE_OUT_LEN);
        uint32_t app_state;
        efx_rc_t rc;
 
        EFSYS_ASSERT((enp->en_family == EFX_FAMILY_MEDFORD) ||
            (enp->en_family == EFX_FAMILY_MEDFORD2));
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_LICENSED_V3_APP_STATE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_LICENSED_V3_APP_STATE_IN_LEN;
@@ -990,28 +982,15 @@ efx_mcdi_licensing_v3_get_id(
                        uint8_t *bufferp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_LICENSING_GET_ID_V3_IN_LEN,
-                           MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LICENSING_GET_ID_V3_IN_LEN,
+               MC_CMD_LICENSING_GET_ID_V3_OUT_LENMAX);
        efx_rc_t rc;
 
        req.emr_cmd = MC_CMD_LICENSING_GET_ID_V3;
-
-       if (bufferp == NULL) {
-               /* Request id type and length only */
-               req.emr_in_buf = bufferp;
-               req.emr_in_length = MC_CMD_LICENSING_GET_ID_V3_IN_LEN;
-               req.emr_out_buf = bufferp;
-               req.emr_out_length = MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN;
-               (void) memset(payload, 0, sizeof (payload));
-       } else {
-               /* Request full buffer */
-               req.emr_in_buf = bufferp;
-               req.emr_in_length = MC_CMD_LICENSING_GET_ID_V3_IN_LEN;
-               req.emr_out_buf = bufferp;
-               req.emr_out_length =
-                   MIN(buffer_size, MC_CMD_LICENSING_GET_ID_V3_OUT_LENMAX);
-               (void) memset(bufferp, 0, req.emr_out_length);
-       }
+       req.emr_in_buf = payload;
+       req.emr_in_length = MC_CMD_LICENSING_GET_ID_V3_IN_LEN;
+       req.emr_out_buf = payload;
+       req.emr_out_length = MC_CMD_LICENSING_GET_ID_V3_OUT_LENMAX;
 
        efx_mcdi_execute_quiet(enp, &req);
 
@@ -1029,19 +1008,10 @@ efx_mcdi_licensing_v3_get_id(
        *lengthp =
            MCDI_OUT_DWORD(req, LICENSING_GET_ID_V3_OUT_LICENSE_ID_LENGTH);
 
-       if (bufferp == NULL) {
-               /*
-                * Modify length requirements to indicate to caller the extra
-                * buffering needed to read the complete output.
-                */
-               *lengthp += MC_CMD_LICENSING_GET_ID_V3_OUT_LENMIN;
-       } else {
-               /* Shift ID down to start of buffer */
-               memmove(bufferp,
-                   bufferp + MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST,
-                   *lengthp);
-               memset(bufferp + (*lengthp), 0,
-                   MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST);
+       if (bufferp != NULL) {
+               memcpy(bufferp,
+                   payload + MC_CMD_LICENSING_GET_ID_V3_OUT_LICENSE_ID_OFST,
+                   MIN(buffer_size, *lengthp));
        }
 
        return (0);
@@ -1158,10 +1128,12 @@ efx_lic_v3_read_key(
        __in                    size_t key_max_size,
        __out                   uint32_t *lengthp)
 {
+       uint32_t tag;
+
        _NOTE(ARGUNUSED(enp))
 
        return ef10_nvram_buffer_get_item(bufferp, buffer_size,
-                   offset, length, keyp, key_max_size, lengthp);
+                   offset, length, &tag, keyp, key_max_size, lengthp);
 }
 
        __checkReturn           efx_rc_t
@@ -1179,7 +1151,7 @@ efx_lic_v3_write_key(
        EFSYS_ASSERT(length <= EFX_LICENSE_V3_KEY_LENGTH_MAX);
 
        return ef10_nvram_buffer_insert_item(bufferp, buffer_size,
-                   offset, keyp, length, lengthp);
+                   offset, TLV_TAG_LICENSE, keyp, length, lengthp);
 }
 
        __checkReturn           efx_rc_t
@@ -1221,8 +1193,10 @@ efx_lic_v3_create_partition(
 {
        efx_rc_t rc;
 
+       _NOTE(ARGUNUSED(enp))
+
        /* Construct empty partition */
-       if ((rc = ef10_nvram_buffer_create(enp,
+       if ((rc = ef10_nvram_buffer_create(
            NVRAM_PARTITION_TYPE_LICENSE,
            bufferp, buffer_size)) != 0) {
                rc = EFAULT;
@@ -1246,13 +1220,16 @@ efx_lic_v3_finish_partition(
 {
        efx_rc_t rc;
 
+       _NOTE(ARGUNUSED(enp))
+
        if ((rc = ef10_nvram_buffer_finish(bufferp,
                        buffer_size)) != 0) {
                goto fail1;
        }
 
        /* Validate completed partition */
-       if ((rc = ef10_nvram_buffer_validate(enp, NVRAM_PARTITION_TYPE_LICENSE,
+       if ((rc = ef10_nvram_buffer_validate(
+                                       NVRAM_PARTITION_TYPE_LICENSE,
                                        bufferp, buffer_size)) != 0) {
                goto fail2;
        }
index d4ebcf2..c896aa0 100644 (file)
@@ -496,6 +496,12 @@ efx_mcdi_request_poll(
        EFSYS_ASSERT(!emip->emi_ev_cpl);
        emrp = emip->emi_pending_req;
 
+       /* Check if hardware is unavailable */
+       if (efx_nic_hw_unavailable(enp)) {
+               EFSYS_UNLOCK(enp->en_eslp, state);
+               return (B_FALSE);
+       }
+
        /* Check for reboot atomically w.r.t efx_mcdi_request_start */
        if (emip->emi_poll_cnt++ == 0) {
                if ((rc = efx_mcdi_poll_reboot(enp)) != 0) {
@@ -900,10 +906,10 @@ efx_mcdi_version(
        __out_opt               efx_mcdi_boot_t *statusp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MAX(MC_CMD_GET_VERSION_IN_LEN,
-                               MC_CMD_GET_VERSION_OUT_LEN),
-                           MAX(MC_CMD_GET_BOOT_STATUS_IN_LEN,
-                               MC_CMD_GET_BOOT_STATUS_OUT_LEN))];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MAX(MC_CMD_GET_VERSION_IN_LEN, MC_CMD_GET_BOOT_STATUS_IN_LEN),
+               MAX(MC_CMD_GET_VERSION_OUT_LEN,
+                       MC_CMD_GET_BOOT_STATUS_OUT_LEN));
        efx_word_t *ver_words;
        uint16_t version[4];
        uint32_t build;
@@ -912,7 +918,6 @@ efx_mcdi_version(
 
        EFSYS_ASSERT3U(enp->en_features, &, EFX_FEATURE_MCDI);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_VERSION;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_VERSION_IN_LEN;
@@ -1018,12 +1023,11 @@ efx_mcdi_get_capabilities(
        __out_opt       uint32_t *tso2ncp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_CAPABILITIES_IN_LEN,
-                           MC_CMD_GET_CAPABILITIES_V2_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_CAPABILITIES_IN_LEN,
+               MC_CMD_GET_CAPABILITIES_V2_OUT_LEN);
        boolean_t v2_capable;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_CAPABILITIES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_CAPABILITIES_IN_LEN;
@@ -1086,7 +1090,8 @@ efx_mcdi_do_reboot(
        __in            efx_nic_t *enp,
        __in            boolean_t after_assertion)
 {
-       uint8_t payload[MAX(MC_CMD_REBOOT_IN_LEN, MC_CMD_REBOOT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_REBOOT_IN_LEN,
+               MC_CMD_REBOOT_OUT_LEN);
        efx_mcdi_req_t req;
        efx_rc_t rc;
 
@@ -1099,7 +1104,6 @@ efx_mcdi_do_reboot(
         */
        EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_REBOOT;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_REBOOT_IN_LEN;
@@ -1150,8 +1154,8 @@ efx_mcdi_read_assertion(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_ASSERTS_IN_LEN,
-                           MC_CMD_GET_ASSERTS_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_ASSERTS_IN_LEN,
+               MC_CMD_GET_ASSERTS_OUT_LEN);
        const char *reason;
        unsigned int flags;
        unsigned int index;
@@ -1252,11 +1256,10 @@ efx_mcdi_drv_attach(
        __in            boolean_t attach)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_DRV_ATTACH_IN_LEN,
-                           MC_CMD_DRV_ATTACH_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_DRV_ATTACH_IN_LEN,
+               MC_CMD_DRV_ATTACH_EXT_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_DRV_ATTACH;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_DRV_ATTACH_IN_LEN;
@@ -1311,11 +1314,10 @@ efx_mcdi_get_board_cfg(
 {
        efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_BOARD_CFG_IN_LEN,
-                           MC_CMD_GET_BOARD_CFG_OUT_LENMIN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_BOARD_CFG_IN_LEN,
+               MC_CMD_GET_BOARD_CFG_OUT_LENMIN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_BOARD_CFG;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_BOARD_CFG_IN_LEN;
@@ -1391,11 +1393,10 @@ efx_mcdi_get_resource_limits(
        __out_opt       uint32_t *ntxqp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_RESOURCE_LIMITS_IN_LEN,
-                           MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_RESOURCE_LIMITS_IN_LEN,
+               MC_CMD_GET_RESOURCE_LIMITS_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_RESOURCE_LIMITS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_RESOURCE_LIMITS_IN_LEN;
@@ -1438,8 +1439,8 @@ efx_mcdi_get_phy_cfg(
        efx_port_t *epp = &(enp->en_port);
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PHY_CFG_IN_LEN,
-                           MC_CMD_GET_PHY_CFG_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PHY_CFG_IN_LEN,
+               MC_CMD_GET_PHY_CFG_OUT_LEN);
 #if EFSYS_OPT_NAMES
        const char *namep;
        size_t namelen;
@@ -1447,7 +1448,6 @@ efx_mcdi_get_phy_cfg(
        uint32_t phy_media_type;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PHY_CFG;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PHY_CFG_IN_LEN;
@@ -1686,11 +1686,10 @@ efx_mcdi_bist_start(
        __in                    efx_bist_type_t type)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_START_BIST_IN_LEN,
-                           MC_CMD_START_BIST_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_START_BIST_IN_LEN,
+               MC_CMD_START_BIST_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_START_BIST;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_START_BIST_IN_LEN;
@@ -1749,11 +1748,10 @@ efx_mcdi_log_ctrl(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_LOG_CTRL_IN_LEN,
-                           MC_CMD_LOG_CTRL_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_LOG_CTRL_IN_LEN,
+               MC_CMD_LOG_CTRL_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_LOG_CTRL;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_LOG_CTRL_IN_LEN;
@@ -1798,8 +1796,8 @@ efx_mcdi_mac_stats(
        __in            uint16_t period_ms)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_MAC_STATS_IN_LEN,
-                           MC_CMD_MAC_STATS_V2_OUT_DMA_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_MAC_STATS_IN_LEN,
+               MC_CMD_MAC_STATS_V2_OUT_DMA_LEN);
        int clear = (action == EFX_STATS_CLEAR);
        int upload = (action == EFX_STATS_UPLOAD);
        int enable = (action == EFX_STATS_ENABLE_NOEVENTS);
@@ -1807,7 +1805,6 @@ efx_mcdi_mac_stats(
        int disable = (action == EFX_STATS_DISABLE);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_MAC_STATS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_MAC_STATS_IN_LEN;
@@ -1979,11 +1976,10 @@ efx_mcdi_get_function_info(
        __out_opt               uint32_t *vfp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_FUNCTION_INFO_IN_LEN,
-                           MC_CMD_GET_FUNCTION_INFO_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_FUNCTION_INFO_IN_LEN,
+               MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_FUNCTION_INFO;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_FUNCTION_INFO_IN_LEN;
@@ -2024,11 +2020,10 @@ efx_mcdi_privilege_mask(
        __out                   uint32_t *maskp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_PRIVILEGE_MASK_IN_LEN,
-                           MC_CMD_PRIVILEGE_MASK_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_PRIVILEGE_MASK_IN_LEN,
+               MC_CMD_PRIVILEGE_MASK_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_PRIVILEGE_MASK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_PRIVILEGE_MASK_IN_LEN;
@@ -2073,11 +2068,10 @@ efx_mcdi_set_workaround(
        __out_opt               uint32_t *flagsp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_WORKAROUND_IN_LEN,
-                           MC_CMD_WORKAROUND_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_WORKAROUND_IN_LEN,
+               MC_CMD_WORKAROUND_EXT_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_WORKAROUND;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_WORKAROUND_IN_LEN;
@@ -2117,10 +2111,9 @@ efx_mcdi_get_workarounds(
        __out_opt               uint32_t *enabledp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MC_CMD_GET_WORKAROUNDS_OUT_LEN];
+       EFX_MCDI_DECLARE_BUF(payload, 0, MC_CMD_GET_WORKAROUNDS_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_WORKAROUNDS;
        req.emr_in_buf = NULL;
        req.emr_in_length = 0;
@@ -2157,6 +2150,14 @@ fail1:
  */
 #define        EFX_PHY_MEDIA_INFO_PAGE_SIZE            0x80
 
+/*
+ * Transceiver identifiers from SFF-8024 Table 4-1.
+ */
+#define        EFX_SFF_TRANSCEIVER_ID_SFP              0x03 /* SFP/SFP+/SFP28 */
+#define        EFX_SFF_TRANSCEIVER_ID_QSFP             0x0c /* QSFP */
+#define        EFX_SFF_TRANSCEIVER_ID_QSFP_PLUS        0x0d /* QSFP+ or later */
+#define        EFX_SFF_TRANSCEIVER_ID_QSFP28           0x11 /* QSFP28 or later */
+
 static __checkReturn           efx_rc_t
 efx_mcdi_get_phy_media_info(
        __in                    efx_nic_t *enp,
@@ -2166,14 +2167,13 @@ efx_mcdi_get_phy_media_info(
        __out_bcount(len)       uint8_t *data)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN,
-                           MC_CMD_GET_PHY_MEDIA_INFO_OUT_LEN(
-                               EFX_PHY_MEDIA_INFO_PAGE_SIZE))];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN,
+               MC_CMD_GET_PHY_MEDIA_INFO_OUT_LEN(
+                       EFX_PHY_MEDIA_INFO_PAGE_SIZE));
        efx_rc_t rc;
 
        EFSYS_ASSERT((uint32_t)offset + len <= EFX_PHY_MEDIA_INFO_PAGE_SIZE);
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PHY_MEDIA_INFO;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PHY_MEDIA_INFO_IN_LEN;
@@ -2218,39 +2218,19 @@ fail1:
        return (rc);
 }
 
-/*
- * 2-wire device address of the base information in accordance with SFF-8472
- * Diagnostic Monitoring Interface for Optical Transceivers section
- * 4 Memory Organization.
- */
-#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_SFP_BASE    0xA0
-
-/*
- * 2-wire device address of the digital diagnostics monitoring interface
- * in accordance with SFF-8472 Diagnostic Monitoring Interface for Optical
- * Transceivers section 4 Memory Organization.
- */
-#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_SFP_DDM     0xA2
-
-/*
- * Hard wired 2-wire device address for QSFP+ in accordance with SFF-8436
- * QSFP+ 10 Gbs 4X PLUGGABLE TRANSCEIVER section 7.4 Device Addressing and
- * Operation.
- */
-#define        EFX_PHY_MEDIA_INFO_DEV_ADDR_QSFP        0xA0
-
        __checkReturn           efx_rc_t
 efx_mcdi_phy_module_get_info(
        __in                    efx_nic_t *enp,
        __in                    uint8_t dev_addr,
-       __in                    uint8_t offset,
-       __in                    uint8_t len,
+       __in                    size_t offset,
+       __in                    size_t len,
        __out_bcount(len)       uint8_t *data)
 {
        efx_port_t *epp = &(enp->en_port);
        efx_rc_t rc;
        uint32_t mcdi_lower_page;
        uint32_t mcdi_upper_page;
+       uint8_t id;
 
        EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE);
 
@@ -2264,6 +2244,26 @@ efx_mcdi_phy_module_get_info(
         */
        switch (epp->ep_fixed_port_type) {
        case EFX_PHY_MEDIA_SFP_PLUS:
+       case EFX_PHY_MEDIA_QSFP_PLUS:
+               /* Port type supports modules */
+               break;
+       default:
+               rc = ENOTSUP;
+               goto fail1;
+       }
+
+       /*
+        * For all supported port types, MCDI page 0 offset 0 holds the
+        * transceiver identifier. Probe to determine the data layout.
+        * Definitions from SFF-8024 Table 4-1.
+        */
+       rc = efx_mcdi_get_phy_media_info(enp,
+                   0, 0, sizeof(id), &id);
+       if (rc != 0)
+               goto fail2;
+
+       switch (id) {
+       case EFX_SFF_TRANSCEIVER_ID_SFP:
                /*
                 * In accordance with SFF-8472 Diagnostic Monitoring
                 * Interface for Optical Transceivers section 4 Memory
@@ -2298,10 +2298,12 @@ efx_mcdi_phy_module_get_info(
                        break;
                default:
                        rc = ENOTSUP;
-                       goto fail1;
+                       goto fail3;
                }
                break;
-       case EFX_PHY_MEDIA_QSFP_PLUS:
+       case EFX_SFF_TRANSCEIVER_ID_QSFP:
+       case EFX_SFF_TRANSCEIVER_ID_QSFP_PLUS:
+       case EFX_SFF_TRANSCEIVER_ID_QSFP28:
                switch (dev_addr) {
                case EFX_PHY_MEDIA_INFO_DEV_ADDR_QSFP:
                        /*
@@ -2317,22 +2319,24 @@ efx_mcdi_phy_module_get_info(
                        break;
                default:
                        rc = ENOTSUP;
-                       goto fail1;
+                       goto fail3;
                }
                break;
        default:
                rc = ENOTSUP;
-               goto fail1;
+               goto fail3;
        }
 
+       EFX_STATIC_ASSERT(EFX_PHY_MEDIA_INFO_PAGE_SIZE <= 0xFF);
+
        if (offset < EFX_PHY_MEDIA_INFO_PAGE_SIZE) {
-               uint8_t read_len =
+               size_t read_len =
                    MIN(len, EFX_PHY_MEDIA_INFO_PAGE_SIZE - offset);
 
                rc = efx_mcdi_get_phy_media_info(enp,
-                   mcdi_lower_page, offset, read_len, data);
+                   mcdi_lower_page, (uint8_t)offset, (uint8_t)read_len, data);
                if (rc != 0)
-                       goto fail2;
+                       goto fail4;
 
                data += read_len;
                len -= read_len;
@@ -2347,13 +2351,17 @@ efx_mcdi_phy_module_get_info(
                EFSYS_ASSERT3U(offset, <, EFX_PHY_MEDIA_INFO_PAGE_SIZE);
 
                rc = efx_mcdi_get_phy_media_info(enp,
-                   mcdi_upper_page, offset, len, data);
+                   mcdi_upper_page, (uint8_t)offset, (uint8_t)len, data);
                if (rc != 0)
-                       goto fail3;
+                       goto fail5;
        }
 
        return (0);
 
+fail5:
+       EFSYS_PROBE(fail5);
+fail4:
+       EFSYS_PROBE(fail4);
 fail3:
        EFSYS_PROBE(fail3);
 fail2:
index 253a9e6..ddf91c1 100644 (file)
 #include "efx.h"
 #include "efx_regs_mcdi.h"
 
+#if EFSYS_OPT_NAMES
+#include "mc_driver_pcol_strs.h"
+#endif /* EFSYS_OPT_NAMES */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -215,8 +219,8 @@ extern      __checkReturn   efx_rc_t
 efx_mcdi_phy_module_get_info(
        __in                    efx_nic_t *enp,
        __in                    uint8_t dev_addr,
-       __in                    uint8_t offset,
-       __in                    uint8_t len,
+       __in                    size_t offset,
+       __in                    size_t len,
        __out_bcount(len)       uint8_t *data);
 
 #define        MCDI_IN(_emr, _type, _ofst)                                     \
@@ -380,6 +384,17 @@ efx_mcdi_phy_module_get_info(
        (((mask) & (MC_CMD_PRIVILEGE_MASK_IN_GRP_ ## priv)) ==          \
        (MC_CMD_PRIVILEGE_MASK_IN_GRP_ ## priv))
 
+/*
+ * The buffer size must be a multiple of dword to ensure that MCDI works
+ * properly with Siena based boards (which use on-chip buffer). Also, it
+ * should be at minimum the size of two dwords to allow space for extended
+ * error responses if the request/response buffer sizes are smaller.
+ */
+#define EFX_MCDI_DECLARE_BUF(_name, _in_len, _out_len)                 \
+       uint8_t _name[P2ROUNDUP(MAX(MAX(_in_len, _out_len),             \
+                                   (2 * sizeof (efx_dword_t))),        \
+                               sizeof (efx_dword_t))] = {0}
+
 typedef enum efx_mcdi_feature_id_e {
        EFX_MCDI_FEATURE_FW_UPDATE = 0,
        EFX_MCDI_FEATURE_LINK_CONTROL,
index 9fc268e..f28775d 100644 (file)
@@ -38,7 +38,8 @@ efx_mon_name(
 #if EFSYS_OPT_MON_MCDI
 static const efx_mon_ops_t     __efx_mon_mcdi_ops = {
 #if EFSYS_OPT_MON_STATS
-       mcdi_mon_stats_update           /* emo_stats_update */
+       mcdi_mon_stats_update,          /* emo_stats_update */
+       mcdi_mon_limits_update,         /* emo_limits_update */
 #endif /* EFSYS_OPT_MON_STATS */
 };
 #endif
@@ -99,77 +100,74 @@ fail1:
 
 #if EFSYS_OPT_NAMES
 
-/* START MKCONFIG GENERATED MonitorStatNamesBlock 8150a068198c0f96 */
+/* START MKCONFIG GENERATED MonitorStatNamesBlock 277c17eda1a6d1a4 */
 static const char * const __mon_stat_name[] = {
-       "value_2_5v",
-       "value_vccp1",
-       "value_vcc",
-       "value_5v",
-       "value_12v",
-       "value_vccp2",
-       "value_ext_temp",
-       "value_int_temp",
-       "value_ain1",
-       "value_ain2",
+       "controller_temp",
+       "phy_common_temp",
        "controller_cooling",
-       "ext_cooling",
-       "1v",
-       "1_2v",
-       "1_8v",
-       "3_3v",
-       "1_2va",
-       "vref",
-       "vaoe",
-       "aoe_temperature",
-       "psu_aoe_temperature",
-       "psu_temperature",
-       "fan0",
-       "fan1",
-       "fan2",
-       "fan3",
-       "fan4",
-       "vaoe_in",
-       "iaoe",
-       "iaoe_in",
+       "phy0_temp",
+       "phy0_cooling",
+       "phy1_temp",
+       "phy1_cooling",
+       "in_1v0",
+       "in_1v2",
+       "in_1v8",
+       "in_2v5",
+       "in_3v3",
+       "in_12v0",
+       "in_1v2a",
+       "in_vref",
+       "out_vaoe",
+       "aoe_temp",
+       "psu_aoe_temp",
+       "psu_temp",
+       "fan_0",
+       "fan_1",
+       "fan_2",
+       "fan_3",
+       "fan_4",
+       "in_vaoe",
+       "out_iaoe",
+       "in_iaoe",
        "nic_power",
-       "0_9v",
-       "i0_9v",
-       "i1_2v",
-       "0_9v_adc",
-       "controller_temperature2",
-       "vreg_temperature",
-       "vreg_0_9v_temperature",
-       "vreg_1_2v_temperature",
-       "int_vptat",
-       "controller_internal_adc_temperature",
-       "ext_vptat",
-       "controller_external_adc_temperature",
-       "ambient_temperature",
+       "in_0v9",
+       "in_i0v9",
+       "in_i1v2",
+       "in_0v9_adc",
+       "controller_2_temp",
+       "vreg_internal_temp",
+       "vreg_0v9_temp",
+       "vreg_1v2_temp",
+       "controller_vptat",
+       "controller_internal_temp",
+       "controller_vptat_extadc",
+       "controller_internal_temp_extadc",
+       "ambient_temp",
        "airflow",
        "vdd08d_vss08d_csr",
        "vdd08d_vss08d_csr_extadc",
-       "hotpoint_temperature",
-       "phy_power_switch_port0",
-       "phy_power_switch_port1",
+       "hotpoint_temp",
+       "phy_power_port0",
+       "phy_power_port1",
        "mum_vcc",
-       "0v9_a",
-       "i0v9_a",
-       "0v9_a_temp",
-       "0v9_b",
-       "i0v9_b",
-       "0v9_b_temp",
+       "in_0v9_a",
+       "in_i0v9_a",
+       "vreg_0v9_a_temp",
+       "in_0v9_b",
+       "in_i0v9_b",
+       "vreg_0v9_b_temp",
        "ccom_avreg_1v2_supply",
-       "ccom_avreg_1v2_supply_ext_adc",
+       "ccom_avreg_1v2_supply_extadc",
        "ccom_avreg_1v8_supply",
-       "ccom_avreg_1v8_supply_ext_adc",
+       "ccom_avreg_1v8_supply_extadc",
        "controller_master_vptat",
        "controller_master_internal_temp",
-       "controller_master_vptat_ext_adc",
-       "controller_master_internal_temp_ext_adc",
+       "controller_master_vptat_extadc",
+       "controller_master_internal_temp_extadc",
        "controller_slave_vptat",
        "controller_slave_internal_temp",
-       "controller_slave_vptat_ext_adc",
-       "controller_slave_internal_temp_ext_adc",
+       "controller_slave_vptat_extadc",
+       "controller_slave_internal_temp_extadc",
        "sodimm_vout",
        "sodimm_0_temp",
        "sodimm_1_temp",
@@ -178,17 +176,17 @@ static const char * const __mon_stat_name[] = {
        "controller_tdiode_temp",
        "board_front_temp",
        "board_back_temp",
-       "i1v8",
-       "i2v5",
-       "i3v3",
-       "i12v0",
-       "1v3",
-       "i1v3",
+       "in_i1v8",
+       "in_i2v5",
+       "in_i3v3",
+       "in_i12v0",
+       "in_1v3",
+       "in_i1v3",
 };
 
 /* END MKCONFIG GENERATED MonitorStatNamesBlock */
 
-extern                                 const char *
+                                       const char *
 efx_mon_stat_name(
        __in                            efx_nic_t *enp,
        __in                            efx_mon_stat_t id)
@@ -200,8 +198,609 @@ efx_mon_stat_name(
        return (__mon_stat_name[id]);
 }
 
+typedef struct _stat_description_t {
+       efx_mon_stat_t  stat;
+       const char      *desc;
+} stat_description_t;
+
+/* START MKCONFIG GENERATED MonitorStatDescriptionsBlock f072138f16d2e1f8 */
+static const char *__mon_stat_description[] = {
+       MC_CMD_SENSOR_CONTROLLER_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PHY_COMMON_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_COOLING_ENUM_STR,
+       MC_CMD_SENSOR_PHY0_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PHY0_COOLING_ENUM_STR,
+       MC_CMD_SENSOR_PHY1_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PHY1_COOLING_ENUM_STR,
+       MC_CMD_SENSOR_IN_1V0_ENUM_STR,
+       MC_CMD_SENSOR_IN_1V2_ENUM_STR,
+       MC_CMD_SENSOR_IN_1V8_ENUM_STR,
+       MC_CMD_SENSOR_IN_2V5_ENUM_STR,
+       MC_CMD_SENSOR_IN_3V3_ENUM_STR,
+       MC_CMD_SENSOR_IN_12V0_ENUM_STR,
+       MC_CMD_SENSOR_IN_1V2A_ENUM_STR,
+       MC_CMD_SENSOR_IN_VREF_ENUM_STR,
+       MC_CMD_SENSOR_OUT_VAOE_ENUM_STR,
+       MC_CMD_SENSOR_AOE_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PSU_AOE_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PSU_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_FAN_0_ENUM_STR,
+       MC_CMD_SENSOR_FAN_1_ENUM_STR,
+       MC_CMD_SENSOR_FAN_2_ENUM_STR,
+       MC_CMD_SENSOR_FAN_3_ENUM_STR,
+       MC_CMD_SENSOR_FAN_4_ENUM_STR,
+       MC_CMD_SENSOR_IN_VAOE_ENUM_STR,
+       MC_CMD_SENSOR_OUT_IAOE_ENUM_STR,
+       MC_CMD_SENSOR_IN_IAOE_ENUM_STR,
+       MC_CMD_SENSOR_NIC_POWER_ENUM_STR,
+       MC_CMD_SENSOR_IN_0V9_ENUM_STR,
+       MC_CMD_SENSOR_IN_I0V9_ENUM_STR,
+       MC_CMD_SENSOR_IN_I1V2_ENUM_STR,
+       MC_CMD_SENSOR_IN_0V9_ADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_2_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_VREG_INTERNAL_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_VREG_0V9_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_VREG_1V2_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_VPTAT_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_AMBIENT_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_AIRFLOW_ENUM_STR,
+       MC_CMD_SENSOR_VDD08D_VSS08D_CSR_ENUM_STR,
+       MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_HOTPOINT_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PHY_POWER_PORT0_ENUM_STR,
+       MC_CMD_SENSOR_PHY_POWER_PORT1_ENUM_STR,
+       MC_CMD_SENSOR_MUM_VCC_ENUM_STR,
+       MC_CMD_SENSOR_IN_0V9_A_ENUM_STR,
+       MC_CMD_SENSOR_IN_I0V9_A_ENUM_STR,
+       MC_CMD_SENSOR_VREG_0V9_A_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_IN_0V9_B_ENUM_STR,
+       MC_CMD_SENSOR_IN_I0V9_B_ENUM_STR,
+       MC_CMD_SENSOR_VREG_0V9_B_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_ENUM_STR,
+       MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_ENUM_STR,
+       MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC_ENUM_STR,
+       MC_CMD_SENSOR_SODIMM_VOUT_ENUM_STR,
+       MC_CMD_SENSOR_SODIMM_0_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_SODIMM_1_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_PHY0_VCC_ENUM_STR,
+       MC_CMD_SENSOR_PHY1_VCC_ENUM_STR,
+       MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_BOARD_FRONT_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_BOARD_BACK_TEMP_ENUM_STR,
+       MC_CMD_SENSOR_IN_I1V8_ENUM_STR,
+       MC_CMD_SENSOR_IN_I2V5_ENUM_STR,
+       MC_CMD_SENSOR_IN_I3V3_ENUM_STR,
+       MC_CMD_SENSOR_IN_I12V0_ENUM_STR,
+       MC_CMD_SENSOR_IN_1V3_ENUM_STR,
+       MC_CMD_SENSOR_IN_I1V3_ENUM_STR,
+};
+
+/* END MKCONFIG GENERATED MonitorStatDescriptionsBlock */
+
+                                       const char *
+efx_mon_stat_description(
+       __in                            efx_nic_t *enp,
+       __in                            efx_mon_stat_t id)
+{
+       _NOTE(ARGUNUSED(enp))
+       EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+
+       EFSYS_ASSERT3U(id, <, EFX_MON_NSTATS);
+       return (__mon_stat_description[id]);
+}
+
 #endif /* EFSYS_OPT_NAMES */
 
+/* START MKCONFIG GENERATED MonitorMcdiMappingBlock 173eee0a5599996a */
+       __checkReturn                   boolean_t
+efx_mon_mcdi_to_efx_stat(
+       __in                            int mcdi_index,
+       __out                           efx_mon_stat_t *statp)
+{
+
+       if ((mcdi_index % (MC_CMD_SENSOR_PAGE0_NEXT + 1)) ==
+           MC_CMD_SENSOR_PAGE0_NEXT) {
+               *statp = EFX_MON_NSTATS;
+               return (B_FALSE);
+       }
+
+       switch (mcdi_index) {
+       case MC_CMD_SENSOR_IN_I0V9:
+               *statp = EFX_MON_STAT_IN_I0V9;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT_EXTADC;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT:
+               *statp = EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT;
+               break;
+       case MC_CMD_SENSOR_PSU_TEMP:
+               *statp = EFX_MON_STAT_PSU_TEMP;
+               break;
+       case MC_CMD_SENSOR_FAN_2:
+               *statp = EFX_MON_STAT_FAN_2;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP_EXTADC;
+               break;
+       case MC_CMD_SENSOR_BOARD_BACK_TEMP:
+               *statp = EFX_MON_STAT_BOARD_BACK_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_1V3:
+               *statp = EFX_MON_STAT_IN_1V3;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_TDIODE_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_2V5:
+               *statp = EFX_MON_STAT_IN_2V5;
+               break;
+       case MC_CMD_SENSOR_PHY_COMMON_TEMP:
+               *statp = EFX_MON_STAT_PHY_COMMON_TEMP;
+               break;
+       case MC_CMD_SENSOR_PHY1_TEMP:
+               *statp = EFX_MON_STAT_PHY1_TEMP;
+               break;
+       case MC_CMD_SENSOR_VREG_INTERNAL_TEMP:
+               *statp = EFX_MON_STAT_VREG_INTERNAL_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_1V0:
+               *statp = EFX_MON_STAT_IN_1V0;
+               break;
+       case MC_CMD_SENSOR_FAN_1:
+               *statp = EFX_MON_STAT_FAN_1;
+               break;
+       case MC_CMD_SENSOR_IN_1V2:
+               *statp = EFX_MON_STAT_IN_1V2;
+               break;
+       case MC_CMD_SENSOR_FAN_3:
+               *statp = EFX_MON_STAT_FAN_3;
+               break;
+       case MC_CMD_SENSOR_IN_1V2A:
+               *statp = EFX_MON_STAT_IN_1V2A;
+               break;
+       case MC_CMD_SENSOR_SODIMM_0_TEMP:
+               *statp = EFX_MON_STAT_SODIMM_0_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_1V8:
+               *statp = EFX_MON_STAT_IN_1V8;
+               break;
+       case MC_CMD_SENSOR_IN_VREF:
+               *statp = EFX_MON_STAT_IN_VREF;
+               break;
+       case MC_CMD_SENSOR_SODIMM_VOUT:
+               *statp = EFX_MON_STAT_SODIMM_VOUT;
+               break;
+       case MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY:
+               *statp = EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY;
+               break;
+       case MC_CMD_SENSOR_IN_I1V2:
+               *statp = EFX_MON_STAT_IN_I1V2;
+               break;
+       case MC_CMD_SENSOR_IN_I1V3:
+               *statp = EFX_MON_STAT_IN_I1V3;
+               break;
+       case MC_CMD_SENSOR_AIRFLOW:
+               *statp = EFX_MON_STAT_AIRFLOW;
+               break;
+       case MC_CMD_SENSOR_HOTPOINT_TEMP:
+               *statp = EFX_MON_STAT_HOTPOINT_TEMP;
+               break;
+       case MC_CMD_SENSOR_VDD08D_VSS08D_CSR:
+               *statp = EFX_MON_STAT_VDD08D_VSS08D_CSR;
+               break;
+       case MC_CMD_SENSOR_AOE_TEMP:
+               *statp = EFX_MON_STAT_AOE_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_I1V8:
+               *statp = EFX_MON_STAT_IN_I1V8;
+               break;
+       case MC_CMD_SENSOR_IN_I2V5:
+               *statp = EFX_MON_STAT_IN_I2V5;
+               break;
+       case MC_CMD_SENSOR_PHY1_COOLING:
+               *statp = EFX_MON_STAT_PHY1_COOLING;
+               break;
+       case MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC:
+               *statp = EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY_EXTADC;
+               break;
+       case MC_CMD_SENSOR_IN_0V9_ADC:
+               *statp = EFX_MON_STAT_IN_0V9_ADC;
+               break;
+       case MC_CMD_SENSOR_VREG_0V9_A_TEMP:
+               *statp = EFX_MON_STAT_VREG_0V9_A_TEMP;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT:
+               *statp = EFX_MON_STAT_CONTROLLER_MASTER_VPTAT;
+               break;
+       case MC_CMD_SENSOR_PHY0_VCC:
+               *statp = EFX_MON_STAT_PHY0_VCC;
+               break;
+       case MC_CMD_SENSOR_PHY0_COOLING:
+               *statp = EFX_MON_STAT_PHY0_COOLING;
+               break;
+       case MC_CMD_SENSOR_PSU_AOE_TEMP:
+               *statp = EFX_MON_STAT_PSU_AOE_TEMP;
+               break;
+       case MC_CMD_SENSOR_VREG_0V9_TEMP:
+               *statp = EFX_MON_STAT_VREG_0V9_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_I0V9_A:
+               *statp = EFX_MON_STAT_IN_I0V9_A;
+               break;
+       case MC_CMD_SENSOR_IN_I3V3:
+               *statp = EFX_MON_STAT_IN_I3V3;
+               break;
+       case MC_CMD_SENSOR_BOARD_FRONT_TEMP:
+               *statp = EFX_MON_STAT_BOARD_FRONT_TEMP;
+               break;
+       case MC_CMD_SENSOR_OUT_VAOE:
+               *statp = EFX_MON_STAT_OUT_VAOE;
+               break;
+       case MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC:
+               *statp = EFX_MON_STAT_VDD08D_VSS08D_CSR_EXTADC;
+               break;
+       case MC_CMD_SENSOR_IN_I12V0:
+               *statp = EFX_MON_STAT_IN_I12V0;
+               break;
+       case MC_CMD_SENSOR_PHY_POWER_PORT1:
+               *statp = EFX_MON_STAT_PHY_POWER_PORT1;
+               break;
+       case MC_CMD_SENSOR_PHY_POWER_PORT0:
+               *statp = EFX_MON_STAT_PHY_POWER_PORT0;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_IAOE:
+               *statp = EFX_MON_STAT_IN_IAOE;
+               break;
+       case MC_CMD_SENSOR_IN_VAOE:
+               *statp = EFX_MON_STAT_IN_VAOE;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_MASTER_VPTAT_EXTADC;
+               break;
+       case MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY:
+               *statp = EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY;
+               break;
+       case MC_CMD_SENSOR_PHY1_VCC:
+               *statp = EFX_MON_STAT_PHY1_VCC;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_COOLING:
+               *statp = EFX_MON_STAT_CONTROLLER_COOLING;
+               break;
+       case MC_CMD_SENSOR_AMBIENT_TEMP:
+               *statp = EFX_MON_STAT_AMBIENT_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_3V3:
+               *statp = EFX_MON_STAT_IN_3V3;
+               break;
+       case MC_CMD_SENSOR_PHY0_TEMP:
+               *statp = EFX_MON_STAT_PHY0_TEMP;
+               break;
+       case MC_CMD_SENSOR_SODIMM_1_TEMP:
+               *statp = EFX_MON_STAT_SODIMM_1_TEMP;
+               break;
+       case MC_CMD_SENSOR_MUM_VCC:
+               *statp = EFX_MON_STAT_MUM_VCC;
+               break;
+       case MC_CMD_SENSOR_VREG_0V9_B_TEMP:
+               *statp = EFX_MON_STAT_VREG_0V9_B_TEMP;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP;
+               break;
+       case MC_CMD_SENSOR_FAN_4:
+               *statp = EFX_MON_STAT_FAN_4;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_2_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_2_TEMP;
+               break;
+       case MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC:
+               *statp = EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY_EXTADC;
+               break;
+       case MC_CMD_SENSOR_IN_0V9_A:
+               *statp = EFX_MON_STAT_IN_0V9_A;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_VPTAT_EXTADC;
+               break;
+       case MC_CMD_SENSOR_IN_0V9:
+               *statp = EFX_MON_STAT_IN_0V9;
+               break;
+       case MC_CMD_SENSOR_IN_I0V9_B:
+               *statp = EFX_MON_STAT_IN_I0V9_B;
+               break;
+       case MC_CMD_SENSOR_NIC_POWER:
+               *statp = EFX_MON_STAT_NIC_POWER;
+               break;
+       case MC_CMD_SENSOR_IN_12V0:
+               *statp = EFX_MON_STAT_IN_12V0;
+               break;
+       case MC_CMD_SENSOR_OUT_IAOE:
+               *statp = EFX_MON_STAT_OUT_IAOE;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_VPTAT:
+               *statp = EFX_MON_STAT_CONTROLLER_VPTAT;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC:
+               *statp = EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC;
+               break;
+       case MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP:
+               *statp = EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP;
+               break;
+       case MC_CMD_SENSOR_FAN_0:
+               *statp = EFX_MON_STAT_FAN_0;
+               break;
+       case MC_CMD_SENSOR_VREG_1V2_TEMP:
+               *statp = EFX_MON_STAT_VREG_1V2_TEMP;
+               break;
+       case MC_CMD_SENSOR_IN_0V9_B:
+               *statp = EFX_MON_STAT_IN_0V9_B;
+               break;
+       default:
+               *statp = EFX_MON_NSTATS;
+               break;
+       };
+
+       if (*statp == EFX_MON_NSTATS)
+               goto fail1;
+
+       return (B_TRUE);
+
+fail1:
+       EFSYS_PROBE1(fail1, boolean_t, B_TRUE);
+       return (B_FALSE);
+};
+
+/* END MKCONFIG GENERATED MonitorMcdiMappingBlock */
+
+/* START MKCONFIG GENERATED MonitorStatisticUnitsBlock 2d447c656cc2d01d */
+       __checkReturn                   boolean_t
+efx_mon_get_stat_unit(
+       __in                            efx_mon_stat_t stat,
+       __out                           efx_mon_stat_unit_t *unitp)
+{
+       switch (stat) {
+       case EFX_MON_STAT_IN_1V0:
+       case EFX_MON_STAT_IN_1V2:
+       case EFX_MON_STAT_IN_1V8:
+       case EFX_MON_STAT_IN_2V5:
+       case EFX_MON_STAT_IN_3V3:
+       case EFX_MON_STAT_IN_12V0:
+       case EFX_MON_STAT_IN_1V2A:
+       case EFX_MON_STAT_IN_VREF:
+       case EFX_MON_STAT_OUT_VAOE:
+       case EFX_MON_STAT_IN_VAOE:
+       case EFX_MON_STAT_IN_0V9:
+       case EFX_MON_STAT_IN_0V9_ADC:
+       case EFX_MON_STAT_CONTROLLER_VPTAT_EXTADC:
+       case EFX_MON_STAT_VDD08D_VSS08D_CSR:
+       case EFX_MON_STAT_VDD08D_VSS08D_CSR_EXTADC:
+       case EFX_MON_STAT_MUM_VCC:
+       case EFX_MON_STAT_IN_0V9_A:
+       case EFX_MON_STAT_IN_0V9_B:
+       case EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY:
+       case EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY_EXTADC:
+       case EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY:
+       case EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_MASTER_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_MASTER_VPTAT_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT_EXTADC:
+       case EFX_MON_STAT_SODIMM_VOUT:
+       case EFX_MON_STAT_PHY0_VCC:
+       case EFX_MON_STAT_PHY1_VCC:
+       case EFX_MON_STAT_IN_1V3:
+               *unitp = EFX_MON_STAT_UNIT_VOLTAGE_MV;
+               break;
+       case EFX_MON_STAT_CONTROLLER_TEMP:
+       case EFX_MON_STAT_PHY_COMMON_TEMP:
+       case EFX_MON_STAT_PHY0_TEMP:
+       case EFX_MON_STAT_PHY1_TEMP:
+       case EFX_MON_STAT_AOE_TEMP:
+       case EFX_MON_STAT_PSU_AOE_TEMP:
+       case EFX_MON_STAT_PSU_TEMP:
+       case EFX_MON_STAT_CONTROLLER_2_TEMP:
+       case EFX_MON_STAT_VREG_INTERNAL_TEMP:
+       case EFX_MON_STAT_VREG_0V9_TEMP:
+       case EFX_MON_STAT_VREG_1V2_TEMP:
+       case EFX_MON_STAT_CONTROLLER_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_AMBIENT_TEMP:
+       case EFX_MON_STAT_HOTPOINT_TEMP:
+       case EFX_MON_STAT_VREG_0V9_A_TEMP:
+       case EFX_MON_STAT_VREG_0V9_B_TEMP:
+       case EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_SODIMM_0_TEMP:
+       case EFX_MON_STAT_SODIMM_1_TEMP:
+       case EFX_MON_STAT_CONTROLLER_TDIODE_TEMP:
+       case EFX_MON_STAT_BOARD_FRONT_TEMP:
+       case EFX_MON_STAT_BOARD_BACK_TEMP:
+               *unitp = EFX_MON_STAT_UNIT_TEMP_C;
+               break;
+       case EFX_MON_STAT_CONTROLLER_COOLING:
+       case EFX_MON_STAT_PHY0_COOLING:
+       case EFX_MON_STAT_PHY1_COOLING:
+       case EFX_MON_STAT_AIRFLOW:
+       case EFX_MON_STAT_PHY_POWER_PORT0:
+       case EFX_MON_STAT_PHY_POWER_PORT1:
+               *unitp = EFX_MON_STAT_UNIT_BOOL;
+               break;
+       case EFX_MON_STAT_NIC_POWER:
+               *unitp = EFX_MON_STAT_UNIT_POWER_W;
+               break;
+       case EFX_MON_STAT_OUT_IAOE:
+       case EFX_MON_STAT_IN_IAOE:
+       case EFX_MON_STAT_IN_I0V9:
+       case EFX_MON_STAT_IN_I1V2:
+       case EFX_MON_STAT_IN_I0V9_A:
+       case EFX_MON_STAT_IN_I0V9_B:
+       case EFX_MON_STAT_IN_I1V8:
+       case EFX_MON_STAT_IN_I2V5:
+       case EFX_MON_STAT_IN_I3V3:
+       case EFX_MON_STAT_IN_I12V0:
+       case EFX_MON_STAT_IN_I1V3:
+               *unitp = EFX_MON_STAT_UNIT_CURRENT_MA;
+               break;
+       case EFX_MON_STAT_FAN_0:
+       case EFX_MON_STAT_FAN_1:
+       case EFX_MON_STAT_FAN_2:
+       case EFX_MON_STAT_FAN_3:
+       case EFX_MON_STAT_FAN_4:
+               *unitp = EFX_MON_STAT_UNIT_RPM;
+               break;
+       default:
+               *unitp = EFX_MON_STAT_UNIT_UNKNOWN;
+               break;
+       };
+
+       if (*unitp == EFX_MON_STAT_UNIT_UNKNOWN)
+               goto fail1;
+
+       return (B_TRUE);
+
+fail1:
+       EFSYS_PROBE1(fail1, boolean_t, B_TRUE);
+       return (B_FALSE);
+};
+
+/* END MKCONFIG GENERATED MonitorStatisticUnitsBlock */
+
+/* START MKCONFIG GENERATED MonitorStatisticPortsBlock 1719b751d842534f */
+       __checkReturn                   boolean_t
+efx_mon_get_stat_portmap(
+       __in                            efx_mon_stat_t stat,
+       __out                           efx_mon_stat_portmask_t *maskp)
+{
+
+       switch (stat) {
+       case EFX_MON_STAT_PHY1_TEMP:
+       case EFX_MON_STAT_PHY1_COOLING:
+       case EFX_MON_STAT_PHY_POWER_PORT1:
+               *maskp = EFX_MON_STAT_PORTMAP_PORT1;
+               break;
+       case EFX_MON_STAT_CONTROLLER_TEMP:
+       case EFX_MON_STAT_PHY_COMMON_TEMP:
+       case EFX_MON_STAT_CONTROLLER_COOLING:
+       case EFX_MON_STAT_IN_1V0:
+       case EFX_MON_STAT_IN_1V2:
+       case EFX_MON_STAT_IN_1V8:
+       case EFX_MON_STAT_IN_2V5:
+       case EFX_MON_STAT_IN_3V3:
+       case EFX_MON_STAT_IN_12V0:
+       case EFX_MON_STAT_IN_1V2A:
+       case EFX_MON_STAT_IN_VREF:
+       case EFX_MON_STAT_OUT_VAOE:
+       case EFX_MON_STAT_AOE_TEMP:
+       case EFX_MON_STAT_PSU_AOE_TEMP:
+       case EFX_MON_STAT_PSU_TEMP:
+       case EFX_MON_STAT_FAN_0:
+       case EFX_MON_STAT_FAN_1:
+       case EFX_MON_STAT_FAN_2:
+       case EFX_MON_STAT_FAN_3:
+       case EFX_MON_STAT_FAN_4:
+       case EFX_MON_STAT_IN_VAOE:
+       case EFX_MON_STAT_OUT_IAOE:
+       case EFX_MON_STAT_IN_IAOE:
+       case EFX_MON_STAT_NIC_POWER:
+       case EFX_MON_STAT_IN_0V9:
+       case EFX_MON_STAT_IN_I0V9:
+       case EFX_MON_STAT_IN_I1V2:
+       case EFX_MON_STAT_IN_0V9_ADC:
+       case EFX_MON_STAT_CONTROLLER_2_TEMP:
+       case EFX_MON_STAT_VREG_INTERNAL_TEMP:
+       case EFX_MON_STAT_VREG_0V9_TEMP:
+       case EFX_MON_STAT_VREG_1V2_TEMP:
+       case EFX_MON_STAT_CONTROLLER_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_VPTAT_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_AMBIENT_TEMP:
+       case EFX_MON_STAT_AIRFLOW:
+       case EFX_MON_STAT_VDD08D_VSS08D_CSR:
+       case EFX_MON_STAT_VDD08D_VSS08D_CSR_EXTADC:
+       case EFX_MON_STAT_HOTPOINT_TEMP:
+       case EFX_MON_STAT_MUM_VCC:
+       case EFX_MON_STAT_IN_0V9_A:
+       case EFX_MON_STAT_IN_I0V9_A:
+       case EFX_MON_STAT_VREG_0V9_A_TEMP:
+       case EFX_MON_STAT_IN_0V9_B:
+       case EFX_MON_STAT_IN_I0V9_B:
+       case EFX_MON_STAT_VREG_0V9_B_TEMP:
+       case EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY:
+       case EFX_MON_STAT_CCOM_AVREG_1V2_SUPPLY_EXTADC:
+       case EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY:
+       case EFX_MON_STAT_CCOM_AVREG_1V8_SUPPLY_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_MASTER_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_MASTER_VPTAT_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_VPTAT_EXTADC:
+       case EFX_MON_STAT_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC:
+       case EFX_MON_STAT_SODIMM_VOUT:
+       case EFX_MON_STAT_SODIMM_0_TEMP:
+       case EFX_MON_STAT_SODIMM_1_TEMP:
+       case EFX_MON_STAT_PHY0_VCC:
+       case EFX_MON_STAT_PHY1_VCC:
+       case EFX_MON_STAT_CONTROLLER_TDIODE_TEMP:
+       case EFX_MON_STAT_BOARD_FRONT_TEMP:
+       case EFX_MON_STAT_BOARD_BACK_TEMP:
+       case EFX_MON_STAT_IN_I1V8:
+       case EFX_MON_STAT_IN_I2V5:
+       case EFX_MON_STAT_IN_I3V3:
+       case EFX_MON_STAT_IN_I12V0:
+       case EFX_MON_STAT_IN_1V3:
+       case EFX_MON_STAT_IN_I1V3:
+               *maskp = EFX_MON_STAT_PORTMAP_ALL;
+               break;
+       case EFX_MON_STAT_PHY0_TEMP:
+       case EFX_MON_STAT_PHY0_COOLING:
+       case EFX_MON_STAT_PHY_POWER_PORT0:
+               *maskp = EFX_MON_STAT_PORTMAP_PORT0;
+               break;
+       default:
+               *maskp = EFX_MON_STAT_PORTMAP_UNKNOWN;
+               break;
+       };
+
+       if (*maskp == EFX_MON_STAT_PORTMAP_UNKNOWN)
+               goto fail1;
+
+       return (B_TRUE);
+
+fail1:
+       EFSYS_PROBE1(fail1, boolean_t, B_TRUE);
+       return (B_FALSE);
+};
+
+/* END MKCONFIG GENERATED MonitorStatisticPortsBlock */
+
        __checkReturn                   efx_rc_t
 efx_mon_stats_update(
        __in                            efx_nic_t *enp,
@@ -217,6 +816,20 @@ efx_mon_stats_update(
        return (emop->emo_stats_update(enp, esmp, values));
 }
 
+       __checkReturn                   efx_rc_t
+efx_mon_limits_update(
+       __in                            efx_nic_t *enp,
+       __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_limits_t *values)
+{
+       efx_mon_t *emp = &(enp->en_mon);
+       const efx_mon_ops_t *emop = emp->em_emop;
+
+       EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+       EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_MON);
+
+       return (emop->emo_limits_update(enp, values));
+}
+
 #endif /* EFSYS_OPT_MON_STATS */
 
                void
index 6c162e0..cea32b7 100644 (file)
@@ -100,6 +100,8 @@ static const efx_nic_ops_t  __efx_nic_siena_ops = {
        siena_nic_init,                 /* eno_init */
        NULL,                           /* eno_get_vi_pool */
        NULL,                           /* eno_get_bar_region */
+       NULL,                           /* eno_hw_unavailable */
+       NULL,                           /* eno_set_hw_unavailable */
 #if EFSYS_OPT_DIAG
        siena_nic_register_test,        /* eno_register_test */
 #endif /* EFSYS_OPT_DIAG */
@@ -119,6 +121,8 @@ static const efx_nic_ops_t  __efx_nic_hunt_ops = {
        ef10_nic_init,                  /* eno_init */
        ef10_nic_get_vi_pool,           /* eno_get_vi_pool */
        ef10_nic_get_bar_region,        /* eno_get_bar_region */
+       ef10_nic_hw_unavailable,        /* eno_hw_unavailable */
+       ef10_nic_set_hw_unavailable,    /* eno_set_hw_unavailable */
 #if EFSYS_OPT_DIAG
        ef10_nic_register_test,         /* eno_register_test */
 #endif /* EFSYS_OPT_DIAG */
@@ -138,6 +142,8 @@ static const efx_nic_ops_t  __efx_nic_medford_ops = {
        ef10_nic_init,                  /* eno_init */
        ef10_nic_get_vi_pool,           /* eno_get_vi_pool */
        ef10_nic_get_bar_region,        /* eno_get_bar_region */
+       ef10_nic_hw_unavailable,        /* eno_hw_unavailable */
+       ef10_nic_set_hw_unavailable,    /* eno_set_hw_unavailable */
 #if EFSYS_OPT_DIAG
        ef10_nic_register_test,         /* eno_register_test */
 #endif /* EFSYS_OPT_DIAG */
@@ -157,6 +163,8 @@ static const efx_nic_ops_t  __efx_nic_medford2_ops = {
        ef10_nic_init,                  /* eno_init */
        ef10_nic_get_vi_pool,           /* eno_get_vi_pool */
        ef10_nic_get_bar_region,        /* eno_get_bar_region */
+       ef10_nic_hw_unavailable,        /* eno_hw_unavailable */
+       ef10_nic_set_hw_unavailable,    /* eno_set_hw_unavailable */
 #if EFSYS_OPT_DIAG
        ef10_nic_register_test,         /* eno_register_test */
 #endif /* EFSYS_OPT_DIAG */
@@ -549,7 +557,7 @@ efx_nic_reset(
        EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
        EFSYS_ASSERT(enp->en_mod_flags & EFX_MOD_PROBE);
        /*
-        * All modules except the MCDI, PROBE, NVRAM, VPD, MON
+        * All modules except the MCDI, PROBE, NVRAM, VPD, MON, TUNNEL
         * (which we do not reset here) must have been shut down or never
         * initialized.
         *
@@ -559,7 +567,10 @@ efx_nic_reset(
         */
        mod_flags = enp->en_mod_flags;
        mod_flags &= ~(EFX_MOD_MCDI | EFX_MOD_PROBE | EFX_MOD_NVRAM |
-                   EFX_MOD_VPD | EFX_MOD_MON);
+           EFX_MOD_VPD | EFX_MOD_MON);
+#if EFSYS_OPT_TUNNEL
+       mod_flags &= ~EFX_MOD_TUNNEL;
+#endif /* EFSYS_OPT_TUNNEL */
        EFSYS_ASSERT3U(mod_flags, ==, 0);
        if (mod_flags != 0) {
                rc = EINVAL;
@@ -584,6 +595,7 @@ efx_nic_cfg_get(
        __in            efx_nic_t *enp)
 {
        EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+       EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE);
 
        return (&(enp->en_nic_cfg));
 }
@@ -649,6 +661,39 @@ fail1:
        return (rc);
 }
 
+       __checkReturn   boolean_t
+efx_nic_hw_unavailable(
+       __in            efx_nic_t *enp)
+{
+       const efx_nic_ops_t *enop = enp->en_enop;
+
+       EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+       /* NOTE: can be used by MCDI before NIC probe */
+
+       if (enop->eno_hw_unavailable != NULL) {
+               if ((enop->eno_hw_unavailable)(enp) != B_FALSE)
+                       goto unavail;
+       }
+
+       return (B_FALSE);
+
+unavail:
+       return (B_TRUE);
+}
+
+                       void
+efx_nic_set_hw_unavailable(
+       __in            efx_nic_t *enp)
+{
+       const efx_nic_ops_t *enop = enp->en_enop;
+
+       EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+
+       if (enop->eno_set_hw_unavailable != NULL)
+               enop->eno_set_hw_unavailable(enp);
+}
+
+
 #if EFSYS_OPT_DIAG
 
        __checkReturn   efx_rc_t
@@ -786,13 +831,12 @@ efx_mcdi_get_loopback_modes(
 {
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_LOOPBACK_MODES_IN_LEN,
-                           MC_CMD_GET_LOOPBACK_MODES_OUT_V2_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_LOOPBACK_MODES_IN_LEN,
+               MC_CMD_GET_LOOPBACK_MODES_OUT_V2_LEN);
        efx_qword_t mask;
        efx_qword_t modes;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_LOOPBACK_MODES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_LOOPBACK_MODES_IN_LEN;
index be409c3..5296c59 100644 (file)
@@ -468,7 +468,7 @@ efx_nvram_validate(
                goto fail1;
 
        if (envop->envo_buffer_validate != NULL) {
-               if ((rc = envop->envo_buffer_validate(enp, partn,
+               if ((rc = envop->envo_buffer_validate(partn,
                            partn_data, partn_size)) != 0)
                        goto fail2;
        }
@@ -514,12 +514,11 @@ efx_mcdi_nvram_partitions(
        __out                   unsigned int *npartnp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_PARTITIONS_IN_LEN,
-                           MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_PARTITIONS_IN_LEN,
+               MC_CMD_NVRAM_PARTITIONS_OUT_LENMAX);
        unsigned int npartn;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_PARTITIONS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_PARTITIONS_IN_LEN;
@@ -577,11 +576,10 @@ efx_mcdi_nvram_metadata(
        __in                    size_t size)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_METADATA_IN_LEN,
-                           MC_CMD_NVRAM_METADATA_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_METADATA_IN_LEN,
+               MC_CMD_NVRAM_METADATA_OUT_LENMAX);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_METADATA;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_METADATA_IN_LEN;
@@ -667,12 +665,11 @@ efx_mcdi_nvram_info(
        __out_opt               uint32_t *erase_sizep,
        __out_opt               uint32_t *write_sizep)
 {
-       uint8_t payload[MAX(MC_CMD_NVRAM_INFO_IN_LEN,
-                           MC_CMD_NVRAM_INFO_V2_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_INFO_IN_LEN,
+               MC_CMD_NVRAM_INFO_V2_OUT_LEN);
        efx_mcdi_req_t req;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_INFO;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_INFO_IN_LEN;
@@ -728,12 +725,11 @@ efx_mcdi_nvram_update_start(
        __in                    efx_nic_t *enp,
        __in                    uint32_t partn)
 {
-       uint8_t payload[MAX(MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN,
-                           MC_CMD_NVRAM_UPDATE_START_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN,
+               MC_CMD_NVRAM_UPDATE_START_OUT_LEN);
        efx_mcdi_req_t req;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_UPDATE_START;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN;
@@ -770,8 +766,8 @@ efx_mcdi_nvram_read(
        __in                    uint32_t mode)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_READ_IN_V2_LEN,
-                           MC_CMD_NVRAM_READ_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_READ_IN_V2_LEN,
+               MC_CMD_NVRAM_READ_OUT_LENMAX);
        efx_rc_t rc;
 
        if (size > MC_CMD_NVRAM_READ_OUT_LENMAX) {
@@ -779,7 +775,6 @@ efx_mcdi_nvram_read(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_READ;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_READ_IN_V2_LEN;
@@ -825,11 +820,10 @@ efx_mcdi_nvram_erase(
        __in                    size_t size)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_ERASE_IN_LEN,
-                           MC_CMD_NVRAM_ERASE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_ERASE_IN_LEN,
+               MC_CMD_NVRAM_ERASE_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_ERASE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_ERASE_IN_LEN;
@@ -865,27 +859,31 @@ efx_mcdi_nvram_write(
        __in                    efx_nic_t *enp,
        __in                    uint32_t partn,
        __in                    uint32_t offset,
-       __out_bcount(size)      caddr_t data,
+       __in_bcount(size)       caddr_t data,
        __in                    size_t size)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MCDI_CTL_SDU_LEN_MAX_V1,
-                           MCDI_CTL_SDU_LEN_MAX_V2)];
+       uint8_t *payload;
        efx_rc_t rc;
        size_t max_data_size;
+       size_t payload_len = enp->en_nic_cfg.enc_mcdi_max_payload_length;
 
-       max_data_size = enp->en_nic_cfg.enc_mcdi_max_payload_length
-           - MC_CMD_NVRAM_WRITE_IN_LEN(0);
-       EFSYS_ASSERT3U(enp->en_nic_cfg.enc_mcdi_max_payload_length, >, 0);
-       EFSYS_ASSERT3U(max_data_size, <,
-                   enp->en_nic_cfg.enc_mcdi_max_payload_length);
+       max_data_size = payload_len - MC_CMD_NVRAM_WRITE_IN_LEN(0);
+       EFSYS_ASSERT3U(payload_len, >, 0);
+       EFSYS_ASSERT3U(max_data_size, <, payload_len);
 
        if (size > max_data_size) {
                rc = EINVAL;
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
+       EFSYS_KMEM_ALLOC(enp->en_esip, payload_len, payload);
+       if (payload == NULL) {
+               rc = ENOMEM;
+               goto fail2;
+       }
+
+       (void) memset(payload, 0, payload_len);
        req.emr_cmd = MC_CMD_NVRAM_WRITE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_WRITE_IN_LEN(size);
@@ -903,11 +901,16 @@ efx_mcdi_nvram_write(
 
        if (req.emr_rc != 0) {
                rc = req.emr_rc;
-               goto fail2;
+               goto fail3;
        }
 
+       EFSYS_KMEM_FREE(enp->en_esip, payload_len, payload);
+
        return (0);
 
+fail3:
+       EFSYS_PROBE(fail3);
+       EFSYS_KMEM_FREE(enp->en_esip, payload_len, payload);
 fail2:
        EFSYS_PROBE(fail2);
 fail1:
@@ -930,12 +933,11 @@ efx_mcdi_nvram_update_finish(
 {
        const efx_nic_cfg_t *encp = &enp->en_nic_cfg;
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN,
-                           MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN,
+               MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN);
        uint32_t verify_result = MC_CMD_NVRAM_VERIFY_RC_UNKNOWN;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_UPDATE_FINISH;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN;
@@ -1001,12 +1003,11 @@ efx_mcdi_nvram_test(
        __in                    uint32_t partn)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_TEST_IN_LEN,
-                           MC_CMD_NVRAM_TEST_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_TEST_IN_LEN,
+               MC_CMD_NVRAM_TEST_OUT_LEN);
        int result;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_TEST;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_TEST_IN_LEN;
index ba2f51c..36a7bbd 100644 (file)
@@ -15,6 +15,7 @@ static const efx_phy_ops_t    __efx_phy_siena_ops = {
        siena_phy_reconfigure,          /* epo_reconfigure */
        siena_phy_verify,               /* epo_verify */
        siena_phy_oui_get,              /* epo_oui_get */
+       NULL,                           /* epo_link_state_get */
 #if EFSYS_OPT_PHY_STATS
        siena_phy_stats_update,         /* epo_stats_update */
 #endif /* EFSYS_OPT_PHY_STATS */
@@ -34,6 +35,7 @@ static const efx_phy_ops_t    __efx_phy_ef10_ops = {
        ef10_phy_reconfigure,           /* epo_reconfigure */
        ef10_phy_verify,                /* epo_verify */
        ef10_phy_oui_get,               /* epo_oui_get */
+       ef10_phy_link_state_get,        /* epo_link_state_get */
 #if EFSYS_OPT_PHY_STATS
        ef10_phy_stats_update,          /* epo_stats_update */
 #endif /* EFSYS_OPT_PHY_STATS */
@@ -286,8 +288,8 @@ efx_phy_media_type_get(
 efx_phy_module_get_info(
        __in                    efx_nic_t *enp,
        __in                    uint8_t dev_addr,
-       __in                    uint8_t offset,
-       __in                    uint8_t len,
+       __in                    size_t offset,
+       __in                    size_t len,
        __out_bcount(len)       uint8_t *data)
 {
        efx_rc_t rc;
@@ -295,7 +297,8 @@ efx_phy_module_get_info(
        EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
        EFSYS_ASSERT(data != NULL);
 
-       if ((uint32_t)offset + len > 0xff) {
+       if ((offset > EFX_PHY_MEDIA_INFO_MAX_OFFSET) ||
+           ((offset + len) > EFX_PHY_MEDIA_INFO_MAX_OFFSET)) {
                rc = EINVAL;
                goto fail1;
        }
@@ -306,6 +309,57 @@ efx_phy_module_get_info(
 
        return (0);
 
+fail2:
+       EFSYS_PROBE(fail2);
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+       __checkReturn           efx_rc_t
+efx_phy_fec_type_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_fec_type_t *typep)
+{
+       efx_rc_t rc;
+       efx_phy_link_state_t epls;
+
+       if ((rc = efx_phy_link_state_get(enp, &epls)) != 0)
+               goto fail1;
+
+       *typep = epls.epls_fec;
+
+       return (0);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+       __checkReturn           efx_rc_t
+efx_phy_link_state_get(
+       __in            efx_nic_t *enp,
+       __out           efx_phy_link_state_t *eplsp)
+{
+       efx_port_t *epp = &(enp->en_port);
+       const efx_phy_ops_t *epop = epp->ep_epop;
+       efx_rc_t rc;
+
+       EFSYS_ASSERT3U(enp->en_magic, ==, EFX_NIC_MAGIC);
+       EFSYS_ASSERT3U(enp->en_mod_flags, &, EFX_MOD_PROBE);
+
+       if (epop->epo_link_state_get == NULL) {
+               rc = ENOTSUP;
+               goto fail1;
+       }
+
+       if ((rc = epop->epo_link_state_get(enp, eplsp)) != 0)
+               goto fail2;
+
+       return (0);
+
 fail2:
        EFSYS_PROBE(fail2);
 fail1:
index 33a1a08..5fff932 100644 (file)
@@ -37,7 +37,7 @@ efx_port_init(
        epp->ep_emop->emo_reconfigure(enp);
 
        /* Pick up current phy capababilities */
-       efx_port_poll(enp, NULL);
+       (void) efx_port_poll(enp, NULL);
 
        /*
         * Turn on the PHY if available, otherwise reset it, and
index 4fd73ba..04bc7ae 100644 (file)
@@ -298,84 +298,104 @@ fail1:
 efx_rx_scale_hash_flags_get(
        __in                                    efx_nic_t *enp,
        __in                                    efx_rx_hash_alg_t hash_alg,
-       __inout_ecount(EFX_RX_HASH_NFLAGS)      unsigned int *flags,
+       __out_ecount_part(max_nflags, *nflagsp) unsigned int *flagsp,
+       __in                                    unsigned int max_nflags,
        __out                                   unsigned int *nflagsp)
 {
        efx_nic_cfg_t *encp = &enp->en_nic_cfg;
-       boolean_t l4;
-       boolean_t additional_modes;
-       unsigned int *entryp = flags;
+       unsigned int nflags = 0;
        efx_rc_t rc;
 
-       if (flags == NULL || nflagsp == NULL) {
+       if (flagsp == NULL || nflagsp == NULL) {
                rc = EINVAL;
                goto fail1;
        }
 
-       l4 = encp->enc_rx_scale_l4_hash_supported;
-       additional_modes = encp->enc_rx_scale_additional_modes_supported;
+       if ((encp->enc_rx_scale_hash_alg_mask & (1U << hash_alg)) == 0) {
+               nflags = 0;
+               goto done;
+       }
 
-#define        LIST_FLAGS(_entryp, _class, _l4_hashing, _additional_modes)     \
-       do {                                                            \
-               if (_l4_hashing) {                                      \
-                       *(_entryp++) = EFX_RX_HASH(_class, 4TUPLE);     \
-                                                                       \
-                       if (_additional_modes) {                        \
-                               *(_entryp++) =                          \
-                                   EFX_RX_HASH(_class, 2TUPLE_DST);    \
-                               *(_entryp++) =                          \
-                                   EFX_RX_HASH(_class, 2TUPLE_SRC);    \
-                       }                                               \
-               }                                                       \
-                                                                       \
-               *(_entryp++) = EFX_RX_HASH(_class, 2TUPLE);             \
-                                                                       \
-               if (_additional_modes) {                                \
-                       *(_entryp++) = EFX_RX_HASH(_class, 1TUPLE_DST); \
-                       *(_entryp++) = EFX_RX_HASH(_class, 1TUPLE_SRC); \
-               }                                                       \
-                                                                       \
-               *(_entryp++) = EFX_RX_HASH(_class, DISABLE);            \
-                                                                       \
-               _NOTE(CONSTANTCONDITION)                                \
+       /* Helper to add flags word to flags array without buffer overflow */
+#define        INSERT_FLAGS(_flags)                    \
+       do {                                    \
+               if (nflags >= max_nflags) {     \
+                       rc = E2BIG;             \
+                       goto fail2;             \
+               }                               \
+               *(flagsp + nflags) = (_flags);  \
+               nflags++;                       \
+                                               \
+               _NOTE(CONSTANTCONDITION)        \
        } while (B_FALSE)
 
-       switch (hash_alg) {
-       case EFX_RX_HASHALG_PACKED_STREAM:
-               if ((encp->enc_rx_scale_hash_alg_mask & (1U << hash_alg)) == 0)
-                       break;
-               /* FALLTHRU */
-       case EFX_RX_HASHALG_TOEPLITZ:
-               if ((encp->enc_rx_scale_hash_alg_mask & (1U << hash_alg)) == 0)
-                       break;
+       if (encp->enc_rx_scale_l4_hash_supported != B_FALSE) {
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 4TUPLE));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 4TUPLE));
+       }
 
-               LIST_FLAGS(entryp, IPV4_TCP, l4, additional_modes);
-               LIST_FLAGS(entryp, IPV6_TCP, l4, additional_modes);
+       if ((encp->enc_rx_scale_l4_hash_supported != B_FALSE) &&
+           (encp->enc_rx_scale_additional_modes_supported != B_FALSE)) {
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 2TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 2TUPLE_SRC));
 
-               if (additional_modes) {
-                       LIST_FLAGS(entryp, IPV4_UDP, l4, additional_modes);
-                       LIST_FLAGS(entryp, IPV6_UDP, l4, additional_modes);
-               }
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 2TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 2TUPLE_SRC));
 
-               LIST_FLAGS(entryp, IPV4, B_FALSE, additional_modes);
-               LIST_FLAGS(entryp, IPV6, B_FALSE, additional_modes);
-               break;
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 4TUPLE));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 2TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 2TUPLE_SRC));
 
-       default:
-               rc = EINVAL;
-               goto fail2;
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 4TUPLE));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 2TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 2TUPLE_SRC));
+       }
+
+       INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 2TUPLE));
+       INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 2TUPLE));
+
+       INSERT_FLAGS(EFX_RX_HASH(IPV4, 2TUPLE));
+       INSERT_FLAGS(EFX_RX_HASH(IPV6, 2TUPLE));
+
+       if (encp->enc_rx_scale_additional_modes_supported != B_FALSE) {
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, 1TUPLE_SRC));
+
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, 1TUPLE_SRC));
+
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 2TUPLE));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, 1TUPLE_SRC));
+
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 2TUPLE));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, 1TUPLE_SRC));
+
+               INSERT_FLAGS(EFX_RX_HASH(IPV4, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV4, 1TUPLE_SRC));
+
+               INSERT_FLAGS(EFX_RX_HASH(IPV6, 1TUPLE_DST));
+               INSERT_FLAGS(EFX_RX_HASH(IPV6, 1TUPLE_SRC));
        }
 
-#undef LIST_FLAGS
+       INSERT_FLAGS(EFX_RX_HASH(IPV4_TCP, DISABLE));
+       INSERT_FLAGS(EFX_RX_HASH(IPV6_TCP, DISABLE));
+
+       INSERT_FLAGS(EFX_RX_HASH(IPV4_UDP, DISABLE));
+       INSERT_FLAGS(EFX_RX_HASH(IPV6_UDP, DISABLE));
 
-       *nflagsp = (unsigned int)(entryp - flags);
-       EFSYS_ASSERT3U(*nflagsp, <=, EFX_RX_HASH_NFLAGS);
+       INSERT_FLAGS(EFX_RX_HASH(IPV4, DISABLE));
+       INSERT_FLAGS(EFX_RX_HASH(IPV6, DISABLE));
 
+#undef INSERT_FLAGS
+
+done:
+       *nflagsp = nflags;
        return (0);
 
 fail2:
        EFSYS_PROBE(fail2);
-
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
@@ -512,9 +532,8 @@ efx_rx_scale_mode_set(
        __in            efx_rx_hash_type_t type,
        __in            boolean_t insert)
 {
+       efx_nic_cfg_t *encp = &enp->en_nic_cfg;
        const efx_rx_ops_t *erxop = enp->en_erxop;
-       unsigned int type_flags[EFX_RX_HASH_NFLAGS];
-       unsigned int type_nflags;
        efx_rx_hash_type_t type_check;
        unsigned int i;
        efx_rc_t rc;
@@ -533,46 +552,60 @@ efx_rx_scale_mode_set(
        }
 
        /*
-        * Translate legacy flags to the new representation
-        * so that chip-specific handlers will consider the
-        * new flags only.
+        * If RSS hash type is represented by additional bits
+        * in the value, the latter need to be verified since
+        * not all bit combinations are valid RSS modes. Also,
+        * depending on the firmware, some valid combinations
+        * may be unsupported. Discern additional bits in the
+        * type value and try to recognise valid combinations.
+        * If some bits remain unrecognised, report the error.
         */
-       if (type & EFX_RX_HASH_IPV4) {
-               type |= EFX_RX_HASH(IPV4, 2TUPLE);
-               type |= EFX_RX_HASH(IPV4_TCP, 2TUPLE);
-               type |= EFX_RX_HASH(IPV4_UDP, 2TUPLE);
-       }
-
-       if (type & EFX_RX_HASH_TCPIPV4)
-               type |= EFX_RX_HASH(IPV4_TCP, 4TUPLE);
+       type_check = type & ~EFX_RX_HASH_LEGACY_MASK;
+       if (type_check != 0) {
+               unsigned int type_flags[EFX_RX_HASH_NFLAGS];
+               unsigned int type_nflags;
 
-       if (type & EFX_RX_HASH_IPV6) {
-               type |= EFX_RX_HASH(IPV6, 2TUPLE);
-               type |= EFX_RX_HASH(IPV6_TCP, 2TUPLE);
-               type |= EFX_RX_HASH(IPV6_UDP, 2TUPLE);
-       }
+               rc = efx_rx_scale_hash_flags_get(enp, alg, type_flags,
+                                   EFX_ARRAY_SIZE(type_flags), &type_nflags);
+               if (rc != 0)
+                       goto fail2;
 
-       if (type & EFX_RX_HASH_TCPIPV6)
-               type |= EFX_RX_HASH(IPV6_TCP, 4TUPLE);
+               for (i = 0; i < type_nflags; ++i) {
+                       if ((type_check & type_flags[i]) == type_flags[i])
+                               type_check &= ~(type_flags[i]);
+               }
 
-       type &= ~EFX_RX_HASH_LEGACY_MASK;
-       type_check = type;
+               if (type_check != 0) {
+                       rc = EINVAL;
+                       goto fail3;
+               }
+       }
 
        /*
-        * Get the list of supported hash flags and sanitise the input.
+        * Translate EFX_RX_HASH() flags to their legacy counterparts
+        * provided that the FW claims no support for additional modes.
         */
-       rc = efx_rx_scale_hash_flags_get(enp, alg, type_flags, &type_nflags);
-       if (rc != 0)
-               goto fail2;
-
-       for (i = 0; i < type_nflags; ++i) {
-               if ((type_check & type_flags[i]) == type_flags[i])
-                       type_check &= ~(type_flags[i]);
-       }
+       if (encp->enc_rx_scale_additional_modes_supported == B_FALSE) {
+               efx_rx_hash_type_t t_ipv4 = EFX_RX_HASH(IPV4, 2TUPLE) |
+                                           EFX_RX_HASH(IPV4_TCP, 2TUPLE);
+               efx_rx_hash_type_t t_ipv6 = EFX_RX_HASH(IPV6, 2TUPLE) |
+                                           EFX_RX_HASH(IPV6_TCP, 2TUPLE);
+               efx_rx_hash_type_t t_ipv4_tcp = EFX_RX_HASH(IPV4_TCP, 4TUPLE);
+               efx_rx_hash_type_t t_ipv6_tcp = EFX_RX_HASH(IPV6_TCP, 4TUPLE);
+
+               if ((type & t_ipv4) == t_ipv4)
+                       type |= EFX_RX_HASH_IPV4;
+               if ((type & t_ipv6) == t_ipv6)
+                       type |= EFX_RX_HASH_IPV6;
+
+               if (encp->enc_rx_scale_l4_hash_supported == B_TRUE) {
+                       if ((type & t_ipv4_tcp) == t_ipv4_tcp)
+                               type |= EFX_RX_HASH_TCPIPV4;
+                       if ((type & t_ipv6_tcp) == t_ipv6_tcp)
+                               type |= EFX_RX_HASH_TCPIPV6;
+               }
 
-       if (type_check != 0) {
-               rc = EINVAL;
-               goto fail3;
+               type &= EFX_RX_HASH_LEGACY_MASK;
        }
 
        if (erxop->erxo_scale_mode_set != NULL) {
@@ -831,7 +864,7 @@ efx_rx_qcreate_packed_stream(
 {
        efx_rxq_type_data_t type_data;
 
-       memset(&type_data, 0, sizeof(type_data));
+       memset(&type_data, 0, sizeof (type_data));
 
        type_data.ertd_packed_stream.eps_buf_size = ps_buf_size;
 
@@ -867,7 +900,7 @@ efx_rx_qcreate_es_super_buffer(
                goto fail1;
        }
 
-       memset(&type_data, 0, sizeof(type_data));
+       memset(&type_data, 0, sizeof (type_data));
 
        type_data.ertd_es_super_buffer.eessb_bufs_per_desc = n_bufs_per_desc;
        type_data.ertd_es_super_buffer.eessb_max_dma_len = max_dma_len;
@@ -1090,10 +1123,6 @@ siena_rx_scale_mode_set(
        __in            efx_rx_hash_type_t type,
        __in            boolean_t insert)
 {
-       efx_rx_hash_type_t type_ipv4 = EFX_RX_HASH(IPV4, 2TUPLE);
-       efx_rx_hash_type_t type_ipv4_tcp = EFX_RX_HASH(IPV4_TCP, 4TUPLE);
-       efx_rx_hash_type_t type_ipv6 = EFX_RX_HASH(IPV6, 2TUPLE);
-       efx_rx_hash_type_t type_ipv6_tcp = EFX_RX_HASH(IPV6_TCP, 4TUPLE);
        efx_rc_t rc;
 
        if (rss_context != EFX_RSS_CONTEXT_DEFAULT) {
@@ -1108,12 +1137,12 @@ siena_rx_scale_mode_set(
 
        case EFX_RX_HASHALG_TOEPLITZ:
                EFX_RX_TOEPLITZ_IPV4_HASH(enp, insert,
-                   (type & type_ipv4) == type_ipv4,
-                   (type & type_ipv4_tcp) == type_ipv4_tcp);
+                   (type & EFX_RX_HASH_IPV4) ? B_TRUE : B_FALSE,
+                   (type & EFX_RX_HASH_TCPIPV4) ? B_TRUE : B_FALSE);
 
                EFX_RX_TOEPLITZ_IPV6_HASH(enp,
-                   (type & type_ipv6) == type_ipv6,
-                   (type & type_ipv6_tcp) == type_ipv6_tcp,
+                   (type & EFX_RX_HASH_IPV6) ? B_TRUE : B_FALSE,
+                   (type & EFX_RX_HASH_TCPIPV6) ? B_TRUE : B_FALSE,
                    rc);
                if (rc != 0)
                        goto fail2;
index 399fd54..edb6be0 100644 (file)
@@ -40,8 +40,9 @@ efx_mcdi_set_tunnel_encap_udp_ports(
        __out                   boolean_t *resetting)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMAX,
-                           MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_IN_LENMAX,
+               MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS_OUT_LEN);
        efx_word_t flags;
        efx_rc_t rc;
        unsigned int i;
@@ -52,7 +53,6 @@ efx_mcdi_set_tunnel_encap_udp_ports(
        else
                entries_num = etcp->etc_udp_entries_num;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_TUNNEL_ENCAP_UDP_PORTS;
        req.emr_in_buf = payload;
        req.emr_in_length =
index da37580..bf1180a 100644 (file)
@@ -572,19 +572,10 @@ efx_tx_qdesc_post(
 {
        efx_nic_t *enp = etp->et_enp;
        const efx_tx_ops_t *etxop = enp->en_etxop;
-       efx_rc_t rc;
 
        EFSYS_ASSERT3U(etp->et_magic, ==, EFX_TXQ_MAGIC);
 
-       if ((rc = etxop->etxo_qdesc_post(etp, ed,
-           ndescs, completed, addedp)) != 0)
-               goto fail1;
-
-       return (0);
-
-fail1:
-       EFSYS_PROBE1(fail1, efx_rc_t, rc);
-       return (rc);
+       return (etxop->etxo_qdesc_post(etp, ed, ndescs, completed, addedp));
 }
 
        void
@@ -763,10 +754,9 @@ siena_tx_qpost(
 {
        unsigned int added = *addedp;
        unsigned int i;
-       int rc = ENOSPC;
 
        if (added - completed + ndescs > EFX_TXQ_LIMIT(etp->et_mask + 1))
-               goto fail1;
+               return (ENOSPC);
 
        for (i = 0; i < ndescs; i++) {
                efx_buffer_t *ebp = &eb[i];
@@ -788,11 +778,6 @@ siena_tx_qpost(
 
        *addedp = added;
        return (0);
-
-fail1:
-       EFSYS_PROBE1(fail1, efx_rc_t, rc);
-
-       return (rc);
 }
 
 static         void
index 16ea81d..ca30e90 100644 (file)
@@ -20,7 +20,6 @@ hunt_nic_get_required_pcie_bandwidth(
        __out           uint32_t *bandwidth_mbpsp)
 {
        uint32_t port_modes;
-       uint32_t max_port_mode;
        uint32_t bandwidth;
        efx_rc_t rc;
 
@@ -30,7 +29,8 @@ hunt_nic_get_required_pcie_bandwidth(
         * capable mode is in use.
         */
 
-       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes, NULL)) != 0) {
+       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes,
+                   NULL, NULL)) != 0) {
                /* No port mode info available */
                bandwidth = 0;
                goto out;
@@ -46,17 +46,13 @@ hunt_nic_get_required_pcie_bandwidth(
                        goto fail1;
        } else {
                if (port_modes & (1U << TLV_PORT_MODE_40G)) {
-                       max_port_mode = TLV_PORT_MODE_40G;
+                       bandwidth = 40000;
                } else if (port_modes & (1U << TLV_PORT_MODE_10G_10G_10G_10G)) {
-                       max_port_mode = TLV_PORT_MODE_10G_10G_10G_10G;
+                       bandwidth = 4 * 10000;
                } else {
                        /* Assume two 10G ports */
-                       max_port_mode = TLV_PORT_MODE_10G_10G;
+                       bandwidth = 2 * 10000;
                }
-
-               if ((rc = ef10_nic_get_port_mode_bandwidth(max_port_mode,
-                                                           &bandwidth)) != 0)
-                       goto fail2;
        }
 
 out:
@@ -64,8 +60,6 @@ out:
 
        return (0);
 
-fail2:
-       EFSYS_PROBE(fail2);
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
@@ -189,6 +183,9 @@ hunt_board_cfg(
 
        encp->enc_bug61265_workaround = B_FALSE; /* Medford only */
 
+       /* Checksums for TSO sends can be incorrect on Huntington. */
+       encp->enc_bug61297_workaround = B_TRUE;
+
        /* Alignment for receive packet DMA buffers */
        encp->enc_rx_buf_align_start = 1;
        encp->enc_rx_buf_align_end = 64; /* RX DMA end padding */
diff --git a/drivers/net/sfc/base/mc_driver_pcol_strs.h b/drivers/net/sfc/base/mc_driver_pcol_strs.h
new file mode 100644 (file)
index 0000000..73d633c
--- /dev/null
@@ -0,0 +1,102 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright 2008-2018 Solarflare Communications Inc.
+ * All rights reserved.
+ */
+
+/*
+ * This file is automatically generated. DO NOT EDIT IT.
+ * To make changes, edit the .yml files under firmwaresrc doc/mcdi/ and
+ * rebuild this file with "make -C doc mcdiheaders".
+ *
+ * The version of this file has MCDI strings really used in the libefx.
+ */
+
+#ifndef _MC_DRIVER_PCOL_STRS_H
+#define _MC_DRIVER_PCOL_STRS_H
+
+#define MC_CMD_SENSOR_CONTROLLER_TEMP_ENUM_STR "Controller temperature: degC"
+#define MC_CMD_SENSOR_PHY_COMMON_TEMP_ENUM_STR "Phy common temperature: degC"
+#define MC_CMD_SENSOR_CONTROLLER_COOLING_ENUM_STR "Controller cooling: bool"
+#define MC_CMD_SENSOR_PHY0_TEMP_ENUM_STR "Phy 0 temperature: degC"
+#define MC_CMD_SENSOR_PHY0_COOLING_ENUM_STR "Phy 0 cooling: bool"
+#define MC_CMD_SENSOR_PHY1_TEMP_ENUM_STR "Phy 1 temperature: degC"
+#define MC_CMD_SENSOR_PHY1_COOLING_ENUM_STR "Phy 1 cooling: bool"
+#define MC_CMD_SENSOR_IN_1V0_ENUM_STR "1.0v power: mV"
+#define MC_CMD_SENSOR_IN_1V2_ENUM_STR "1.2v power: mV"
+#define MC_CMD_SENSOR_IN_1V8_ENUM_STR "1.8v power: mV"
+#define MC_CMD_SENSOR_IN_2V5_ENUM_STR "2.5v power: mV"
+#define MC_CMD_SENSOR_IN_3V3_ENUM_STR "3.3v power: mV"
+#define MC_CMD_SENSOR_IN_12V0_ENUM_STR "12v power: mV"
+#define MC_CMD_SENSOR_IN_1V2A_ENUM_STR "1.2v analogue power: mV"
+#define MC_CMD_SENSOR_IN_VREF_ENUM_STR "reference voltage: mV"
+#define MC_CMD_SENSOR_OUT_VAOE_ENUM_STR "AOE FPGA power: mV"
+#define MC_CMD_SENSOR_AOE_TEMP_ENUM_STR "AOE FPGA temperature: degC"
+#define MC_CMD_SENSOR_PSU_AOE_TEMP_ENUM_STR "AOE FPGA PSU temperature: degC"
+#define MC_CMD_SENSOR_PSU_TEMP_ENUM_STR "AOE PSU temperature: degC"
+#define MC_CMD_SENSOR_FAN_0_ENUM_STR "Fan 0 speed: RPM"
+#define MC_CMD_SENSOR_FAN_1_ENUM_STR "Fan 1 speed: RPM"
+#define MC_CMD_SENSOR_FAN_2_ENUM_STR "Fan 2 speed: RPM"
+#define MC_CMD_SENSOR_FAN_3_ENUM_STR "Fan 3 speed: RPM"
+#define MC_CMD_SENSOR_FAN_4_ENUM_STR "Fan 4 speed: RPM"
+#define MC_CMD_SENSOR_IN_VAOE_ENUM_STR "AOE FPGA input power: mV"
+#define MC_CMD_SENSOR_OUT_IAOE_ENUM_STR "AOE FPGA current: mA"
+#define MC_CMD_SENSOR_IN_IAOE_ENUM_STR "AOE FPGA input current: mA"
+#define MC_CMD_SENSOR_NIC_POWER_ENUM_STR "NIC power consumption: W"
+#define MC_CMD_SENSOR_IN_0V9_ENUM_STR "0.9v power voltage: mV"
+#define MC_CMD_SENSOR_IN_I0V9_ENUM_STR "0.9v power current: mA"
+#define MC_CMD_SENSOR_IN_I1V2_ENUM_STR "1.2v power current: mA"
+#define MC_CMD_SENSOR_PAGE0_NEXT_ENUM_STR "Not a sensor: reserved for the next page flag"
+#define MC_CMD_SENSOR_IN_0V9_ADC_ENUM_STR "0.9v power voltage (at ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_2_TEMP_ENUM_STR "Controller temperature 2: degC"
+#define MC_CMD_SENSOR_VREG_INTERNAL_TEMP_ENUM_STR "Voltage regulator internal temperature: degC"
+#define MC_CMD_SENSOR_VREG_0V9_TEMP_ENUM_STR "0.9V voltage regulator temperature: degC"
+#define MC_CMD_SENSOR_VREG_1V2_TEMP_ENUM_STR "1.2V voltage regulator temperature: degC"
+#define MC_CMD_SENSOR_CONTROLLER_VPTAT_ENUM_STR "controller internal temperature sensor voltage (internal ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_ENUM_STR "controller internal temperature (internal ADC): degC"
+#define MC_CMD_SENSOR_CONTROLLER_VPTAT_EXTADC_ENUM_STR "controller internal temperature sensor voltage (external ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_INTERNAL_TEMP_EXTADC_ENUM_STR "controller internal temperature (external ADC): degC"
+#define MC_CMD_SENSOR_AMBIENT_TEMP_ENUM_STR "ambient temperature: degC"
+#define MC_CMD_SENSOR_AIRFLOW_ENUM_STR "air flow: bool"
+#define MC_CMD_SENSOR_VDD08D_VSS08D_CSR_ENUM_STR "voltage between VSS08D and VSS08D at CSR: mV"
+#define MC_CMD_SENSOR_VDD08D_VSS08D_CSR_EXTADC_ENUM_STR "voltage between VSS08D and VSS08D at CSR (external ADC): mV"
+#define MC_CMD_SENSOR_HOTPOINT_TEMP_ENUM_STR "Hotpoint temperature: degC"
+#define MC_CMD_SENSOR_PHY_POWER_PORT0_ENUM_STR "Port 0 PHY power switch over-current: bool"
+#define MC_CMD_SENSOR_PHY_POWER_PORT1_ENUM_STR "Port 1 PHY power switch over-current: bool"
+#define MC_CMD_SENSOR_MUM_VCC_ENUM_STR "Mop-up microcontroller reference voltage: mV"
+#define MC_CMD_SENSOR_IN_0V9_A_ENUM_STR "0.9v power phase A voltage: mV"
+#define MC_CMD_SENSOR_IN_I0V9_A_ENUM_STR "0.9v power phase A current: mA"
+#define MC_CMD_SENSOR_VREG_0V9_A_TEMP_ENUM_STR "0.9V voltage regulator phase A temperature: degC"
+#define MC_CMD_SENSOR_IN_0V9_B_ENUM_STR "0.9v power phase B voltage: mV"
+#define MC_CMD_SENSOR_IN_I0V9_B_ENUM_STR "0.9v power phase B current: mA"
+#define MC_CMD_SENSOR_VREG_0V9_B_TEMP_ENUM_STR "0.9V voltage regulator phase B temperature: degC"
+#define MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_ENUM_STR "CCOM AVREG 1v2 supply (interval ADC): mV"
+#define MC_CMD_SENSOR_CCOM_AVREG_1V2_SUPPLY_EXTADC_ENUM_STR "CCOM AVREG 1v2 supply (external ADC): mV"
+#define MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_ENUM_STR "CCOM AVREG 1v8 supply (interval ADC): mV"
+#define MC_CMD_SENSOR_CCOM_AVREG_1V8_SUPPLY_EXTADC_ENUM_STR "CCOM AVREG 1v8 supply (external ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_RTS_ENUM_STR "CCOM RTS temperature: degC"
+#define MC_CMD_SENSOR_PAGE1_NEXT_ENUM_STR "Not a sensor: reserved for the next page flag"
+#define MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_ENUM_STR "controller internal temperature sensor voltage on master core (internal ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_ENUM_STR "controller internal temperature on master core (internal ADC): degC"
+#define MC_CMD_SENSOR_CONTROLLER_MASTER_VPTAT_EXTADC_ENUM_STR "controller internal temperature sensor voltage on master core (external ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_MASTER_INTERNAL_TEMP_EXTADC_ENUM_STR "controller internal temperature on master core (external ADC): degC"
+#define MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_ENUM_STR "controller internal temperature on slave core sensor voltage (internal ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_ENUM_STR "controller internal temperature on slave core (internal ADC): degC"
+#define MC_CMD_SENSOR_CONTROLLER_SLAVE_VPTAT_EXTADC_ENUM_STR "controller internal temperature on slave core sensor voltage (external ADC): mV"
+#define MC_CMD_SENSOR_CONTROLLER_SLAVE_INTERNAL_TEMP_EXTADC_ENUM_STR "controller internal temperature on slave core (external ADC): degC"
+#define MC_CMD_SENSOR_SODIMM_VOUT_ENUM_STR "Voltage supplied to the SODIMMs from their power supply: mV"
+#define MC_CMD_SENSOR_SODIMM_0_TEMP_ENUM_STR "Temperature of SODIMM 0 (if installed): degC"
+#define MC_CMD_SENSOR_SODIMM_1_TEMP_ENUM_STR "Temperature of SODIMM 1 (if installed): degC"
+#define MC_CMD_SENSOR_PHY0_VCC_ENUM_STR "Voltage supplied to the QSFP #0 from their power supply: mV"
+#define MC_CMD_SENSOR_PHY1_VCC_ENUM_STR "Voltage supplied to the QSFP #1 from their power supply: mV"
+#define MC_CMD_SENSOR_CONTROLLER_TDIODE_TEMP_ENUM_STR "Controller die temperature (TDIODE): degC"
+#define MC_CMD_SENSOR_BOARD_FRONT_TEMP_ENUM_STR "Board temperature (front): degC"
+#define MC_CMD_SENSOR_BOARD_BACK_TEMP_ENUM_STR "Board temperature (back): degC"
+#define MC_CMD_SENSOR_IN_I1V8_ENUM_STR "1.8v power current: mA"
+#define MC_CMD_SENSOR_IN_I2V5_ENUM_STR "2.5v power current: mA"
+#define MC_CMD_SENSOR_IN_I3V3_ENUM_STR "3.3v power current: mA"
+#define MC_CMD_SENSOR_IN_I12V0_ENUM_STR "12v power current: mA"
+#define MC_CMD_SENSOR_IN_1V3_ENUM_STR "1.3v power: mV"
+#define MC_CMD_SENSOR_IN_I1V3_ENUM_STR "1.3v power current: mA"
+
+#endif /* _MC_DRIVER_PCOL_STRS_H */
index 940bd02..b53de0d 100644 (file)
 
 #include "efx.h"
 #include "efx_impl.h"
+#include "mcdi_mon.h"
 
 #if EFSYS_OPT_MON_MCDI
 
 #if EFSYS_OPT_MON_STATS
 
-#define        MCDI_MON_NEXT_PAGE  ((uint16_t)0xfffe)
-#define        MCDI_MON_INVALID_SENSOR ((uint16_t)0xfffd)
-#define        MCDI_MON_PAGE_SIZE 0x20
-
-/* Bitmasks of valid port(s) for each sensor */
-#define        MCDI_MON_PORT_NONE      (0x00)
-#define        MCDI_MON_PORT_P1        (0x01)
-#define        MCDI_MON_PORT_P2        (0x02)
-#define        MCDI_MON_PORT_P3        (0x04)
-#define        MCDI_MON_PORT_P4        (0x08)
-#define        MCDI_MON_PORT_Px        (0xFFFF)
-
 /* Get port mask from one-based MCDI port number */
 #define        MCDI_MON_PORT_MASK(_emip) (1U << ((_emip)->emi_port - 1))
 
-/* Entry for MCDI sensor in sensor map */
-#define        STAT(portmask, stat)    \
-       { (MCDI_MON_PORT_##portmask), (EFX_MON_STAT_##stat) }
-
-/* Entry for sensor next page flag in sensor map */
-#define        STAT_NEXT_PAGE()        \
-       { MCDI_MON_PORT_NONE, MCDI_MON_NEXT_PAGE }
-
-/* Placeholder for gaps in the array */
-#define        STAT_NO_SENSOR()        \
-       { MCDI_MON_PORT_NONE, MCDI_MON_INVALID_SENSOR }
-
-/* Map from MC sensors to monitor statistics */
-static const struct mcdi_sensor_map_s {
-       uint16_t        msm_port_mask;
-       uint16_t        msm_stat;
-} mcdi_sensor_map[] = {
-       /* Sensor page 0                MC_CMD_SENSOR_xxx */
-       STAT(Px, INT_TEMP),             /* 0x00 CONTROLLER_TEMP */
-       STAT(Px, EXT_TEMP),             /* 0x01 PHY_COMMON_TEMP */
-       STAT(Px, INT_COOLING),          /* 0x02 CONTROLLER_COOLING */
-       STAT(P1, EXT_TEMP),             /* 0x03 PHY0_TEMP */
-       STAT(P1, EXT_COOLING),          /* 0x04 PHY0_COOLING */
-       STAT(P2, EXT_TEMP),             /* 0x05 PHY1_TEMP */
-       STAT(P2, EXT_COOLING),          /* 0x06 PHY1_COOLING */
-       STAT(Px, 1V),                   /* 0x07 IN_1V0 */
-       STAT(Px, 1_2V),                 /* 0x08 IN_1V2 */
-       STAT(Px, 1_8V),                 /* 0x09 IN_1V8 */
-       STAT(Px, 2_5V),                 /* 0x0a IN_2V5 */
-       STAT(Px, 3_3V),                 /* 0x0b IN_3V3 */
-       STAT(Px, 12V),                  /* 0x0c IN_12V0 */
-       STAT(Px, 1_2VA),                /* 0x0d IN_1V2A */
-       STAT(Px, VREF),                 /* 0x0e IN_VREF */
-       STAT(Px, VAOE),                 /* 0x0f OUT_VAOE */
-       STAT(Px, AOE_TEMP),             /* 0x10 AOE_TEMP */
-       STAT(Px, PSU_AOE_TEMP),         /* 0x11 PSU_AOE_TEMP */
-       STAT(Px, PSU_TEMP),             /* 0x12 PSU_TEMP */
-       STAT(Px, FAN0),                 /* 0x13 FAN_0 */
-       STAT(Px, FAN1),                 /* 0x14 FAN_1 */
-       STAT(Px, FAN2),                 /* 0x15 FAN_2 */
-       STAT(Px, FAN3),                 /* 0x16 FAN_3 */
-       STAT(Px, FAN4),                 /* 0x17 FAN_4 */
-       STAT(Px, VAOE_IN),              /* 0x18 IN_VAOE */
-       STAT(Px, IAOE),                 /* 0x19 OUT_IAOE */
-       STAT(Px, IAOE_IN),              /* 0x1a IN_IAOE */
-       STAT(Px, NIC_POWER),            /* 0x1b NIC_POWER */
-       STAT(Px, 0_9V),                 /* 0x1c IN_0V9 */
-       STAT(Px, I0_9V),                /* 0x1d IN_I0V9 */
-       STAT(Px, I1_2V),                /* 0x1e IN_I1V2 */
-       STAT_NEXT_PAGE(),               /* 0x1f Next page flag (not a sensor) */
-
-       /* Sensor page 1                MC_CMD_SENSOR_xxx */
-       STAT(Px, 0_9V_ADC),             /* 0x20 IN_0V9_ADC */
-       STAT(Px, INT_TEMP2),            /* 0x21 CONTROLLER_2_TEMP */
-       STAT(Px, VREG_TEMP),            /* 0x22 VREG_INTERNAL_TEMP */
-       STAT(Px, VREG_0_9V_TEMP),       /* 0x23 VREG_0V9_TEMP */
-       STAT(Px, VREG_1_2V_TEMP),       /* 0x24 VREG_1V2_TEMP */
-       STAT(Px, INT_VPTAT),            /* 0x25 CTRLR. VPTAT */
-       STAT(Px, INT_ADC_TEMP),         /* 0x26 CTRLR. INTERNAL_TEMP */
-       STAT(Px, EXT_VPTAT),            /* 0x27 CTRLR. VPTAT_EXTADC */
-       STAT(Px, EXT_ADC_TEMP),         /* 0x28 CTRLR. INTERNAL_TEMP_EXTADC */
-       STAT(Px, AMBIENT_TEMP),         /* 0x29 AMBIENT_TEMP */
-       STAT(Px, AIRFLOW),              /* 0x2a AIRFLOW */
-       STAT(Px, VDD08D_VSS08D_CSR),    /* 0x2b VDD08D_VSS08D_CSR */
-       STAT(Px, VDD08D_VSS08D_CSR_EXTADC), /* 0x2c VDD08D_VSS08D_CSR_EXTADC */
-       STAT(Px, HOTPOINT_TEMP),        /* 0x2d HOTPOINT_TEMP */
-       STAT(P1, PHY_POWER_SWITCH_PORT0),   /* 0x2e PHY_POWER_SWITCH_PORT0 */
-       STAT(P2, PHY_POWER_SWITCH_PORT1),   /* 0x2f PHY_POWER_SWITCH_PORT1 */
-       STAT(Px, MUM_VCC),              /* 0x30 MUM_VCC */
-       STAT(Px, 0V9_A),                /* 0x31 0V9_A */
-       STAT(Px, I0V9_A),               /* 0x32 I0V9_A */
-       STAT(Px, 0V9_A_TEMP),           /* 0x33 0V9_A_TEMP */
-       STAT(Px, 0V9_B),                /* 0x34 0V9_B */
-       STAT(Px, I0V9_B),               /* 0x35 I0V9_B */
-       STAT(Px, 0V9_B_TEMP),           /* 0x36 0V9_B_TEMP */
-       STAT(Px, CCOM_AVREG_1V2_SUPPLY),  /* 0x37 CCOM_AVREG_1V2_SUPPLY */
-       STAT(Px, CCOM_AVREG_1V2_SUPPLY_EXT_ADC),
-                                       /* 0x38 CCOM_AVREG_1V2_SUPPLY_EXT_ADC */
-       STAT(Px, CCOM_AVREG_1V8_SUPPLY),  /* 0x39 CCOM_AVREG_1V8_SUPPLY */
-       STAT(Px, CCOM_AVREG_1V8_SUPPLY_EXT_ADC),
-                                       /* 0x3a CCOM_AVREG_1V8_SUPPLY_EXT_ADC */
-       STAT_NO_SENSOR(),               /* 0x3b (no sensor) */
-       STAT_NO_SENSOR(),               /* 0x3c (no sensor) */
-       STAT_NO_SENSOR(),               /* 0x3d (no sensor) */
-       STAT_NO_SENSOR(),               /* 0x3e (no sensor) */
-       STAT_NEXT_PAGE(),               /* 0x3f Next page flag (not a sensor) */
-
-       /* Sensor page 2                MC_CMD_SENSOR_xxx */
-       STAT(Px, CONTROLLER_MASTER_VPTAT),         /* 0x40 MASTER_VPTAT */
-       STAT(Px, CONTROLLER_MASTER_INTERNAL_TEMP), /* 0x41 MASTER_INT_TEMP */
-       STAT(Px, CONTROLLER_MASTER_VPTAT_EXT_ADC), /* 0x42 MAST_VPTAT_EXT_ADC */
-       STAT(Px, CONTROLLER_MASTER_INTERNAL_TEMP_EXT_ADC),
-                                       /* 0x43 MASTER_INTERNAL_TEMP_EXT_ADC */
-       STAT(Px, CONTROLLER_SLAVE_VPTAT),         /* 0x44 SLAVE_VPTAT */
-       STAT(Px, CONTROLLER_SLAVE_INTERNAL_TEMP), /* 0x45 SLAVE_INTERNAL_TEMP */
-       STAT(Px, CONTROLLER_SLAVE_VPTAT_EXT_ADC), /* 0x46 SLAVE_VPTAT_EXT_ADC */
-       STAT(Px, CONTROLLER_SLAVE_INTERNAL_TEMP_EXT_ADC),
-                                       /* 0x47 SLAVE_INTERNAL_TEMP_EXT_ADC */
-       STAT_NO_SENSOR(),               /* 0x48 (no sensor) */
-       STAT(Px, SODIMM_VOUT),          /* 0x49 SODIMM_VOUT */
-       STAT(Px, SODIMM_0_TEMP),        /* 0x4a SODIMM_0_TEMP */
-       STAT(Px, SODIMM_1_TEMP),        /* 0x4b SODIMM_1_TEMP */
-       STAT(Px, PHY0_VCC),             /* 0x4c PHY0_VCC */
-       STAT(Px, PHY1_VCC),             /* 0x4d PHY1_VCC */
-       STAT(Px, CONTROLLER_TDIODE_TEMP), /* 0x4e CONTROLLER_TDIODE_TEMP */
-       STAT(Px, BOARD_FRONT_TEMP),     /* 0x4f BOARD_FRONT_TEMP */
-       STAT(Px, BOARD_BACK_TEMP),      /* 0x50 BOARD_BACK_TEMP */
-       STAT(Px, I1V8),                 /* 0x51 IN_I1V8 */
-       STAT(Px, I2V5),                 /* 0x52 IN_I2V5 */
-       STAT(Px, I3V3),                 /* 0x53 IN_I3V3 */
-       STAT(Px, I12V0),                /* 0x54 IN_I12V0 */
-       STAT(Px, 1_3V),                 /* 0x55 IN_1V3 */
-       STAT(Px, I1V3),                 /* 0x56 IN_I1V3 */
-};
-
 #define        MCDI_STATIC_SENSOR_ASSERT(_field)                               \
        EFX_STATIC_ASSERT(MC_CMD_SENSOR_STATE_ ## _field                \
                            == EFX_MON_STAT_STATE_ ## _field)
@@ -155,10 +29,10 @@ mcdi_mon_decode_stats(
        __inout_ecount_opt(EFX_MON_NSTATS)      efx_mon_stat_value_t *stat)
 {
        efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
-       uint16_t port_mask;
+       efx_mon_stat_portmask_t port_mask;
        uint16_t sensor;
        size_t sensor_max;
-       uint32_t stat_mask[(EFX_ARRAY_SIZE(mcdi_sensor_map) + 31) / 32];
+       uint32_t stat_mask[(EFX_MON_NSTATS + 31) / 32];
        uint32_t idx = 0;
        uint32_t page = 0;
 
@@ -169,13 +43,10 @@ mcdi_mon_decode_stats(
        MCDI_STATIC_SENSOR_ASSERT(BROKEN);
        MCDI_STATIC_SENSOR_ASSERT(NO_READING);
 
-       EFX_STATIC_ASSERT(sizeof (stat_mask[0]) * 8 ==
-           EFX_MON_MASK_ELEMENT_SIZE);
-       sensor_max =
-           MIN((8 * sensor_mask_size), EFX_ARRAY_SIZE(mcdi_sensor_map));
+       sensor_max = 8 * sensor_mask_size;
 
        EFSYS_ASSERT(emip->emi_port > 0); /* MCDI port number is one-based */
-       port_mask = MCDI_MON_PORT_MASK(emip);
+       port_mask = (efx_mon_stat_portmask_t)MCDI_MON_PORT_MASK(emip);
 
        memset(stat_mask, 0, sizeof (stat_mask));
 
@@ -190,19 +61,36 @@ mcdi_mon_decode_stats(
         * does not understand.
         */
        for (sensor = 0; sensor < sensor_max; ++sensor) {
-               efx_mon_stat_t id = mcdi_sensor_map[sensor].msm_stat;
+               efx_mon_stat_t id;
+               efx_mon_stat_portmask_t stat_portmask = 0;
+               boolean_t decode_ok;
+               efx_mon_stat_unit_t stat_unit;
 
-               if ((sensor % MCDI_MON_PAGE_SIZE) == MC_CMD_SENSOR_PAGE0_NEXT) {
-                       EFSYS_ASSERT3U(id, ==, MCDI_MON_NEXT_PAGE);
+               if ((sensor % (MC_CMD_SENSOR_PAGE0_NEXT + 1)) ==
+                   MC_CMD_SENSOR_PAGE0_NEXT) {
                        page++;
                        continue;
+                       /* This sensor is one of the page boundary bits. */
                }
+
                if (~(sensor_mask[page]) & (1U << sensor))
                        continue;
+               /* This sensor not in DMA buffer */
+
                idx++;
+               /*
+                * Valid stat in DMA buffer that we need to increment over, even
+                * if we couldn't look up the id
+                */
+
+               decode_ok = efx_mon_mcdi_to_efx_stat(sensor, &id);
+               decode_ok =
+                   decode_ok && efx_mon_get_stat_portmap(id, &stat_portmask);
 
-               if ((port_mask & mcdi_sensor_map[sensor].msm_port_mask) == 0)
+               if (!(decode_ok && (stat_portmask & port_mask)))
                        continue;
+               /* Either bad decode, or don't know what port stat is on */
+
                EFSYS_ASSERT(id < EFX_MON_NSTATS);
 
                /*
@@ -228,6 +116,10 @@ mcdi_mon_decode_stats(
 
                        stat[id].emsv_state = (uint16_t)EFX_DWORD_FIELD(dword,
                            MC_CMD_SENSOR_VALUE_ENTRY_TYPEDEF_STATE);
+
+                       stat[id].emsv_unit =
+                           efx_mon_get_stat_unit(id, &stat_unit) ?
+                           stat_unit : EFX_MON_STAT_UNIT_UNKNOWN;
                }
        }
 
@@ -244,7 +136,7 @@ mcdi_mon_ev(
        __out                           efx_mon_stat_value_t *valuep)
 {
        efx_mcdi_iface_t *emip = &(enp->en_mcdi.em_emip);
-       uint16_t port_mask;
+       efx_mon_stat_portmask_t port_mask, sensor_port_mask;
        uint16_t sensor;
        uint16_t state;
        uint16_t value;
@@ -261,20 +153,22 @@ mcdi_mon_ev(
        /* Hardware must support this MCDI sensor */
        EFSYS_ASSERT3U(sensor, <,
            (8 * enp->en_nic_cfg.enc_mcdi_sensor_mask_size));
-       EFSYS_ASSERT((sensor % MCDI_MON_PAGE_SIZE) != MC_CMD_SENSOR_PAGE0_NEXT);
+       EFSYS_ASSERT((sensor % (MC_CMD_SENSOR_PAGE0_NEXT + 1)) !=
+           MC_CMD_SENSOR_PAGE0_NEXT);
        EFSYS_ASSERT(enp->en_nic_cfg.enc_mcdi_sensor_maskp != NULL);
-       EFSYS_ASSERT(
-           (enp->en_nic_cfg.enc_mcdi_sensor_maskp[sensor/MCDI_MON_PAGE_SIZE] &
-           (1U << (sensor % MCDI_MON_PAGE_SIZE))) != 0);
+       EFSYS_ASSERT((enp->en_nic_cfg.enc_mcdi_sensor_maskp[
+                   sensor / (MC_CMD_SENSOR_PAGE0_NEXT + 1)] &
+               (1U << (sensor % (MC_CMD_SENSOR_PAGE0_NEXT + 1)))) != 0);
 
-       /* But we don't have to understand it */
-       if (sensor >= EFX_ARRAY_SIZE(mcdi_sensor_map)) {
+       /* And we need to understand it, to get port-map */
+       if (!efx_mon_mcdi_to_efx_stat(sensor, &id)) {
                rc = ENOTSUP;
                goto fail1;
        }
-       id = mcdi_sensor_map[sensor].msm_stat;
-       if ((port_mask & mcdi_sensor_map[sensor].msm_port_mask) == 0)
+       if (!(efx_mon_get_stat_portmap(id, &sensor_port_mask) &&
+               (port_mask && sensor_port_mask))) {
                return (ENODEV);
+       }
        EFSYS_ASSERT(id < EFX_MON_NSTATS);
 
        *idp = id;
@@ -297,9 +191,15 @@ efx_mcdi_read_sensors(
        __in            uint32_t size)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_READ_SENSORS_EXT_IN_LEN,
-                           MC_CMD_READ_SENSORS_EXT_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_READ_SENSORS_EXT_IN_LEN,
+               MC_CMD_READ_SENSORS_EXT_OUT_LEN);
        uint32_t addr_lo, addr_hi;
+       efx_rc_t rc;
+
+       if (EFSYS_MEM_SIZE(esmp) < size) {
+               rc = EINVAL;
+               goto fail1;
+       }
 
        req.emr_cmd = MC_CMD_READ_SENSORS;
        req.emr_in_buf = payload;
@@ -317,6 +217,11 @@ efx_mcdi_read_sensors(
        efx_mcdi_execute(enp, &req);
 
        return (req.emr_rc);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
 }
 
 static __checkReturn   efx_rc_t
@@ -325,8 +230,8 @@ efx_mcdi_sensor_info_npages(
        __out           uint32_t *npagesp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SENSOR_INFO_EXT_IN_LEN,
-                           MC_CMD_SENSOR_INFO_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SENSOR_INFO_EXT_IN_LEN,
+               MC_CMD_SENSOR_INFO_OUT_LENMAX);
        int page;
        efx_rc_t rc;
 
@@ -369,8 +274,8 @@ efx_mcdi_sensor_info(
        __in                    size_t npages)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_SENSOR_INFO_EXT_IN_LEN,
-                           MC_CMD_SENSOR_INFO_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SENSOR_INFO_EXT_IN_LEN,
+               MC_CMD_SENSOR_INFO_OUT_LENMAX);
        uint32_t page;
        efx_rc_t rc;
 
@@ -429,6 +334,86 @@ fail1:
        return (rc);
 }
 
+static __checkReturn           efx_rc_t
+efx_mcdi_sensor_info_page(
+       __in                    efx_nic_t *enp,
+       __in                    uint32_t page,
+       __out                   uint32_t *mask_part,
+       __out_ecount((sizeof (*mask_part) * 8) - 1)
+                               efx_mon_stat_limits_t *limits)
+{
+       efx_mcdi_req_t req;
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_SENSOR_INFO_EXT_IN_LEN,
+               MC_CMD_SENSOR_INFO_OUT_LENMAX);
+       efx_rc_t rc;
+       uint32_t mask_copy;
+       efx_dword_t *maskp;
+       efx_qword_t *limit_info;
+
+       EFSYS_ASSERT(mask_part != NULL);
+       EFSYS_ASSERT(limits != NULL);
+
+       memset(limits, 0,
+           ((sizeof (*mask_part) * 8) - 1) * sizeof (efx_mon_stat_limits_t));
+
+       req.emr_cmd = MC_CMD_SENSOR_INFO;
+       req.emr_in_buf = payload;
+       req.emr_in_length = MC_CMD_SENSOR_INFO_EXT_IN_LEN;
+       req.emr_out_buf = payload;
+       req.emr_out_length = MC_CMD_SENSOR_INFO_OUT_LENMAX;
+
+       MCDI_IN_SET_DWORD(req, SENSOR_INFO_EXT_IN_PAGE, page);
+
+       efx_mcdi_execute(enp, &req);
+
+       rc = req.emr_rc;
+
+       if (rc != 0)
+               goto fail1;
+
+       EFSYS_ASSERT(sizeof (*limit_info) ==
+           MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF_LEN);
+       maskp = MCDI_OUT2(req, efx_dword_t, SENSOR_INFO_OUT_MASK);
+       limit_info = (efx_qword_t *)(maskp + 1);
+
+       *mask_part = maskp->ed_u32[0];
+       mask_copy = *mask_part;
+
+       /* Copy an entry for all but the highest bit set. */
+       while (mask_copy) {
+
+               if (mask_copy == (1U << MC_CMD_SENSOR_PAGE0_NEXT)) {
+                       /* Only next page bit set. */
+                       mask_copy = 0;
+               } else {
+                       /* Clear lowest bit */
+                       mask_copy = mask_copy & ~(mask_copy ^ (mask_copy - 1));
+                       /* And copy out limit entry into buffer */
+                       limits->emlv_warning_min = EFX_QWORD_FIELD(*limit_info,
+                           MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF_MIN1);
+
+                       limits->emlv_warning_max = EFX_QWORD_FIELD(*limit_info,
+                           MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF_MAX1);
+
+                       limits->emlv_fatal_min = EFX_QWORD_FIELD(*limit_info,
+                           MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF_MIN2);
+
+                       limits->emlv_fatal_max = EFX_QWORD_FIELD(*limit_info,
+                           MC_CMD_SENSOR_INFO_ENTRY_TYPEDEF_MAX2);
+
+                       limits++;
+                       limit_info++;
+               }
+       }
+
+       return (rc);
+
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
        __checkReturn                   efx_rc_t
 mcdi_mon_stats_update(
        __in                            efx_nic_t *enp,
@@ -451,6 +436,96 @@ mcdi_mon_stats_update(
 
        return (0);
 
+fail1:
+       EFSYS_PROBE1(fail1, efx_rc_t, rc);
+
+       return (rc);
+}
+
+static         void
+lowest_set_bit(
+       __in    uint32_t input_mask,
+       __out   uint32_t *lowest_bit_mask,
+       __out   uint32_t *lowest_bit_num
+)
+{
+       uint32_t x;
+       uint32_t set_bit, bit_index;
+
+       x = (input_mask ^ (input_mask - 1));
+       set_bit = (x + 1) >> 1;
+       if (!set_bit)
+               set_bit = (1U << 31U);
+
+       bit_index = 0;
+       if (set_bit & 0xFFFF0000)
+               bit_index += 16;
+       if (set_bit & 0xFF00FF00)
+               bit_index += 8;
+       if (set_bit & 0xF0F0F0F0)
+               bit_index += 4;
+       if (set_bit & 0xCCCCCCCC)
+               bit_index += 2;
+       if (set_bit & 0xAAAAAAAA)
+               bit_index += 1;
+
+       *lowest_bit_mask = set_bit;
+       *lowest_bit_num = bit_index;
+}
+
+       __checkReturn                   efx_rc_t
+mcdi_mon_limits_update(
+       __in                            efx_nic_t *enp,
+       __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_limits_t *values)
+{
+       efx_rc_t rc;
+       uint32_t page;
+       uint32_t page_mask;
+       uint32_t limit_index;
+       efx_mon_stat_limits_t limits[sizeof (page_mask) * 8];
+       efx_mon_stat_t stat;
+
+       page = 0;
+       page--;
+       do {
+               page++;
+
+               rc = efx_mcdi_sensor_info_page(enp, page, &page_mask, limits);
+               if (rc != 0)
+                       goto fail1;
+
+               limit_index = 0;
+               while (page_mask) {
+                       uint32_t set_bit;
+                       uint32_t page_index;
+                       uint32_t mcdi_index;
+
+                       if (page_mask == (1U << MC_CMD_SENSOR_PAGE0_NEXT))
+                               break;
+
+                       lowest_set_bit(page_mask, &set_bit, &page_index);
+                       page_mask = page_mask & ~set_bit;
+
+                       mcdi_index =
+                           page_index + (sizeof (page_mask) * 8 * page);
+
+                       /*
+                        * This can fail if MCDI reports newer stats than the
+                        * drivers understand, or the bit is the next page bit.
+                        *
+                        * Driver needs to be tolerant of this.
+                        */
+                       if (!efx_mon_mcdi_to_efx_stat(mcdi_index, &stat))
+                               continue;
+
+                       values[stat] = limits[limit_index];
+                       limit_index++;
+               }
+
+       } while (page_mask & (1U << MC_CMD_SENSOR_PAGE0_NEXT));
+
+       return (rc);
+
 fail1:
        EFSYS_PROBE1(fail1, efx_rc_t, rc);
 
index 5aa6a6a..5eba090 100644 (file)
@@ -39,6 +39,11 @@ mcdi_mon_stats_update(
        __in                            efsys_mem_t *esmp,
        __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_value_t *values);
 
+extern __checkReturn                   efx_rc_t
+mcdi_mon_limits_update(
+       __in                            efx_nic_t *enp,
+       __inout_ecount(EFX_MON_NSTATS)  efx_mon_stat_limits_t *values);
+
 #endif /* EFSYS_OPT_MON_STATS */
 
 #endif /* EFSYS_OPT_MON_MCDI */
index 7f5ad17..6bc1e87 100644 (file)
@@ -15,25 +15,15 @@ medford2_nic_get_required_pcie_bandwidth(
        __in            efx_nic_t *enp,
        __out           uint32_t *bandwidth_mbpsp)
 {
-       uint32_t port_modes;
-       uint32_t current_mode;
        uint32_t bandwidth;
        efx_rc_t rc;
 
        /* FIXME: support new Medford2 dynamic port modes */
 
-       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes,
-                                   &current_mode)) != 0) {
-               /* No port mode info available. */
-               bandwidth = 0;
-               goto out;
-       }
-
-       if ((rc = ef10_nic_get_port_mode_bandwidth(current_mode,
+       if ((rc = ef10_nic_get_port_mode_bandwidth(enp,
                                                    &bandwidth)) != 0)
                goto fail1;
 
-out:
        *bandwidth_mbpsp = bandwidth;
 
        return (0);
@@ -96,6 +86,9 @@ medford2_board_cfg(
        else
                goto fail1;
 
+       /* Checksums for TSO sends should always be correct on Medford2. */
+       encp->enc_bug61297_workaround = B_FALSE;
+
        /* Get clock frequencies (in MHz). */
        if ((rc = efx_mcdi_get_clock(enp, &sysclk, &dpcpu_clk)) != 0)
                goto fail2;
index 6dc895f..bfe01ca 100644 (file)
@@ -15,23 +15,13 @@ medford_nic_get_required_pcie_bandwidth(
        __in            efx_nic_t *enp,
        __out           uint32_t *bandwidth_mbpsp)
 {
-       uint32_t port_modes;
-       uint32_t current_mode;
        uint32_t bandwidth;
        efx_rc_t rc;
 
-       if ((rc = efx_mcdi_get_port_modes(enp, &port_modes,
-                                   &current_mode)) != 0) {
-               /* No port mode info available. */
-               bandwidth = 0;
-               goto out;
-       }
-
-       if ((rc = ef10_nic_get_port_mode_bandwidth(current_mode,
+       if ((rc = ef10_nic_get_port_mode_bandwidth(enp,
                                                    &bandwidth)) != 0)
                goto fail1;
 
-out:
        *bandwidth_mbpsp = bandwidth;
 
        return (0);
@@ -94,6 +84,9 @@ medford_board_cfg(
        else
                goto fail1;
 
+       /* Checksums for TSO sends can be incorrect on Medford. */
+       encp->enc_bug61297_workaround = B_TRUE;
+
        /* Get clock frequencies (in MHz). */
        if ((rc = efx_mcdi_get_clock(enp, &sysclk, &dpcpu_clk)) != 0)
                goto fail2;
index da2bf44..ab66f32 100644 (file)
@@ -58,6 +58,9 @@ extra_flags = [
 ]
 
 c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 foreach flag: extra_flags
        if cc.has_argument(flag)
                c_args += flag
index f8857cd..928dfc3 100644 (file)
@@ -68,14 +68,13 @@ siena_mac_reconfigure(
        efx_port_t *epp = &(enp->en_port);
        efx_oword_t multicast_hash[2];
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MAX(MC_CMD_SET_MAC_IN_LEN,
-                               MC_CMD_SET_MAC_OUT_LEN),
-                           MAX(MC_CMD_SET_MCAST_HASH_IN_LEN,
-                               MC_CMD_SET_MCAST_HASH_OUT_LEN))];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MAX(MC_CMD_SET_MAC_IN_LEN, MC_CMD_SET_MCAST_HASH_IN_LEN),
+               MAX(MC_CMD_SET_MAC_OUT_LEN, MC_CMD_SET_MCAST_HASH_OUT_LEN));
+
        unsigned int fcntl;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_MAC;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_MAC_IN_LEN;
index 31eef80..fca1717 100644 (file)
@@ -18,11 +18,10 @@ siena_nic_get_partn_mask(
        __out                   unsigned int *maskp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_NVRAM_TYPES_IN_LEN,
-                           MC_CMD_NVRAM_TYPES_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_NVRAM_TYPES_IN_LEN,
+               MC_CMD_NVRAM_TYPES_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_NVRAM_TYPES;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_NVRAM_TYPES_IN_LEN;
@@ -115,6 +114,7 @@ siena_board_cfg(
        /* Alignment for WPTR updates */
        encp->enc_rx_push_align = 1;
 
+#if EFSYS_OPT_RX_SCALE
        /* There is one RSS context per function */
        encp->enc_rx_scale_max_exclusive_contexts = 1;
 
@@ -129,6 +129,7 @@ siena_board_cfg(
 
        /* There is no support for additional RSS modes */
        encp->enc_rx_scale_additional_modes_supported = B_FALSE;
+#endif /* EFSYS_OPT_RX_SCALE */
 
        encp->enc_tx_dma_desc_size_max = EFX_MASK32(FSF_AZ_TX_KER_BYTE_COUNT);
        /* Fragments must not span 4k boundaries. */
index 8cdd2df..b8ea8a7 100644 (file)
@@ -418,12 +418,11 @@ siena_nvram_get_subtype(
        __out                   uint32_t *subtypep)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_BOARD_CFG_IN_LEN,
-                           MC_CMD_GET_BOARD_CFG_OUT_LENMAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_BOARD_CFG_IN_LEN,
+               MC_CMD_GET_BOARD_CFG_OUT_LENMAX);
        efx_word_t *fw_list;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_BOARD_CFG;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_BOARD_CFG_IN_LEN;
index 4b2190d..7eec9c7 100644 (file)
@@ -169,11 +169,10 @@ siena_phy_get_link(
        __out           siena_link_state_t *slsp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_LINK_IN_LEN,
-                           MC_CMD_GET_LINK_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_LINK_IN_LEN,
+               MC_CMD_GET_LINK_OUT_LEN);
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_LINK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_LINK_IN_LEN;
@@ -244,10 +243,9 @@ siena_phy_reconfigure(
 {
        efx_port_t *epp = &(enp->en_port);
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MAX(MC_CMD_SET_ID_LED_IN_LEN,
-                               MC_CMD_SET_ID_LED_OUT_LEN),
-                           MAX(MC_CMD_SET_LINK_IN_LEN,
-                               MC_CMD_SET_LINK_OUT_LEN))];
+       EFX_MCDI_DECLARE_BUF(payload,
+               MAX(MC_CMD_SET_ID_LED_IN_LEN, MC_CMD_SET_LINK_IN_LEN),
+               MAX(MC_CMD_SET_ID_LED_OUT_LEN, MC_CMD_SET_LINK_OUT_LEN));
        uint32_t cap_mask;
 #if EFSYS_OPT_PHY_LED_CONTROL
        unsigned int led_mode;
@@ -255,7 +253,6 @@ siena_phy_reconfigure(
        unsigned int speed;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_SET_LINK;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_SET_LINK_IN_LEN;
@@ -361,12 +358,11 @@ siena_phy_verify(
        __in            efx_nic_t *enp)
 {
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_GET_PHY_STATE_IN_LEN,
-                           MC_CMD_GET_PHY_STATE_OUT_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_GET_PHY_STATE_IN_LEN,
+               MC_CMD_GET_PHY_STATE_OUT_LEN);
        uint32_t state;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_GET_PHY_STATE;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_GET_PHY_STATE_IN_LEN;
@@ -530,8 +526,8 @@ siena_phy_stats_update(
        uint32_t vmask = encp->enc_mcdi_phy_stat_mask;
        uint64_t smask;
        efx_mcdi_req_t req;
-       uint8_t payload[MAX(MC_CMD_PHY_STATS_IN_LEN,
-                           MC_CMD_PHY_STATS_OUT_DMA_LEN)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_PHY_STATS_IN_LEN,
+               MC_CMD_PHY_STATS_OUT_DMA_LEN);
        efx_rc_t rc;
 
        if ((esmp == NULL) || (EFSYS_MEM_SIZE(esmp) < EFX_PHY_STATS_SIZE)) {
@@ -539,7 +535,6 @@ siena_phy_stats_update(
                goto fail1;
        }
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_PHY_STATS;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_PHY_STATS_IN_LEN;
@@ -626,14 +621,13 @@ siena_phy_bist_poll(
        __in                    size_t count)
 {
        efx_nic_cfg_t *encp = &(enp->en_nic_cfg);
-       uint8_t payload[MAX(MC_CMD_POLL_BIST_IN_LEN,
-                           MCDI_CTL_SDU_LEN_MAX)];
+       EFX_MCDI_DECLARE_BUF(payload, MC_CMD_POLL_BIST_IN_LEN,
+               MCDI_CTL_SDU_LEN_MAX);
        uint32_t value_mask = 0;
        efx_mcdi_req_t req;
        uint32_t result;
        efx_rc_t rc;
 
-       (void) memset(payload, 0, sizeof (payload));
        req.emr_cmd = MC_CMD_POLL_BIST;
        req.emr_in_buf = payload;
        req.emr_in_length = MC_CMD_POLL_BIST_IN_LEN;
index b9d2df5..0b4795d 100644 (file)
@@ -48,10 +48,6 @@ extern "C" {
 #include "efx_types.h"
 
 
-#ifndef _NOTE
-#define _NOTE(s)
-#endif
-
 typedef bool boolean_t;
 
 #ifndef B_FALSE
@@ -106,40 +102,6 @@ prefetch_read_once(const volatile void *addr)
        rte_prefetch_non_temporal(addr);
 }
 
-/* Modifiers used for Windows builds */
-#define __in
-#define __in_opt
-#define __in_ecount(_n)
-#define __in_ecount_opt(_n)
-#define __in_bcount(_n)
-#define __in_bcount_opt(_n)
-
-#define __out
-#define __out_opt
-#define __out_ecount(_n)
-#define __out_ecount_opt(_n)
-#define __out_bcount(_n)
-#define __out_bcount_opt(_n)
-#define __out_bcount_part(_n, _l)
-#define __out_bcount_part_opt(_n, _l)
-
-#define __deref_out
-
-#define __inout
-#define __inout_opt
-#define __inout_ecount(_n)
-#define __inout_ecount_opt(_n)
-#define __inout_bcount(_n)
-#define __inout_bcount_opt(_n)
-#define __inout_bcount_full_opt(_n)
-
-#define __deref_out_bcount_opt(n)
-
-#define __checkReturn
-#define __success(_x)
-
-#define __drv_when(_p, _c)
-
 /* Code inclusion options */
 
 
index eda9676..c246871 100644 (file)
@@ -57,6 +57,11 @@ struct sfc_dp_tx_qcreate_info {
        volatile void           *mem_bar;
        /** VI window size shift */
        unsigned int            vi_window_shift;
+       /**
+        * Maximum number of bytes into the packet the TCP header can start for
+        * the hardware to apply TSO packet edits.
+        */
+       uint16_t                tso_tcp_header_offset_limit;
 };
 
 /**
index 81c8f7f..a24f54e 100644 (file)
@@ -123,14 +123,22 @@ static struct rte_mbuf *
 sfc_ef10_essb_next_mbuf(const struct sfc_ef10_essb_rxq *rxq,
                        struct rte_mbuf *mbuf)
 {
-       return (struct rte_mbuf *)((uintptr_t)mbuf + rxq->buf_stride);
+       struct rte_mbuf *m;
+
+       m = (struct rte_mbuf *)((uintptr_t)mbuf + rxq->buf_stride);
+       MBUF_RAW_ALLOC_CHECK(m);
+       return m;
 }
 
 static struct rte_mbuf *
 sfc_ef10_essb_mbuf_by_index(const struct sfc_ef10_essb_rxq *rxq,
                            struct rte_mbuf *mbuf, unsigned int idx)
 {
-       return (struct rte_mbuf *)((uintptr_t)mbuf + idx * rxq->buf_stride);
+       struct rte_mbuf *m;
+
+       m = (struct rte_mbuf *)((uintptr_t)mbuf + idx * rxq->buf_stride);
+       MBUF_RAW_ALLOC_CHECK(m);
+       return m;
 }
 
 static struct rte_mbuf *
@@ -324,7 +332,7 @@ sfc_ef10_essb_rx_get_pending(struct sfc_ef10_essb_rxq *rxq,
 
                        /* Buffers to be discarded have 0 in packet type */
                        if (unlikely(m->packet_type == 0)) {
-                               rte_mempool_put(rxq->refill_mb_pool, m);
+                               rte_mbuf_raw_free(m);
                                goto next_buf;
                        }
 
@@ -687,7 +695,7 @@ sfc_ef10_essb_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
                m = sfc_ef10_essb_mbuf_by_index(rxq, rxd->first_mbuf,
                                rxq->block_size - rxq->left_in_completed);
                while (rxq->left_in_completed > 0) {
-                       rte_mempool_put(rxq->refill_mb_pool, m);
+                       rte_mbuf_raw_free(m);
                        m = sfc_ef10_essb_next_mbuf(rxq, m);
                        rxq->left_in_completed--;
                }
index 6a5052b..77ca580 100644 (file)
@@ -57,12 +57,13 @@ struct sfc_ef10_rxq {
 #define SFC_EF10_RXQ_EXCEPTION         0x4
 #define SFC_EF10_RXQ_RSS_HASH          0x8
        unsigned int                    ptr_mask;
-       unsigned int                    prepared;
+       unsigned int                    pending;
        unsigned int                    completed;
        unsigned int                    evq_read_ptr;
        efx_qword_t                     *evq_hw_ring;
        struct sfc_ef10_rx_sw_desc      *sw_ring;
        uint64_t                        rearm_data;
+       struct rte_mbuf                 *scatter_pkt;
        uint16_t                        prefix_size;
 
        /* Used on refill */
@@ -133,6 +134,8 @@ sfc_ef10_rx_qrefill(struct sfc_ef10_rxq *rxq)
                        struct sfc_ef10_rx_sw_desc *rxd;
                        rte_iova_t phys_addr;
 
+                       MBUF_RAW_ALLOC_CHECK(m);
+
                        SFC_ASSERT((id & ~ptr_mask) == 0);
                        rxd = &rxq->sw_ring[id];
                        rxd->mbuf = m;
@@ -184,21 +187,26 @@ sfc_ef10_rx_prefetch_next(struct sfc_ef10_rxq *rxq, unsigned int next_id)
        }
 }
 
-static uint16_t
-sfc_ef10_rx_prepared(struct sfc_ef10_rxq *rxq, struct rte_mbuf **rx_pkts,
-                    uint16_t nb_pkts)
+static struct rte_mbuf **
+sfc_ef10_rx_pending(struct sfc_ef10_rxq *rxq, struct rte_mbuf **rx_pkts,
+                   uint16_t nb_pkts)
 {
-       uint16_t n_rx_pkts = RTE_MIN(nb_pkts, rxq->prepared);
-       unsigned int completed = rxq->completed;
-       unsigned int i;
+       uint16_t n_rx_pkts = RTE_MIN(nb_pkts, rxq->pending - rxq->completed);
 
-       rxq->prepared -= n_rx_pkts;
-       rxq->completed = completed + n_rx_pkts;
+       SFC_ASSERT(rxq->pending == rxq->completed || rxq->scatter_pkt == NULL);
 
-       for (i = 0; i < n_rx_pkts; ++i, ++completed)
-               rx_pkts[i] = rxq->sw_ring[completed & rxq->ptr_mask].mbuf;
+       if (n_rx_pkts != 0) {
+               unsigned int completed = rxq->completed;
+
+               rxq->completed = completed + n_rx_pkts;
+
+               do {
+                       *rx_pkts++ =
+                               rxq->sw_ring[completed++ & rxq->ptr_mask].mbuf;
+               } while (completed != rxq->completed);
+       }
 
-       return n_rx_pkts;
+       return rx_pkts;
 }
 
 static uint16_t
@@ -213,47 +221,80 @@ sfc_ef10_rx_pseudo_hdr_get_hash(const uint8_t *pseudo_hdr)
        return rte_le_to_cpu_32(*(const uint32_t *)pseudo_hdr);
 }
 
-static uint16_t
+static struct rte_mbuf **
 sfc_ef10_rx_process_event(struct sfc_ef10_rxq *rxq, efx_qword_t rx_ev,
-                         struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+                         struct rte_mbuf **rx_pkts,
+                         struct rte_mbuf ** const rx_pkts_end)
 {
        const unsigned int ptr_mask = rxq->ptr_mask;
-       unsigned int completed = rxq->completed;
+       unsigned int pending = rxq->pending;
        unsigned int ready;
        struct sfc_ef10_rx_sw_desc *rxd;
        struct rte_mbuf *m;
        struct rte_mbuf *m0;
-       uint16_t n_rx_pkts;
        const uint8_t *pseudo_hdr;
-       uint16_t pkt_len;
+       uint16_t seg_len;
 
-       ready = (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_DSC_PTR_LBITS) - completed) &
+       ready = (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_DSC_PTR_LBITS) - pending) &
                EFX_MASK32(ESF_DZ_RX_DSC_PTR_LBITS);
-       SFC_ASSERT(ready > 0);
+
+       if (ready == 0) {
+               /* Rx abort - it was no enough descriptors for Rx packet */
+               rte_pktmbuf_free(rxq->scatter_pkt);
+               rxq->scatter_pkt = NULL;
+               return rx_pkts;
+       }
+
+       rxq->pending = pending + ready;
 
        if (rx_ev.eq_u64[0] &
            rte_cpu_to_le_64((1ull << ESF_DZ_RX_ECC_ERR_LBN) |
                             (1ull << ESF_DZ_RX_ECRC_ERR_LBN))) {
-               SFC_ASSERT(rxq->prepared == 0);
-               rxq->completed += ready;
-               while (ready-- > 0) {
-                       rxd = &rxq->sw_ring[completed++ & ptr_mask];
-                       rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
-               }
-               return 0;
+               SFC_ASSERT(rxq->completed == pending);
+               do {
+                       rxd = &rxq->sw_ring[pending++ & ptr_mask];
+                       rte_mbuf_raw_free(rxd->mbuf);
+               } while (pending != rxq->pending);
+               rxq->completed = pending;
+               return rx_pkts;
        }
 
-       n_rx_pkts = RTE_MIN(ready, nb_pkts);
-       rxq->prepared = ready - n_rx_pkts;
-       rxq->completed += n_rx_pkts;
+       /* If scattered packet is in progress */
+       if (rxq->scatter_pkt != NULL) {
+               /* Events for scattered packet frags are not merged */
+               SFC_ASSERT(ready == 1);
+               SFC_ASSERT(rxq->completed == pending);
 
-       rxd = &rxq->sw_ring[completed++ & ptr_mask];
+               /* There is no pseudo-header in scatter segments. */
+               seg_len = EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_BYTES);
 
-       sfc_ef10_rx_prefetch_next(rxq, completed & ptr_mask);
+               rxd = &rxq->sw_ring[pending++ & ptr_mask];
+               m = rxd->mbuf;
 
-       m = rxd->mbuf;
+               MBUF_RAW_ALLOC_CHECK(m);
+
+               m->data_off = RTE_PKTMBUF_HEADROOM;
+               rte_pktmbuf_data_len(m) = seg_len;
+               rte_pktmbuf_pkt_len(m) = seg_len;
 
-       *rx_pkts++ = m;
+               rxq->scatter_pkt->nb_segs++;
+               rte_pktmbuf_pkt_len(rxq->scatter_pkt) += seg_len;
+               rte_pktmbuf_lastseg(rxq->scatter_pkt)->next = m;
+
+               if (~rx_ev.eq_u64[0] &
+                   rte_cpu_to_le_64(1ull << ESF_DZ_RX_CONT_LBN)) {
+                       *rx_pkts++ = rxq->scatter_pkt;
+                       rxq->scatter_pkt = NULL;
+               }
+               rxq->completed = pending;
+               return rx_pkts;
+       }
+
+       rxd = &rxq->sw_ring[pending++ & ptr_mask];
+
+       sfc_ef10_rx_prefetch_next(rxq, pending & ptr_mask);
+
+       m = rxd->mbuf;
 
        RTE_BUILD_BUG_ON(sizeof(m->rearm_data[0]) != sizeof(rxq->rearm_data));
        m->rearm_data[0] = rxq->rearm_data;
@@ -275,27 +316,40 @@ sfc_ef10_rx_process_event(struct sfc_ef10_rxq *rxq, efx_qword_t rx_ev,
        m->hash.rss = sfc_ef10_rx_pseudo_hdr_get_hash(pseudo_hdr);
 
        if (ready == 1)
-               pkt_len = EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_BYTES) -
+               seg_len = EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_BYTES) -
                        rxq->prefix_size;
        else
-               pkt_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr);
-       SFC_ASSERT(pkt_len > 0);
-       rte_pktmbuf_data_len(m) = pkt_len;
-       rte_pktmbuf_pkt_len(m) = pkt_len;
+               seg_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr);
+       SFC_ASSERT(seg_len > 0);
+       rte_pktmbuf_data_len(m) = seg_len;
+       rte_pktmbuf_pkt_len(m) = seg_len;
 
        SFC_ASSERT(m->next == NULL);
 
+       if (~rx_ev.eq_u64[0] & rte_cpu_to_le_64(1ull << ESF_DZ_RX_CONT_LBN)) {
+               *rx_pkts++ = m;
+               rxq->completed = pending;
+       } else {
+               /* Events with CONT bit are not merged */
+               SFC_ASSERT(ready == 1);
+               rxq->scatter_pkt = m;
+               rxq->completed = pending;
+               return rx_pkts;
+       }
+
        /* Remember mbuf to copy offload flags and packet type from */
        m0 = m;
-       for (--ready; ready > 0; --ready) {
-               rxd = &rxq->sw_ring[completed++ & ptr_mask];
+       while (pending != rxq->pending) {
+               rxd = &rxq->sw_ring[pending++ & ptr_mask];
 
-               sfc_ef10_rx_prefetch_next(rxq, completed & ptr_mask);
+               sfc_ef10_rx_prefetch_next(rxq, pending & ptr_mask);
 
                m = rxd->mbuf;
 
-               if (ready > rxq->prepared)
+               if (rx_pkts != rx_pkts_end) {
                        *rx_pkts++ = m;
+                       rxq->completed = pending;
+               }
 
                RTE_BUILD_BUG_ON(sizeof(m->rearm_data[0]) !=
                                 sizeof(rxq->rearm_data));
@@ -315,15 +369,15 @@ sfc_ef10_rx_process_event(struct sfc_ef10_rxq *rxq, efx_qword_t rx_ev,
                 */
                m->hash.rss = sfc_ef10_rx_pseudo_hdr_get_hash(pseudo_hdr);
 
-               pkt_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr);
-               SFC_ASSERT(pkt_len > 0);
-               rte_pktmbuf_data_len(m) = pkt_len;
-               rte_pktmbuf_pkt_len(m) = pkt_len;
+               seg_len = sfc_ef10_rx_pseudo_hdr_get_len(pseudo_hdr);
+               SFC_ASSERT(seg_len > 0);
+               rte_pktmbuf_data_len(m) = seg_len;
+               rte_pktmbuf_pkt_len(m) = seg_len;
 
                SFC_ASSERT(m->next == NULL);
        }
 
-       return n_rx_pkts;
+       return rx_pkts;
 }
 
 static bool
@@ -355,26 +409,25 @@ static uint16_t
 sfc_ef10_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
        struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(rx_queue);
+       struct rte_mbuf ** const rx_pkts_end = &rx_pkts[nb_pkts];
        unsigned int evq_old_read_ptr;
-       uint16_t n_rx_pkts;
        efx_qword_t rx_ev;
 
+       rx_pkts = sfc_ef10_rx_pending(rxq, rx_pkts, nb_pkts);
+
        if (unlikely(rxq->flags &
                     (SFC_EF10_RXQ_NOT_RUNNING | SFC_EF10_RXQ_EXCEPTION)))
-               return 0;
-
-       n_rx_pkts = sfc_ef10_rx_prepared(rxq, rx_pkts, nb_pkts);
+               goto done;
 
        evq_old_read_ptr = rxq->evq_read_ptr;
-       while (n_rx_pkts != nb_pkts && sfc_ef10_rx_get_event(rxq, &rx_ev)) {
+       while (rx_pkts != rx_pkts_end && sfc_ef10_rx_get_event(rxq, &rx_ev)) {
                /*
                 * DROP_EVENT is an internal to the NIC, software should
                 * never see it and, therefore, may ignore it.
                 */
 
-               n_rx_pkts += sfc_ef10_rx_process_event(rxq, rx_ev,
-                                                      rx_pkts + n_rx_pkts,
-                                                      nb_pkts - n_rx_pkts);
+               rx_pkts = sfc_ef10_rx_process_event(rxq, rx_ev,
+                                                   rx_pkts, rx_pkts_end);
        }
 
        sfc_ef10_ev_qclear(rxq->evq_hw_ring, rxq->ptr_mask, evq_old_read_ptr,
@@ -383,7 +436,8 @@ sfc_ef10_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
        /* It is not a problem if we refill in the case of exception */
        sfc_ef10_rx_qrefill(rxq);
 
-       return n_rx_pkts;
+done:
+       return nb_pkts - (rx_pkts_end - rx_pkts);
 }
 
 const uint32_t *
@@ -446,21 +500,53 @@ sfc_ef10_supported_ptypes_get(uint32_t tunnel_encaps)
 
 static sfc_dp_rx_qdesc_npending_t sfc_ef10_rx_qdesc_npending;
 static unsigned int
-sfc_ef10_rx_qdesc_npending(__rte_unused struct sfc_dp_rxq *dp_rxq)
+sfc_ef10_rx_qdesc_npending(struct sfc_dp_rxq *dp_rxq)
 {
+       struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq);
+       efx_qword_t rx_ev;
+       const unsigned int evq_old_read_ptr = rxq->evq_read_ptr;
+       unsigned int pending = rxq->pending;
+       unsigned int ready;
+
+       if (unlikely(rxq->flags &
+                    (SFC_EF10_RXQ_NOT_RUNNING | SFC_EF10_RXQ_EXCEPTION)))
+               goto done;
+
+       while (sfc_ef10_rx_get_event(rxq, &rx_ev)) {
+               ready = (EFX_QWORD_FIELD(rx_ev, ESF_DZ_RX_DSC_PTR_LBITS) -
+                        pending) &
+                       EFX_MASK32(ESF_DZ_RX_DSC_PTR_LBITS);
+               pending += ready;
+       }
+
        /*
-        * Correct implementation requires EvQ polling and events
-        * processing (keeping all ready mbufs in prepared).
+        * The function does not process events, so return event queue read
+        * pointer to the original position to allow the events that were
+        * read to be processed later
         */
-       return -ENOTSUP;
+       rxq->evq_read_ptr = evq_old_read_ptr;
+
+done:
+       return pending - rxq->completed;
 }
 
 static sfc_dp_rx_qdesc_status_t sfc_ef10_rx_qdesc_status;
 static int
-sfc_ef10_rx_qdesc_status(__rte_unused struct sfc_dp_rxq *dp_rxq,
-                        __rte_unused uint16_t offset)
+sfc_ef10_rx_qdesc_status(struct sfc_dp_rxq *dp_rxq, uint16_t offset)
 {
-       return -ENOTSUP;
+       struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq);
+       unsigned int npending = sfc_ef10_rx_qdesc_npending(dp_rxq);
+
+       if (unlikely(offset > rxq->ptr_mask))
+               return -EINVAL;
+
+       if (offset < npending)
+               return RTE_ETH_RX_DESC_DONE;
+
+       if (offset < (rxq->added - rxq->completed))
+               return RTE_ETH_RX_DESC_AVAIL;
+
+       return RTE_ETH_RX_DESC_UNAVAIL;
 }
 
 
@@ -594,8 +680,9 @@ sfc_ef10_rx_qstart(struct sfc_dp_rxq *dp_rxq, unsigned int evq_read_ptr)
 {
        struct sfc_ef10_rxq *rxq = sfc_ef10_rxq_by_dp_rxq(dp_rxq);
 
-       rxq->prepared = 0;
-       rxq->completed = rxq->added = 0;
+       SFC_ASSERT(rxq->completed == 0);
+       SFC_ASSERT(rxq->pending == 0);
+       SFC_ASSERT(rxq->added == 0);
 
        sfc_ef10_rx_qrefill(rxq);
 
@@ -642,12 +729,17 @@ sfc_ef10_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
        unsigned int i;
        struct sfc_ef10_rx_sw_desc *rxd;
 
+       rte_pktmbuf_free(rxq->scatter_pkt);
+       rxq->scatter_pkt = NULL;
+
        for (i = rxq->completed; i != rxq->added; ++i) {
                rxd = &rxq->sw_ring[i & rxq->ptr_mask];
-               rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
+               rte_mbuf_raw_free(rxd->mbuf);
                rxd->mbuf = NULL;
        }
 
+       rxq->completed = rxq->pending = rxq->added = 0;
+
        rxq->flags &= ~SFC_EF10_RXQ_STARTED;
 }
 
@@ -657,7 +749,8 @@ struct sfc_dp_rx sfc_ef10_rx = {
                .type           = SFC_DP_RX,
                .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF10,
        },
-       .features               = SFC_DP_RX_FEAT_MULTI_PROCESS |
+       .features               = SFC_DP_RX_FEAT_SCATTER |
+                                 SFC_DP_RX_FEAT_MULTI_PROCESS |
                                  SFC_DP_RX_FEAT_TUNNELS |
                                  SFC_DP_RX_FEAT_CHECKSUM,
        .get_dev_info           = sfc_ef10_rx_get_dev_info,
index d0daa3b..bcd3153 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <rte_mbuf.h>
 #include <rte_io.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
 
 #include "efx.h"
 #include "efx_types.h"
@@ -21,6 +23,7 @@
 #include "sfc_tweak.h"
 #include "sfc_kvargs.h"
 #include "sfc_ef10.h"
+#include "sfc_tso.h"
 
 #define sfc_ef10_tx_err(dpq, ...) \
        SFC_DP_LOG(SFC_KVARG_DATAPATH_EF10, ERR, dpq, __VA_ARGS__)
@@ -62,6 +65,9 @@ struct sfc_ef10_txq {
        efx_qword_t                     *txq_hw_ring;
        volatile void                   *doorbell;
        efx_qword_t                     *evq_hw_ring;
+       uint8_t                         *tsoh;
+       rte_iova_t                      tsoh_iova;
+       uint16_t                        tso_tcp_header_offset_limit;
 
        /* Datapath transmit queue anchor */
        struct sfc_dp_txq               dp;
@@ -184,6 +190,30 @@ sfc_ef10_tx_qdesc_dma_create(rte_iova_t addr, uint16_t size, bool eop,
                             ESF_DZ_TX_KER_BUF_ADDR, addr);
 }
 
+static void
+sfc_ef10_tx_qdesc_tso2_create(struct sfc_ef10_txq * const txq,
+                             unsigned int added, uint16_t ipv4_id,
+                             uint16_t outer_ipv4_id, uint32_t tcp_seq,
+                             uint16_t tcp_mss)
+{
+       EFX_POPULATE_QWORD_5(txq->txq_hw_ring[added & txq->ptr_mask],
+                           ESF_DZ_TX_DESC_IS_OPT, 1,
+                           ESF_DZ_TX_OPTION_TYPE,
+                           ESE_DZ_TX_OPTION_DESC_TSO,
+                           ESF_DZ_TX_TSO_OPTION_TYPE,
+                           ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
+                           ESF_DZ_TX_TSO_IP_ID, ipv4_id,
+                           ESF_DZ_TX_TSO_TCP_SEQNO, tcp_seq);
+       EFX_POPULATE_QWORD_5(txq->txq_hw_ring[(added + 1) & txq->ptr_mask],
+                           ESF_DZ_TX_DESC_IS_OPT, 1,
+                           ESF_DZ_TX_OPTION_TYPE,
+                           ESE_DZ_TX_OPTION_DESC_TSO,
+                           ESF_DZ_TX_TSO_OPTION_TYPE,
+                           ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
+                           ESF_DZ_TX_TSO_TCP_MSS, tcp_mss,
+                           ESF_DZ_TX_TSO_OUTER_IPID, outer_ipv4_id);
+}
+
 static inline void
 sfc_ef10_tx_qpush(struct sfc_ef10_txq *txq, unsigned int added,
                  unsigned int pushed)
@@ -263,6 +293,252 @@ sfc_ef10_tx_pkt_descs_max(const struct rte_mbuf *m)
                                    extra_descs_per_pkt);
 }
 
+static bool
+sfc_ef10_try_reap(struct sfc_ef10_txq * const txq, unsigned int added,
+                 unsigned int needed_desc, unsigned int *dma_desc_space,
+                 bool *reap_done)
+{
+       if (*reap_done)
+               return false;
+
+       if (added != txq->added) {
+               sfc_ef10_tx_qpush(txq, added, txq->added);
+               txq->added = added;
+       }
+
+       sfc_ef10_tx_reap(txq);
+       *reap_done = true;
+
+       /*
+        * Recalculate DMA descriptor space since Tx reap may change
+        * the number of completed descriptors
+        */
+       *dma_desc_space = txq->max_fill_level -
+               (added - txq->completed);
+
+       return (needed_desc <= *dma_desc_space);
+}
+
+static int
+sfc_ef10_xmit_tso_pkt(struct sfc_ef10_txq * const txq, struct rte_mbuf *m_seg,
+                     unsigned int *added, unsigned int *dma_desc_space,
+                     bool *reap_done)
+{
+       size_t iph_off = m_seg->l2_len;
+       size_t tcph_off = m_seg->l2_len + m_seg->l3_len;
+       size_t header_len = m_seg->l2_len + m_seg->l3_len + m_seg->l4_len;
+       /* Offset of the payload in the last segment that contains the header */
+       size_t in_off = 0;
+       const struct tcp_hdr *th;
+       uint16_t packet_id;
+       uint32_t sent_seq;
+       uint8_t *hdr_addr;
+       rte_iova_t hdr_iova;
+       struct rte_mbuf *first_m_seg = m_seg;
+       unsigned int pkt_start = *added;
+       unsigned int needed_desc;
+       struct rte_mbuf *m_seg_to_free_up_to = first_m_seg;
+       bool eop;
+
+       /* Both checks may be done, so use bit OR to have only one branching */
+       if (unlikely((header_len > SFC_TSOH_STD_LEN) |
+                    (tcph_off > txq->tso_tcp_header_offset_limit)))
+               return EMSGSIZE;
+
+       /*
+        * Preliminary estimation of required DMA descriptors, including extra
+        * descriptor for TSO header that is needed when the header is
+        * separated from payload in one segment. It does not include
+        * extra descriptors that may appear when a big segment is split across
+        * several descriptors.
+        */
+       needed_desc = m_seg->nb_segs +
+                       (unsigned int)SFC_TSO_OPT_DESCS_NUM +
+                       (unsigned int)SFC_TSO_HDR_DESCS_NUM;
+
+       if (needed_desc > *dma_desc_space &&
+           !sfc_ef10_try_reap(txq, pkt_start, needed_desc,
+                              dma_desc_space, reap_done)) {
+               /*
+                * If a future Tx reap may increase available DMA descriptor
+                * space, do not try to send the packet.
+                */
+               if (txq->completed != pkt_start)
+                       return ENOSPC;
+               /*
+                * Do not allow to send packet if the maximum DMA
+                * descriptor space is not sufficient to hold TSO
+                * descriptors, header descriptor and at least 1
+                * segment descriptor.
+                */
+               if (*dma_desc_space < SFC_TSO_OPT_DESCS_NUM +
+                               SFC_TSO_HDR_DESCS_NUM + 1)
+                       return EMSGSIZE;
+       }
+
+       /* Check if the header is not fragmented */
+       if (rte_pktmbuf_data_len(m_seg) >= header_len) {
+               hdr_addr = rte_pktmbuf_mtod(m_seg, uint8_t *);
+               hdr_iova = rte_mbuf_data_iova(m_seg);
+               if (rte_pktmbuf_data_len(m_seg) == header_len) {
+                       /*
+                        * Associate header mbuf with header descriptor
+                        * which is located after TSO descriptors.
+                        */
+                       txq->sw_ring[(pkt_start + SFC_TSO_OPT_DESCS_NUM) &
+                                    txq->ptr_mask].mbuf = m_seg;
+                       m_seg = m_seg->next;
+                       in_off = 0;
+
+                       /*
+                        * If there is no payload offset (payload starts at the
+                        * beginning of a segment) then an extra descriptor for
+                        * separated header is not needed.
+                        */
+                       needed_desc--;
+               } else {
+                       in_off = header_len;
+               }
+       } else {
+               unsigned int copied_segs;
+               unsigned int hdr_addr_off = (*added & txq->ptr_mask) *
+                               SFC_TSOH_STD_LEN;
+
+               hdr_addr = txq->tsoh + hdr_addr_off;
+               hdr_iova = txq->tsoh_iova + hdr_addr_off;
+               copied_segs = sfc_tso_prepare_header(hdr_addr, header_len,
+                                                    &m_seg, &in_off);
+
+               m_seg_to_free_up_to = m_seg;
+               /*
+                * Reduce the number of needed descriptors by the number of
+                * segments that entirely consist of header data.
+                */
+               needed_desc -= copied_segs;
+
+               /* Extra descriptor for separated header is not needed */
+               if (in_off == 0)
+                       needed_desc--;
+       }
+
+       switch (first_m_seg->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)) {
+       case PKT_TX_IPV4: {
+               const struct ipv4_hdr *iphe4;
+
+               iphe4 = (const struct ipv4_hdr *)(hdr_addr + iph_off);
+               rte_memcpy(&packet_id, &iphe4->packet_id, sizeof(uint16_t));
+               packet_id = rte_be_to_cpu_16(packet_id);
+               break;
+       }
+       case PKT_TX_IPV6:
+               packet_id = 0;
+               break;
+       default:
+               return EINVAL;
+       }
+
+       th = (const struct tcp_hdr *)(hdr_addr + tcph_off);
+       rte_memcpy(&sent_seq, &th->sent_seq, sizeof(uint32_t));
+       sent_seq = rte_be_to_cpu_32(sent_seq);
+
+       sfc_ef10_tx_qdesc_tso2_create(txq, *added, packet_id, 0, sent_seq,
+                       first_m_seg->tso_segsz);
+       (*added) += SFC_TSO_OPT_DESCS_NUM;
+
+       sfc_ef10_tx_qdesc_dma_create(hdr_iova, header_len, false,
+                       &txq->txq_hw_ring[(*added) & txq->ptr_mask]);
+       (*added)++;
+
+       do {
+               rte_iova_t next_frag = rte_mbuf_data_iova(m_seg);
+               unsigned int seg_len = rte_pktmbuf_data_len(m_seg);
+               unsigned int id;
+
+               next_frag += in_off;
+               seg_len -= in_off;
+               in_off = 0;
+
+               do {
+                       rte_iova_t frag_addr = next_frag;
+                       size_t frag_len;
+
+                       frag_len = RTE_MIN(seg_len,
+                                          SFC_EF10_TX_DMA_DESC_LEN_MAX);
+
+                       next_frag += frag_len;
+                       seg_len -= frag_len;
+
+                       eop = (seg_len == 0 && m_seg->next == NULL);
+
+                       id = (*added) & txq->ptr_mask;
+                       (*added)++;
+
+                       /*
+                        * Initially we assume that one DMA descriptor is needed
+                        * for every segment. When the segment is split across
+                        * several DMA descriptors, increase the estimation.
+                        */
+                       needed_desc += (seg_len != 0);
+
+                       /*
+                        * When no more descriptors can be added, but not all
+                        * segments are processed.
+                        */
+                       if (*added - pkt_start == *dma_desc_space &&
+                           !eop &&
+                           !sfc_ef10_try_reap(txq, pkt_start, needed_desc,
+                                               dma_desc_space, reap_done)) {
+                               struct rte_mbuf *m;
+                               struct rte_mbuf *m_next;
+
+                               if (txq->completed != pkt_start) {
+                                       unsigned int i;
+
+                                       /*
+                                        * Reset mbuf associations with added
+                                        * descriptors.
+                                        */
+                                       for (i = pkt_start; i != *added; i++) {
+                                               id = i & txq->ptr_mask;
+                                               txq->sw_ring[id].mbuf = NULL;
+                                       }
+                                       return ENOSPC;
+                               }
+
+                               /* Free the segments that cannot be sent */
+                               for (m = m_seg->next; m != NULL; m = m_next) {
+                                       m_next = m->next;
+                                       rte_pktmbuf_free_seg(m);
+                               }
+                               eop = true;
+                               /* Ignore the rest of the segment */
+                               seg_len = 0;
+                       }
+
+                       sfc_ef10_tx_qdesc_dma_create(frag_addr, frag_len,
+                                       eop, &txq->txq_hw_ring[id]);
+
+               } while (seg_len != 0);
+
+               txq->sw_ring[id].mbuf = m_seg;
+
+               m_seg = m_seg->next;
+       } while (!eop);
+
+       /*
+        * Free segments which content was entirely copied to the TSO header
+        * memory space of Tx queue
+        */
+       for (m_seg = first_m_seg; m_seg != m_seg_to_free_up_to;) {
+               struct rte_mbuf *seg_to_free = m_seg;
+
+               m_seg = m_seg->next;
+               rte_pktmbuf_free_seg(seg_to_free);
+       }
+
+       return 0;
+}
+
 static uint16_t
 sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -296,6 +572,30 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
                if (likely(pktp + 1 != pktp_end))
                        rte_mbuf_prefetch_part1(pktp[1]);
 
+               if (m_seg->ol_flags & PKT_TX_TCP_SEG) {
+                       int rc;
+
+                       rc = sfc_ef10_xmit_tso_pkt(txq, m_seg, &added,
+                                       &dma_desc_space, &reap_done);
+                       if (rc != 0) {
+                               added = pkt_start;
+
+                               /* Packet can be sent in following xmit calls */
+                               if (likely(rc == ENOSPC))
+                                       break;
+
+                               /*
+                                * Packet cannot be sent, tell RTE that
+                                * it is sent, but actually drop it and
+                                * continue with another packet
+                                */
+                               rte_pktmbuf_free(*pktp);
+                               continue;
+                       }
+
+                       goto dma_desc_space_update;
+               }
+
                if (sfc_ef10_tx_pkt_descs_max(m_seg) > dma_desc_space) {
                        if (reap_done)
                                break;
@@ -349,6 +649,7 @@ sfc_ef10_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 
                } while ((m_seg = m_seg->next) != 0);
 
+dma_desc_space_update:
                dma_desc_space -= (added - pkt_start);
        }
 
@@ -524,6 +825,18 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id,
        if (txq->sw_ring == NULL)
                goto fail_sw_ring_alloc;
 
+       if (info->offloads & DEV_TX_OFFLOAD_TCP_TSO) {
+               txq->tsoh = rte_calloc_socket("sfc-ef10-txq-tsoh",
+                                             info->txq_entries,
+                                             SFC_TSOH_STD_LEN,
+                                             RTE_CACHE_LINE_SIZE,
+                                             socket_id);
+               if (txq->tsoh == NULL)
+                       goto fail_tsoh_alloc;
+
+               txq->tsoh_iova = rte_malloc_virt2iova(txq->tsoh);
+       }
+
        txq->flags = SFC_EF10_TXQ_NOT_RUNNING;
        txq->ptr_mask = info->txq_entries - 1;
        txq->max_fill_level = info->max_fill_level;
@@ -533,10 +846,14 @@ sfc_ef10_tx_qcreate(uint16_t port_id, uint16_t queue_id,
                        ER_DZ_TX_DESC_UPD_REG_OFST +
                        (info->hw_index << info->vi_window_shift);
        txq->evq_hw_ring = info->evq_hw_ring;
+       txq->tso_tcp_header_offset_limit = info->tso_tcp_header_offset_limit;
 
        *dp_txqp = &txq->dp;
        return 0;
 
+fail_tsoh_alloc:
+       rte_free(txq->sw_ring);
+
 fail_sw_ring_alloc:
        rte_free(txq);
 
@@ -551,6 +868,7 @@ sfc_ef10_tx_qdestroy(struct sfc_dp_txq *dp_txq)
 {
        struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq);
 
+       rte_free(txq->tsoh);
        rte_free(txq->sw_ring);
        rte_free(txq);
 }
@@ -618,12 +936,49 @@ sfc_ef10_tx_qreap(struct sfc_dp_txq *dp_txq)
        txq->flags &= ~SFC_EF10_TXQ_STARTED;
 }
 
+static unsigned int
+sfc_ef10_tx_qdesc_npending(struct sfc_ef10_txq *txq)
+{
+       const unsigned int curr_done = txq->completed - 1;
+       unsigned int anew_done = curr_done;
+       efx_qword_t tx_ev;
+       const unsigned int evq_old_read_ptr = txq->evq_read_ptr;
+
+       if (unlikely(txq->flags &
+                    (SFC_EF10_TXQ_NOT_RUNNING | SFC_EF10_TXQ_EXCEPTION)))
+               return 0;
+
+       while (sfc_ef10_tx_get_event(txq, &tx_ev))
+               anew_done = EFX_QWORD_FIELD(tx_ev, ESF_DZ_TX_DESCR_INDX);
+
+       /*
+        * The function does not process events, so return event queue read
+        * pointer to the original position to allow the events that were
+        * read to be processed later
+        */
+       txq->evq_read_ptr = evq_old_read_ptr;
+
+       return (anew_done - curr_done) & txq->ptr_mask;
+}
+
 static sfc_dp_tx_qdesc_status_t sfc_ef10_tx_qdesc_status;
 static int
-sfc_ef10_tx_qdesc_status(__rte_unused struct sfc_dp_txq *dp_txq,
-                        __rte_unused uint16_t offset)
+sfc_ef10_tx_qdesc_status(struct sfc_dp_txq *dp_txq,
+                        uint16_t offset)
 {
-       return -ENOTSUP;
+       struct sfc_ef10_txq *txq = sfc_ef10_txq_by_dp_txq(dp_txq);
+       unsigned int npending = sfc_ef10_tx_qdesc_npending(txq);
+
+       if (unlikely(offset > txq->ptr_mask))
+               return -EINVAL;
+
+       if (unlikely(offset >= txq->max_fill_level))
+               return RTE_ETH_TX_DESC_UNAVAIL;
+
+       if (unlikely(offset < npending))
+               return RTE_ETH_TX_DESC_FULL;
+
+       return RTE_ETH_TX_DESC_DONE;
 }
 
 struct sfc_dp_tx sfc_ef10_tx = {
@@ -632,7 +987,8 @@ struct sfc_dp_tx sfc_ef10_tx = {
                .type           = SFC_DP_TX,
                .hw_fw_caps     = SFC_DP_HW_FW_CAP_EF10,
        },
-       .features               = SFC_DP_TX_FEAT_MULTI_SEG |
+       .features               = SFC_DP_TX_FEAT_TSO |
+                                 SFC_DP_TX_FEAT_MULTI_SEG |
                                  SFC_DP_TX_FEAT_MULTI_POOL |
                                  SFC_DP_TX_FEAT_REFCNT |
                                  SFC_DP_TX_FEAT_MULTI_PROCESS,
index 9decbf5..3886daf 100644 (file)
@@ -171,6 +171,9 @@ sfc_dev_infos_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
                sa->dp_rx->get_dev_info(dev_info);
        if (sa->dp_tx->get_dev_info != NULL)
                sa->dp_tx->get_dev_info(dev_info);
+
+       dev_info->dev_capa = RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP |
+                            RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP;
 }
 
 static const uint32_t *
@@ -441,8 +444,6 @@ sfc_rx_queue_release(void *queue)
 
        sfc_log_init(sa, "RxQ=%u", sw_index);
 
-       sa->eth_dev->data->rx_queues[sw_index] = NULL;
-
        sfc_rx_qfini(sa, sw_index);
 
        sfc_adapter_unlock(sa);
@@ -497,9 +498,6 @@ sfc_tx_queue_release(void *queue)
 
        sfc_adapter_lock(sa);
 
-       SFC_ASSERT(sw_index < sa->eth_dev->data->nb_tx_queues);
-       sa->eth_dev->data->tx_queues[sw_index] = NULL;
-
        sfc_tx_qfini(sa, sw_index);
 
        sfc_adapter_unlock(sa);
@@ -1143,6 +1141,9 @@ sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        if (sa->state != SFC_ADAPTER_STARTED)
                goto fail_not_started;
 
+       if (sa->rxq_info[rx_queue_id].rxq == NULL)
+               goto fail_not_setup;
+
        rc = sfc_rx_qstart(sa, rx_queue_id);
        if (rc != 0)
                goto fail_rx_qstart;
@@ -1154,6 +1155,7 @@ sfc_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id)
        return 0;
 
 fail_rx_qstart:
+fail_not_setup:
 fail_not_started:
        sfc_adapter_unlock(sa);
        SFC_ASSERT(rc > 0);
@@ -1191,6 +1193,9 @@ sfc_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
        if (sa->state != SFC_ADAPTER_STARTED)
                goto fail_not_started;
 
+       if (sa->txq_info[tx_queue_id].txq == NULL)
+               goto fail_not_setup;
+
        rc = sfc_tx_qstart(sa, tx_queue_id);
        if (rc != 0)
                goto fail_tx_qstart;
@@ -1202,6 +1207,7 @@ sfc_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id)
 
 fail_tx_qstart:
 
+fail_not_setup:
 fail_not_started:
        sfc_adapter_unlock(sa);
        SFC_ASSERT(rc > 0);
@@ -1348,14 +1354,10 @@ sfc_dev_rss_hash_conf_get(struct rte_eth_dev *dev,
 {
        struct sfc_adapter *sa = dev->data->dev_private;
        struct sfc_rss *rss = &sa->rss;
-       struct sfc_port *port = &sa->port;
 
-       if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE || port->isolated)
+       if (rss->context_type != EFX_RX_SCALE_EXCLUSIVE)
                return -ENOTSUP;
 
-       if (rss->channels == 0)
-               return -EINVAL;
-
        sfc_adapter_lock(sa);
 
        /*
@@ -2033,9 +2035,6 @@ sfc_eth_dev_uninit(struct rte_eth_dev *dev)
        sfc_detach(sa);
        sfc_unprobe(sa);
 
-       rte_free(dev->data->mac_addrs);
-       dev->data->mac_addrs = NULL;
-
        sfc_kvargs_cleanup(sa);
 
        sfc_adapter_unlock(sa);
index d8503e2..c792e0b 100644 (file)
@@ -96,13 +96,12 @@ sfc_efx_rx_qrefill(struct sfc_efx_rxq *rxq)
                     ++i, id = (id + 1) & rxq->ptr_mask) {
                        m = objs[i];
 
+                       MBUF_RAW_ALLOC_CHECK(m);
+
                        rxd = &rxq->sw_desc[id];
                        rxd->mbuf = m;
 
-                       SFC_ASSERT(rte_mbuf_refcnt_read(m) == 1);
                        m->data_off = RTE_PKTMBUF_HEADROOM;
-                       SFC_ASSERT(m->next == NULL);
-                       SFC_ASSERT(m->nb_segs == 1);
                        m->port = port_id;
 
                        addr[i] = rte_pktmbuf_iova(m);
@@ -296,7 +295,7 @@ sfc_efx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 
 discard:
                discard_next = ((desc_flags & EFX_PKT_CONT) != 0);
-               rte_mempool_put(rxq->refill_mb_pool, m);
+               rte_mbuf_raw_free(m);
                rxd->mbuf = NULL;
        }
 
@@ -498,7 +497,7 @@ sfc_efx_rx_qpurge(struct sfc_dp_rxq *dp_rxq)
 
        for (i = rxq->completed; i != rxq->added; ++i) {
                rxd = &rxq->sw_desc[i & rxq->ptr_mask];
-               rte_mempool_put(rxq->refill_mb_pool, rxd->mbuf);
+               rte_mbuf_raw_free(rxd->mbuf);
                rxd->mbuf = NULL;
                /* Packed stream relies on 0 in inactive SW desc.
                 * Rx queue stop is not performance critical, so
@@ -673,6 +672,7 @@ sfc_rx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
 
        rxq_info = &sa->rxq_info[sw_index];
        rxq = rxq_info->rxq;
+       SFC_ASSERT(rxq != NULL);
        SFC_ASSERT(rxq->state == SFC_RXQ_INITIALIZED);
 
        evq = rxq->evq;
@@ -763,7 +763,7 @@ sfc_rx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
        rxq_info = &sa->rxq_info[sw_index];
        rxq = rxq_info->rxq;
 
-       if (rxq->state == SFC_RXQ_INITIALIZED)
+       if (rxq == NULL || rxq->state == SFC_RXQ_INITIALIZED)
                return;
        SFC_ASSERT(rxq->state & SFC_RXQ_STARTED);
 
@@ -792,7 +792,6 @@ sfc_rx_get_dev_offload_caps(struct sfc_adapter *sa)
        uint64_t caps = 0;
 
        caps |= DEV_RX_OFFLOAD_JUMBO_FRAME;
-       caps |= DEV_RX_OFFLOAD_CRC_STRIP;
 
        if (sa->dp_rx->features & SFC_DP_RX_FEAT_CHECKSUM) {
                caps |= DEV_RX_OFFLOAD_IPV4_CKSUM;
@@ -1103,6 +1102,7 @@ sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
        struct sfc_rxq *rxq;
 
        SFC_ASSERT(sw_index < sa->rxq_count);
+       sa->eth_dev->data->rx_queues[sw_index] = NULL;
 
        rxq_info = &sa->rxq_info[sw_index];
 
@@ -1126,7 +1126,7 @@ sfc_rx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
 /*
  * Mapping between RTE RSS hash functions and their EFX counterparts.
  */
-struct sfc_rss_hf_rte_to_efx sfc_rss_hf_map[] = {
+static const struct sfc_rss_hf_rte_to_efx sfc_rss_hf_map[] = {
        { ETH_RSS_NONFRAG_IPV4_TCP,
          EFX_RX_HASH(IPV4_TCP, 4TUPLE) },
        { ETH_RSS_NONFRAG_IPV4_UDP,
@@ -1200,7 +1200,7 @@ sfc_rx_hash_init(struct sfc_adapter *sa)
                return EINVAL;
 
        rc = efx_rx_scale_hash_flags_get(sa->nic, alg, flags_supp,
-                                        &nb_flags_supp);
+                                        RTE_DIM(flags_supp), &nb_flags_supp);
        if (rc != 0)
                return rc;
 
@@ -1363,7 +1363,8 @@ sfc_rx_start(struct sfc_adapter *sa)
                goto fail_rss_config;
 
        for (sw_index = 0; sw_index < sa->rxq_count; ++sw_index) {
-               if ((!sa->rxq_info[sw_index].deferred_start ||
+               if (sa->rxq_info[sw_index].rxq != NULL &&
+                   (!sa->rxq_info[sw_index].deferred_start ||
                     sa->rxq_info[sw_index].deferred_started)) {
                        rc = sfc_rx_qstart(sa, sw_index);
                        if (rc != 0)
@@ -1439,14 +1440,6 @@ sfc_rx_check_mode(struct sfc_adapter *sa, struct rte_eth_rxmode *rxmode)
                rc = EINVAL;
        }
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(rxmode->offloads)) {
-               sfc_warn(sa, "FCS stripping cannot be disabled - always on");
-               rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-       }
-
        /*
         * Requested offloads are validated against supported by ethdev,
         * so unsupported offloads cannot be added as the result of
@@ -1511,7 +1504,7 @@ sfc_rx_configure(struct sfc_adapter *sa)
                goto fail_check_mode;
 
        if (nb_rx_queues == sa->rxq_count)
-               goto done;
+               goto configure_rss;
 
        if (sa->rxq_info == NULL) {
                rc = ENOMEM;
@@ -1548,6 +1541,7 @@ sfc_rx_configure(struct sfc_adapter *sa)
                sa->rxq_count++;
        }
 
+configure_rss:
        rss->channels = (dev_conf->rxmode.mq_mode == ETH_MQ_RX_RSS) ?
                         MIN(sa->rxq_count, EFX_MAXRSS) : 0;
 
@@ -1564,7 +1558,6 @@ sfc_rx_configure(struct sfc_adapter *sa)
                        goto fail_rx_process_adv_conf_rss;
        }
 
-done:
        return 0;
 
 fail_rx_process_adv_conf_rss:
index effe985..076a25d 100644 (file)
 #include "sfc_debug.h"
 #include "sfc_tx.h"
 #include "sfc_ev.h"
-
-/** Standard TSO header length */
-#define SFC_TSOH_STD_LEN        256
-
-/** The number of TSO option descriptors that precede the packet descriptors */
-#define SFC_TSO_OPDESCS_IDX_SHIFT      2
+#include "sfc_tso.h"
 
 int
 sfc_efx_tso_alloc_tsoh_objs(struct sfc_efx_tx_sw_desc *sw_ring,
@@ -57,13 +52,14 @@ sfc_efx_tso_free_tsoh_objs(struct sfc_efx_tx_sw_desc *sw_ring,
        }
 }
 
-static void
-sfc_efx_tso_prepare_header(struct sfc_efx_txq *txq, struct rte_mbuf **in_seg,
-                          size_t *in_off, unsigned int idx, size_t bytes_left)
+unsigned int
+sfc_tso_prepare_header(uint8_t *tsoh, size_t header_len,
+                      struct rte_mbuf **in_seg, size_t *in_off)
 {
        struct rte_mbuf *m = *in_seg;
        size_t bytes_to_copy = 0;
-       uint8_t *tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh;
+       size_t bytes_left = header_len;
+       unsigned int segments_copied = 0;
 
        do {
                bytes_to_copy = MIN(bytes_left, m->data_len);
@@ -77,16 +73,20 @@ sfc_efx_tso_prepare_header(struct sfc_efx_txq *txq, struct rte_mbuf **in_seg,
                if (bytes_left > 0) {
                        m = m->next;
                        SFC_ASSERT(m != NULL);
+                       segments_copied++;
                }
        } while (bytes_left > 0);
 
        if (bytes_to_copy == m->data_len) {
                *in_seg = m->next;
                *in_off = 0;
+               segments_copied++;
        } else {
                *in_seg = m;
                *in_off = bytes_to_copy;
        }
+
+       return segments_copied;
 }
 
 int
@@ -105,7 +105,7 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx,
        size_t header_len = m->l2_len + m->l3_len + m->l4_len;
        const efx_nic_cfg_t *encp = efx_nic_cfg_get(txq->evq->sa->nic);
 
-       idx += SFC_TSO_OPDESCS_IDX_SHIFT;
+       idx += SFC_TSO_OPT_DESCS_NUM;
 
        /* Packets which have too big headers should be discarded */
        if (unlikely(header_len > SFC_TSOH_STD_LEN))
@@ -129,9 +129,8 @@ sfc_efx_tso_do(struct sfc_efx_txq *txq, unsigned int idx,
         * limitations on address boundaries crossing by DMA descriptor data.
         */
        if (m->data_len < header_len) {
-               sfc_efx_tso_prepare_header(txq, in_seg, in_off, idx,
-                                          header_len);
                tsoh = txq->sw_ring[idx & txq->ptr_mask].tsoh;
+               sfc_tso_prepare_header(tsoh, header_len, in_seg, in_off);
 
                header_paddr = rte_malloc_virt2iova((void *)tsoh);
        } else {
diff --git a/drivers/net/sfc/sfc_tso.h b/drivers/net/sfc/sfc_tso.h
new file mode 100644 (file)
index 0000000..3d2faf5
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2018 Solarflare Communications Inc.
+ * All rights reserved.
+ *
+ * This software was jointly developed between OKTET Labs (under contract
+ * for Solarflare) and Solarflare Communications, Inc.
+ */
+
+/** Standard TSO header length */
+#define SFC_TSOH_STD_LEN       256
+
+/** The number of TSO option descriptors that precede the packet descriptors */
+#define SFC_TSO_OPT_DESCS_NUM  2
+
+/**
+ * The number of DMA descriptors for TSO header that may or may not precede the
+ * packet's payload descriptors
+ */
+#define SFC_TSO_HDR_DESCS_NUM  1
+
+unsigned int sfc_tso_prepare_header(uint8_t *tsoh, size_t header_len,
+                                   struct rte_mbuf **in_seg, size_t *in_off);
index 6d42a1a..147f933 100644 (file)
@@ -190,6 +190,8 @@ sfc_tx_qinit(struct sfc_adapter *sa, unsigned int sw_index,
        info.hw_index = txq->hw_index;
        info.mem_bar = sa->mem_bar.esb_base;
        info.vi_window_shift = encp->enc_vi_window_shift;
+       info.tso_tcp_header_offset_limit =
+               encp->enc_tx_tso_tcp_header_offset_limit;
 
        rc = sa->dp_tx->qcreate(sa->eth_dev->data->port_id, sw_index,
                                &RTE_ETH_DEV_TO_PCI(sa->eth_dev)->addr,
@@ -233,6 +235,8 @@ sfc_tx_qfini(struct sfc_adapter *sa, unsigned int sw_index)
        sfc_log_init(sa, "TxQ = %u", sw_index);
 
        SFC_ASSERT(sw_index < sa->txq_count);
+       sa->eth_dev->data->tx_queues[sw_index] = NULL;
+
        txq_info = &sa->txq_info[sw_index];
 
        txq = txq_info->txq;
@@ -421,6 +425,7 @@ sfc_tx_qstart(struct sfc_adapter *sa, unsigned int sw_index)
 
        txq = txq_info->txq;
 
+       SFC_ASSERT(txq != NULL);
        SFC_ASSERT(txq->state == SFC_TXQ_INITIALIZED);
 
        evq = txq->evq;
@@ -501,7 +506,7 @@ sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index)
 
        txq = txq_info->txq;
 
-       if (txq->state == SFC_TXQ_INITIALIZED)
+       if (txq == NULL || txq->state == SFC_TXQ_INITIALIZED)
                return;
 
        SFC_ASSERT(txq->state & SFC_TXQ_STARTED);
@@ -578,8 +583,9 @@ sfc_tx_start(struct sfc_adapter *sa)
                goto fail_efx_tx_init;
 
        for (sw_index = 0; sw_index < sa->txq_count; ++sw_index) {
-               if (!(sa->txq_info[sw_index].deferred_start) ||
-                   sa->txq_info[sw_index].deferred_started) {
+               if (sa->txq_info[sw_index].txq != NULL &&
+                   (!(sa->txq_info[sw_index].deferred_start) ||
+                    sa->txq_info[sw_index].deferred_started)) {
                        rc = sfc_tx_qstart(sa, sw_index);
                        if (rc != 0)
                                goto fail_tx_qstart;
index ea9b65f..484e76c 100644 (file)
@@ -14,6 +14,7 @@ CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_pipeline -lrte_port -lrte_table
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_sched
+LDLIBS += -lrte_cryptodev
 LDLIBS += -lrte_bus_vdev
 
 EXPORT_MAP := rte_pmd_softnic_version.map
@@ -33,6 +34,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_action.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_pipeline.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_thread.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_cli.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_flow.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_meter.c
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic_cryptodev.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += parser.c
 SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += conn.c
 
index 990cf40..8b66580 100644 (file)
@@ -8,7 +8,6 @@
 #include <unistd.h>
 #include <sys/types.h>
 
-#define __USE_GNU
 #include <sys/socket.h>
 
 #include <sys/epoll.h>
diff --git a/drivers/net/softnic/hash_func.h b/drivers/net/softnic/hash_func.h
deleted file mode 100644 (file)
index 198d2b2..0000000
+++ /dev/null
@@ -1,359 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2018 Intel Corporation
- */
-
-#ifndef __INCLUDE_HASH_FUNC_H__
-#define __INCLUDE_HASH_FUNC_H__
-
-#include <rte_common.h>
-
-static inline uint64_t
-hash_xor_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = seed ^ (k[0] & m[0]);
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-
-       xor0 ^= k[2] & m[2];
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-
-       xor0 ^= xor1;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-
-       xor0 ^= xor1;
-
-       xor0 ^= k[4] & m[4];
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-
-       xor0 ^= xor1;
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-
-       xor0 ^= xor1;
-       xor2 ^= k[6] & m[6];
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2, xor3;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-       xor3 = (k[6] & m[6]) ^ (k[7] & m[7]);
-
-       xor0 ^= xor1;
-       xor2 ^= xor3;
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-#if defined(RTE_ARCH_X86_64)
-
-#include <x86intrin.h>
-
-static inline uint64_t
-hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t crc0;
-
-       crc0 = _mm_crc32_u64(seed, k[0] & m[0]);
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, crc0, crc1;
-
-       k0 = k[0] & m[0];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc0 = _mm_crc32_u64(crc0, k2);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-
-       crc0 = _mm_crc32_u64(crc0, crc1);
-       crc1 = _mm_crc32_u64(crc2, crc3);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc0 = _mm_crc32_u64(crc0, crc1);
-       crc1 = _mm_crc32_u64(crc2, crc3);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, k5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc4 = _mm_crc32_u64(k5, k[6] & m[6]);
-       crc5 = k5 >> 32;
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc4 = _mm_crc32_u64(k5, k[6] & m[6]);
-       crc5 = _mm_crc32_u64(k5 >> 32, k[7] & m[7]);
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-#define hash_default_key8                      hash_crc_key8
-#define hash_default_key16                     hash_crc_key16
-#define hash_default_key24                     hash_crc_key24
-#define hash_default_key32                     hash_crc_key32
-#define hash_default_key40                     hash_crc_key40
-#define hash_default_key48                     hash_crc_key48
-#define hash_default_key56                     hash_crc_key56
-#define hash_default_key64                     hash_crc_key64
-
-#elif defined(RTE_ARCH_ARM64)
-#include "hash_func_arm64.h"
-#else
-
-#define hash_default_key8                      hash_xor_key8
-#define hash_default_key16                     hash_xor_key16
-#define hash_default_key24                     hash_xor_key24
-#define hash_default_key32                     hash_xor_key32
-#define hash_default_key40                     hash_xor_key40
-#define hash_default_key48                     hash_xor_key48
-#define hash_default_key56                     hash_xor_key56
-#define hash_default_key64                     hash_xor_key64
-
-#endif
-
-#endif
diff --git a/drivers/net/softnic/hash_func_arm64.h b/drivers/net/softnic/hash_func_arm64.h
deleted file mode 100644 (file)
index ae6c0f4..0000000
+++ /dev/null
@@ -1,261 +0,0 @@
-/*-
- *   BSD LICENSE
- *
- *   Copyright(c) 2017 Linaro Limited. All rights reserved.
- *   All rights reserved.
- *
- *   Redistribution and use in source and binary forms, with or without
- *   modification, are permitted provided that the following conditions
- *   are met:
- *
- *     * Redistributions of source code must retain the above copyright
- *       notice, this list of conditions and the following disclaimer.
- *     * Redistributions in binary form must reproduce the above copyright
- *       notice, this list of conditions and the following disclaimer in
- *       the documentation and/or other materials provided with the
- *       distribution.
- *     * Neither the name of Intel Corporation nor the names of its
- *       contributors may be used to endorse or promote products derived
- *       from this software without specific prior written permission.
- *
- *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef __HASH_FUNC_ARM64_H__
-#define __HASH_FUNC_ARM64_H__
-
-#define _CRC32CX(crc, val)     \
-       __asm__("crc32cx %w[c], %w[c], %x[v]":[c] "+r" (crc):[v] "r" (val))
-
-static inline uint64_t
-hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint32_t crc0;
-
-       crc0 = seed;
-       _CRC32CX(crc0, k[0] & m[0]);
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1;
-
-       k0 = k[0] & m[0];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       _CRC32CX(crc0, k2);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-
-       _CRC32CX(crc0, crc1);
-       _CRC32CX(crc2, crc3);
-
-       crc0 ^= crc2;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       _CRC32CX(crc0, crc1);
-       _CRC32CX(crc2, crc3);
-
-       crc0 ^= crc2;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, k5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       crc4 = k5;
-        _CRC32CX(crc4, k[6] & m[6]);
-       crc5 = k5 >> 32;
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, ((uint64_t)crc4 << 32) ^ crc5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       crc4 = k5;
-        _CRC32CX(crc4, k[6] & m[6]);
-       crc5 = k5 >> 32;
-       _CRC32CX(crc5, k[7] & m[7]);
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, ((uint64_t)crc4 << 32) ^ crc5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-#define hash_default_key8                      hash_crc_key8
-#define hash_default_key16                     hash_crc_key16
-#define hash_default_key24                     hash_crc_key24
-#define hash_default_key32                     hash_crc_key32
-#define hash_default_key40                     hash_crc_key40
-#define hash_default_key48                     hash_crc_key48
-#define hash_default_key56                     hash_crc_key56
-#define hash_default_key64                     hash_crc_key64
-
-#endif
index ff98227..da249c0 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2018 Intel Corporation
 
+if host_machine.system() != 'linux'
+        build = false
+endif
 allow_experimental_apis = true
 install_headers('rte_eth_softnic.h')
 sources = files('rte_eth_softnic_tm.c',
@@ -13,6 +16,9 @@ sources = files('rte_eth_softnic_tm.c',
        'rte_eth_softnic_pipeline.c',
        'rte_eth_softnic_thread.c',
        'rte_eth_softnic_cli.c',
+       'rte_eth_softnic_flow.c',
+       'rte_eth_softnic_meter.c',
+       'rte_eth_softnic_cryptodev.c',
        'parser.c',
        'conn.c')
-deps += ['pipeline', 'port', 'table', 'sched']
+deps += ['pipeline', 'port', 'table', 'sched', 'cryptodev']
index 30fb395..743a7c5 100644 (file)
@@ -14,6 +14,7 @@
 #include <rte_errno.h>
 #include <rte_ring.h>
 #include <rte_tm_driver.h>
+#include <rte_mtr_driver.h>
 
 #include "rte_eth_softnic.h"
 #include "rte_eth_softnic_internals.h"
@@ -27,7 +28,7 @@
 #define PMD_PARAM_TM_QSIZE2                                "tm_qsize2"
 #define PMD_PARAM_TM_QSIZE3                                "tm_qsize3"
 
-static const char *pmd_valid_args[] = {
+static const char * const pmd_valid_args[] = {
        PMD_PARAM_FIRMWARE,
        PMD_PARAM_CONN_PORT,
        PMD_PARAM_CPU_ID,
@@ -46,7 +47,7 @@ static const char welcome[] =
 
 static const char prompt[] = "softnic> ";
 
-struct softnic_conn_params conn_params_default = {
+static const struct softnic_conn_params conn_params_default = {
        .welcome = welcome,
        .prompt = prompt,
        .addr = "0.0.0.0",
@@ -73,7 +74,6 @@ static const struct rte_eth_dev_info pmd_dev_info = {
                .nb_min = 0,
                .nb_align = 1,
        },
-       .rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP,
 };
 
 static int pmd_softnic_logtype;
@@ -190,6 +190,7 @@ pmd_dev_stop(struct rte_eth_dev *dev)
        softnic_mempool_free(p);
 
        tm_hierarchy_free(p);
+       softnic_mtr_free(p);
 }
 
 static void
@@ -205,6 +206,21 @@ pmd_link_update(struct rte_eth_dev *dev __rte_unused,
        return 0;
 }
 
+static int
+pmd_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
+               enum rte_filter_type filter_type,
+               enum rte_filter_op filter_op,
+               void *arg)
+{
+       if (filter_type == RTE_ETH_FILTER_GENERIC &&
+                       filter_op == RTE_ETH_FILTER_GET) {
+               *(const void **)arg = &pmd_flow_ops;
+               return 0;
+       }
+
+       return -ENOTSUP;
+}
+
 static int
 pmd_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg)
 {
@@ -213,6 +229,14 @@ pmd_tm_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg)
        return 0;
 }
 
+static int
+pmd_mtr_ops_get(struct rte_eth_dev *dev __rte_unused, void *arg)
+{
+       *(const struct rte_mtr_ops **)arg = &pmd_mtr_ops;
+
+       return 0;
+}
+
 static const struct eth_dev_ops pmd_ops = {
        .dev_configure = pmd_dev_configure,
        .dev_start = pmd_dev_start,
@@ -222,7 +246,9 @@ static const struct eth_dev_ops pmd_ops = {
        .dev_infos_get = pmd_dev_infos_get,
        .rx_queue_setup = pmd_rx_queue_setup,
        .tx_queue_setup = pmd_tx_queue_setup,
+       .filter_ctrl = pmd_filter_ctrl,
        .tm_ops_get = pmd_tm_ops_get,
+       .mtr_ops_get = pmd_mtr_ops_get,
 };
 
 static uint16_t
@@ -265,12 +291,14 @@ pmd_init(struct pmd_params *params)
 
        /* Resources */
        tm_hierarchy_init(p);
+       softnic_mtr_init(p);
 
        softnic_mempool_init(p);
        softnic_swq_init(p);
        softnic_link_init(p);
        softnic_tmgr_init(p);
        softnic_tap_init(p);
+       softnic_cryptodev_init(p);
        softnic_port_in_action_profile_init(p);
        softnic_table_action_profile_init(p);
        softnic_pipeline_init(p);
@@ -319,6 +347,7 @@ pmd_free(struct pmd_internals *p)
        softnic_mempool_free(p);
 
        tm_hierarchy_free(p);
+       softnic_mtr_free(p);
 
        rte_free(p);
 }
@@ -528,7 +557,6 @@ static int
 pmd_remove(struct rte_vdev_device *vdev)
 {
        struct rte_eth_dev *dev = NULL;
-       struct pmd_internals *p;
 
        if (!vdev)
                return -EINVAL;
@@ -539,12 +567,12 @@ pmd_remove(struct rte_vdev_device *vdev)
        dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
        if (dev == NULL)
                return -ENODEV;
-       p = dev->data->dev_private;
 
        /* Free device data structures*/
-       rte_free(dev->data);
+       pmd_free(dev->data->dev_private);
+       dev->data->dev_private = NULL; /* already freed */
+       dev->data->mac_addrs = NULL; /* statically allocated */
        rte_eth_dev_release_port(dev);
-       pmd_free(p);
 
        return 0;
 }
index c25f4dd..92c744d 100644 (file)
@@ -7,8 +7,8 @@
 #include <string.h>
 
 #include <rte_string_fns.h>
+#include <rte_table_hash_func.h>
 
-#include "hash_func.h"
 #include "rte_eth_softnic_internals.h"
 
 /**
@@ -72,35 +72,35 @@ softnic_port_in_action_profile_create(struct pmd_internals *p,
                params->lb.f_hash == NULL) {
                switch (params->lb.key_size) {
                case  8:
-                       params->lb.f_hash = hash_default_key8;
+                       params->lb.f_hash = rte_table_hash_crc_key8;
                        break;
 
                case 16:
-                       params->lb.f_hash = hash_default_key16;
+                       params->lb.f_hash = rte_table_hash_crc_key16;
                        break;
 
                case 24:
-                       params->lb.f_hash = hash_default_key24;
+                       params->lb.f_hash = rte_table_hash_crc_key24;
                        break;
 
                case 32:
-                       params->lb.f_hash = hash_default_key32;
+                       params->lb.f_hash = rte_table_hash_crc_key32;
                        break;
 
                case 40:
-                       params->lb.f_hash = hash_default_key40;
+                       params->lb.f_hash = rte_table_hash_crc_key40;
                        break;
 
                case 48:
-                       params->lb.f_hash = hash_default_key48;
+                       params->lb.f_hash = rte_table_hash_crc_key48;
                        break;
 
                case 56:
-                       params->lb.f_hash = hash_default_key56;
+                       params->lb.f_hash = rte_table_hash_crc_key56;
                        break;
 
                case 64:
-                       params->lb.f_hash = hash_default_key64;
+                       params->lb.f_hash = rte_table_hash_crc_key64;
                        break;
 
                default:
@@ -223,35 +223,35 @@ softnic_table_action_profile_create(struct pmd_internals *p,
                params->lb.f_hash == NULL) {
                switch (params->lb.key_size) {
                case 8:
-                       params->lb.f_hash = hash_default_key8;
+                       params->lb.f_hash = rte_table_hash_crc_key8;
                        break;
 
                case 16:
-                       params->lb.f_hash = hash_default_key16;
+                       params->lb.f_hash = rte_table_hash_crc_key16;
                        break;
 
                case 24:
-                       params->lb.f_hash = hash_default_key24;
+                       params->lb.f_hash = rte_table_hash_crc_key24;
                        break;
 
                case 32:
-                       params->lb.f_hash = hash_default_key32;
+                       params->lb.f_hash = rte_table_hash_crc_key32;
                        break;
 
                case 40:
-                       params->lb.f_hash = hash_default_key40;
+                       params->lb.f_hash = rte_table_hash_crc_key40;
                        break;
 
                case 48:
-                       params->lb.f_hash = hash_default_key48;
+                       params->lb.f_hash = rte_table_hash_crc_key48;
                        break;
 
                case 56:
-                       params->lb.f_hash = hash_default_key56;
+                       params->lb.f_hash = rte_table_hash_crc_key56;
                        break;
 
                case 64:
-                       params->lb.f_hash = hash_default_key64;
+                       params->lb.f_hash = rte_table_hash_crc_key64;
                        break;
 
                default:
@@ -364,6 +364,39 @@ softnic_table_action_profile_create(struct pmd_internals *p,
                }
        }
 
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_TAG,
+                       NULL);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_DECAP,
+                       NULL);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_SYM_CRYPTO,
+                       &params->sym_crypto);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
        status = rte_table_action_profile_freeze(ap);
        if (status) {
                rte_table_action_profile_free(ap);
index 0c7448c..c6640d6 100644 (file)
@@ -9,6 +9,8 @@
 
 #include <rte_common.h>
 #include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cryptodev.h>
 
 #include "rte_eth_softnic_internals.h"
 #include "parser.h"
@@ -1088,6 +1090,67 @@ cmd_tap(struct pmd_internals *softnic,
        }
 }
 
+/**
+ * cryptodev <tap_name> dev <device_name> | dev_id <device_id>
+ * queue <n_queues> <queue_size>
+ **/
+
+static void
+cmd_cryptodev(struct pmd_internals *softnic,
+               char **tokens,
+               uint32_t n_tokens,
+               char *out,
+               size_t out_size)
+{
+       struct softnic_cryptodev_params params;
+       char *name;
+
+       memset(&params, 0, sizeof(params));
+       if (n_tokens != 7) {
+               snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
+               return;
+       }
+
+       name = tokens[1];
+
+       if (strcmp(tokens[2], "dev") == 0)
+               params.dev_name = tokens[3];
+       else if (strcmp(tokens[2], "dev_id") == 0) {
+               if (softnic_parser_read_uint32(&params.dev_id, tokens[3]) < 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "dev_id");
+                       return;
+               }
+       } else {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "cryptodev");
+               return;
+       }
+
+       if (strcmp(tokens[4], "queue")) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                       "4");
+               return;
+       }
+
+       if (softnic_parser_read_uint32(&params.n_queues, tokens[5]) < 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "q");
+               return;
+       }
+
+       if (softnic_parser_read_uint32(&params.queue_size, tokens[6]) < 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "queue_size");
+               return;
+       }
+
+       if (softnic_cryptodev_create(softnic, name, &params) == NULL) {
+               snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]);
+               return;
+       }
+}
+
 /**
  * port in action profile <profile_name>
  *  [filter match | mismatch offset <key_offset> mask <key_mask> key <key_value> port <port_id>]
@@ -1272,13 +1335,17 @@ cmd_port_in_action_profile(struct pmd_internals *softnic,
  *      tc <n_tc>
  *      stats none | pkts | bytes | both]
  *  [tm spp <n_subports_per_port> pps <n_pipes_per_subport>]
- *  [encap ether | vlan | qinq | mpls | pppoe]
+ *  [encap ether | vlan | qinq | mpls | pppoe |
+ *      vxlan offset <ether_offset> ipv4 | ipv6 vlan on | off]
  *  [nat src | dst
  *      proto udp | tcp]
  *  [ttl drop | fwd
  *      stats none | pkts]
  *  [stats pkts | bytes | both]
  *  [time]
+ *  [tag]
+ *  [decap]
+ *
  */
 static void
 cmd_table_action_profile(struct pmd_internals *softnic,
@@ -1478,6 +1545,8 @@ cmd_table_action_profile(struct pmd_internals *softnic,
 
        if (t0 < n_tokens &&
                (strcmp(tokens[t0], "encap") == 0)) {
+               uint32_t n_extra_tokens = 0;
+
                if (n_tokens < t0 + 2) {
                        snprintf(out, out_size, MSG_ARG_MISMATCH,
                                "action profile encap");
@@ -1494,13 +1563,61 @@ cmd_table_action_profile(struct pmd_internals *softnic,
                        p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS;
                } else if (strcmp(tokens[t0 + 1], "pppoe") == 0) {
                        p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE;
+               } else if (strcmp(tokens[t0 + 1], "vxlan") == 0) {
+                       if (n_tokens < t0 + 2 + 5) {
+                               snprintf(out, out_size, MSG_ARG_MISMATCH,
+                                       "action profile encap vxlan");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2], "offset") != 0) {
+                               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                                       "vxlan: offset");
+                               return;
+                       }
+
+                       if (softnic_parser_read_uint32(&p.encap.vxlan.data_offset,
+                               tokens[t0 + 2 + 1]) != 0) {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: ether_offset");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 2], "ipv4") == 0)
+                               p.encap.vxlan.ip_version = 1;
+                       else if (strcmp(tokens[t0 + 2 + 2], "ipv6") == 0)
+                               p.encap.vxlan.ip_version = 0;
+                       else {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: ipv4 or ipv6");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 3], "vlan") != 0) {
+                               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                                       "vxlan: vlan");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 4], "on") == 0)
+                               p.encap.vxlan.vlan = 1;
+                       else if (strcmp(tokens[t0 + 2 + 4], "off") == 0)
+                               p.encap.vxlan.vlan = 0;
+                       else {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: on or off");
+                               return;
+                       }
+
+                       p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN;
+                       n_extra_tokens = 5;
+
                } else {
                        snprintf(out, out_size, MSG_ARG_MISMATCH, "encap");
                        return;
                }
-
                p.action_mask |= 1LLU << RTE_TABLE_ACTION_ENCAP;
-               t0 += 2;
+               t0 += 2 + n_extra_tokens;
        } /* encap */
 
        if (t0 < n_tokens &&
@@ -1610,6 +1727,18 @@ cmd_table_action_profile(struct pmd_internals *softnic,
                t0 += 1;
        } /* time */
 
+       if (t0 < n_tokens &&
+               (strcmp(tokens[t0], "tag") == 0)) {
+               p.action_mask |= 1LLU << RTE_TABLE_ACTION_TAG;
+               t0 += 1;
+       } /* tag */
+
+       if (t0 < n_tokens &&
+               (strcmp(tokens[t0], "decap") == 0)) {
+               p.action_mask |= 1LLU << RTE_TABLE_ACTION_DECAP;
+               t0 += 1;
+       } /* decap */
+
        if (t0 < n_tokens) {
                snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
                return;
@@ -1682,6 +1811,7 @@ cmd_pipeline(struct pmd_internals *softnic,
  *  | tmgr <tmgr_name>
  *  | tap <tap_name> mempool <mempool_name> mtu <mtu>
  *  | source mempool <mempool_name> file <file_name> bpp <n_bytes_per_pkt>
+ *  | cryptodev <cryptodev_name> rxq <queue_id>
  *  [action <port_in_action_profile_name>]
  *  [disabled]
  */
@@ -1697,6 +1827,8 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
        uint32_t t0;
        int enabled, status;
 
+       memset(&p, 0, sizeof(p));
+
        if (n_tokens < 7) {
                snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
                return;
@@ -1735,7 +1867,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
 
                p.type = PORT_IN_RXQ;
 
-               p.dev_name = tokens[t0 + 1];
+               strcpy(p.dev_name, tokens[t0 + 1]);
 
                if (strcmp(tokens[t0 + 2], "rxq") != 0) {
                        snprintf(out, out_size, MSG_ARG_NOT_FOUND, "rxq");
@@ -1758,7 +1890,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
 
                p.type = PORT_IN_SWQ;
 
-               p.dev_name = tokens[t0 + 1];
+               strcpy(p.dev_name, tokens[t0 + 1]);
 
                t0 += 2;
        } else if (strcmp(tokens[t0], "tmgr") == 0) {
@@ -1770,7 +1902,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
 
                p.type = PORT_IN_TMGR;
 
-               p.dev_name = tokens[t0 + 1];
+               strcpy(p.dev_name, tokens[t0 + 1]);
 
                t0 += 2;
        } else if (strcmp(tokens[t0], "tap") == 0) {
@@ -1782,7 +1914,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
 
                p.type = PORT_IN_TAP;
 
-               p.dev_name = tokens[t0 + 1];
+               strcpy(p.dev_name, tokens[t0 + 1]);
 
                if (strcmp(tokens[t0 + 2], "mempool") != 0) {
                        snprintf(out, out_size, MSG_ARG_NOT_FOUND,
@@ -1814,8 +1946,6 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
 
                p.type = PORT_IN_SOURCE;
 
-               p.dev_name = NULL;
-
                if (strcmp(tokens[t0 + 1], "mempool") != 0) {
                        snprintf(out, out_size, MSG_ARG_NOT_FOUND,
                                "mempool");
@@ -1846,12 +1976,32 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
                }
 
                t0 += 7;
+       } else if (strcmp(tokens[t0], "cryptodev") == 0) {
+               if (n_tokens < t0 + 3) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port in cryptodev");
+                       return;
+               }
+
+               p.type = PORT_IN_CRYPTODEV;
+
+               strlcpy(p.dev_name, tokens[t0 + 1], sizeof(p.dev_name));
+               if (softnic_parser_read_uint16(&p.rxq.queue_id,
+                               tokens[t0 + 3]) != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "rxq");
+                       return;
+               }
+
+               p.cryptodev.arg_callback = NULL;
+               p.cryptodev.f_callback = NULL;
+
+               t0 += 4;
        } else {
                snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]);
                return;
        }
 
-       p.action_profile_name = NULL;
        if (n_tokens > t0 &&
                (strcmp(tokens[t0], "action") == 0)) {
                if (n_tokens < t0 + 2) {
@@ -1859,7 +2009,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
                        return;
                }
 
-               p.action_profile_name = tokens[t0 + 1];
+               strcpy(p.action_profile_name, tokens[t0 + 1]);
 
                t0 += 2;
        }
@@ -1895,6 +2045,7 @@ cmd_pipeline_port_in(struct pmd_internals *softnic,
  *  | tmgr <tmgr_name>
  *  | tap <tap_name>
  *  | sink [file <file_name> pkts <max_n_pkts>]
+ *  | cryptodev <cryptodev_name> txq <txq_id> offset <crypto_op_offset>
  */
 static void
 cmd_pipeline_port_out(struct pmd_internals *softnic,
@@ -1945,7 +2096,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
 
                p.type = PORT_OUT_TXQ;
 
-               p.dev_name = tokens[7];
+               strcpy(p.dev_name, tokens[7]);
 
                if (strcmp(tokens[8], "txq") != 0) {
                        snprintf(out, out_size, MSG_ARG_NOT_FOUND, "txq");
@@ -1966,7 +2117,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
 
                p.type = PORT_OUT_SWQ;
 
-               p.dev_name = tokens[7];
+               strcpy(p.dev_name, tokens[7]);
        } else if (strcmp(tokens[6], "tmgr") == 0) {
                if (n_tokens != 8) {
                        snprintf(out, out_size, MSG_ARG_MISMATCH,
@@ -1976,7 +2127,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
 
                p.type = PORT_OUT_TMGR;
 
-               p.dev_name = tokens[7];
+               strcpy(p.dev_name, tokens[7]);
        } else if (strcmp(tokens[6], "tap") == 0) {
                if (n_tokens != 8) {
                        snprintf(out, out_size, MSG_ARG_MISMATCH,
@@ -1986,7 +2137,7 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
 
                p.type = PORT_OUT_TAP;
 
-               p.dev_name = tokens[7];
+               strcpy(p.dev_name, tokens[7]);
        } else if (strcmp(tokens[6], "sink") == 0) {
                if ((n_tokens != 7) && (n_tokens != 11)) {
                        snprintf(out, out_size, MSG_ARG_MISMATCH,
@@ -1996,8 +2147,6 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
 
                p.type = PORT_OUT_SINK;
 
-               p.dev_name = NULL;
-
                if (n_tokens == 7) {
                        p.sink.file_name = NULL;
                        p.sink.max_n_pkts = 0;
@@ -2021,6 +2170,40 @@ cmd_pipeline_port_out(struct pmd_internals *softnic,
                                return;
                        }
                }
+       } else if (strcmp(tokens[6], "cryptodev") == 0) {
+               if (n_tokens != 12) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               p.type = PORT_OUT_CRYPTODEV;
+
+               strlcpy(p.dev_name, tokens[7], sizeof(p.dev_name));
+
+               if (strcmp(tokens[8], "txq")) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               if (softnic_parser_read_uint16(&p.cryptodev.queue_id, tokens[9])
+                               != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID, "queue_id");
+                       return;
+               }
+
+               if (strcmp(tokens[10], "offset")) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               if (softnic_parser_read_uint32(&p.cryptodev.op_offset,
+                               tokens[11]) != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID, "queue_id");
+                       return;
+               }
        } else {
                snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]);
                return;
@@ -2064,12 +2247,13 @@ cmd_pipeline_table(struct pmd_internals *softnic,
        char *out,
        size_t out_size)
 {
-       uint8_t key_mask[TABLE_RULE_MATCH_SIZE_MAX];
        struct softnic_table_params p;
        char *pipeline_name;
        uint32_t t0;
        int status;
 
+       memset(&p, 0, sizeof(p));
+
        if (n_tokens < 5) {
                snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
                return;
@@ -2203,12 +2387,11 @@ cmd_pipeline_table(struct pmd_internals *softnic,
                }
 
                if ((softnic_parse_hex_string(tokens[t0 + 5],
-                       key_mask, &key_mask_size) != 0) ||
+                       p.match.hash.key_mask, &key_mask_size) != 0) ||
                        key_mask_size != p.match.hash.key_size) {
                        snprintf(out, out_size, MSG_ARG_INVALID, "key_mask");
                        return;
                }
-               p.match.hash.key_mask = key_mask;
 
                if (strcmp(tokens[t0 + 6], "offset") != 0) {
                        snprintf(out, out_size, MSG_ARG_NOT_FOUND, "offset");
@@ -2295,7 +2478,6 @@ cmd_pipeline_table(struct pmd_internals *softnic,
                return;
        }
 
-       p.action_profile_name = NULL;
        if (n_tokens > t0 &&
                (strcmp(tokens[t0], "action") == 0)) {
                if (n_tokens < t0 + 2) {
@@ -2303,7 +2485,7 @@ cmd_pipeline_table(struct pmd_internals *softnic,
                        return;
                }
 
-               p.action_profile_name = tokens[t0 + 1];
+               strcpy(p.action_profile_name, tokens[t0 + 1]);
 
                t0 += 2;
        }
@@ -3176,10 +3358,30 @@ parse_match(char **tokens,
  *          [label2 <label> <tc> <ttl>
  *          [label3 <label> <tc> <ttl>]]]
  *       | pppoe <da> <sa> <session_id>]
+ *       | vxlan ether <da> <sa>
+ *          [vlan <pcp> <dei> <vid>]
+ *          ipv4 <sa> <da> <dscp> <ttl>
+ *          | ipv6 <sa> <da> <flow_label> <dscp> <hop_limit>
+ *          udp <sp> <dp>
+ *          vxlan <vni>]
  *    [nat ipv4 | ipv6 <addr> <port>]
  *    [ttl dec | keep]
  *    [stats]
  *    [time]
+ *    [tag <tag>]
+ *    [decap <n>]
+ *    [sym_crypto
+ *       encrypt | decrypt
+ *       type
+ *       | cipher
+ *          cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+ *       | cipher_auth
+ *          cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+ *          auth_algo <algo> auth_key <key> digest_size <size>
+ *       | aead
+ *          aead_algo <algo> aead_key <key> aead_iv <iv> aead_aad <aad>
+ *          digest_size <size>
+ *       data_offset <data_offset>]
  *
  * where:
  *    <pa> ::= g | y | r | drop
@@ -3575,6 +3777,122 @@ parse_table_action_encap(char **tokens,
                return 1 + 4;
        }
 
+       /* vxlan */
+       if (n_tokens && (strcmp(tokens[0], "vxlan") == 0)) {
+               uint32_t n = 0;
+
+               n_tokens--;
+               tokens++;
+               n++;
+
+               /* ether <da> <sa> */
+               if ((n_tokens < 3) ||
+                       strcmp(tokens[0], "ether") ||
+                       softnic_parse_mac_addr(tokens[1], &a->encap.vxlan.ether.da) ||
+                       softnic_parse_mac_addr(tokens[2], &a->encap.vxlan.ether.sa))
+                       return 0;
+
+               n_tokens -= 3;
+               tokens += 3;
+               n += 3;
+
+               /* [vlan <pcp> <dei> <vid>] */
+               if (strcmp(tokens[0], "vlan") == 0) {
+                       uint32_t pcp, dei, vid;
+
+                       if ((n_tokens < 4) ||
+                               softnic_parser_read_uint32(&pcp, tokens[1]) ||
+                               (pcp > 7) ||
+                               softnic_parser_read_uint32(&dei, tokens[2]) ||
+                               (dei > 1) ||
+                               softnic_parser_read_uint32(&vid, tokens[3]) ||
+                               (vid > 0xFFF))
+                               return 0;
+
+                       a->encap.vxlan.vlan.pcp = pcp;
+                       a->encap.vxlan.vlan.dei = dei;
+                       a->encap.vxlan.vlan.vid = vid;
+
+                       n_tokens -= 4;
+                       tokens += 4;
+                       n += 4;
+               }
+
+               /* ipv4 <sa> <da> <dscp> <ttl>
+                  | ipv6 <sa> <da> <flow_label> <dscp> <hop_limit> */
+               if (strcmp(tokens[0], "ipv4") == 0) {
+                       struct in_addr sa, da;
+                       uint8_t dscp, ttl;
+
+                       if ((n_tokens < 5) ||
+                               softnic_parse_ipv4_addr(tokens[1], &sa) ||
+                               softnic_parse_ipv4_addr(tokens[2], &da) ||
+                               softnic_parser_read_uint8(&dscp, tokens[3]) ||
+                               (dscp > 64) ||
+                               softnic_parser_read_uint8(&ttl, tokens[4]))
+                               return 0;
+
+                       a->encap.vxlan.ipv4.sa = rte_be_to_cpu_32(sa.s_addr);
+                       a->encap.vxlan.ipv4.da = rte_be_to_cpu_32(da.s_addr);
+                       a->encap.vxlan.ipv4.dscp = dscp;
+                       a->encap.vxlan.ipv4.ttl = ttl;
+
+                       n_tokens -= 5;
+                       tokens += 5;
+                       n += 5;
+               } else if (strcmp(tokens[0], "ipv6") == 0) {
+                       struct in6_addr sa, da;
+                       uint32_t flow_label;
+                       uint8_t dscp, hop_limit;
+
+                       if ((n_tokens < 6) ||
+                               softnic_parse_ipv6_addr(tokens[1], &sa) ||
+                               softnic_parse_ipv6_addr(tokens[2], &da) ||
+                               softnic_parser_read_uint32(&flow_label, tokens[3]) ||
+                               softnic_parser_read_uint8(&dscp, tokens[4]) ||
+                               (dscp > 64) ||
+                               softnic_parser_read_uint8(&hop_limit, tokens[5]))
+                               return 0;
+
+                       memcpy(a->encap.vxlan.ipv6.sa, sa.s6_addr, 16);
+                       memcpy(a->encap.vxlan.ipv6.da, da.s6_addr, 16);
+                       a->encap.vxlan.ipv6.flow_label = flow_label;
+                       a->encap.vxlan.ipv6.dscp = dscp;
+                       a->encap.vxlan.ipv6.hop_limit = hop_limit;
+
+                       n_tokens -= 6;
+                       tokens += 6;
+                       n += 6;
+               } else
+                       return 0;
+
+               /* udp <sp> <dp> */
+               if ((n_tokens < 3) ||
+                       strcmp(tokens[0], "udp") ||
+                       softnic_parser_read_uint16(&a->encap.vxlan.udp.sp, tokens[1]) ||
+                       softnic_parser_read_uint16(&a->encap.vxlan.udp.dp, tokens[2]))
+                       return 0;
+
+               n_tokens -= 3;
+               tokens += 3;
+               n += 3;
+
+               /* vxlan <vni> */
+               if ((n_tokens < 2) ||
+                       strcmp(tokens[0], "vxlan") ||
+                       softnic_parser_read_uint32(&a->encap.vxlan.vxlan.vni, tokens[1]) ||
+                       (a->encap.vxlan.vxlan.vni > 0xFFFFFF))
+                       return 0;
+
+               n_tokens -= 2;
+               tokens += 2;
+               n += 2;
+
+               a->encap.type = RTE_TABLE_ACTION_ENCAP_VXLAN;
+               a->action_mask |= 1 << RTE_TABLE_ACTION_ENCAP;
+               return 1 + n;
+       }
+
        return 0;
 }
 
@@ -3669,6 +3987,400 @@ parse_table_action_time(char **tokens,
        return 1;
 }
 
+static void
+parse_free_sym_crypto_param_data(struct rte_table_action_sym_crypto_params *p)
+{
+       struct rte_crypto_sym_xform *xform[2] = {NULL};
+       uint32_t i;
+
+       xform[0] = p->xform;
+       if (xform[0])
+               xform[1] = xform[0]->next;
+
+       for (i = 0; i < 2; i++) {
+               if (xform[i] == NULL)
+                       continue;
+
+               switch (xform[i]->type) {
+               case RTE_CRYPTO_SYM_XFORM_CIPHER:
+                       if (xform[i]->cipher.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->cipher_auth.cipher_iv.val)
+                               free(p->cipher_auth.cipher_iv.val);
+                       if (p->cipher_auth.cipher_iv_update.val)
+                               free(p->cipher_auth.cipher_iv_update.val);
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AUTH:
+                       if (xform[i]->auth.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->cipher_auth.auth_iv.val)
+                               free(p->cipher_auth.cipher_iv.val);
+                       if (p->cipher_auth.auth_iv_update.val)
+                               free(p->cipher_auth.cipher_iv_update.val);
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AEAD:
+                       if (xform[i]->aead.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->aead.iv.val)
+                               free(p->aead.iv.val);
+                       if (p->aead.aad.val)
+                               free(p->aead.aad.val);
+                       break;
+               default:
+                       continue;
+               }
+       }
+
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_cipher(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_cipher;
+       int status;
+       size_t len;
+
+       if (n_tokens < 7 || strcmp(tokens[1], "cipher_algo") ||
+                       strcmp(tokens[3], "cipher_key") ||
+                       strcmp(tokens[5], "cipher_iv"))
+               return NULL;
+
+       xform_cipher = calloc(1, sizeof(*xform_cipher));
+       if (xform_cipher == NULL)
+               return NULL;
+
+       xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+       xform_cipher->cipher.op = encrypt ? RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+                       RTE_CRYPTO_CIPHER_OP_DECRYPT;
+
+       /* cipher_algo */
+       status = rte_cryptodev_get_cipher_algo_enum(
+                       &xform_cipher->cipher.algo, tokens[2]);
+       if (status < 0)
+               goto error_exit;
+
+       /* cipher_key */
+       len = strlen(tokens[4]);
+       xform_cipher->cipher.key.data = calloc(1, len / 2 + 1);
+       if (xform_cipher->cipher.key.data == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[4],
+                       xform_cipher->cipher.key.data,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher->cipher.key.length = (uint16_t)len;
+
+       /* cipher_iv */
+       len = strlen(tokens[6]);
+
+       p->cipher_auth.cipher_iv.val = calloc(1, len / 2 + 1);
+       if (p->cipher_auth.cipher_iv.val == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[6],
+                       p->cipher_auth.cipher_iv.val,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher->cipher.iv.length = (uint16_t)len;
+       xform_cipher->cipher.iv.offset = RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET;
+       p->cipher_auth.cipher_iv.length = (uint32_t)len;
+       *used_n_tokens = 7;
+
+       return xform_cipher;
+
+error_exit:
+       if (xform_cipher->cipher.key.data)
+               free(xform_cipher->cipher.key.data);
+
+       if (p->cipher_auth.cipher_iv.val) {
+               free(p->cipher_auth.cipher_iv.val);
+               p->cipher_auth.cipher_iv.val = NULL;
+       }
+
+       free(xform_cipher);
+
+       return NULL;
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_cipher_auth(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_cipher;
+       struct rte_crypto_sym_xform *xform_auth;
+       int status;
+       size_t len;
+
+       if (n_tokens < 13 ||
+                       strcmp(tokens[7], "auth_algo") ||
+                       strcmp(tokens[9], "auth_key") ||
+                       strcmp(tokens[11], "digest_size"))
+               return NULL;
+
+       xform_auth = calloc(1, sizeof(*xform_auth));
+       if (xform_auth == NULL)
+               return NULL;
+
+       xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+       xform_auth->auth.op = encrypt ? RTE_CRYPTO_AUTH_OP_GENERATE :
+                       RTE_CRYPTO_AUTH_OP_VERIFY;
+
+       /* auth_algo */
+       status = rte_cryptodev_get_auth_algo_enum(&xform_auth->auth.algo,
+                       tokens[8]);
+       if (status < 0)
+               goto error_exit;
+
+       /* auth_key */
+       len = strlen(tokens[10]);
+       xform_auth->auth.key.data = calloc(1, len / 2 + 1);
+       if (xform_auth->auth.key.data == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[10],
+                       xform_auth->auth.key.data, (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_auth->auth.key.length = (uint16_t)len;
+
+       if (strcmp(tokens[11], "digest_size"))
+               goto error_exit;
+
+       status = softnic_parser_read_uint16(&xform_auth->auth.digest_length,
+                       tokens[12]);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher = parse_table_action_cipher(p, tokens, 7, encrypt,
+                       used_n_tokens);
+       if (xform_cipher == NULL)
+               goto error_exit;
+
+       *used_n_tokens += 6;
+
+       if (encrypt) {
+               xform_cipher->next = xform_auth;
+               return xform_cipher;
+       } else {
+               xform_auth->next = xform_cipher;
+               return xform_auth;
+       }
+
+error_exit:
+       if (xform_auth->auth.key.data)
+               free(xform_auth->auth.key.data);
+       if (p->cipher_auth.auth_iv.val) {
+               free(p->cipher_auth.auth_iv.val);
+               p->cipher_auth.auth_iv.val = 0;
+       }
+
+       free(xform_auth);
+
+       return NULL;
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_aead(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_aead;
+       int status;
+       size_t len;
+
+       if (n_tokens < 11 || strcmp(tokens[1], "aead_algo") ||
+                       strcmp(tokens[3], "aead_key") ||
+                       strcmp(tokens[5], "aead_iv") ||
+                       strcmp(tokens[7], "aead_aad") ||
+                       strcmp(tokens[9], "digest_size"))
+               return NULL;
+
+       xform_aead = calloc(1, sizeof(*xform_aead));
+       if (xform_aead == NULL)
+               return NULL;
+
+       xform_aead->type = RTE_CRYPTO_SYM_XFORM_AEAD;
+       xform_aead->aead.op = encrypt ? RTE_CRYPTO_AEAD_OP_ENCRYPT :
+                       RTE_CRYPTO_AEAD_OP_DECRYPT;
+
+       /* aead_algo */
+       status = rte_cryptodev_get_aead_algo_enum(&xform_aead->aead.algo,
+                       tokens[2]);
+       if (status < 0)
+               goto error_exit;
+
+       /* aead_key */
+       len = strlen(tokens[4]);
+       xform_aead->aead.key.data = calloc(1, len / 2 + 1);
+       if (xform_aead->aead.key.data == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[4], xform_aead->aead.key.data,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.key.length = (uint16_t)len;
+
+       /* aead_iv */
+       len = strlen(tokens[6]);
+       p->aead.iv.val = calloc(1, len / 2 + 1);
+       if (p->aead.iv.val == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[6], p->aead.iv.val,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.iv.length = (uint16_t)len;
+       xform_aead->aead.iv.offset = RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET;
+       p->aead.iv.length = (uint32_t)len;
+
+       /* aead_aad */
+       len = strlen(tokens[8]);
+       p->aead.aad.val = calloc(1, len / 2 + 1);
+       if (p->aead.aad.val == NULL)
+               goto error_exit;
+
+       status = softnic_parse_hex_string(tokens[8], p->aead.aad.val, (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.aad_length = (uint16_t)len;
+       p->aead.aad.length = (uint32_t)len;
+
+       /* digest_size */
+       status = softnic_parser_read_uint16(&xform_aead->aead.digest_length,
+                       tokens[10]);
+       if (status < 0)
+               goto error_exit;
+
+       *used_n_tokens = 11;
+
+       return xform_aead;
+
+error_exit:
+       if (xform_aead->aead.key.data)
+               free(xform_aead->aead.key.data);
+       if (p->aead.iv.val) {
+               free(p->aead.iv.val);
+               p->aead.iv.val = NULL;
+       }
+       if (p->aead.aad.val) {
+               free(p->aead.aad.val);
+               p->aead.aad.val = NULL;
+       }
+
+       free(xform_aead);
+
+       return NULL;
+}
+
+
+static uint32_t
+parse_table_action_sym_crypto(char **tokens,
+       uint32_t n_tokens,
+       struct softnic_table_rule_action *a)
+{
+       struct rte_table_action_sym_crypto_params *p = &a->sym_crypto;
+       struct rte_crypto_sym_xform *xform = NULL;
+       uint32_t used_n_tokens;
+       uint32_t encrypt;
+       int status;
+
+       if ((n_tokens < 12) ||
+               strcmp(tokens[0], "sym_crypto") ||
+               strcmp(tokens[2], "type"))
+               return 0;
+
+       memset(p, 0, sizeof(*p));
+
+       if (strcmp(tokens[1], "encrypt") == 0)
+               encrypt = 1;
+       else
+               encrypt = 0;
+
+       status = softnic_parser_read_uint32(&p->data_offset, tokens[n_tokens - 1]);
+       if (status < 0)
+               return 0;
+
+       if (strcmp(tokens[3], "cipher") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_cipher(p, tokens, n_tokens, encrypt,
+                               &used_n_tokens);
+       } else if (strcmp(tokens[3], "cipher_auth") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_cipher_auth(p, tokens, n_tokens,
+                               encrypt, &used_n_tokens);
+       } else if (strcmp(tokens[3], "aead") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_aead(p, tokens, n_tokens, encrypt,
+                               &used_n_tokens);
+       }
+
+       if (xform == NULL)
+               return 0;
+
+       p->xform = xform;
+
+       if (strcmp(tokens[used_n_tokens], "data_offset")) {
+               parse_free_sym_crypto_param_data(p);
+               return 0;
+       }
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_SYM_CRYPTO;
+
+       return used_n_tokens + 5;
+}
+
+static uint32_t
+parse_table_action_tag(char **tokens,
+       uint32_t n_tokens,
+       struct softnic_table_rule_action *a)
+{
+       if (n_tokens < 2 ||
+               strcmp(tokens[0], "tag"))
+               return 0;
+
+       if (softnic_parser_read_uint32(&a->tag.tag, tokens[1]))
+               return 0;
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_TAG;
+       return 2;
+}
+
+static uint32_t
+parse_table_action_decap(char **tokens,
+       uint32_t n_tokens,
+       struct softnic_table_rule_action *a)
+{
+       if (n_tokens < 2 ||
+               strcmp(tokens[0], "decap"))
+               return 0;
+
+       if (softnic_parser_read_uint16(&a->decap.n, tokens[1]))
+               return 0;
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_DECAP;
+       return 2;
+}
+
 static uint32_t
 parse_table_action(char **tokens,
        uint32_t n_tokens,
@@ -3813,6 +4525,47 @@ parse_table_action(char **tokens,
                n_tokens -= n;
        }
 
+       if (n_tokens && (strcmp(tokens[0], "tag") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_tag(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action tag");
+                       return 0;
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
+       if (n_tokens && (strcmp(tokens[0], "decap") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_decap(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action decap");
+                       return 0;
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
+       if (n_tokens && (strcmp(tokens[0], "sym_crypto") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_sym_crypto(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action sym_crypto");
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
        if (n_tokens0 - n_tokens == 1) {
                snprintf(out, out_size, MSG_ARG_INVALID, "action");
                return 0;
@@ -4797,6 +5550,81 @@ cmd_softnic_thread_pipeline_disable(struct pmd_internals *softnic,
        }
 }
 
+/**
+ * flowapi map
+ *  group <group_id>
+ *  ingress | egress
+ *  pipeline <pipeline_name>
+ *  table <table_id>
+ */
+static void
+cmd_softnic_flowapi_map(struct pmd_internals *softnic,
+               char **tokens,
+               uint32_t n_tokens,
+               char *out,
+               size_t out_size)
+{
+       char *pipeline_name;
+       uint32_t group_id, table_id;
+       int ingress, status;
+
+       if (n_tokens != 9) {
+               snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
+               return;
+       }
+
+       if (strcmp(tokens[1], "map") != 0) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND, "map");
+               return;
+       }
+
+       if (strcmp(tokens[2], "group") != 0) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND, "group");
+               return;
+       }
+
+       if (softnic_parser_read_uint32(&group_id, tokens[3]) != 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID, "group_id");
+               return;
+       }
+
+       if (strcmp(tokens[4], "ingress") == 0) {
+               ingress = 1;
+       } else if (strcmp(tokens[4], "egress") == 0) {
+               ingress = 0;
+       } else {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND, "ingress | egress");
+               return;
+       }
+
+       if (strcmp(tokens[5], "pipeline") != 0) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND, "pipeline");
+               return;
+       }
+
+       pipeline_name = tokens[6];
+
+       if (strcmp(tokens[7], "table") != 0) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND, "table");
+               return;
+       }
+
+       if (softnic_parser_read_uint32(&table_id, tokens[8]) != 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID, "table_id");
+               return;
+       }
+
+       status = flow_attr_map_set(softnic,
+                       group_id,
+                       ingress,
+                       pipeline_name,
+                       table_id);
+       if (status) {
+               snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]);
+               return;
+       }
+}
+
 void
 softnic_cli_process(char *in, char *out, size_t out_size, void *arg)
 {
@@ -4877,6 +5705,11 @@ softnic_cli_process(char *in, char *out, size_t out_size, void *arg)
                return;
        }
 
+       if (strcmp(tokens[0], "cryptodev") == 0) {
+               cmd_cryptodev(softnic, tokens, n_tokens, out, out_size);
+               return;
+       }
+
        if (strcmp(tokens[0], "port") == 0) {
                cmd_port_in_action_profile(softnic, tokens, n_tokens, out, out_size);
                return;
@@ -5089,6 +5922,12 @@ softnic_cli_process(char *in, char *out, size_t out_size, void *arg)
                }
        }
 
+       if (strcmp(tokens[0], "flowapi") == 0) {
+               cmd_softnic_flowapi_map(softnic, tokens, n_tokens, out,
+                                       out_size);
+               return;
+       }
+
        snprintf(out, out_size, MSG_CMD_UNKNOWN, tokens[0]);
 }
 
diff --git a/drivers/net/softnic/rte_eth_softnic_cryptodev.c b/drivers/net/softnic/rte_eth_softnic_cryptodev.c
new file mode 100644 (file)
index 0000000..1480f6d
--- /dev/null
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_string_fns.h>
+
+#include "rte_eth_softnic_internals.h"
+
+int
+softnic_cryptodev_init(struct pmd_internals *p)
+{
+       TAILQ_INIT(&p->cryptodev_list);
+
+       return 0;
+}
+
+void
+softnic_cryptodev_free(struct pmd_internals *p)
+{
+       for ( ; ; ) {
+               struct softnic_cryptodev *cryptodev;
+
+               cryptodev = TAILQ_FIRST(&p->cryptodev_list);
+               if (cryptodev == NULL)
+                       break;
+
+               TAILQ_REMOVE(&p->cryptodev_list, cryptodev, node);
+               free(cryptodev);
+       }
+}
+
+struct softnic_cryptodev *
+softnic_cryptodev_find(struct pmd_internals *p,
+       const char *name)
+{
+       struct softnic_cryptodev *cryptodev;
+
+       if (name == NULL)
+               return NULL;
+
+       TAILQ_FOREACH(cryptodev, &p->cryptodev_list, node)
+               if (strcmp(cryptodev->name, name) == 0)
+                       return cryptodev;
+
+       return NULL;
+}
+
+struct softnic_cryptodev *
+softnic_cryptodev_create(struct pmd_internals *p,
+       const char *name,
+       struct softnic_cryptodev_params *params)
+{
+       struct rte_cryptodev_info dev_info;
+       struct rte_cryptodev_config dev_conf;
+       struct rte_cryptodev_qp_conf queue_conf;
+       struct softnic_cryptodev *cryptodev;
+       uint32_t dev_id, i;
+       uint32_t socket_id;
+       int status;
+
+       /* Check input params */
+       if ((name == NULL) ||
+               softnic_cryptodev_find(p, name) ||
+               (params->n_queues == 0) ||
+               (params->queue_size == 0))
+               return NULL;
+
+       if (params->dev_name) {
+               status = rte_cryptodev_get_dev_id(params->dev_name);
+               if (status == -1)
+                       return NULL;
+
+               dev_id = (uint32_t)status;
+       } else {
+               if (rte_cryptodev_pmd_is_valid_dev(params->dev_id) == 0)
+                       return NULL;
+
+               dev_id = params->dev_id;
+       }
+
+       socket_id = rte_cryptodev_socket_id(dev_id);
+       rte_cryptodev_info_get(dev_id, &dev_info);
+
+       if (dev_info.max_nb_queue_pairs < params->n_queues)
+               return NULL;
+       if (dev_info.feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED)
+               return NULL;
+
+       dev_conf.socket_id = socket_id;
+       dev_conf.nb_queue_pairs = params->n_queues;
+
+       status = rte_cryptodev_configure(dev_id, &dev_conf);
+       if (status < 0)
+               return NULL;
+
+       queue_conf.nb_descriptors = params->queue_size;
+       for (i = 0; i < params->n_queues; i++) {
+               status = rte_cryptodev_queue_pair_setup(dev_id, i,
+                               &queue_conf, socket_id, NULL);
+               if (status < 0)
+                       return NULL;
+       }
+
+       if (rte_cryptodev_start(dev_id) < 0)
+               return NULL;
+
+       cryptodev = calloc(1, sizeof(struct softnic_cryptodev));
+       if (cryptodev == NULL) {
+               rte_cryptodev_stop(dev_id);
+               return NULL;
+       }
+
+       strlcpy(cryptodev->name, name, sizeof(cryptodev->name));
+       cryptodev->dev_id = dev_id;
+       cryptodev->n_queues = params->n_queues;
+
+       TAILQ_INSERT_TAIL(&p->cryptodev_list, cryptodev, node);
+
+       return cryptodev;
+}
diff --git a/drivers/net/softnic/rte_eth_softnic_flow.c b/drivers/net/softnic/rte_eth_softnic_flow.c
new file mode 100644 (file)
index 0000000..285af46
--- /dev/null
@@ -0,0 +1,2287 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_flow.h>
+#include <rte_flow_driver.h>
+#include <rte_tailq.h>
+
+#include "rte_eth_softnic_internals.h"
+#include "rte_eth_softnic.h"
+
+#define rte_htons rte_cpu_to_be_16
+#define rte_htonl rte_cpu_to_be_32
+
+#define rte_ntohs rte_be_to_cpu_16
+#define rte_ntohl rte_be_to_cpu_32
+
+static struct rte_flow *
+softnic_flow_find(struct softnic_table *table,
+       struct softnic_table_rule_match *rule_match)
+{
+       struct rte_flow *flow;
+
+       TAILQ_FOREACH(flow, &table->flows, node)
+               if (memcmp(&flow->match, rule_match, sizeof(*rule_match)) == 0)
+                       return flow;
+
+       return NULL;
+}
+
+int
+flow_attr_map_set(struct pmd_internals *softnic,
+               uint32_t group_id,
+               int ingress,
+               const char *pipeline_name,
+               uint32_t table_id)
+{
+       struct pipeline *pipeline;
+       struct flow_attr_map *map;
+
+       if (group_id >= SOFTNIC_FLOW_MAX_GROUPS ||
+                       pipeline_name == NULL)
+               return -1;
+
+       pipeline = softnic_pipeline_find(softnic, pipeline_name);
+       if (pipeline == NULL ||
+                       table_id >= pipeline->n_tables)
+               return -1;
+
+       map = (ingress) ? &softnic->flow.ingress_map[group_id] :
+               &softnic->flow.egress_map[group_id];
+       strcpy(map->pipeline_name, pipeline_name);
+       map->table_id = table_id;
+       map->valid = 1;
+
+       return 0;
+}
+
+struct flow_attr_map *
+flow_attr_map_get(struct pmd_internals *softnic,
+               uint32_t group_id,
+               int ingress)
+{
+       if (group_id >= SOFTNIC_FLOW_MAX_GROUPS)
+               return NULL;
+
+       return (ingress) ? &softnic->flow.ingress_map[group_id] :
+               &softnic->flow.egress_map[group_id];
+}
+
+static int
+flow_pipeline_table_get(struct pmd_internals *softnic,
+               const struct rte_flow_attr *attr,
+               const char **pipeline_name,
+               uint32_t *table_id,
+               struct rte_flow_error *error)
+{
+       struct flow_attr_map *map;
+
+       if (attr == NULL)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR,
+                               NULL,
+                               "Null attr");
+
+       if (!attr->ingress && !attr->egress)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                               attr,
+                               "Ingress/egress not specified");
+
+       if (attr->ingress && attr->egress)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
+                               attr,
+                               "Setting both ingress and egress is not allowed");
+
+       map = flow_attr_map_get(softnic,
+                       attr->group,
+                       attr->ingress);
+       if (map == NULL ||
+                       map->valid == 0)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
+                               attr,
+                               "Invalid group ID");
+
+       if (pipeline_name)
+               *pipeline_name = map->pipeline_name;
+
+       if (table_id)
+               *table_id = map->table_id;
+
+       return 0;
+}
+
+union flow_item {
+       uint8_t raw[TABLE_RULE_MATCH_SIZE_MAX];
+       struct rte_flow_item_eth eth;
+       struct rte_flow_item_vlan vlan;
+       struct rte_flow_item_ipv4 ipv4;
+       struct rte_flow_item_ipv6 ipv6;
+       struct rte_flow_item_icmp icmp;
+       struct rte_flow_item_udp udp;
+       struct rte_flow_item_tcp tcp;
+       struct rte_flow_item_sctp sctp;
+       struct rte_flow_item_vxlan vxlan;
+       struct rte_flow_item_e_tag e_tag;
+       struct rte_flow_item_nvgre nvgre;
+       struct rte_flow_item_mpls mpls;
+       struct rte_flow_item_gre gre;
+       struct rte_flow_item_gtp gtp;
+       struct rte_flow_item_esp esp;
+       struct rte_flow_item_geneve geneve;
+       struct rte_flow_item_vxlan_gpe vxlan_gpe;
+       struct rte_flow_item_arp_eth_ipv4 arp_eth_ipv4;
+       struct rte_flow_item_ipv6_ext ipv6_ext;
+       struct rte_flow_item_icmp6 icmp6;
+       struct rte_flow_item_icmp6_nd_ns icmp6_nd_ns;
+       struct rte_flow_item_icmp6_nd_na icmp6_nd_na;
+       struct rte_flow_item_icmp6_nd_opt icmp6_nd_opt;
+       struct rte_flow_item_icmp6_nd_opt_sla_eth icmp6_nd_opt_sla_eth;
+       struct rte_flow_item_icmp6_nd_opt_tla_eth icmp6_nd_opt_tla_eth;
+};
+
+static const union flow_item flow_item_raw_mask;
+
+static int
+flow_item_is_proto(enum rte_flow_item_type type,
+       const void **mask,
+       size_t *size)
+{
+       switch (type) {
+       case RTE_FLOW_ITEM_TYPE_RAW:
+               *mask = &flow_item_raw_mask;
+               *size = sizeof(flow_item_raw_mask);
+               return 1; /* TRUE */
+
+       case RTE_FLOW_ITEM_TYPE_ETH:
+               *mask = &rte_flow_item_eth_mask;
+               *size = sizeof(struct rte_flow_item_eth);
+               return 1; /* TRUE */
+
+       case RTE_FLOW_ITEM_TYPE_VLAN:
+               *mask = &rte_flow_item_vlan_mask;
+               *size = sizeof(struct rte_flow_item_vlan);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_IPV4:
+               *mask = &rte_flow_item_ipv4_mask;
+               *size = sizeof(struct rte_flow_item_ipv4);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_IPV6:
+               *mask = &rte_flow_item_ipv6_mask;
+               *size = sizeof(struct rte_flow_item_ipv6);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP:
+               *mask = &rte_flow_item_icmp_mask;
+               *size = sizeof(struct rte_flow_item_icmp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_UDP:
+               *mask = &rte_flow_item_udp_mask;
+               *size = sizeof(struct rte_flow_item_udp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_TCP:
+               *mask = &rte_flow_item_tcp_mask;
+               *size = sizeof(struct rte_flow_item_tcp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_SCTP:
+               *mask = &rte_flow_item_sctp_mask;
+               *size = sizeof(struct rte_flow_item_sctp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_VXLAN:
+               *mask = &rte_flow_item_vxlan_mask;
+               *size = sizeof(struct rte_flow_item_vxlan);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_E_TAG:
+               *mask = &rte_flow_item_e_tag_mask;
+               *size = sizeof(struct rte_flow_item_e_tag);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_NVGRE:
+               *mask = &rte_flow_item_nvgre_mask;
+               *size = sizeof(struct rte_flow_item_nvgre);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_MPLS:
+               *mask = &rte_flow_item_mpls_mask;
+               *size = sizeof(struct rte_flow_item_mpls);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_GRE:
+               *mask = &rte_flow_item_gre_mask;
+               *size = sizeof(struct rte_flow_item_gre);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_GTP:
+       case RTE_FLOW_ITEM_TYPE_GTPC:
+       case RTE_FLOW_ITEM_TYPE_GTPU:
+               *mask = &rte_flow_item_gtp_mask;
+               *size = sizeof(struct rte_flow_item_gtp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ESP:
+               *mask = &rte_flow_item_esp_mask;
+               *size = sizeof(struct rte_flow_item_esp);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_GENEVE:
+               *mask = &rte_flow_item_geneve_mask;
+               *size = sizeof(struct rte_flow_item_geneve);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
+               *mask = &rte_flow_item_vxlan_gpe_mask;
+               *size = sizeof(struct rte_flow_item_vxlan_gpe);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ARP_ETH_IPV4:
+               *mask = &rte_flow_item_arp_eth_ipv4_mask;
+               *size = sizeof(struct rte_flow_item_arp_eth_ipv4);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_IPV6_EXT:
+               *mask = &rte_flow_item_ipv6_ext_mask;
+               *size = sizeof(struct rte_flow_item_ipv6_ext);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6:
+               *mask = &rte_flow_item_icmp6_mask;
+               *size = sizeof(struct rte_flow_item_icmp6);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6_ND_NS:
+               *mask = &rte_flow_item_icmp6_nd_ns_mask;
+               *size = sizeof(struct rte_flow_item_icmp6_nd_ns);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6_ND_NA:
+               *mask = &rte_flow_item_icmp6_nd_na_mask;
+               *size = sizeof(struct rte_flow_item_icmp6_nd_na);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT:
+               *mask = &rte_flow_item_icmp6_nd_opt_mask;
+               *size = sizeof(struct rte_flow_item_icmp6_nd_opt);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_SLA_ETH:
+               *mask = &rte_flow_item_icmp6_nd_opt_sla_eth_mask;
+               *size = sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth);
+               return 1;
+
+       case RTE_FLOW_ITEM_TYPE_ICMP6_ND_OPT_TLA_ETH:
+               *mask = &rte_flow_item_icmp6_nd_opt_tla_eth_mask;
+               *size = sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth);
+               return 1;
+
+       default: return 0; /* FALSE */
+       }
+}
+
+static int
+flow_item_raw_preprocess(const struct rte_flow_item *item,
+       union flow_item *item_spec,
+       union flow_item *item_mask,
+       size_t *item_size,
+       int *item_disabled,
+       struct rte_flow_error *error)
+{
+       const struct rte_flow_item_raw *item_raw_spec = item->spec;
+       const struct rte_flow_item_raw *item_raw_mask = item->mask;
+       const uint8_t *pattern;
+       const uint8_t *pattern_mask;
+       uint8_t *spec = (uint8_t *)item_spec;
+       uint8_t *mask = (uint8_t *)item_mask;
+       size_t pattern_length, pattern_offset, i;
+       int disabled;
+
+       if (!item->spec)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Null specification");
+
+       if (item->last)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Range not allowed (last must be NULL)");
+
+       if (item_raw_spec->relative == 0)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Absolute offset not supported");
+
+       if (item_raw_spec->search)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Search not supported");
+
+       if (item_raw_spec->offset < 0)
+               return rte_flow_error_set(error,
+                       ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Negative offset not supported");
+
+       if (item_raw_spec->length == 0)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Zero pattern length");
+
+       if (item_raw_spec->offset + item_raw_spec->length >
+               TABLE_RULE_MATCH_SIZE_MAX)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Item too big");
+
+       if (!item_raw_spec->pattern && item_raw_mask && item_raw_mask->pattern)
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "RAW: Non-NULL pattern mask not allowed with NULL pattern");
+
+       pattern = item_raw_spec->pattern;
+       pattern_mask = (item_raw_mask) ? item_raw_mask->pattern : NULL;
+       pattern_length = (size_t)item_raw_spec->length;
+       pattern_offset = (size_t)item_raw_spec->offset;
+
+       disabled = 0;
+       if (pattern_mask == NULL)
+               disabled = 1;
+       else
+               for (i = 0; i < pattern_length; i++)
+                       if ((pattern)[i])
+                               disabled = 1;
+
+       memset(spec, 0, TABLE_RULE_MATCH_SIZE_MAX);
+       if (pattern)
+               memcpy(&spec[pattern_offset], pattern, pattern_length);
+
+       memset(mask, 0, TABLE_RULE_MATCH_SIZE_MAX);
+       if (pattern_mask)
+               memcpy(&mask[pattern_offset], pattern_mask, pattern_length);
+
+       *item_size = pattern_offset + pattern_length;
+       *item_disabled = disabled;
+
+       return 0;
+}
+
+static int
+flow_item_proto_preprocess(const struct rte_flow_item *item,
+       union flow_item *item_spec,
+       union flow_item *item_mask,
+       size_t *item_size,
+       int *item_disabled,
+       struct rte_flow_error *error)
+{
+       const void *mask_default;
+       uint8_t *spec = (uint8_t *)item_spec;
+       uint8_t *mask = (uint8_t *)item_mask;
+       size_t size, i;
+
+       if (!flow_item_is_proto(item->type, &mask_default, &size))
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "Item type not supported");
+
+       if (item->type == RTE_FLOW_ITEM_TYPE_RAW)
+               return flow_item_raw_preprocess(item,
+                       item_spec,
+                       item_mask,
+                       item_size,
+                       item_disabled,
+                       error);
+
+       /* spec */
+       if (!item->spec) {
+               /* If spec is NULL, then last and mask also have to be NULL. */
+               if (item->last || item->mask)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "Invalid item (NULL spec with non-NULL last or mask)");
+
+               memset(item_spec, 0, size);
+               memset(item_mask, 0, size);
+               *item_size = size;
+               *item_disabled = 1; /* TRUE */
+               return 0;
+       }
+
+       memcpy(spec, item->spec, size);
+       *item_size = size;
+
+       /* mask */
+       if (item->mask)
+               memcpy(mask, item->mask, size);
+       else
+               memcpy(mask, mask_default, size);
+
+       /* disabled */
+       for (i = 0; i < size; i++)
+               if (mask[i])
+                       break;
+       *item_disabled = (i == size) ? 1 : 0;
+
+       /* Apply mask over spec. */
+       for (i = 0; i < size; i++)
+               spec[i] &= mask[i];
+
+       /* last */
+       if (item->last) {
+               uint8_t last[size];
+
+               /* init last */
+               memcpy(last, item->last, size);
+               for (i = 0; i < size; i++)
+                       last[i] &= mask[i];
+
+               /* check for range */
+               for (i = 0; i < size; i++)
+                       if (last[i] != spec[i])
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "Range not supported");
+       }
+
+       return 0;
+}
+
+/***
+ * Skip disabled protocol items and VOID items
+ * until any of the mutually exclusive conditions
+ * from the list below takes place:
+ *    (A) A protocol present in the proto_mask
+ *        is met (either ENABLED or DISABLED);
+ *    (B) A protocol NOT present in the proto_mask is met in ENABLED state;
+ *    (C) The END item is met.
+ */
+static int
+flow_item_skip_disabled_protos(const struct rte_flow_item **item,
+       uint64_t proto_mask,
+       size_t *length,
+       struct rte_flow_error *error)
+{
+       size_t len = 0;
+
+       for ( ; (*item)->type != RTE_FLOW_ITEM_TYPE_END; (*item)++) {
+               union flow_item spec, mask;
+               size_t size;
+               int disabled = 0, status;
+
+               if ((*item)->type == RTE_FLOW_ITEM_TYPE_VOID)
+                       continue;
+
+               status = flow_item_proto_preprocess(*item,
+                               &spec,
+                               &mask,
+                               &size,
+                               &disabled,
+                               error);
+               if (status)
+                       return status;
+
+               if ((proto_mask & (1LLU << (*item)->type)) ||
+                               !disabled)
+                       break;
+
+               len += size;
+       }
+
+       if (length)
+               *length = len;
+
+       return 0;
+}
+
+#define FLOW_ITEM_PROTO_IP \
+       ((1LLU << RTE_FLOW_ITEM_TYPE_IPV4) | \
+        (1LLU << RTE_FLOW_ITEM_TYPE_IPV6))
+
+static void
+flow_item_skip_void(const struct rte_flow_item **item)
+{
+       for ( ; ; (*item)++)
+               if ((*item)->type != RTE_FLOW_ITEM_TYPE_VOID)
+                       return;
+}
+
+#define IP_PROTOCOL_TCP 0x06
+#define IP_PROTOCOL_UDP 0x11
+#define IP_PROTOCOL_SCTP 0x84
+
+static int
+mask_to_depth(uint64_t mask,
+               uint32_t *depth)
+{
+       uint64_t n;
+
+       if (mask == UINT64_MAX) {
+               if (depth)
+                       *depth = 64;
+
+               return 0;
+       }
+
+       mask = ~mask;
+
+       if (mask & (mask + 1))
+               return -1;
+
+       n = __builtin_popcountll(mask);
+       if (depth)
+               *depth = (uint32_t)(64 - n);
+
+       return 0;
+}
+
+static int
+ipv4_mask_to_depth(uint32_t mask,
+               uint32_t *depth)
+{
+       uint32_t d;
+       int status;
+
+       status = mask_to_depth(mask | (UINT64_MAX << 32), &d);
+       if (status)
+               return status;
+
+       d -= 32;
+       if (depth)
+               *depth = d;
+
+       return 0;
+}
+
+static int
+ipv6_mask_to_depth(uint8_t *mask,
+       uint32_t *depth)
+{
+       uint64_t *m = (uint64_t *)mask;
+       uint64_t m0 = rte_be_to_cpu_64(m[0]);
+       uint64_t m1 = rte_be_to_cpu_64(m[1]);
+       uint32_t d0, d1;
+       int status;
+
+       status = mask_to_depth(m0, &d0);
+       if (status)
+               return status;
+
+       status = mask_to_depth(m1, &d1);
+       if (status)
+               return status;
+
+       if (d0 < 64 && d1)
+               return -1;
+
+       if (depth)
+               *depth = d0 + d1;
+
+       return 0;
+}
+
+static int
+port_mask_to_range(uint16_t port,
+       uint16_t port_mask,
+       uint16_t *port0,
+       uint16_t *port1)
+{
+       int status;
+       uint16_t p0, p1;
+
+       status = mask_to_depth(port_mask | (UINT64_MAX << 16), NULL);
+       if (status)
+               return -1;
+
+       p0 = port & port_mask;
+       p1 = p0 | ~port_mask;
+
+       if (port0)
+               *port0 = p0;
+
+       if (port1)
+               *port1 = p1;
+
+       return 0;
+}
+
+static int
+flow_rule_match_acl_get(struct pmd_internals *softnic __rte_unused,
+               struct pipeline *pipeline __rte_unused,
+               struct softnic_table *table __rte_unused,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item *item,
+               struct softnic_table_rule_match *rule_match,
+               struct rte_flow_error *error)
+{
+       union flow_item spec, mask;
+       size_t size, length = 0;
+       int disabled = 0, status;
+       uint8_t ip_proto, ip_proto_mask;
+
+       memset(rule_match, 0, sizeof(*rule_match));
+       rule_match->match_type = TABLE_ACL;
+       rule_match->match.acl.priority = attr->priority;
+
+       /* VOID or disabled protos only, if any. */
+       status = flow_item_skip_disabled_protos(&item,
+                       FLOW_ITEM_PROTO_IP, &length, error);
+       if (status)
+               return status;
+
+       /* IP only. */
+       status = flow_item_proto_preprocess(item, &spec, &mask,
+                       &size, &disabled, error);
+       if (status)
+               return status;
+
+       switch (item->type) {
+       case RTE_FLOW_ITEM_TYPE_IPV4:
+       {
+               uint32_t sa_depth, da_depth;
+
+               status = ipv4_mask_to_depth(rte_ntohl(mask.ipv4.hdr.src_addr),
+                               &sa_depth);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal IPv4 header source address mask");
+
+               status = ipv4_mask_to_depth(rte_ntohl(mask.ipv4.hdr.dst_addr),
+                               &da_depth);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal IPv4 header destination address mask");
+
+               ip_proto = spec.ipv4.hdr.next_proto_id;
+               ip_proto_mask = mask.ipv4.hdr.next_proto_id;
+
+               rule_match->match.acl.ip_version = 1;
+               rule_match->match.acl.ipv4.sa =
+                       rte_ntohl(spec.ipv4.hdr.src_addr);
+               rule_match->match.acl.ipv4.da =
+                       rte_ntohl(spec.ipv4.hdr.dst_addr);
+               rule_match->match.acl.sa_depth = sa_depth;
+               rule_match->match.acl.da_depth = da_depth;
+               rule_match->match.acl.proto = ip_proto;
+               rule_match->match.acl.proto_mask = ip_proto_mask;
+               break;
+       } /* RTE_FLOW_ITEM_TYPE_IPV4 */
+
+       case RTE_FLOW_ITEM_TYPE_IPV6:
+       {
+               uint32_t sa_depth, da_depth;
+
+               status = ipv6_mask_to_depth(mask.ipv6.hdr.src_addr, &sa_depth);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal IPv6 header source address mask");
+
+               status = ipv6_mask_to_depth(mask.ipv6.hdr.dst_addr, &da_depth);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal IPv6 header destination address mask");
+
+               ip_proto = spec.ipv6.hdr.proto;
+               ip_proto_mask = mask.ipv6.hdr.proto;
+
+               rule_match->match.acl.ip_version = 0;
+               memcpy(rule_match->match.acl.ipv6.sa,
+                       spec.ipv6.hdr.src_addr,
+                       sizeof(spec.ipv6.hdr.src_addr));
+               memcpy(rule_match->match.acl.ipv6.da,
+                       spec.ipv6.hdr.dst_addr,
+                       sizeof(spec.ipv6.hdr.dst_addr));
+               rule_match->match.acl.sa_depth = sa_depth;
+               rule_match->match.acl.da_depth = da_depth;
+               rule_match->match.acl.proto = ip_proto;
+               rule_match->match.acl.proto_mask = ip_proto_mask;
+               break;
+       } /* RTE_FLOW_ITEM_TYPE_IPV6 */
+
+       default:
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "ACL: IP protocol required");
+       } /* switch */
+
+       if (ip_proto_mask != UINT8_MAX)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "ACL: Illegal IP protocol mask");
+
+       item++;
+
+       /* VOID only, if any. */
+       flow_item_skip_void(&item);
+
+       /* TCP/UDP/SCTP only. */
+       status = flow_item_proto_preprocess(item, &spec, &mask,
+                       &size, &disabled, error);
+       if (status)
+               return status;
+
+       switch (item->type) {
+       case RTE_FLOW_ITEM_TYPE_TCP:
+       {
+               uint16_t sp0, sp1, dp0, dp1;
+
+               if (ip_proto != IP_PROTOCOL_TCP)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Item type is TCP, but IP protocol is not");
+
+               status = port_mask_to_range(rte_ntohs(spec.tcp.hdr.src_port),
+                               rte_ntohs(mask.tcp.hdr.src_port),
+                               &sp0,
+                               &sp1);
+
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal TCP source port mask");
+
+               status = port_mask_to_range(rte_ntohs(spec.tcp.hdr.dst_port),
+                               rte_ntohs(mask.tcp.hdr.dst_port),
+                               &dp0,
+                               &dp1);
+
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal TCP destination port mask");
+
+               rule_match->match.acl.sp0 = sp0;
+               rule_match->match.acl.sp1 = sp1;
+               rule_match->match.acl.dp0 = dp0;
+               rule_match->match.acl.dp1 = dp1;
+
+               break;
+       } /* RTE_FLOW_ITEM_TYPE_TCP */
+
+       case RTE_FLOW_ITEM_TYPE_UDP:
+       {
+               uint16_t sp0, sp1, dp0, dp1;
+
+               if (ip_proto != IP_PROTOCOL_UDP)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Item type is UDP, but IP protocol is not");
+
+               status = port_mask_to_range(rte_ntohs(spec.udp.hdr.src_port),
+                       rte_ntohs(mask.udp.hdr.src_port),
+                       &sp0,
+                       &sp1);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal UDP source port mask");
+
+               status = port_mask_to_range(rte_ntohs(spec.udp.hdr.dst_port),
+                       rte_ntohs(mask.udp.hdr.dst_port),
+                       &dp0,
+                       &dp1);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal UDP destination port mask");
+
+               rule_match->match.acl.sp0 = sp0;
+               rule_match->match.acl.sp1 = sp1;
+               rule_match->match.acl.dp0 = dp0;
+               rule_match->match.acl.dp1 = dp1;
+
+               break;
+       } /* RTE_FLOW_ITEM_TYPE_UDP */
+
+       case RTE_FLOW_ITEM_TYPE_SCTP:
+       {
+               uint16_t sp0, sp1, dp0, dp1;
+
+               if (ip_proto != IP_PROTOCOL_SCTP)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Item type is SCTP, but IP protocol is not");
+
+               status = port_mask_to_range(rte_ntohs(spec.sctp.hdr.src_port),
+                       rte_ntohs(mask.sctp.hdr.src_port),
+                       &sp0,
+                       &sp1);
+
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal SCTP source port mask");
+
+               status = port_mask_to_range(rte_ntohs(spec.sctp.hdr.dst_port),
+                       rte_ntohs(mask.sctp.hdr.dst_port),
+                       &dp0,
+                       &dp1);
+               if (status)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "ACL: Illegal SCTP destination port mask");
+
+               rule_match->match.acl.sp0 = sp0;
+               rule_match->match.acl.sp1 = sp1;
+               rule_match->match.acl.dp0 = dp0;
+               rule_match->match.acl.dp1 = dp1;
+
+               break;
+       } /* RTE_FLOW_ITEM_TYPE_SCTP */
+
+       default:
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "ACL: TCP/UDP/SCTP required");
+       } /* switch */
+
+       item++;
+
+       /* VOID or disabled protos only, if any. */
+       status = flow_item_skip_disabled_protos(&item, 0, NULL, error);
+       if (status)
+               return status;
+
+       /* END only. */
+       if (item->type != RTE_FLOW_ITEM_TYPE_END)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "ACL: Expecting END item");
+
+       return 0;
+}
+
+/***
+ * Both *tmask* and *fmask* are byte arrays of size *tsize* and *fsize*
+ * respectively.
+ * They are located within a larger buffer at offsets *toffset* and *foffset*
+ * respectivelly. Both *tmask* and *fmask* represent bitmasks for the larger
+ * buffer.
+ * Question: are the two masks equivalent?
+ *
+ * Notes:
+ * 1. Offset basically indicates that the first offset bytes in the buffer
+ *    are "don't care", so offset is equivalent to pre-pending an "all-zeros"
+ *    array of *offset* bytes to the *mask*.
+ * 2. Each *mask* might contain a number of zero bytes at the beginning or
+ *    at the end.
+ * 3. Bytes in the larger buffer after the end of the *mask* are also considered
+ *    "don't care", so they are equivalent to appending an "all-zeros" array of
+ *    bytes to the *mask*.
+ *
+ * Example:
+ * Buffer = [xx xx xx xx xx xx xx xx], buffer size = 8 bytes
+ * tmask = [00 22 00 33 00], toffset = 2, tsize = 5
+ *    => buffer mask = [00 00 00 22 00 33 00 00]
+ * fmask = [22 00 33], foffset = 3, fsize = 3 =>
+ *    => buffer mask = [00 00 00 22 00 33 00 00]
+ * Therefore, the tmask and fmask from this example are equivalent.
+ */
+static int
+hash_key_mask_is_same(uint8_t *tmask,
+       size_t toffset,
+       size_t tsize,
+       uint8_t *fmask,
+       size_t foffset,
+       size_t fsize,
+       size_t *toffset_plus,
+       size_t *foffset_plus)
+{
+       size_t tpos; /* Position of first non-zero byte in the tmask buffer. */
+       size_t fpos; /* Position of first non-zero byte in the fmask buffer. */
+
+       /* Compute tpos and fpos. */
+       for (tpos = 0; tmask[tpos] == 0; tpos++)
+               ;
+       for (fpos = 0; fmask[fpos] == 0; fpos++)
+               ;
+
+       if (toffset + tpos != foffset + fpos)
+               return 0; /* FALSE */
+
+       tsize -= tpos;
+       fsize -= fpos;
+
+       if (tsize < fsize) {
+               size_t i;
+
+               for (i = 0; i < tsize; i++)
+                       if (tmask[tpos + i] != fmask[fpos + i])
+                               return 0; /* FALSE */
+
+               for ( ; i < fsize; i++)
+                       if (fmask[fpos + i])
+                               return 0; /* FALSE */
+       } else {
+               size_t i;
+
+               for (i = 0; i < fsize; i++)
+                       if (tmask[tpos + i] != fmask[fpos + i])
+                               return 0; /* FALSE */
+
+               for ( ; i < tsize; i++)
+                       if (tmask[tpos + i])
+                               return 0; /* FALSE */
+       }
+
+       if (toffset_plus)
+               *toffset_plus = tpos;
+
+       if (foffset_plus)
+               *foffset_plus = fpos;
+
+       return 1; /* TRUE */
+}
+
+static int
+flow_rule_match_hash_get(struct pmd_internals *softnic __rte_unused,
+       struct pipeline *pipeline __rte_unused,
+       struct softnic_table *table,
+       const struct rte_flow_attr *attr __rte_unused,
+       const struct rte_flow_item *item,
+       struct softnic_table_rule_match *rule_match,
+       struct rte_flow_error *error)
+{
+       struct softnic_table_rule_match_hash key, key_mask;
+       struct softnic_table_hash_params *params = &table->params.match.hash;
+       size_t offset = 0, length = 0, tpos, fpos;
+       int status;
+
+       memset(&key, 0, sizeof(key));
+       memset(&key_mask, 0, sizeof(key_mask));
+
+       /* VOID or disabled protos only, if any. */
+       status = flow_item_skip_disabled_protos(&item, 0, &offset, error);
+       if (status)
+               return status;
+
+       if (item->type == RTE_FLOW_ITEM_TYPE_END)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       item,
+                       "HASH: END detected too early");
+
+       /* VOID or any protocols (enabled or disabled). */
+       for ( ; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
+               union flow_item spec, mask;
+               size_t size;
+               int disabled, status;
+
+               if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
+                       continue;
+
+               status = flow_item_proto_preprocess(item,
+                       &spec,
+                       &mask,
+                       &size,
+                       &disabled,
+                       error);
+               if (status)
+                       return status;
+
+               if (length + size > sizeof(key)) {
+                       if (disabled)
+                               break;
+
+                       return rte_flow_error_set(error,
+                               ENOTSUP,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "HASH: Item too big");
+               }
+
+               memcpy(&key.key[length], &spec, size);
+               memcpy(&key_mask.key[length], &mask, size);
+               length += size;
+       }
+
+       if (item->type != RTE_FLOW_ITEM_TYPE_END) {
+               /* VOID or disabled protos only, if any. */
+               status = flow_item_skip_disabled_protos(&item, 0, NULL, error);
+               if (status)
+                       return status;
+
+               /* END only. */
+               if (item->type != RTE_FLOW_ITEM_TYPE_END)
+                       return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               item,
+                               "HASH: Expecting END item");
+       }
+
+       /* Compare flow key mask against table key mask. */
+       offset += sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM;
+
+       if (!hash_key_mask_is_same(params->key_mask,
+               params->key_offset,
+               params->key_size,
+               key_mask.key,
+               offset,
+               length,
+               &tpos,
+               &fpos))
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "HASH: Item list is not observing the match format");
+
+       /* Rule match. */
+       memset(rule_match, 0, sizeof(*rule_match));
+       rule_match->match_type = TABLE_HASH;
+       memcpy(&rule_match->match.hash.key[tpos],
+               &key.key[fpos],
+               RTE_MIN(sizeof(rule_match->match.hash.key) - tpos,
+                       length - fpos));
+
+       return 0;
+}
+
+static int
+flow_rule_match_get(struct pmd_internals *softnic,
+               struct pipeline *pipeline,
+               struct softnic_table *table,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item *item,
+               struct softnic_table_rule_match *rule_match,
+               struct rte_flow_error *error)
+{
+       switch (table->params.match_type) {
+       case TABLE_ACL:
+               return flow_rule_match_acl_get(softnic,
+                       pipeline,
+                       table,
+                       attr,
+                       item,
+                       rule_match,
+                       error);
+
+               /* FALLTHROUGH */
+
+       case TABLE_HASH:
+               return flow_rule_match_hash_get(softnic,
+                       pipeline,
+                       table,
+                       attr,
+                       item,
+                       rule_match,
+                       error);
+
+               /* FALLTHROUGH */
+
+       default:
+               return rte_flow_error_set(error,
+                       ENOTSUP,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Unsupported pipeline table match type");
+       }
+}
+
+static int
+flow_rule_action_get(struct pmd_internals *softnic,
+       struct pipeline *pipeline,
+       struct softnic_table *table,
+       const struct rte_flow_attr *attr,
+       const struct rte_flow_action *action,
+       struct softnic_table_rule_action *rule_action,
+       struct rte_flow_error *error)
+{
+       struct softnic_table_action_profile *profile;
+       struct softnic_table_action_profile_params *params;
+       int n_jump_queue_rss_drop = 0;
+       int n_count = 0;
+       int n_mark = 0;
+       int n_vxlan_decap = 0;
+
+       profile = softnic_table_action_profile_find(softnic,
+               table->params.action_profile_name);
+       if (profile == NULL)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       action,
+                       "JUMP: Table action profile");
+
+       params = &profile->params;
+
+       for ( ; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
+               if (action->type == RTE_FLOW_ACTION_TYPE_VOID)
+                       continue;
+
+               switch (action->type) {
+               case RTE_FLOW_ACTION_TYPE_JUMP:
+               {
+                       const struct rte_flow_action_jump *conf = action->conf;
+                       struct flow_attr_map *map;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "JUMP: Null configuration");
+
+                       if (n_jump_queue_rss_drop)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one termination action is"
+                                       " allowed per flow");
+
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_FWD)) == 0)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "JUMP action not enabled for this table");
+
+                       n_jump_queue_rss_drop = 1;
+
+                       map = flow_attr_map_get(softnic,
+                               conf->group,
+                               attr->ingress);
+                       if (map == NULL || map->valid == 0)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "JUMP: Invalid group mapping");
+
+                       if (strcmp(pipeline->name, map->pipeline_name) != 0)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "JUMP: Jump to table in different pipeline");
+
+                       /* RTE_TABLE_ACTION_FWD */
+                       rule_action->fwd.action = RTE_PIPELINE_ACTION_TABLE;
+                       rule_action->fwd.id = map->table_id;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_FWD;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_JUMP */
+
+               case RTE_FLOW_ACTION_TYPE_QUEUE:
+               {
+                       char name[NAME_SIZE];
+                       struct rte_eth_dev *dev;
+                       const struct rte_flow_action_queue *conf = action->conf;
+                       uint32_t port_id;
+                       int status;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "QUEUE: Null configuration");
+
+                       if (n_jump_queue_rss_drop)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one termination action is allowed"
+                                       " per flow");
+
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_FWD)) == 0)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "QUEUE action not enabled for this table");
+
+                       n_jump_queue_rss_drop = 1;
+
+                       dev = ETHDEV(softnic);
+                       if (dev == NULL ||
+                               conf->index >= dev->data->nb_rx_queues)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "QUEUE: Invalid RX queue ID");
+
+                       sprintf(name, "RXQ%u", (uint32_t)conf->index);
+
+                       status = softnic_pipeline_port_out_find(softnic,
+                               pipeline->name,
+                               name,
+                               &port_id);
+                       if (status)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "QUEUE: RX queue not accessible from this pipeline");
+
+                       /* RTE_TABLE_ACTION_FWD */
+                       rule_action->fwd.action = RTE_PIPELINE_ACTION_PORT;
+                       rule_action->fwd.id = port_id;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_FWD;
+                       break;
+               } /*RTE_FLOW_ACTION_TYPE_QUEUE */
+
+               case RTE_FLOW_ACTION_TYPE_RSS:
+               {
+                       const struct rte_flow_action_rss *conf = action->conf;
+                       uint32_t i;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "RSS: Null configuration");
+
+                       if (!rte_is_power_of_2(conf->queue_num))
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                       conf,
+                                       "RSS: Number of queues must be a power of 2");
+
+                       if (conf->queue_num > RTE_DIM(rule_action->lb.out))
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                       conf,
+                                       "RSS: Number of queues too big");
+
+                       if (n_jump_queue_rss_drop)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one termination action is allowed per flow");
+
+                       if (((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_FWD)) == 0) ||
+                               ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_LB)) == 0))
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "RSS action not supported by this table");
+
+                       if (params->lb.out_offset !=
+                               pipeline->params.offset_port_id)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "RSS action not supported by this pipeline");
+
+                       n_jump_queue_rss_drop = 1;
+
+                       /* RTE_TABLE_ACTION_LB */
+                       for (i = 0; i < conf->queue_num; i++) {
+                               char name[NAME_SIZE];
+                               struct rte_eth_dev *dev;
+                               uint32_t port_id;
+                               int status;
+
+                               dev = ETHDEV(softnic);
+                               if (dev == NULL ||
+                                       conf->queue[i] >=
+                                               dev->data->nb_rx_queues)
+                                       return rte_flow_error_set(error,
+                                               EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               action,
+                                               "RSS: Invalid RX queue ID");
+
+                               sprintf(name, "RXQ%u",
+                                       (uint32_t)conf->queue[i]);
+
+                               status = softnic_pipeline_port_out_find(softnic,
+                                       pipeline->name,
+                                       name,
+                                       &port_id);
+                               if (status)
+                                       return rte_flow_error_set(error,
+                                               ENOTSUP,
+                                               RTE_FLOW_ERROR_TYPE_ACTION,
+                                               action,
+                                               "RSS: RX queue not accessible from this pipeline");
+
+                               rule_action->lb.out[i] = port_id;
+                       }
+
+                       for ( ; i < RTE_DIM(rule_action->lb.out); i++)
+                               rule_action->lb.out[i] =
+                               rule_action->lb.out[i % conf->queue_num];
+
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_LB;
+
+                       /* RTE_TABLE_ACTION_FWD */
+                       rule_action->fwd.action = RTE_PIPELINE_ACTION_PORT_META;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_FWD;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_RSS */
+
+               case RTE_FLOW_ACTION_TYPE_DROP:
+               {
+                       const void *conf = action->conf;
+
+                       if (conf != NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "DROP: No configuration required");
+
+                       if (n_jump_queue_rss_drop)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one termination action is allowed per flow");
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_FWD)) == 0)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "DROP action not supported by this table");
+
+                       n_jump_queue_rss_drop = 1;
+
+                       /* RTE_TABLE_ACTION_FWD */
+                       rule_action->fwd.action = RTE_PIPELINE_ACTION_DROP;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_FWD;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_DROP */
+
+               case RTE_FLOW_ACTION_TYPE_COUNT:
+               {
+                       const struct rte_flow_action_count *conf = action->conf;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "COUNT: Null configuration");
+
+                       if (conf->shared)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                       conf,
+                                       "COUNT: Shared counters not supported");
+
+                       if (n_count)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one COUNT action per flow");
+
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_STATS)) == 0)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "COUNT action not supported by this table");
+
+                       n_count = 1;
+
+                       /* RTE_TABLE_ACTION_STATS */
+                       rule_action->stats.n_packets = 0;
+                       rule_action->stats.n_bytes = 0;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_STATS;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_COUNT */
+
+               case RTE_FLOW_ACTION_TYPE_MARK:
+               {
+                       const struct rte_flow_action_mark *conf = action->conf;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "MARK: Null configuration");
+
+                       if (n_mark)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one MARK action per flow");
+
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_TAG)) == 0)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "MARK action not supported by this table");
+
+                       n_mark = 1;
+
+                       /* RTE_TABLE_ACTION_TAG */
+                       rule_action->tag.tag = conf->id;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_TAG;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_MARK */
+
+               case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
+               {
+                       const struct rte_flow_action_mark *conf = action->conf;
+
+                       if (conf)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "VXLAN DECAP: Non-null configuration");
+
+                       if (n_vxlan_decap)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "Only one VXLAN DECAP action per flow");
+
+                       if ((params->action_mask &
+                               (1LLU << RTE_TABLE_ACTION_DECAP)) == 0)
+                               return rte_flow_error_set(error,
+                                       ENOTSUP,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "VXLAN DECAP action not supported by this table");
+
+                       n_vxlan_decap = 1;
+
+                       /* RTE_TABLE_ACTION_DECAP */
+                       rule_action->decap.n = 50; /* Ether/IPv4/UDP/VXLAN */
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_DECAP;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_VXLAN_DECAP */
+
+               case RTE_FLOW_ACTION_TYPE_METER:
+               {
+                       const struct rte_flow_action_meter *conf = action->conf;
+                       struct softnic_mtr_meter_profile *mp;
+                       struct softnic_mtr *m;
+                       uint32_t table_id = table - pipeline->table;
+                       uint32_t meter_profile_id;
+                       int status;
+
+                       if ((params->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) == 0)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "METER: Table action not supported");
+
+                       if (params->mtr.n_tc != 1)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "METER: Multiple TCs not supported");
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "METER: Null configuration");
+
+                       m = softnic_mtr_find(softnic, conf->mtr_id);
+
+                       if (m == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                       NULL,
+                                       "METER: Invalid meter ID");
+
+                       if (m->flow)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION_CONF,
+                                       NULL,
+                                       "METER: Meter already attached to a flow");
+
+                       meter_profile_id = m->params.meter_profile_id;
+                       mp = softnic_mtr_meter_profile_find(softnic, meter_profile_id);
+
+                       /* Add meter profile to pipeline table */
+                       if (!softnic_pipeline_table_meter_profile_find(table,
+                                       meter_profile_id)) {
+                               struct rte_table_action_meter_profile profile;
+
+                               memset(&profile, 0, sizeof(profile));
+                               profile.alg = RTE_TABLE_ACTION_METER_TRTCM;
+                               profile.trtcm.cir = mp->params.trtcm_rfc2698.cir;
+                               profile.trtcm.pir = mp->params.trtcm_rfc2698.pir;
+                               profile.trtcm.cbs = mp->params.trtcm_rfc2698.cbs;
+                               profile.trtcm.pbs = mp->params.trtcm_rfc2698.pbs;
+
+                               status = softnic_pipeline_table_mtr_profile_add(softnic,
+                                               pipeline->name,
+                                               table_id,
+                                               meter_profile_id,
+                                               &profile);
+                               if (status) {
+                                       rte_flow_error_set(error,
+                                               EINVAL,
+                                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                               NULL,
+                                               "METER: Table meter profile add failed");
+                                       return -1;
+                               }
+                       }
+
+                       /* RTE_TABLE_ACTION_METER */
+                       rule_action->mtr.mtr[0].meter_profile_id = meter_profile_id;
+                       rule_action->mtr.mtr[0].policer[e_RTE_METER_GREEN] =
+                               (enum rte_table_action_policer)m->params.action[RTE_MTR_GREEN];
+                       rule_action->mtr.mtr[0].policer[e_RTE_METER_YELLOW] =
+                               (enum rte_table_action_policer)m->params.action[RTE_MTR_YELLOW];
+                       rule_action->mtr.mtr[0].policer[e_RTE_METER_RED] =
+                               (enum rte_table_action_policer)m->params.action[RTE_MTR_RED];
+                       rule_action->mtr.tc_mask = 1;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_MTR;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_METER */
+
+               case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+               {
+                       const struct rte_flow_action_vxlan_encap *conf =
+                               action->conf;
+                       const struct rte_flow_item *item;
+                       union flow_item spec, mask;
+                       int disabled = 0, status;
+                       size_t size;
+
+                       if (conf == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "VXLAN ENCAP: Null configuration");
+
+                       item = conf->definition;
+                       if (item == NULL)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ACTION,
+                                       action,
+                                       "VXLAN ENCAP: Null configuration definition");
+
+                       if (!(params->action_mask &
+                                       (1LLU << RTE_TABLE_ACTION_ENCAP)))
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "VXLAN ENCAP: Encap action not enabled for this table");
+
+                       /* Check for Ether. */
+                       flow_item_skip_void(&item);
+                       status = flow_item_proto_preprocess(item, &spec, &mask,
+                               &size, &disabled, error);
+                       if (status)
+                               return status;
+
+                       if (item->type != RTE_FLOW_ITEM_TYPE_ETH) {
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "VXLAN ENCAP: first encap item should be ether");
+                       }
+                       ether_addr_copy(&spec.eth.dst,
+                                       &rule_action->encap.vxlan.ether.da);
+                       ether_addr_copy(&spec.eth.src,
+                                       &rule_action->encap.vxlan.ether.sa);
+
+                       item++;
+
+                       /* Check for VLAN. */
+                       flow_item_skip_void(&item);
+                       status = flow_item_proto_preprocess(item, &spec, &mask,
+                                       &size, &disabled, error);
+                       if (status)
+                               return status;
+
+                       if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+                               if (!params->encap.vxlan.vlan)
+                                       return rte_flow_error_set(error,
+                                               ENOTSUP,
+                                               RTE_FLOW_ERROR_TYPE_ITEM,
+                                               item,
+                                               "VXLAN ENCAP: vlan encap not supported by table");
+
+                               uint16_t tci = rte_ntohs(spec.vlan.tci);
+                               rule_action->encap.vxlan.vlan.pcp =
+                                       tci >> 13;
+                               rule_action->encap.vxlan.vlan.dei =
+                                       (tci >> 12) & 0x1;
+                               rule_action->encap.vxlan.vlan.vid =
+                                       tci & 0xfff;
+
+                               item++;
+
+                               flow_item_skip_void(&item);
+                               status = flow_item_proto_preprocess(item, &spec,
+                                               &mask, &size, &disabled, error);
+                               if (status)
+                                       return status;
+                       } else {
+                               if (params->encap.vxlan.vlan)
+                                       return rte_flow_error_set(error,
+                                               ENOTSUP,
+                                               RTE_FLOW_ERROR_TYPE_ITEM,
+                                               item,
+                                               "VXLAN ENCAP: expecting vlan encap item");
+                       }
+
+                       /* Check for IPV4/IPV6. */
+                       switch (item->type) {
+                       case RTE_FLOW_ITEM_TYPE_IPV4:
+                       {
+                               rule_action->encap.vxlan.ipv4.sa =
+                                       rte_ntohl(spec.ipv4.hdr.src_addr);
+                               rule_action->encap.vxlan.ipv4.da =
+                                       rte_ntohl(spec.ipv4.hdr.dst_addr);
+                               rule_action->encap.vxlan.ipv4.dscp =
+                                       spec.ipv4.hdr.type_of_service >> 2;
+                               rule_action->encap.vxlan.ipv4.ttl =
+                                       spec.ipv4.hdr.time_to_live;
+                               break;
+                       }
+                       case RTE_FLOW_ITEM_TYPE_IPV6:
+                       {
+                               uint32_t vtc_flow;
+
+                               memcpy(&rule_action->encap.vxlan.ipv6.sa,
+                                               &spec.ipv6.hdr.src_addr,
+                                               sizeof(spec.ipv6.hdr.src_addr));
+                               memcpy(&rule_action->encap.vxlan.ipv6.da,
+                                               &spec.ipv6.hdr.dst_addr,
+                                               sizeof(spec.ipv6.hdr.dst_addr));
+                               vtc_flow = rte_ntohl(spec.ipv6.hdr.vtc_flow);
+                               rule_action->encap.vxlan.ipv6.flow_label =
+                                               vtc_flow & 0xfffff;
+                               rule_action->encap.vxlan.ipv6.dscp =
+                                               (vtc_flow >> 22) & 0x3f;
+                               rule_action->encap.vxlan.ipv6.hop_limit =
+                                       spec.ipv6.hdr.hop_limits;
+                               break;
+                       }
+                       default:
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "VXLAN ENCAP: encap item after ether should be ipv4/ipv6");
+                       }
+
+                       item++;
+
+                       /* Check for UDP. */
+                       flow_item_skip_void(&item);
+                       status = flow_item_proto_preprocess(item, &spec, &mask,
+                                       &size, &disabled, error);
+                       if (status)
+                               return status;
+
+                       if (item->type != RTE_FLOW_ITEM_TYPE_UDP) {
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "VXLAN ENCAP: encap item after ipv4/ipv6 should be udp");
+                       }
+                       rule_action->encap.vxlan.udp.sp =
+                               rte_ntohs(spec.udp.hdr.src_port);
+                       rule_action->encap.vxlan.udp.dp =
+                               rte_ntohs(spec.udp.hdr.dst_port);
+
+                       item++;
+
+                       /* Check for VXLAN. */
+                       flow_item_skip_void(&item);
+                       status = flow_item_proto_preprocess(item, &spec, &mask,
+                                       &size, &disabled, error);
+                       if (status)
+                               return status;
+
+                       if (item->type != RTE_FLOW_ITEM_TYPE_VXLAN) {
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "VXLAN ENCAP: encap item after udp should be vxlan");
+                       }
+                       rule_action->encap.vxlan.vxlan.vni =
+                               (spec.vxlan.vni[0] << 16U |
+                                       spec.vxlan.vni[1] << 8U
+                                       | spec.vxlan.vni[2]);
+
+                       item++;
+
+                       /* Check for END. */
+                       flow_item_skip_void(&item);
+
+                       if (item->type != RTE_FLOW_ITEM_TYPE_END)
+                               return rte_flow_error_set(error,
+                                       EINVAL,
+                                       RTE_FLOW_ERROR_TYPE_ITEM,
+                                       item,
+                                       "VXLAN ENCAP: expecting END item");
+
+                       rule_action->encap.type = RTE_TABLE_ACTION_ENCAP_VXLAN;
+                       rule_action->action_mask |= 1 << RTE_TABLE_ACTION_ENCAP;
+                       break;
+               } /* RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP */
+
+               default:
+                       return -ENOTSUP;
+               }
+       }
+
+       if (n_jump_queue_rss_drop == 0)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       action,
+                       "Flow does not have any terminating action");
+
+       return 0;
+}
+
+static int
+pmd_flow_validate(struct rte_eth_dev *dev,
+               const struct rte_flow_attr *attr,
+               const struct rte_flow_item item[],
+               const struct rte_flow_action action[],
+               struct rte_flow_error *error)
+{
+       struct softnic_table_rule_match rule_match;
+       struct softnic_table_rule_action rule_action;
+
+       struct pmd_internals *softnic = dev->data->dev_private;
+       struct pipeline *pipeline;
+       struct softnic_table *table;
+       const char *pipeline_name = NULL;
+       uint32_t table_id = 0;
+       int status;
+
+       /* Check input parameters. */
+       if (attr == NULL)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ATTR,
+                               NULL, "Null attr");
+
+       if (item == NULL)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ITEM,
+                               NULL,
+                               "Null item");
+
+       if (action == NULL)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_ACTION,
+                               NULL,
+                               "Null action");
+
+       /* Identify the pipeline table to add this flow to. */
+       status = flow_pipeline_table_get(softnic, attr, &pipeline_name,
+                                       &table_id, error);
+       if (status)
+               return status;
+
+       pipeline = softnic_pipeline_find(softnic, pipeline_name);
+       if (pipeline == NULL)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               NULL,
+                               "Invalid pipeline name");
+
+       if (table_id >= pipeline->n_tables)
+               return rte_flow_error_set(error,
+                               EINVAL,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               NULL,
+                               "Invalid pipeline table ID");
+
+       table = &pipeline->table[table_id];
+
+       /* Rule match. */
+       memset(&rule_match, 0, sizeof(rule_match));
+       status = flow_rule_match_get(softnic,
+                       pipeline,
+                       table,
+                       attr,
+                       item,
+                       &rule_match,
+                       error);
+       if (status)
+               return status;
+
+       /* Rule action. */
+       memset(&rule_action, 0, sizeof(rule_action));
+       status = flow_rule_action_get(softnic,
+               pipeline,
+               table,
+               attr,
+               action,
+               &rule_action,
+               error);
+       if (status)
+               return status;
+
+       return 0;
+}
+
+static struct softnic_mtr *
+flow_action_meter_get(struct pmd_internals *softnic,
+       const struct rte_flow_action *action)
+{
+       for ( ; action->type != RTE_FLOW_ACTION_TYPE_END; action++)
+               if (action->type == RTE_FLOW_ACTION_TYPE_METER) {
+                       const struct rte_flow_action_meter *conf = action->conf;
+
+                       if (conf == NULL)
+                               return NULL;
+
+                       return softnic_mtr_find(softnic, conf->mtr_id);
+               }
+
+       return NULL;
+}
+
+static void
+flow_meter_owner_reset(struct pmd_internals *softnic,
+       struct rte_flow *flow)
+{
+       struct softnic_mtr_list *ml = &softnic->mtr.mtrs;
+       struct softnic_mtr *m;
+
+       TAILQ_FOREACH(m, ml, node)
+               if (m->flow == flow) {
+                       m->flow = NULL;
+                       break;
+               }
+}
+
+static void
+flow_meter_owner_set(struct pmd_internals *softnic,
+       struct rte_flow *flow,
+       struct softnic_mtr *mtr)
+{
+       /* Reset current flow meter  */
+       flow_meter_owner_reset(softnic, flow);
+
+       /* Set new flow meter */
+       mtr->flow = flow;
+}
+
+static int
+is_meter_action_enable(struct pmd_internals *softnic,
+       struct softnic_table *table)
+{
+       struct softnic_table_action_profile *profile =
+               softnic_table_action_profile_find(softnic,
+                       table->params.action_profile_name);
+       struct softnic_table_action_profile_params *params = &profile->params;
+
+       return (params->action_mask & (1LLU << RTE_TABLE_ACTION_MTR)) ? 1 : 0;
+}
+
+static struct rte_flow *
+pmd_flow_create(struct rte_eth_dev *dev,
+       const struct rte_flow_attr *attr,
+       const struct rte_flow_item item[],
+       const struct rte_flow_action action[],
+       struct rte_flow_error *error)
+{
+       struct softnic_table_rule_match rule_match;
+       struct softnic_table_rule_action rule_action;
+       void *rule_data;
+
+       struct pmd_internals *softnic = dev->data->dev_private;
+       struct pipeline *pipeline;
+       struct softnic_table *table;
+       struct rte_flow *flow;
+       struct softnic_mtr *mtr;
+       const char *pipeline_name = NULL;
+       uint32_t table_id = 0;
+       int new_flow, status;
+
+       /* Check input parameters. */
+       if (attr == NULL) {
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ATTR,
+                       NULL,
+                       "Null attr");
+               return NULL;
+       }
+
+       if (item == NULL) {
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ITEM,
+                       NULL,
+                       "Null item");
+               return NULL;
+       }
+
+       if (action == NULL) {
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_ACTION,
+                       NULL,
+                       "Null action");
+               return NULL;
+       }
+
+       /* Identify the pipeline table to add this flow to. */
+       status = flow_pipeline_table_get(softnic, attr, &pipeline_name,
+                                       &table_id, error);
+       if (status)
+               return NULL;
+
+       pipeline = softnic_pipeline_find(softnic, pipeline_name);
+       if (pipeline == NULL) {
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Invalid pipeline name");
+               return NULL;
+       }
+
+       if (table_id >= pipeline->n_tables) {
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Invalid pipeline table ID");
+               return NULL;
+       }
+
+       table = &pipeline->table[table_id];
+
+       /* Rule match. */
+       memset(&rule_match, 0, sizeof(rule_match));
+       status = flow_rule_match_get(softnic,
+               pipeline,
+               table,
+               attr,
+               item,
+               &rule_match,
+               error);
+       if (status)
+               return NULL;
+
+       /* Rule action. */
+       memset(&rule_action, 0, sizeof(rule_action));
+       status = flow_rule_action_get(softnic,
+               pipeline,
+               table,
+               attr,
+               action,
+               &rule_action,
+               error);
+       if (status)
+               return NULL;
+
+       /* Flow find/allocate. */
+       new_flow = 0;
+       flow = softnic_flow_find(table, &rule_match);
+       if (flow == NULL) {
+               new_flow = 1;
+               flow = calloc(1, sizeof(struct rte_flow));
+               if (flow == NULL) {
+                       rte_flow_error_set(error,
+                               ENOMEM,
+                               RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                               NULL,
+                               "Not enough memory for new flow");
+                       return NULL;
+               }
+       }
+
+       /* Rule add. */
+       status = softnic_pipeline_table_rule_add(softnic,
+               pipeline_name,
+               table_id,
+               &rule_match,
+               &rule_action,
+               &rule_data);
+       if (status) {
+               if (new_flow)
+                       free(flow);
+
+               rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Pipeline table rule add failed");
+               return NULL;
+       }
+
+       /* Flow fill in. */
+       memcpy(&flow->match, &rule_match, sizeof(rule_match));
+       memcpy(&flow->action, &rule_action, sizeof(rule_action));
+       flow->data = rule_data;
+       flow->pipeline = pipeline;
+       flow->table_id = table_id;
+
+       mtr = flow_action_meter_get(softnic, action);
+       if (mtr)
+               flow_meter_owner_set(softnic, flow, mtr);
+
+       /* Flow add to list. */
+       if (new_flow)
+               TAILQ_INSERT_TAIL(&table->flows, flow, node);
+
+       return flow;
+}
+
+static int
+pmd_flow_destroy(struct rte_eth_dev *dev,
+       struct rte_flow *flow,
+       struct rte_flow_error *error)
+{
+       struct pmd_internals *softnic = dev->data->dev_private;
+       struct softnic_table *table;
+       int status;
+
+       /* Check input parameters. */
+       if (flow == NULL)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_HANDLE,
+                       NULL,
+                       "Null flow");
+
+       table = &flow->pipeline->table[flow->table_id];
+
+       /* Rule delete. */
+       status = softnic_pipeline_table_rule_delete(softnic,
+               flow->pipeline->name,
+               flow->table_id,
+               &flow->match);
+       if (status)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Pipeline table rule delete failed");
+
+       /* Update dependencies */
+       if (is_meter_action_enable(softnic, table))
+               flow_meter_owner_reset(softnic, flow);
+
+       /* Flow delete. */
+       TAILQ_REMOVE(&table->flows, flow, node);
+       free(flow);
+
+       return 0;
+}
+
+static int
+pmd_flow_flush(struct rte_eth_dev *dev,
+       struct rte_flow_error *error)
+{
+       struct pmd_internals *softnic = dev->data->dev_private;
+       struct pipeline *pipeline;
+       int fail_to_del_rule = 0;
+       uint32_t i;
+
+       TAILQ_FOREACH(pipeline, &softnic->pipeline_list, node) {
+               /* Remove all the flows added to the tables. */
+               for (i = 0; i < pipeline->n_tables; i++) {
+                       struct softnic_table *table = &pipeline->table[i];
+                       struct rte_flow *flow;
+                       void *temp;
+                       int status;
+
+                       TAILQ_FOREACH_SAFE(flow, &table->flows, node, temp) {
+                               /* Rule delete. */
+                               status = softnic_pipeline_table_rule_delete
+                                               (softnic,
+                                               pipeline->name,
+                                               i,
+                                               &flow->match);
+                               if (status)
+                                       fail_to_del_rule = 1;
+                               /* Update dependencies */
+                               if (is_meter_action_enable(softnic, table))
+                                       flow_meter_owner_reset(softnic, flow);
+
+                               /* Flow delete. */
+                               TAILQ_REMOVE(&table->flows, flow, node);
+                               free(flow);
+                       }
+               }
+       }
+
+       if (fail_to_del_rule)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Some of the rules could not be deleted");
+
+       return 0;
+}
+
+static int
+pmd_flow_query(struct rte_eth_dev *dev __rte_unused,
+       struct rte_flow *flow,
+       const struct rte_flow_action *action __rte_unused,
+       void *data,
+       struct rte_flow_error *error)
+{
+       struct rte_table_action_stats_counters stats;
+       struct softnic_table *table;
+       struct rte_flow_query_count *flow_stats = data;
+       int status;
+
+       /* Check input parameters. */
+       if (flow == NULL)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_HANDLE,
+                       NULL,
+                       "Null flow");
+
+       if (data == NULL)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Null data");
+
+       table = &flow->pipeline->table[flow->table_id];
+
+       /* Rule stats read. */
+       status = rte_table_action_stats_read(table->a,
+               flow->data,
+               &stats,
+               flow_stats->reset);
+       if (status)
+               return rte_flow_error_set(error,
+                       EINVAL,
+                       RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Pipeline table rule stats read failed");
+
+       /* Fill in flow stats. */
+       flow_stats->hits_set =
+               (table->ap->params.stats.n_packets_enabled) ? 1 : 0;
+       flow_stats->bytes_set =
+               (table->ap->params.stats.n_bytes_enabled) ? 1 : 0;
+       flow_stats->hits = stats.n_packets;
+       flow_stats->bytes = stats.n_bytes;
+
+       return 0;
+}
+
+const struct rte_flow_ops pmd_flow_ops = {
+       .validate = pmd_flow_validate,
+       .create = pmd_flow_create,
+       .destroy = pmd_flow_destroy,
+       .flush = pmd_flow_flush,
+       .query = pmd_flow_query,
+       .isolate = NULL,
+};
index a25eb87..e12b8ae 100644 (file)
 #include <rte_table_action.h>
 #include <rte_pipeline.h>
 
+#include <rte_ethdev_core.h>
 #include <rte_ethdev_driver.h>
 #include <rte_tm_driver.h>
+#include <rte_flow_driver.h>
+#include <rte_mtr_driver.h>
 
 #include "rte_eth_softnic.h"
 #include "conn.h"
@@ -43,6 +46,57 @@ struct pmd_params {
        } tm;
 };
 
+/**
+ * Ethdev Flow API
+ */
+struct rte_flow;
+
+TAILQ_HEAD(flow_list, rte_flow);
+
+struct flow_attr_map {
+       char pipeline_name[NAME_SIZE];
+       uint32_t table_id;
+       int valid;
+};
+
+#ifndef SOFTNIC_FLOW_MAX_GROUPS
+#define SOFTNIC_FLOW_MAX_GROUPS                            64
+#endif
+
+struct flow_internals {
+       struct flow_attr_map ingress_map[SOFTNIC_FLOW_MAX_GROUPS];
+       struct flow_attr_map egress_map[SOFTNIC_FLOW_MAX_GROUPS];
+};
+
+/**
+ * Meter
+ */
+
+/* MTR meter profile */
+struct softnic_mtr_meter_profile {
+       TAILQ_ENTRY(softnic_mtr_meter_profile) node;
+       uint32_t meter_profile_id;
+       struct rte_mtr_meter_profile params;
+       uint32_t n_users;
+};
+
+TAILQ_HEAD(softnic_mtr_meter_profile_list, softnic_mtr_meter_profile);
+
+/* MTR meter object */
+struct softnic_mtr {
+       TAILQ_ENTRY(softnic_mtr) node;
+       uint32_t mtr_id;
+       struct rte_mtr_params params;
+       struct rte_flow *flow;
+};
+
+TAILQ_HEAD(softnic_mtr_list, softnic_mtr);
+
+struct mtr_internals {
+       struct softnic_mtr_meter_profile_list meter_profiles;
+       struct softnic_mtr_list mtrs;
+};
+
 /**
  * MEMPOOL
  */
@@ -224,6 +278,25 @@ struct softnic_tap {
 
 TAILQ_HEAD(softnic_tap_list, softnic_tap);
 
+/**
+ * Cryptodev
+ */
+struct softnic_cryptodev_params {
+       const char *dev_name;
+       uint32_t dev_id; /**< Valid only when *dev_name* is NULL. */
+       uint32_t n_queues;
+       uint32_t queue_size;
+};
+
+struct softnic_cryptodev {
+       TAILQ_ENTRY(softnic_cryptodev) node;
+       char name[NAME_SIZE];
+       uint16_t dev_id;
+       uint32_t n_queues;
+};
+
+TAILQ_HEAD(softnic_cryptodev_list, softnic_cryptodev);
+
 /**
  * Input port action
  */
@@ -255,6 +328,7 @@ struct softnic_table_action_profile_params {
        struct rte_table_action_nat_config nat;
        struct rte_table_action_ttl_config ttl;
        struct rte_table_action_stats_config stats;
+       struct rte_table_action_sym_crypto_config sym_crypto;
 };
 
 struct softnic_table_action_profile {
@@ -266,6 +340,15 @@ struct softnic_table_action_profile {
 
 TAILQ_HEAD(softnic_table_action_profile_list, softnic_table_action_profile);
 
+struct softnic_table_meter_profile {
+       TAILQ_ENTRY(softnic_table_meter_profile) node;
+       uint32_t meter_profile_id;
+       struct rte_table_action_meter_profile profile;
+};
+
+TAILQ_HEAD(softnic_table_meter_profile_list,
+       softnic_table_meter_profile);
+
 /**
  * Pipeline
  */
@@ -280,12 +363,13 @@ enum softnic_port_in_type {
        PORT_IN_TMGR,
        PORT_IN_TAP,
        PORT_IN_SOURCE,
+       PORT_IN_CRYPTODEV,
 };
 
 struct softnic_port_in_params {
        /* Read */
        enum softnic_port_in_type type;
-       const char *dev_name;
+       char dev_name[NAME_SIZE];
        union {
                struct {
                        uint16_t queue_id;
@@ -301,11 +385,17 @@ struct softnic_port_in_params {
                        const char *file_name;
                        uint32_t n_bytes_per_pkt;
                } source;
+
+               struct {
+                       uint16_t queue_id;
+                       void *f_callback;
+                       void *arg_callback;
+               } cryptodev;
        };
        uint32_t burst_size;
 
        /* Action */
-       const char *action_profile_name;
+       char action_profile_name[NAME_SIZE];
 };
 
 enum softnic_port_out_type {
@@ -314,11 +404,12 @@ enum softnic_port_out_type {
        PORT_OUT_TMGR,
        PORT_OUT_TAP,
        PORT_OUT_SINK,
+       PORT_OUT_CRYPTODEV,
 };
 
 struct softnic_port_out_params {
        enum softnic_port_out_type type;
-       const char *dev_name;
+       char dev_name[NAME_SIZE];
        union {
                struct {
                        uint16_t queue_id;
@@ -328,6 +419,11 @@ struct softnic_port_out_params {
                        const char *file_name;
                        uint32_t max_n_pkts;
                } sink;
+
+               struct {
+                       uint16_t queue_id;
+                       uint32_t op_offset;
+               } cryptodev;
        };
        uint32_t burst_size;
        int retry;
@@ -353,11 +449,15 @@ struct softnic_table_array_params {
        uint32_t key_offset;
 };
 
+#ifndef TABLE_RULE_MATCH_SIZE_MAX
+#define TABLE_RULE_MATCH_SIZE_MAX                          256
+#endif
+
 struct softnic_table_hash_params {
        uint32_t n_keys;
        uint32_t key_offset;
        uint32_t key_size;
-       uint8_t *key_mask;
+       uint8_t key_mask[TABLE_RULE_MATCH_SIZE_MAX];
        uint32_t n_buckets;
        int extendable_bucket;
 };
@@ -379,7 +479,7 @@ struct softnic_table_params {
        } match;
 
        /* Action */
-       const char *action_profile_name;
+       char action_profile_name[NAME_SIZE];
 };
 
 struct softnic_port_in {
@@ -388,10 +488,17 @@ struct softnic_port_in {
        struct rte_port_in_action *a;
 };
 
+struct softnic_port_out {
+       struct softnic_port_out_params params;
+};
+
 struct softnic_table {
        struct softnic_table_params params;
        struct softnic_table_action_profile *ap;
        struct rte_table_action *a;
+       struct flow_list flows;
+       struct rte_table_action_dscp_table dscp_table;
+       struct softnic_table_meter_profile_list meter_profiles;
 };
 
 struct pipeline {
@@ -399,7 +506,9 @@ struct pipeline {
        char name[NAME_SIZE];
 
        struct rte_pipeline *p;
+       struct pipeline_params params;
        struct softnic_port_in port_in[RTE_PIPELINE_PORT_IN_MAX];
+       struct softnic_port_out port_out[RTE_PIPELINE_PORT_OUT_MAX];
        struct softnic_table table[RTE_PIPELINE_TABLE_MAX];
        uint32_t n_ports_in;
        uint32_t n_ports_out;
@@ -489,12 +598,16 @@ struct pmd_internals {
                struct tm_internals tm; /**< Traffic Management */
        } soft;
 
+       struct flow_internals flow;
+       struct mtr_internals mtr;
+
        struct softnic_conn *conn;
        struct softnic_mempool_list mempool_list;
        struct softnic_swq_list swq_list;
        struct softnic_link_list link_list;
        struct softnic_tmgr_port_list tmgr_port_list;
        struct softnic_tap_list tap_list;
+       struct softnic_cryptodev_list cryptodev_list;
        struct softnic_port_in_action_profile_list port_in_action_profile_list;
        struct softnic_table_action_profile_list table_action_profile_list;
        struct pipeline_list pipeline_list;
@@ -502,6 +615,58 @@ struct pmd_internals {
        struct softnic_thread_data thread_data[RTE_MAX_LCORE];
 };
 
+static inline struct rte_eth_dev *
+ETHDEV(struct pmd_internals *softnic)
+{
+       uint16_t port_id;
+       int status;
+
+       if (softnic == NULL)
+               return NULL;
+
+       status = rte_eth_dev_get_port_by_name(softnic->params.name, &port_id);
+       if (status)
+               return NULL;
+
+       return &rte_eth_devices[port_id];
+}
+
+/**
+ * Ethdev Flow API
+ */
+int
+flow_attr_map_set(struct pmd_internals *softnic,
+               uint32_t group_id,
+               int ingress,
+               const char *pipeline_name,
+               uint32_t table_id);
+
+struct flow_attr_map *
+flow_attr_map_get(struct pmd_internals *softnic,
+               uint32_t group_id,
+               int ingress);
+
+extern const struct rte_flow_ops pmd_flow_ops;
+
+/**
+ * Meter
+ */
+int
+softnic_mtr_init(struct pmd_internals *p);
+
+void
+softnic_mtr_free(struct pmd_internals *p);
+
+struct softnic_mtr *
+softnic_mtr_find(struct pmd_internals *p,
+       uint32_t mtr_id);
+
+struct softnic_mtr_meter_profile *
+softnic_mtr_meter_profile_find(struct pmd_internals *p,
+       uint32_t meter_profile_id);
+
+extern const struct rte_mtr_ops pmd_mtr_ops;
+
 /**
  * MEMPOOL
  */
@@ -609,6 +774,24 @@ struct softnic_tap *
 softnic_tap_create(struct pmd_internals *p,
        const char *name);
 
+/**
+ * Sym Crypto
+ */
+int
+softnic_cryptodev_init(struct pmd_internals *p);
+
+void
+softnic_cryptodev_free(struct pmd_internals *p);
+
+struct softnic_cryptodev *
+softnic_cryptodev_find(struct pmd_internals *p,
+       const char *name);
+
+struct softnic_cryptodev *
+softnic_cryptodev_create(struct pmd_internals *p,
+       const char *name,
+       struct softnic_cryptodev_params *params);
+
 /**
  * Input port action
  */
@@ -682,11 +865,21 @@ softnic_pipeline_port_out_create(struct pmd_internals *p,
        const char *pipeline_name,
        struct softnic_port_out_params *params);
 
+int
+softnic_pipeline_port_out_find(struct pmd_internals *softnic,
+               const char *pipeline_name,
+               const char *name,
+               uint32_t *port_id);
+
 int
 softnic_pipeline_table_create(struct pmd_internals *p,
        const char *pipeline_name,
        struct softnic_table_params *params);
 
+struct softnic_table_meter_profile *
+softnic_pipeline_table_meter_profile_find(struct softnic_table *table,
+       uint32_t meter_profile_id);
+
 struct softnic_table_rule_match_acl {
        int ip_version;
 
@@ -718,10 +911,6 @@ struct softnic_table_rule_match_array {
        uint32_t pos;
 };
 
-#ifndef TABLE_RULE_MATCH_SIZE_MAX
-#define TABLE_RULE_MATCH_SIZE_MAX                          256
-#endif
-
 struct softnic_table_rule_match_hash {
        uint8_t key[TABLE_RULE_MATCH_SIZE_MAX];
 };
@@ -760,6 +949,18 @@ struct softnic_table_rule_action {
        struct rte_table_action_ttl_params ttl;
        struct rte_table_action_stats_params stats;
        struct rte_table_action_time_params time;
+       struct rte_table_action_tag_params tag;
+       struct rte_table_action_decap_params decap;
+       struct rte_table_action_sym_crypto_params sym_crypto;
+};
+
+struct rte_flow {
+       TAILQ_ENTRY(rte_flow) node;
+       struct softnic_table_rule_match match;
+       struct softnic_table_rule_action action;
+       void *data;
+       struct pipeline *pipeline;
+       uint32_t table_id;
 };
 
 int
diff --git a/drivers/net/softnic/rte_eth_softnic_meter.c b/drivers/net/softnic/rte_eth_softnic_meter.c
new file mode 100644 (file)
index 0000000..73ecf3b
--- /dev/null
@@ -0,0 +1,728 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_mtr.h>
+#include <rte_mtr_driver.h>
+
+#include "rte_eth_softnic_internals.h"
+
+int
+softnic_mtr_init(struct pmd_internals *p)
+{
+       /* Initialize meter profiles list */
+       TAILQ_INIT(&p->mtr.meter_profiles);
+
+       /* Initialize MTR objects list */
+       TAILQ_INIT(&p->mtr.mtrs);
+
+       return 0;
+}
+
+void
+softnic_mtr_free(struct pmd_internals *p)
+{
+       /* Remove MTR objects */
+       for ( ; ; ) {
+               struct softnic_mtr *m;
+
+               m = TAILQ_FIRST(&p->mtr.mtrs);
+               if (m == NULL)
+                       break;
+
+               TAILQ_REMOVE(&p->mtr.mtrs, m, node);
+               free(m);
+       }
+
+       /* Remove meter profiles */
+       for ( ; ; ) {
+               struct softnic_mtr_meter_profile *mp;
+
+               mp = TAILQ_FIRST(&p->mtr.meter_profiles);
+               if (mp == NULL)
+                       break;
+
+               TAILQ_REMOVE(&p->mtr.meter_profiles, mp, node);
+               free(mp);
+       }
+}
+
+struct softnic_mtr_meter_profile *
+softnic_mtr_meter_profile_find(struct pmd_internals *p,
+       uint32_t meter_profile_id)
+{
+       struct softnic_mtr_meter_profile_list *mpl = &p->mtr.meter_profiles;
+       struct softnic_mtr_meter_profile *mp;
+
+       TAILQ_FOREACH(mp, mpl, node)
+               if (meter_profile_id == mp->meter_profile_id)
+                       return mp;
+
+       return NULL;
+}
+
+static int
+meter_profile_check(struct rte_eth_dev *dev,
+       uint32_t meter_profile_id,
+       struct rte_mtr_meter_profile *profile,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_meter_profile *mp;
+
+       /* Meter profile ID must be valid. */
+       if (meter_profile_id == UINT32_MAX)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter profile id not valid");
+
+       /* Meter profile must not exist. */
+       mp = softnic_mtr_meter_profile_find(p, meter_profile_id);
+       if (mp)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter prfile already exists");
+
+       /* Profile must not be NULL. */
+       if (profile == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE,
+                       NULL,
+                       "profile null");
+
+       /* Traffic metering algorithm : TRTCM_RFC2698 */
+       if (profile->alg != RTE_MTR_TRTCM_RFC2698)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE,
+                       NULL,
+                       "Metering alg not supported");
+
+       return 0;
+}
+
+/* MTR meter profile add */
+static int
+pmd_mtr_meter_profile_add(struct rte_eth_dev *dev,
+       uint32_t meter_profile_id,
+       struct rte_mtr_meter_profile *profile,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_meter_profile_list *mpl = &p->mtr.meter_profiles;
+       struct softnic_mtr_meter_profile *mp;
+       int status;
+
+       /* Check input params */
+       status = meter_profile_check(dev, meter_profile_id, profile, error);
+       if (status)
+               return status;
+
+       /* Memory allocation */
+       mp = calloc(1, sizeof(struct softnic_mtr_meter_profile));
+       if (mp == NULL)
+               return -rte_mtr_error_set(error,
+                       ENOMEM,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Memory alloc failed");
+
+       /* Fill in */
+       mp->meter_profile_id = meter_profile_id;
+       memcpy(&mp->params, profile, sizeof(mp->params));
+
+       /* Add to list */
+       TAILQ_INSERT_TAIL(mpl, mp, node);
+
+       return 0;
+}
+
+/* MTR meter profile delete */
+static int
+pmd_mtr_meter_profile_delete(struct rte_eth_dev *dev,
+       uint32_t meter_profile_id,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_meter_profile *mp;
+
+       /* Meter profile must exist */
+       mp = softnic_mtr_meter_profile_find(p, meter_profile_id);
+       if (mp == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter profile id invalid");
+
+       /* Check unused */
+       if (mp->n_users)
+               return -rte_mtr_error_set(error,
+                       EBUSY,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter profile in use");
+
+       /* Remove from list */
+       TAILQ_REMOVE(&p->mtr.meter_profiles, mp, node);
+       free(mp);
+
+       return 0;
+}
+
+struct softnic_mtr *
+softnic_mtr_find(struct pmd_internals *p, uint32_t mtr_id)
+{
+       struct softnic_mtr_list *ml = &p->mtr.mtrs;
+       struct softnic_mtr *m;
+
+       TAILQ_FOREACH(m, ml, node)
+               if (m->mtr_id == mtr_id)
+                       return m;
+
+       return NULL;
+}
+
+
+static int
+mtr_check(struct pmd_internals *p,
+       uint32_t mtr_id,
+       struct rte_mtr_params *params,
+       int shared,
+       struct rte_mtr_error *error)
+{
+       /* MTR id valid  */
+       if (softnic_mtr_find(p, mtr_id))
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object already exists");
+
+       /* MTR params must not be NULL */
+       if (params == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_MTR_PARAMS,
+                       NULL,
+                       "MTR object params null");
+
+       /* Previous meter color not supported */
+       if (params->use_prev_mtr_color)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_MTR_PARAMS,
+                       NULL,
+                       "Previous meter color not supported");
+
+       /* Shared MTR object not supported */
+       if (shared)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_SHARED,
+                       NULL,
+                       "Shared MTR object not supported");
+
+       return 0;
+}
+
+/* MTR object create */
+static int
+pmd_mtr_create(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       struct rte_mtr_params *params,
+       int shared,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_list *ml = &p->mtr.mtrs;
+       struct softnic_mtr_meter_profile *mp;
+       struct softnic_mtr *m;
+       int status;
+
+       /* Check parameters */
+       status = mtr_check(p, mtr_id, params, shared, error);
+       if (status)
+               return status;
+
+       /* Meter profile must exist */
+       mp = softnic_mtr_meter_profile_find(p, params->meter_profile_id);
+       if (mp == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter profile id not valid");
+
+       /* Memory allocation */
+       m = calloc(1, sizeof(struct softnic_mtr));
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       ENOMEM,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Memory alloc failed");
+
+       /* Fill in */
+       m->mtr_id = mtr_id;
+       memcpy(&m->params, params, sizeof(m->params));
+
+       /* Add to list */
+       TAILQ_INSERT_TAIL(ml, m, node);
+
+       /* Update dependencies */
+       mp->n_users++;
+
+       return 0;
+}
+
+/* MTR object destroy */
+static int
+pmd_mtr_destroy(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_list *ml = &p->mtr.mtrs;
+       struct softnic_mtr_meter_profile *mp;
+       struct softnic_mtr *m;
+
+       /* MTR object must exist */
+       m = softnic_mtr_find(p, mtr_id);
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object id not valid");
+
+       /* MTR object must not have any owner */
+       if (m->flow != NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "MTR object is being used");
+
+       /* Get meter profile */
+       mp = softnic_mtr_meter_profile_find(p, m->params.meter_profile_id);
+       if (mp == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "MTR object meter profile invalid");
+
+       /* Update dependencies */
+       mp->n_users--;
+
+       /* Remove from list */
+       TAILQ_REMOVE(ml, m, node);
+       free(m);
+
+       return 0;
+}
+
+/* MTR object meter profile update */
+static int
+pmd_mtr_meter_profile_update(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       uint32_t meter_profile_id,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr_meter_profile *mp_new, *mp_old;
+       struct softnic_mtr *m;
+       int status;
+
+       /* MTR object id must be valid */
+       m = softnic_mtr_find(p, mtr_id);
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object id not valid");
+
+       /* Meter profile id must be valid */
+       mp_new = softnic_mtr_meter_profile_find(p, meter_profile_id);
+       if (mp_new == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_METER_PROFILE_ID,
+                       NULL,
+                       "Meter profile not valid");
+
+       /* MTR object already set to meter profile id */
+       if (m->params.meter_profile_id == meter_profile_id)
+               return 0;
+
+       /*  MTR object owner table update */
+       if (m->flow) {
+               uint32_t table_id = m->flow->table_id;
+               struct softnic_table *table = &m->flow->pipeline->table[table_id];
+               struct softnic_table_rule_action action;
+
+               if (!softnic_pipeline_table_meter_profile_find(table,
+                       meter_profile_id)) {
+                       struct rte_table_action_meter_profile profile;
+
+                       memset(&profile, 0, sizeof(profile));
+
+                       profile.alg = RTE_TABLE_ACTION_METER_TRTCM;
+                       profile.trtcm.cir = mp_new->params.trtcm_rfc2698.cir;
+                       profile.trtcm.pir = mp_new->params.trtcm_rfc2698.pir;
+                       profile.trtcm.cbs = mp_new->params.trtcm_rfc2698.cbs;
+                       profile.trtcm.pbs = mp_new->params.trtcm_rfc2698.pbs;
+
+                       /* Add meter profile to pipeline table */
+                       status = softnic_pipeline_table_mtr_profile_add(p,
+                                       m->flow->pipeline->name,
+                                       table_id,
+                                       meter_profile_id,
+                                       &profile);
+                       if (status)
+                               return -rte_mtr_error_set(error,
+                                       EINVAL,
+                                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       "Table meter profile add failed");
+               }
+
+               /* Set meter action */
+               memcpy(&action, &m->flow->action, sizeof(action));
+
+               action.mtr.mtr[0].meter_profile_id = meter_profile_id;
+
+               /* Re-add rule */
+               status = softnic_pipeline_table_rule_add(p,
+                       m->flow->pipeline->name,
+                       table_id,
+                       &m->flow->match,
+                       &action,
+                       &m->flow->data);
+               if (status)
+                       return -rte_mtr_error_set(error,
+                               EINVAL,
+                               RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                               NULL,
+                               "Pipeline table rule add failed");
+
+               /* Flow: update meter action */
+               memcpy(&m->flow->action, &action, sizeof(m->flow->action));
+       }
+
+       mp_old = softnic_mtr_meter_profile_find(p, m->params.meter_profile_id);
+
+       /* Meter: Set meter profile */
+       m->params.meter_profile_id = meter_profile_id;
+
+       /* Update dependencies*/
+       mp_old->n_users--;
+       mp_new->n_users++;
+
+       return 0;
+}
+
+/* MTR object meter DSCP table update */
+static int
+pmd_mtr_meter_dscp_table_update(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       enum rte_mtr_color *dscp_table,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct rte_table_action_dscp_table dt;
+       struct pipeline *pipeline;
+       struct softnic_table *table;
+       struct softnic_mtr *m;
+       uint32_t table_id, i;
+       int status;
+
+       /* MTR object id must be valid */
+       m = softnic_mtr_find(p, mtr_id);
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object id not valid");
+
+       /* MTR object owner valid? */
+       if (m->flow == NULL)
+               return 0;
+
+       pipeline = m->flow->pipeline;
+       table_id = m->flow->table_id;
+       table = &pipeline->table[table_id];
+
+       memcpy(&dt, &table->dscp_table, sizeof(dt));
+       for (i = 0; i < RTE_DIM(dt.entry); i++)
+               dt.entry[i].color = (enum rte_meter_color)dscp_table[i];
+
+       /* Update table */
+       status = softnic_pipeline_table_dscp_table_update(p,
+                       pipeline->name,
+                       table_id,
+                       UINT64_MAX,
+                       &dt);
+       if (status)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Table action dscp table update failed");
+
+       return 0;
+}
+
+/* MTR object policer action update */
+static int
+pmd_mtr_policer_actions_update(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       uint32_t action_mask,
+       enum rte_mtr_policer_action *actions,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct softnic_mtr *m;
+       uint32_t i;
+       int status;
+
+       /* MTR object id must be valid */
+       m = softnic_mtr_find(p, mtr_id);
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object id not valid");
+
+       /* Valid policer actions */
+       if (actions == NULL)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Invalid actions");
+
+       for (i = 0; i < RTE_MTR_COLORS; i++) {
+               if (action_mask & (1 << i)) {
+                       if (actions[i] != MTR_POLICER_ACTION_COLOR_GREEN  &&
+                               actions[i] != MTR_POLICER_ACTION_COLOR_YELLOW &&
+                               actions[i] != MTR_POLICER_ACTION_COLOR_RED &&
+                               actions[i] != MTR_POLICER_ACTION_DROP) {
+                               return -rte_mtr_error_set(error,
+                                       EINVAL,
+                                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                                       NULL,
+                                       " Invalid action value");
+                       }
+               }
+       }
+
+       /* MTR object owner valid? */
+       if (m->flow) {
+               struct pipeline *pipeline = m->flow->pipeline;
+               struct softnic_table *table = &pipeline->table[m->flow->table_id];
+               struct softnic_table_rule_action action;
+
+               memcpy(&action, &m->flow->action, sizeof(action));
+
+               /* Set action */
+               for (i = 0; i < RTE_MTR_COLORS; i++)
+                       if (action_mask & (1 << i))
+                               action.mtr.mtr[0].policer[i] =
+                                       (enum rte_table_action_policer)actions[i];
+
+               /* Re-add the rule */
+               status = softnic_pipeline_table_rule_add(p,
+                       pipeline->name,
+                       m->flow->table_id,
+                       &m->flow->match,
+                       &action,
+                       &m->flow->data);
+               if (status)
+                       return -rte_mtr_error_set(error,
+                               EINVAL,
+                               RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                               NULL,
+                               "Pipeline table rule re-add failed");
+
+               /* Flow: Update meter action */
+               memcpy(&m->flow->action, &action, sizeof(m->flow->action));
+
+               /* Reset the meter stats */
+               rte_table_action_meter_read(table->a, m->flow->data,
+                       1, NULL, 1);
+       }
+
+       /* Meter: Update policer actions */
+       for (i = 0; i < RTE_MTR_COLORS; i++)
+               if (action_mask & (1 << i))
+                       m->params.action[i] = actions[i];
+
+       return 0;
+}
+
+#define MTR_STATS_PKTS_DEFAULT (RTE_MTR_STATS_N_PKTS_GREEN | \
+                               RTE_MTR_STATS_N_PKTS_YELLOW | \
+                               RTE_MTR_STATS_N_PKTS_RED | \
+                               RTE_MTR_STATS_N_PKTS_DROPPED)
+
+#define MTR_STATS_BYTES_DEFAULT (RTE_MTR_STATS_N_BYTES_GREEN | \
+                               RTE_MTR_STATS_N_BYTES_YELLOW | \
+                               RTE_MTR_STATS_N_BYTES_RED | \
+                               RTE_MTR_STATS_N_BYTES_DROPPED)
+
+/* MTR object stats read */
+static void
+mtr_stats_convert(struct softnic_mtr *m,
+       struct rte_table_action_mtr_counters_tc *in,
+       struct rte_mtr_stats *out,
+       uint64_t *out_mask)
+{
+       memset(&out, 0, sizeof(out));
+       *out_mask = 0;
+
+       if (in->n_packets_valid) {
+               uint32_t i;
+
+               for (i = 0; i < RTE_MTR_COLORS; i++) {
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_GREEN)
+                               out->n_pkts[RTE_MTR_GREEN] += in->n_packets[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_YELLOW)
+                               out->n_pkts[RTE_MTR_YELLOW] += in->n_packets[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_RED)
+                               out->n_pkts[RTE_MTR_RED] += in->n_packets[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_DROP)
+                               out->n_pkts_dropped += in->n_packets[i];
+               }
+
+               *out_mask |= MTR_STATS_PKTS_DEFAULT;
+       }
+
+       if (in->n_bytes_valid) {
+               uint32_t i;
+
+               for (i = 0; i < RTE_MTR_COLORS; i++) {
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_GREEN)
+                               out->n_bytes[RTE_MTR_GREEN] += in->n_bytes[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_YELLOW)
+                               out->n_bytes[RTE_MTR_YELLOW] += in->n_bytes[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_COLOR_RED)
+                               out->n_bytes[RTE_MTR_RED] += in->n_bytes[i];
+
+                       if (m->params.action[i] == MTR_POLICER_ACTION_DROP)
+                               out->n_bytes_dropped += in->n_bytes[i];
+               }
+
+               *out_mask |= MTR_STATS_BYTES_DEFAULT;
+       }
+}
+
+/* MTR object stats read */
+static int
+pmd_mtr_stats_read(struct rte_eth_dev *dev,
+       uint32_t mtr_id,
+       struct rte_mtr_stats *stats,
+       uint64_t *stats_mask,
+       int clear,
+       struct rte_mtr_error *error)
+{
+       struct pmd_internals *p = dev->data->dev_private;
+       struct rte_table_action_mtr_counters counters;
+       struct pipeline *pipeline;
+       struct softnic_table *table;
+       struct softnic_mtr *m;
+       int status;
+
+       /* MTR object id must be valid */
+       m = softnic_mtr_find(p, mtr_id);
+       if (m == NULL)
+               return -rte_mtr_error_set(error,
+                       EEXIST,
+                       RTE_MTR_ERROR_TYPE_MTR_ID,
+                       NULL,
+                       "MTR object id not valid");
+
+       /* MTR meter object owner valid? */
+       if (m->flow == NULL) {
+               if (stats != NULL)
+                       memset(stats, 0, sizeof(*stats));
+
+               if (stats_mask)
+                       *stats_mask = MTR_STATS_PKTS_DEFAULT |
+                               MTR_STATS_BYTES_DEFAULT;
+
+               return 0;
+       }
+
+       pipeline = m->flow->pipeline;
+       table = &pipeline->table[m->flow->table_id];
+
+       /* Meter stats read. */
+       status = rte_table_action_meter_read(table->a,
+               m->flow->data,
+               1,
+               &counters,
+               clear);
+       if (status)
+               return -rte_mtr_error_set(error,
+                       EINVAL,
+                       RTE_MTR_ERROR_TYPE_UNSPECIFIED,
+                       NULL,
+                       "Meter stats read failed");
+
+       /* Stats format conversion. */
+       if (stats || stats_mask) {
+               struct rte_mtr_stats s;
+               uint64_t s_mask = 0;
+
+               mtr_stats_convert(m,
+                       &counters.stats[0],
+                       &s,
+                       &s_mask);
+
+               if (stats)
+                       memcpy(stats, &s, sizeof(*stats));
+
+               if (stats_mask)
+                       *stats_mask = s_mask;
+       }
+
+       return 0;
+}
+
+const struct rte_mtr_ops pmd_mtr_ops = {
+       .capabilities_get = NULL,
+
+       .meter_profile_add = pmd_mtr_meter_profile_add,
+       .meter_profile_delete = pmd_mtr_meter_profile_delete,
+
+       .create = pmd_mtr_create,
+       .destroy = pmd_mtr_destroy,
+       .meter_enable = NULL,
+       .meter_disable = NULL,
+
+       .meter_profile_update = pmd_mtr_meter_profile_update,
+       .meter_dscp_table_update = pmd_mtr_meter_dscp_table_update,
+       .policer_actions_update = pmd_mtr_policer_actions_update,
+       .stats_update = NULL,
+
+       .stats_read = pmd_mtr_stats_read,
+};
index 45136a4..5e180f8 100644 (file)
 #include <rte_port_source_sink.h>
 #include <rte_port_fd.h>
 #include <rte_port_sched.h>
+#include <rte_port_sym_crypto.h>
 
 #include <rte_table_acl.h>
 #include <rte_table_array.h>
 #include <rte_table_hash.h>
+#include <rte_table_hash_func.h>
 #include <rte_table_lpm.h>
 #include <rte_table_lpm_ipv6.h>
 #include <rte_table_stub.h>
 
 #include "rte_eth_softnic_internals.h"
 
-#include "hash_func.h"
-
 #ifndef PIPELINE_MSGQ_SIZE
 #define PIPELINE_MSGQ_SIZE                                 64
 #endif
@@ -43,17 +43,52 @@ softnic_pipeline_init(struct pmd_internals *p)
        return 0;
 }
 
+static void
+softnic_pipeline_table_free(struct softnic_table *table)
+{
+       for ( ; ; ) {
+               struct rte_flow *flow;
+
+               flow = TAILQ_FIRST(&table->flows);
+               if (flow == NULL)
+                       break;
+
+               TAILQ_REMOVE(&table->flows, flow, node);
+               free(flow);
+       }
+
+       for ( ; ; ) {
+               struct softnic_table_meter_profile *mp;
+
+               mp = TAILQ_FIRST(&table->meter_profiles);
+               if (mp == NULL)
+                       break;
+
+               TAILQ_REMOVE(&table->meter_profiles, mp, node);
+               free(mp);
+       }
+}
+
 void
 softnic_pipeline_free(struct pmd_internals *p)
 {
        for ( ; ; ) {
                struct pipeline *pipeline;
+               uint32_t table_id;
 
                pipeline = TAILQ_FIRST(&p->pipeline_list);
                if (pipeline == NULL)
                        break;
 
                TAILQ_REMOVE(&p->pipeline_list, pipeline, node);
+
+               for (table_id = 0; table_id < pipeline->n_tables; table_id++) {
+                       struct softnic_table *table =
+                               &pipeline->table[table_id];
+
+                       softnic_pipeline_table_free(table);
+               }
+
                rte_ring_free(pipeline->msgq_req);
                rte_ring_free(pipeline->msgq_rsp);
                rte_pipeline_free(pipeline->p);
@@ -160,6 +195,7 @@ softnic_pipeline_create(struct pmd_internals *softnic,
        /* Node fill in */
        strlcpy(pipeline->name, name, sizeof(pipeline->name));
        pipeline->p = p;
+       memcpy(&pipeline->params, params, sizeof(*params));
        pipeline->n_ports_in = 0;
        pipeline->n_ports_out = 0;
        pipeline->n_tables = 0;
@@ -189,6 +225,7 @@ softnic_pipeline_port_in_create(struct pmd_internals *softnic,
                struct rte_port_sched_reader_params sched;
                struct rte_port_fd_reader_params fd;
                struct rte_port_source_params source;
+               struct rte_port_sym_crypto_reader_params cryptodev;
        } pp;
 
        struct pipeline *pipeline;
@@ -213,7 +250,7 @@ softnic_pipeline_port_in_create(struct pmd_internals *softnic,
                return -1;
 
        ap = NULL;
-       if (params->action_profile_name) {
+       if (strlen(params->action_profile_name)) {
                ap = softnic_port_in_action_profile_find(softnic,
                        params->action_profile_name);
                if (ap == NULL)
@@ -306,6 +343,23 @@ softnic_pipeline_port_in_create(struct pmd_internals *softnic,
                break;
        }
 
+       case PORT_IN_CRYPTODEV:
+       {
+               struct softnic_cryptodev *cryptodev;
+
+               cryptodev = softnic_cryptodev_find(softnic, params->dev_name);
+               if (cryptodev == NULL)
+                       return -1;
+
+               pp.cryptodev.cryptodev_id = cryptodev->dev_id;
+               pp.cryptodev.queue_id = params->cryptodev.queue_id;
+               pp.cryptodev.f_callback = params->cryptodev.f_callback;
+               pp.cryptodev.arg_callback = params->cryptodev.arg_callback;
+               p.ops = &rte_port_sym_crypto_reader_ops;
+               p.arg_create = &pp.cryptodev;
+               break;
+       }
+
        default:
                return -1;
        }
@@ -392,15 +446,18 @@ softnic_pipeline_port_out_create(struct pmd_internals *softnic,
                struct rte_port_sched_writer_params sched;
                struct rte_port_fd_writer_params fd;
                struct rte_port_sink_params sink;
+               struct rte_port_sym_crypto_writer_params cryptodev;
        } pp;
 
        union {
                struct rte_port_ethdev_writer_nodrop_params ethdev;
                struct rte_port_ring_writer_nodrop_params ring;
                struct rte_port_fd_writer_nodrop_params fd;
+               struct rte_port_sym_crypto_writer_nodrop_params cryptodev;
        } pp_nodrop;
 
        struct pipeline *pipeline;
+       struct softnic_port_out *port_out;
        uint32_t port_id;
        int status;
 
@@ -526,6 +583,40 @@ softnic_pipeline_port_out_create(struct pmd_internals *softnic,
                break;
        }
 
+       case PORT_OUT_CRYPTODEV:
+       {
+               struct softnic_cryptodev *cryptodev;
+
+               cryptodev = softnic_cryptodev_find(softnic, params->dev_name);
+               if (cryptodev == NULL)
+                       return -1;
+
+               if (params->cryptodev.queue_id >= cryptodev->n_queues)
+                       return -1;
+
+               pp.cryptodev.cryptodev_id = cryptodev->dev_id;
+               pp.cryptodev.queue_id = params->cryptodev.queue_id;
+               pp.cryptodev.tx_burst_sz = params->burst_size;
+               pp.cryptodev.crypto_op_offset = params->cryptodev.op_offset;
+
+               pp_nodrop.cryptodev.cryptodev_id = cryptodev->dev_id;
+               pp_nodrop.cryptodev.queue_id = params->cryptodev.queue_id;
+               pp_nodrop.cryptodev.tx_burst_sz = params->burst_size;
+               pp_nodrop.cryptodev.n_retries = params->retry;
+               pp_nodrop.cryptodev.crypto_op_offset =
+                               params->cryptodev.op_offset;
+
+               if (params->retry == 0) {
+                       p.ops = &rte_port_sym_crypto_writer_ops;
+                       p.arg_create = &pp.cryptodev;
+               } else {
+                       p.ops = &rte_port_sym_crypto_writer_nodrop_ops;
+                       p.arg_create = &pp_nodrop.cryptodev;
+               }
+
+               break;
+       }
+
        default:
                return -1;
        }
@@ -542,6 +633,8 @@ softnic_pipeline_port_out_create(struct pmd_internals *softnic,
                return -1;
 
        /* Pipeline */
+       port_out = &pipeline->port_out[pipeline->n_ports_out];
+       memcpy(&port_out->params, params, sizeof(*params));
        pipeline->n_ports_out++;
 
        return 0;
@@ -730,7 +823,7 @@ softnic_pipeline_table_create(struct pmd_internals *softnic,
                return -1;
 
        ap = NULL;
-       if (params->action_profile_name) {
+       if (strlen(params->action_profile_name)) {
                ap = softnic_table_action_profile_find(softnic,
                        params->action_profile_name);
                if (ap == NULL)
@@ -797,28 +890,28 @@ softnic_pipeline_table_create(struct pmd_internals *softnic,
 
                switch (params->match.hash.key_size) {
                case  8:
-                       f_hash = hash_default_key8;
+                       f_hash = rte_table_hash_crc_key8;
                        break;
                case 16:
-                       f_hash = hash_default_key16;
+                       f_hash = rte_table_hash_crc_key16;
                        break;
                case 24:
-                       f_hash = hash_default_key24;
+                       f_hash = rte_table_hash_crc_key24;
                        break;
                case 32:
-                       f_hash = hash_default_key32;
+                       f_hash = rte_table_hash_crc_key32;
                        break;
                case 40:
-                       f_hash = hash_default_key40;
+                       f_hash = rte_table_hash_crc_key40;
                        break;
                case 48:
-                       f_hash = hash_default_key48;
+                       f_hash = rte_table_hash_crc_key48;
                        break;
                case 56:
-                       f_hash = hash_default_key56;
+                       f_hash = rte_table_hash_crc_key56;
                        break;
                case 64:
-                       f_hash = hash_default_key64;
+                       f_hash = rte_table_hash_crc_key64;
                        break;
                default:
                        return -1;
@@ -960,7 +1053,51 @@ softnic_pipeline_table_create(struct pmd_internals *softnic,
        memcpy(&table->params, params, sizeof(*params));
        table->ap = ap;
        table->a = action;
+       TAILQ_INIT(&table->flows);
+       TAILQ_INIT(&table->meter_profiles);
+       memset(&table->dscp_table, 0, sizeof(table->dscp_table));
        pipeline->n_tables++;
 
        return 0;
 }
+
+int
+softnic_pipeline_port_out_find(struct pmd_internals *softnic,
+               const char *pipeline_name,
+               const char *name,
+               uint32_t *port_id)
+{
+       struct pipeline *pipeline;
+       uint32_t i;
+
+       if (softnic == NULL ||
+                       pipeline_name == NULL ||
+                       name == NULL ||
+                       port_id == NULL)
+               return -1;
+
+       pipeline = softnic_pipeline_find(softnic, pipeline_name);
+       if (pipeline == NULL)
+               return -1;
+
+       for (i = 0; i < pipeline->n_ports_out; i++)
+               if (strcmp(pipeline->port_out[i].params.dev_name, name) == 0) {
+                       *port_id = i;
+                       return 0;
+               }
+
+       return -1;
+}
+
+struct softnic_table_meter_profile *
+softnic_pipeline_table_meter_profile_find(struct softnic_table *table,
+       uint32_t meter_profile_id)
+{
+       struct softnic_table_meter_profile *mp;
+
+       TAILQ_FOREACH(mp, &table->meter_profiles, node)
+               if (mp->meter_profile_id == meter_profile_id)
+                       return mp;
+
+       return NULL;
+}
index 8a15090..4572adf 100644 (file)
@@ -1680,6 +1680,8 @@ softnic_pipeline_table_mtr_profile_add(struct pmd_internals *softnic,
        struct pipeline *p;
        struct pipeline_msg_req *req;
        struct pipeline_msg_rsp *rsp;
+       struct softnic_table *table;
+       struct softnic_table_meter_profile *mp;
        int status;
 
        /* Check input params */
@@ -1692,20 +1694,40 @@ softnic_pipeline_table_mtr_profile_add(struct pmd_internals *softnic,
                table_id >= p->n_tables)
                return -1;
 
-       if (!pipeline_is_running(p)) {
-               struct rte_table_action *a = p->table[table_id].a;
+       table = &p->table[table_id];
+       mp = softnic_pipeline_table_meter_profile_find(table, meter_profile_id);
+       if (mp)
+               return -1;
 
-               status = rte_table_action_meter_profile_add(a,
+       /* Resource Allocation */
+       mp = calloc(1, sizeof(struct softnic_table_meter_profile));
+       if (mp == NULL)
+               return -1;
+
+       mp->meter_profile_id = meter_profile_id;
+       memcpy(&mp->profile, profile, sizeof(mp->profile));
+
+       if (!pipeline_is_running(p)) {
+               status = rte_table_action_meter_profile_add(table->a,
                        meter_profile_id,
                        profile);
+               if (status) {
+                       free(mp);
+                       return status;
+               }
+
+               /* Add profile to the table. */
+               TAILQ_INSERT_TAIL(&table->meter_profiles, mp, node);
 
                return status;
        }
 
        /* Allocate request */
        req = pipeline_msg_alloc();
-       if (req == NULL)
+       if (req == NULL) {
+               free(mp);
                return -1;
+       }
 
        /* Write request */
        req->type = PIPELINE_REQ_TABLE_MTR_PROFILE_ADD;
@@ -1715,11 +1737,17 @@ softnic_pipeline_table_mtr_profile_add(struct pmd_internals *softnic,
 
        /* Send request and wait for response */
        rsp = pipeline_msg_send_recv(p, req);
-       if (rsp == NULL)
+       if (rsp == NULL) {
+               free(mp);
                return -1;
+       }
 
        /* Read response */
        status = rsp->status;
+       if (status == 0)
+               TAILQ_INSERT_TAIL(&table->meter_profiles, mp, node);
+       else
+               free(mp);
 
        /* Free response */
        pipeline_msg_free(rsp);
@@ -1874,6 +1902,11 @@ softnic_pipeline_table_dscp_table_update(struct pmd_internals *softnic,
                                dscp_mask,
                                dscp_table);
 
+               /* Update table dscp table */
+               if (!status)
+                       memcpy(&p->table[table_id].dscp_table, dscp_table,
+                               sizeof(p->table[table_id].dscp_table));
+
                return status;
        }
 
@@ -1897,6 +1930,11 @@ softnic_pipeline_table_dscp_table_update(struct pmd_internals *softnic,
        /* Read response */
        status = rsp->status;
 
+       /* Update table dscp table */
+       if (!status)
+               memcpy(&p->table[table_id].dscp_table, dscp_table,
+                       sizeof(p->table[table_id].dscp_table));
+
        /* Free response */
        pipeline_msg_free(rsp);
 
@@ -2202,29 +2240,37 @@ match_convert(struct softnic_table_rule_match *mh,
                                ml->acl_add.field_value[0].mask_range.u8 =
                                        mh->match.acl.proto_mask;
 
-                               ml->acl_add.field_value[1].value.u32 = sa32[0];
+                               ml->acl_add.field_value[1].value.u32 =
+                                       rte_be_to_cpu_32(sa32[0]);
                                ml->acl_add.field_value[1].mask_range.u32 =
                                        sa32_depth[0];
-                               ml->acl_add.field_value[2].value.u32 = sa32[1];
+                               ml->acl_add.field_value[2].value.u32 =
+                                       rte_be_to_cpu_32(sa32[1]);
                                ml->acl_add.field_value[2].mask_range.u32 =
                                        sa32_depth[1];
-                               ml->acl_add.field_value[3].value.u32 = sa32[2];
+                               ml->acl_add.field_value[3].value.u32 =
+                                       rte_be_to_cpu_32(sa32[2]);
                                ml->acl_add.field_value[3].mask_range.u32 =
                                        sa32_depth[2];
-                               ml->acl_add.field_value[4].value.u32 = sa32[3];
+                               ml->acl_add.field_value[4].value.u32 =
+                                       rte_be_to_cpu_32(sa32[3]);
                                ml->acl_add.field_value[4].mask_range.u32 =
                                        sa32_depth[3];
 
-                               ml->acl_add.field_value[5].value.u32 = da32[0];
+                               ml->acl_add.field_value[5].value.u32 =
+                                       rte_be_to_cpu_32(da32[0]);
                                ml->acl_add.field_value[5].mask_range.u32 =
                                        da32_depth[0];
-                               ml->acl_add.field_value[6].value.u32 = da32[1];
+                               ml->acl_add.field_value[6].value.u32 =
+                                       rte_be_to_cpu_32(da32[1]);
                                ml->acl_add.field_value[6].mask_range.u32 =
                                        da32_depth[1];
-                               ml->acl_add.field_value[7].value.u32 = da32[2];
+                               ml->acl_add.field_value[7].value.u32 =
+                                       rte_be_to_cpu_32(da32[2]);
                                ml->acl_add.field_value[7].mask_range.u32 =
                                        da32_depth[2];
-                               ml->acl_add.field_value[8].value.u32 = da32[3];
+                               ml->acl_add.field_value[8].value.u32 =
+                                       rte_be_to_cpu_32(da32[3]);
                                ml->acl_add.field_value[8].mask_range.u32 =
                                        da32_depth[3];
 
@@ -2264,36 +2310,36 @@ match_convert(struct softnic_table_rule_match *mh,
                                        mh->match.acl.proto_mask;
 
                                ml->acl_delete.field_value[1].value.u32 =
-                                       sa32[0];
+                                       rte_be_to_cpu_32(sa32[0]);
                                ml->acl_delete.field_value[1].mask_range.u32 =
                                        sa32_depth[0];
                                ml->acl_delete.field_value[2].value.u32 =
-                                       sa32[1];
+                                       rte_be_to_cpu_32(sa32[1]);
                                ml->acl_delete.field_value[2].mask_range.u32 =
                                        sa32_depth[1];
                                ml->acl_delete.field_value[3].value.u32 =
-                                       sa32[2];
+                                       rte_be_to_cpu_32(sa32[2]);
                                ml->acl_delete.field_value[3].mask_range.u32 =
                                        sa32_depth[2];
                                ml->acl_delete.field_value[4].value.u32 =
-                                       sa32[3];
+                                       rte_be_to_cpu_32(sa32[3]);
                                ml->acl_delete.field_value[4].mask_range.u32 =
                                        sa32_depth[3];
 
                                ml->acl_delete.field_value[5].value.u32 =
-                                       da32[0];
+                                       rte_be_to_cpu_32(da32[0]);
                                ml->acl_delete.field_value[5].mask_range.u32 =
                                        da32_depth[0];
                                ml->acl_delete.field_value[6].value.u32 =
-                                       da32[1];
+                                       rte_be_to_cpu_32(da32[1]);
                                ml->acl_delete.field_value[6].mask_range.u32 =
                                        da32_depth[1];
                                ml->acl_delete.field_value[7].value.u32 =
-                                       da32[2];
+                                       rte_be_to_cpu_32(da32[2]);
                                ml->acl_delete.field_value[7].mask_range.u32 =
                                        da32_depth[2];
                                ml->acl_delete.field_value[8].value.u32 =
-                                       da32[3];
+                                       rte_be_to_cpu_32(da32[3]);
                                ml->acl_delete.field_value[8].mask_range.u32 =
                                        da32_depth[3];
 
@@ -2432,6 +2478,36 @@ action_convert(struct rte_table_action *a,
                        return status;
        }
 
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_TAG,
+                       &action->tag);
+
+               if (status)
+                       return status;
+       }
+
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_DECAP,
+                       &action->decap);
+
+               if (status)
+                       return status;
+       }
+
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_SYM_CRYPTO,
+                       &action->sym_crypto);
+
+               if (status)
+                       return status;
+       }
+
        return 0;
 }
 
index 1d20cb5..88448ef 100644 (file)
@@ -1056,8 +1056,7 @@ eth_dev_info(struct rte_eth_dev *dev,
        dev_info->max_rx_queues = internals->max_rx_queues;
        dev_info->max_tx_queues = internals->max_tx_queues;
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_SCATTER |
-                                   DEV_RX_OFFLOAD_CRC_STRIP;
+       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_SCATTER;
        dev_info->tx_offload_capa = 0;
        dev_info->rx_queue_offload_capa = 0;
        dev_info->tx_queue_offload_capa = 0;
@@ -1475,7 +1474,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev, struct port_info *pi)
        PMD_INIT_FUNC_TRACE();
 
        PMD_INIT_LOG(INFO, "Initializing eth_dev %s (driver %s)", data->name,
-                       dev->device->driver->name);
+                       RTE_STR(RTE_SZEDATA2_DRIVER_NAME));
 
        /* Fill internal private structure. */
        internals->dev = dev;
@@ -1526,7 +1525,7 @@ rte_szedata2_eth_dev_init(struct rte_eth_dev *dev, struct port_info *pi)
        ether_addr_copy(&eth_addr, data->mac_addrs);
 
        PMD_INIT_LOG(INFO, "%s device %s successfully initialized",
-                       dev->device->driver->name, data->name);
+                       RTE_STR(RTE_SZEDATA2_DRIVER_NAME), data->name);
 
        return 0;
 }
@@ -1545,10 +1544,9 @@ rte_szedata2_eth_dev_uninit(struct rte_eth_dev *dev)
        PMD_INIT_FUNC_TRACE();
 
        free(internals->sze_dev_path);
-       rte_free(dev->data->mac_addrs);
 
        PMD_DRV_LOG(INFO, "%s device %s successfully uninitialized",
-                       dev->device->driver->name, dev->data->name);
+                       RTE_STR(RTE_SZEDATA2_DRIVER_NAME), dev->data->name);
 
        return 0;
 }
index 3243365..7748283 100644 (file)
@@ -22,6 +22,7 @@ CFLAGS += -O3
 CFLAGS += -I$(SRCDIR)
 CFLAGS += -I.
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_vdev -lrte_gso
diff --git a/drivers/net/tap/meson.build b/drivers/net/tap/meson.build
new file mode 100644 (file)
index 0000000..9cb7142
--- /dev/null
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright 2018 Luca Boccassi <bluca@debian.org>
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+sources = files(
+       'rte_eth_tap.c',
+       'tap_bpf_api.c',
+       'tap_flow.c',
+       'tap_intr.c',
+       'tap_netlink.c',
+       'tap_tcmsgs.c',
+)
+
+deps = ['bus_vdev', 'gso', 'hash']
+
+cflags += '-DTAP_MAX_QUEUES=16'
+
+# To maintain the compatibility with the make build system
+# tap_autoconf.h file is still generated.
+# input array for meson symbol search:
+# [ "MACRO to define if found", "header for the search",
+#   "enum/define", "symbol to search" ]
+#
+args = [
+       [ 'HAVE_TC_FLOWER', 'linux/pkt_cls.h',
+         'TCA_FLOWER_UNSPEC' ],
+       [ 'HAVE_TC_VLAN_ID', 'linux/pkt_cls.h',
+         'TCA_FLOWER_KEY_VLAN_PRIO' ],
+       [ 'HAVE_TC_BPF', 'linux/pkt_cls.h',
+         'TCA_BPF_UNSPEC' ],
+       [ 'HAVE_TC_BPF_FD', 'linux/pkt_cls.h',
+         'TCA_BPF_FD' ],
+       [ 'HAVE_TC_ACT_BPF', 'linux/tc_act/tc_bpf.h',
+         'TCA_ACT_BPF_UNSPEC' ],
+       [ 'HAVE_TC_ACT_BPF_FD', 'linux/tc_act/tc_bpf.h',
+         'TCA_ACT_BPF_FD' ],
+]
+config = configuration_data()
+allow_experimental_apis = true
+foreach arg:args
+       config.set(arg[0], cc.has_header_symbol(arg[1], arg[2]))
+endforeach
+configure_file(output : 'tap_autoconf.h', configuration : config)
index feb92b4..e7817e8 100644 (file)
@@ -16,6 +16,8 @@
 #include <rte_debug.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
 
 #include <assert.h>
 #include <sys/types.h>
 #define TAP_GSO_MBUFS_NUM \
        (TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
 
+/* IPC key for queue fds sync */
+#define TAP_MP_KEY "tap_mp_sync_queues"
+
+static int tap_devices_count;
 static struct rte_vdev_driver pmd_tap_drv;
 static struct rte_vdev_driver pmd_tun_drv;
 
@@ -100,6 +106,17 @@ enum ioctl_mode {
        REMOTE_ONLY,
 };
 
+/* Message header to synchronize queues via IPC */
+struct ipc_queues {
+       char port_name[RTE_DEV_NAME_MAX_LEN];
+       int rxq_count;
+       int txq_count;
+       /*
+        * The file descriptors are in the dedicated part
+        * of the Unix message to be translated by the kernel.
+        */
+};
+
 static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
 
 /**
@@ -305,8 +322,7 @@ tap_rx_offload_get_queue_capa(void)
        return DEV_RX_OFFLOAD_SCATTER |
               DEV_RX_OFFLOAD_IPV4_CKSUM |
               DEV_RX_OFFLOAD_UDP_CKSUM |
-              DEV_RX_OFFLOAD_TCP_CKSUM |
-              DEV_RX_OFFLOAD_CRC_STRIP;
+              DEV_RX_OFFLOAD_TCP_CKSUM;
 }
 
 /* Callback to handle the rx burst of packets to the correct interface and
@@ -316,6 +332,7 @@ static uint16_t
 pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
        struct rx_queue *rxq = queue;
+       struct pmd_process_private *process_private;
        uint16_t num_rx;
        unsigned long num_rx_bytes = 0;
        uint32_t trigger = tap_trigger;
@@ -324,6 +341,7 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                return 0;
        if (trigger)
                rxq->trigger_seen = trigger;
+       process_private = rte_eth_devices[rxq->in_port].process_private;
        rte_compiler_barrier();
        for (num_rx = 0; num_rx < nb_pkts; ) {
                struct rte_mbuf *mbuf = rxq->pool;
@@ -332,9 +350,9 @@ pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
                uint16_t data_off = rte_pktmbuf_headroom(mbuf);
                int len;
 
-               len = readv(rxq->fd, *rxq->iovecs,
-                           1 +
-                           (rxq->rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ?
+               len = readv(process_private->rxq_fds[rxq->queue_id],
+                       *rxq->iovecs,
+                       1 + (rxq->rxmode->offloads & DEV_RX_OFFLOAD_SCATTER ?
                             rxq->nb_rx_desc : 1));
                if (len < (int)sizeof(struct tun_pi))
                        break;
@@ -495,6 +513,9 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 {
        int i;
        uint16_t l234_hlen;
+       struct pmd_process_private *process_private;
+
+       process_private = rte_eth_devices[txq->out_port].process_private;
 
        for (i = 0; i < num_mbufs; i++) {
                struct rte_mbuf *mbuf = pmbufs[i];
@@ -596,7 +617,7 @@ tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
                        tap_tx_l4_cksum(l4_cksum, l4_phdr_cksum, l4_raw_cksum);
 
                /* copy the tx frame data */
-               n = writev(txq->fd, iovecs, j);
+               n = writev(process_private->txq_fds[txq->queue_id], iovecs, j);
                if (n <= 0)
                        break;
                (*num_packets)++;
@@ -686,7 +707,7 @@ pmd_tx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
        txq->stats.errs += nb_pkts - num_tx;
        txq->stats.obytes += num_tx_bytes;
 
-       return num_tx;
+       return num_packets;
 }
 
 static const char *
@@ -971,19 +992,20 @@ tap_dev_close(struct rte_eth_dev *dev)
 {
        int i;
        struct pmd_internals *internals = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
 
        tap_link_set_down(dev);
        tap_flow_flush(dev, NULL);
        tap_flow_implicit_flush(internals, NULL);
 
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               if (internals->rxq[i].fd != -1) {
-                       close(internals->rxq[i].fd);
-                       internals->rxq[i].fd = -1;
+               if (process_private->rxq_fds[i] != -1) {
+                       close(process_private->rxq_fds[i]);
+                       process_private->rxq_fds[i] = -1;
                }
-               if (internals->txq[i].fd != -1) {
-                       close(internals->txq[i].fd);
-                       internals->txq[i].fd = -1;
+               if (process_private->txq_fds[i] != -1) {
+                       close(process_private->txq_fds[i]);
+                       process_private->txq_fds[i] = -1;
                }
        }
 
@@ -1007,10 +1029,14 @@ static void
 tap_rx_queue_release(void *queue)
 {
        struct rx_queue *rxq = queue;
+       struct pmd_process_private *process_private;
 
-       if (rxq && (rxq->fd > 0)) {
-               close(rxq->fd);
-               rxq->fd = -1;
+       if (!rxq)
+               return;
+       process_private = rte_eth_devices[rxq->in_port].process_private;
+       if (process_private->rxq_fds[rxq->queue_id] > 0) {
+               close(process_private->rxq_fds[rxq->queue_id]);
+               process_private->rxq_fds[rxq->queue_id] = -1;
                rte_pktmbuf_free(rxq->pool);
                rte_free(rxq->iovecs);
                rxq->pool = NULL;
@@ -1022,10 +1048,15 @@ static void
 tap_tx_queue_release(void *queue)
 {
        struct tx_queue *txq = queue;
+       struct pmd_process_private *process_private;
 
-       if (txq && (txq->fd > 0)) {
-               close(txq->fd);
-               txq->fd = -1;
+       if (!txq)
+               return;
+       process_private = rte_eth_devices[txq->out_port].process_private;
+
+       if (process_private->txq_fds[txq->queue_id] > 0) {
+               close(process_private->txq_fds[txq->queue_id]);
+               process_private->txq_fds[txq->queue_id] = -1;
        }
 }
 
@@ -1210,18 +1241,19 @@ tap_setup_queue(struct rte_eth_dev *dev,
        int *other_fd;
        const char *dir;
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        struct rx_queue *rx = &internals->rxq[qid];
        struct tx_queue *tx = &internals->txq[qid];
        struct rte_gso_ctx *gso_ctx;
 
        if (is_rx) {
-               fd = &rx->fd;
-               other_fd = &tx->fd;
+               fd = &process_private->rxq_fds[qid];
+               other_fd = &process_private->txq_fds[qid];
                dir = "rx";
                gso_ctx = NULL;
        } else {
-               fd = &tx->fd;
-               other_fd = &rx->fd;
+               fd = &process_private->txq_fds[qid];
+               other_fd = &process_private->rxq_fds[qid];
                dir = "tx";
                gso_ctx = &tx->gso_ctx;
        }
@@ -1274,6 +1306,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
                   struct rte_mempool *mp)
 {
        struct pmd_internals *internals = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        struct rx_queue *rxq = &internals->rxq[rx_queue_id];
        struct rte_mbuf **tmp = &rxq->pool;
        long iov_max = sysconf(_SC_IOV_MAX);
@@ -1294,6 +1327,7 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        rxq->mp = mp;
        rxq->trigger_seen = 1; /* force initial burst */
        rxq->in_port = dev->data->port_id;
+       rxq->queue_id = rx_queue_id;
        rxq->nb_rx_desc = nb_desc;
        iovecs = rte_zmalloc_socket(dev->device->name, sizeof(*iovecs), 0,
                                    socket_id);
@@ -1332,7 +1366,8 @@ tap_rx_queue_setup(struct rte_eth_dev *dev,
        }
 
        TAP_LOG(DEBUG, "  RX TUNTAP device name %s, qid %d on fd %d",
-               internals->name, rx_queue_id, internals->rxq[rx_queue_id].fd);
+               internals->name, rx_queue_id,
+               process_private->rxq_fds[rx_queue_id]);
 
        return 0;
 
@@ -1352,6 +1387,7 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
                   const struct rte_eth_txconf *tx_conf)
 {
        struct pmd_internals *internals = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        struct tx_queue *txq;
        int ret;
        uint64_t offloads;
@@ -1360,6 +1396,8 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
                return -1;
        dev->data->tx_queues[tx_queue_id] = &internals->txq[tx_queue_id];
        txq = dev->data->tx_queues[tx_queue_id];
+       txq->out_port = dev->data->port_id;
+       txq->queue_id = tx_queue_id;
 
        offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
        txq->csum = !!(offloads &
@@ -1372,7 +1410,8 @@ tap_tx_queue_setup(struct rte_eth_dev *dev,
                return -1;
        TAP_LOG(DEBUG,
                "  TX TUNTAP device name %s, qid %d on fd %d csum %s",
-               internals->name, tx_queue_id, internals->txq[tx_queue_id].fd,
+               internals->name, tx_queue_id,
+               process_private->txq_fds[tx_queue_id],
                txq->csum ? "on" : "off");
 
        return 0;
@@ -1620,6 +1659,7 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
        int numa_node = rte_socket_id();
        struct rte_eth_dev *dev;
        struct pmd_internals *pmd;
+       struct pmd_process_private *process_private;
        struct rte_eth_dev_data *data;
        struct ifreq ifr;
        int i;
@@ -1634,7 +1674,16 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
                goto error_exit_nodev;
        }
 
+       process_private = (struct pmd_process_private *)
+               rte_zmalloc_socket(tap_name, sizeof(struct pmd_process_private),
+                       RTE_CACHE_LINE_SIZE, dev->device->numa_node);
+
+       if (process_private == NULL) {
+               TAP_LOG(ERR, "Failed to alloc memory for process private");
+               return -1;
+       }
        pmd = dev->data->dev_private;
+       dev->process_private = process_private;
        pmd->dev = dev;
        snprintf(pmd->name, sizeof(pmd->name), "%s", tap_name);
        pmd->type = type;
@@ -1670,8 +1719,8 @@ eth_dev_tap_create(struct rte_vdev_device *vdev, char *tap_name,
        /* Presetup the fds to -1 as being not valid */
        pmd->ka_fd = -1;
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               pmd->rxq[i].fd = -1;
-               pmd->txq[i].fd = -1;
+               process_private->rxq_fds[i] = -1;
+               process_private->txq_fds[i] = -1;
        }
 
        if (pmd->type == ETH_TUNTAP_TYPE_TAP) {
@@ -1809,6 +1858,8 @@ error_remote:
 error_exit:
        if (pmd->ioctl_sock > 0)
                close(pmd->ioctl_sock);
+       /* mac_addrs must not be freed alone because part of dev_private */
+       dev->data->mac_addrs = NULL;
        rte_eth_dev_release_port(dev);
 
 error_exit_nodev:
@@ -1974,6 +2025,102 @@ leave:
        return ret;
 }
 
+/* Request queue file descriptors from secondary to primary. */
+static int
+tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
+{
+       int ret;
+       struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+       struct rte_mp_msg request, *reply;
+       struct rte_mp_reply replies;
+       struct ipc_queues *request_param = (struct ipc_queues *)request.param;
+       struct ipc_queues *reply_param;
+       struct pmd_process_private *process_private = dev->process_private;
+       int queue, fd_iterator;
+
+       /* Prepare the request */
+       strlcpy(request.name, TAP_MP_KEY, sizeof(request.name));
+       strlcpy(request_param->port_name, port_name,
+               sizeof(request_param->port_name));
+       request.len_param = sizeof(*request_param);
+       /* Send request and receive reply */
+       ret = rte_mp_request_sync(&request, &replies, &timeout);
+       if (ret < 0) {
+               TAP_LOG(ERR, "Failed to request queues from primary: %d",
+                       rte_errno);
+               return -1;
+       }
+       reply = &replies.msgs[0];
+       reply_param = (struct ipc_queues *)reply->param;
+       TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
+
+       /* Attach the queues from received file descriptors */
+       dev->data->nb_rx_queues = reply_param->rxq_count;
+       dev->data->nb_tx_queues = reply_param->txq_count;
+       fd_iterator = 0;
+       for (queue = 0; queue < reply_param->rxq_count; queue++)
+               process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
+       for (queue = 0; queue < reply_param->txq_count; queue++)
+               process_private->txq_fds[queue] = reply->fds[fd_iterator++];
+
+       return 0;
+}
+
+/* Send the queue file descriptors from the primary process to secondary. */
+static int
+tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
+{
+       struct rte_eth_dev *dev;
+       struct pmd_process_private *process_private;
+       struct rte_mp_msg reply;
+       const struct ipc_queues *request_param =
+               (const struct ipc_queues *)request->param;
+       struct ipc_queues *reply_param =
+               (struct ipc_queues *)reply.param;
+       uint16_t port_id;
+       int queue;
+       int ret;
+
+       /* Get requested port */
+       TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
+       ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
+       if (ret) {
+               TAP_LOG(ERR, "Failed to get port id for %s",
+                       request_param->port_name);
+               return -1;
+       }
+       dev = &rte_eth_devices[port_id];
+       process_private = dev->process_private;
+
+       /* Fill file descriptors for all queues */
+       reply.num_fds = 0;
+       reply_param->rxq_count = 0;
+       for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
+               reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
+               reply_param->rxq_count++;
+       }
+       RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
+       RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
+       RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
+
+       reply_param->txq_count = 0;
+       for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
+               reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
+               reply_param->txq_count++;
+       }
+
+       /* Send reply */
+       strlcpy(reply.name, request->name, sizeof(reply.name));
+       strlcpy(reply_param->port_name, request_param->port_name,
+               sizeof(reply_param->port_name));
+       reply.len_param = sizeof(*reply_param);
+       if (rte_mp_reply(&reply, peer) < 0) {
+               TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
+               return -1;
+       }
+       return 0;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -1987,22 +2134,41 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
        char remote_iface[RTE_ETH_NAME_MAX_LEN];
        struct ether_addr user_mac = { .addr_bytes = {0} };
        struct rte_eth_dev *eth_dev;
+       int tap_devices_count_increased = 0;
 
        strcpy(tuntap_name, "TAP");
 
        name = rte_vdev_device_name(dev);
        params = rte_vdev_device_args(dev);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(params) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        TAP_LOG(ERR, "Failed to probe %s", name);
                        return -1;
                }
-               /* TODO: request info from primary to set up Rx and Tx */
                eth_dev->dev_ops = &ops;
                eth_dev->device = &dev->device;
+               eth_dev->rx_pkt_burst = pmd_rx_burst;
+               eth_dev->tx_pkt_burst = pmd_tx_burst;
+               if (!rte_eal_primary_proc_alive(NULL)) {
+                       TAP_LOG(ERR, "Primary process is missing");
+                       return -1;
+               }
+               eth_dev->process_private = (struct pmd_process_private *)
+                       rte_zmalloc_socket(name,
+                               sizeof(struct pmd_process_private),
+                               RTE_CACHE_LINE_SIZE,
+                               eth_dev->device->numa_node);
+               if (eth_dev->process_private == NULL) {
+                       TAP_LOG(ERR,
+                               "Failed to alloc memory for process private");
+                       return -1;
+               }
+
+               ret = tap_mp_attach_queues(name, eth_dev);
+               if (ret != 0)
+                       return -1;
                rte_eth_dev_probing_finish(eth_dev);
                return 0;
        }
@@ -2050,6 +2216,17 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
        TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
                name, tap_name);
 
+       /* Register IPC feed callback */
+       if (!tap_devices_count) {
+               ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
+               if (ret < 0) {
+                       TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+                               tuntap_name, strerror(rte_errno));
+                       goto leave;
+               }
+       }
+       tap_devices_count++;
+       tap_devices_count_increased = 1;
        ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
                ETH_TUNTAP_TYPE_TAP);
 
@@ -2057,6 +2234,11 @@ leave:
        if (ret == -1) {
                TAP_LOG(ERR, "Failed to create pmd for %s as %s",
                        name, tap_name);
+               if (tap_devices_count_increased == 1) {
+                       if (tap_devices_count == 1)
+                               rte_mp_action_unregister(TAP_MP_KEY);
+                       tap_devices_count--;
+               }
                tap_unit--;             /* Restore the unit number */
        }
        rte_kvargs_free(kvlist);
@@ -2071,14 +2253,22 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
 {
        struct rte_eth_dev *eth_dev = NULL;
        struct pmd_internals *internals;
+       struct pmd_process_private *process_private;
        int i;
 
        /* find the ethdev entry */
        eth_dev = rte_eth_dev_allocated(rte_vdev_device_name(dev));
        if (!eth_dev)
-               return 0;
+               return -ENODEV;
+
+       /* mac_addrs must not be freed alone because part of dev_private */
+       eth_dev->data->mac_addrs = NULL;
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return rte_eth_dev_release_port(eth_dev);
 
        internals = eth_dev->data->dev_private;
+       process_private = eth_dev->process_private;
 
        TAP_LOG(DEBUG, "Closing %s Ethernet device on numa %u",
                (internals->type == ETH_TUNTAP_TYPE_TAP) ? "TAP" : "TUN",
@@ -2090,18 +2280,21 @@ rte_pmd_tap_remove(struct rte_vdev_device *dev)
                tap_nl_final(internals->nlsk_fd);
        }
        for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-               if (internals->rxq[i].fd != -1) {
-                       close(internals->rxq[i].fd);
-                       internals->rxq[i].fd = -1;
+               if (process_private->rxq_fds[i] != -1) {
+                       close(process_private->rxq_fds[i]);
+                       process_private->rxq_fds[i] = -1;
                }
-               if (internals->txq[i].fd != -1) {
-                       close(internals->txq[i].fd);
-                       internals->txq[i].fd = -1;
+               if (process_private->txq_fds[i] != -1) {
+                       close(process_private->txq_fds[i]);
+                       process_private->txq_fds[i] = -1;
                }
        }
 
        close(internals->ioctl_sock);
-       rte_free(eth_dev->data->dev_private);
+       rte_free(eth_dev->process_private);
+       if (tap_devices_count == 1)
+               rte_mp_action_unregister(TAP_MP_KEY);
+       tap_devices_count--;
        rte_eth_dev_release_port(eth_dev);
 
        if (internals->ka_fd != -1) {
index 44e2773..dc3579a 100644 (file)
@@ -46,7 +46,7 @@ struct rx_queue {
        struct rte_mempool *mp;         /* Mempool for RX packets */
        uint32_t trigger_seen;          /* Last seen Rx trigger value */
        uint16_t in_port;               /* Port ID */
-       int fd;
+       uint16_t queue_id;              /* queue ID*/
        struct pkt_stats stats;         /* Stats for this RX queue */
        uint16_t nb_rx_desc;            /* max number of mbufs available */
        struct rte_eth_rxmode *rxmode;  /* RX features */
@@ -56,12 +56,13 @@ struct rx_queue {
 };
 
 struct tx_queue {
-       int fd;
        int type;                       /* Type field - TUN|TAP */
        uint16_t *mtu;                  /* Pointer to MTU from dev_data */
        uint16_t csum:1;                /* Enable checksum offloading */
        struct pkt_stats stats;         /* Stats for this TX queue */
        struct rte_gso_ctx gso_ctx;     /* GSO context */
+       uint16_t out_port;              /* Port ID */
+       uint16_t queue_id;              /* queue ID*/
 };
 
 struct pmd_internals {
@@ -92,6 +93,11 @@ struct pmd_internals {
        int ka_fd;                        /* keep-alive file descriptor */
 };
 
+struct pmd_process_private {
+       int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
+       int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
+};
+
 /* tap_intr.c */
 
 int tap_rx_intr_vec_set(struct rte_eth_dev *dev, int set);
index 79e3e66..1a91bba 100644 (file)
@@ -5,7 +5,7 @@
 #include <tap_bpf.h>
 
 /* bpf_insn array matching cls_q section. See tap_bpf_program.c file */
-struct bpf_insn cls_q_insns[] = {
+static struct bpf_insn cls_q_insns[] = {
        {0x61,    2,    1,       52, 0x00000000},
        {0x18,    3,    0,        0, 0xdeadbeef},
        {0x00,    0,    0,        0, 0x00000000},
@@ -24,7 +24,7 @@ struct bpf_insn cls_q_insns[] = {
 };
 
 /* bpf_insn array matching l3_l4 section. see tap_bpf_program.c file */
-struct bpf_insn l3_l4_hash_insns[] = {
+static struct bpf_insn l3_l4_hash_insns[] = {
        {0xbf,    7,    1,        0, 0x00000000},
        {0x61,    8,    7,       16, 0x00000000},
        {0x61,    6,    7,       76, 0x00000000},
index 0e01af6..d155618 100644 (file)
@@ -1567,6 +1567,7 @@ tap_flow_isolate(struct rte_eth_dev *dev,
                 struct rte_flow_error *error __rte_unused)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
 
        /* normalize 'set' variable to contain 0 or 1 values */
        if (set)
@@ -1580,7 +1581,7 @@ tap_flow_isolate(struct rte_eth_dev *dev,
         * If netdevice is there, setup appropriate flow rules immediately.
         * Otherwise it will be set when bringing up the netdevice (tun_alloc).
         */
-       if (!pmd->rxq[0].fd)
+       if (!process_private->rxq_fds[0])
                return 0;
        if (set) {
                struct rte_flow *remote_flow;
@@ -1810,7 +1811,7 @@ tap_flow_implicit_flush(struct pmd_internals *pmd, struct rte_flow_error *error)
 #define KEY_IDX_OFFSET (3 * MAX_RSS_KEYS)
 #define SEC_NAME_CLS_Q "cls_q"
 
-const char *sec_name[SEC_MAX] = {
+static const char *sec_name[SEC_MAX] = {
        [SEC_L3_L4] = "l3_l4",
 };
 
index fc59018..7af0010 100644 (file)
@@ -51,6 +51,7 @@ static int
 tap_rx_intr_vec_install(struct rte_eth_dev *dev)
 {
        struct pmd_internals *pmd = dev->data->dev_private;
+       struct pmd_process_private *process_private = dev->process_private;
        unsigned int rxqs_n = pmd->dev->data->nb_rx_queues;
        struct rte_intr_handle *intr_handle = &pmd->intr_handle;
        unsigned int n = RTE_MIN(rxqs_n, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID);
@@ -71,7 +72,7 @@ tap_rx_intr_vec_install(struct rte_eth_dev *dev)
                struct rx_queue *rxq = pmd->dev->data->rx_queues[i];
 
                /* Skip queues that cannot request interrupts. */
-               if (!rxq || rxq->fd <= 0) {
+               if (!rxq || process_private->rxq_fds[i] <= 0) {
                        /* Use invalid intr_vec[] index to disable entry. */
                        intr_handle->intr_vec[i] =
                                RTE_INTR_VEC_RXTX_OFFSET +
@@ -79,7 +80,7 @@ tap_rx_intr_vec_install(struct rte_eth_dev *dev)
                        continue;
                }
                intr_handle->intr_vec[i] = RTE_INTR_VEC_RXTX_OFFSET + count;
-               intr_handle->efds[count] = rxq->fd;
+               intr_handle->efds[count] = process_private->rxq_fds[i];
                count++;
        }
        if (!count)
index c9d5a8f..bf4e860 100644 (file)
@@ -7,8 +7,12 @@ sources = [
        'nicvf_bsvf.c'
 ]
 
+c_args = cflags
+if allow_experimental_apis
+       c_args += '-DALLOW_EXPERIMENTAL_API'
+endif
 base_lib = static_library('nicvf_base', sources,
-       c_args: cflags,
+       c_args: c_args,
        dependencies: static_rte_ethdev
 )
 
index a55c3ca..879d889 100644 (file)
@@ -1431,7 +1431,6 @@ nicvf_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->default_rxconf = (struct rte_eth_rxconf) {
                .rx_free_thresh = NICVF_DEFAULT_RX_FREE_THRESH,
                .rx_drop_en = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        };
 
        dev_info->default_txconf = (struct rte_eth_txconf) {
@@ -1916,14 +1915,6 @@ nicvf_dev_configure(struct rte_eth_dev *dev)
                return -EINVAL;
        }
 
-       /* KEEP_CRC offload flag is not supported by PMD
-        * can remove the below block when DEV_RX_OFFLOAD_CRC_STRIP removed
-        */
-       if (rte_eth_dev_must_keep_crc(rxmode->offloads)) {
-               PMD_INIT_LOG(NOTICE, "Can't disable hw crc strip");
-               rxmode->offloads |= DEV_RX_OFFLOAD_CRC_STRIP;
-       }
-
        if (txmode->mq_mode) {
                PMD_INIT_LOG(INFO, "Tx mq_mode DCB or VMDq not supported");
                return -EINVAL;
index ae440fe..c0bfbf8 100644 (file)
@@ -40,7 +40,6 @@
 #define NICVF_RX_OFFLOAD_CAPA ( \
        DEV_RX_OFFLOAD_CHECKSUM    | \
        DEV_RX_OFFLOAD_VLAN_STRIP  | \
-       DEV_RX_OFFLOAD_CRC_STRIP   | \
        DEV_RX_OFFLOAD_JUMBO_FRAME | \
        DEV_RX_OFFLOAD_SCATTER)
 
diff --git a/drivers/net/vdev_netvsc/meson.build b/drivers/net/vdev_netvsc/meson.build
new file mode 100644 (file)
index 0000000..d3ada87
--- /dev/null
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+if host_machine.system() != 'linux'
+        build = false
+endif
+sources = files('vdev_netvsc.c')
+
+allow_experimental_apis = true
+
+cflags_options = [
+        '-Wall',
+        '-Wextra',
+        '-D_BSD_SOURCE',
+        '-D_DEFAULT_SOURCE',
+        '-D_XOPEN_SOURCE=600'
+]
+foreach option:cflags_options
+        if cc.has_argument(option)
+                cflags += option
+        endif
+endforeach
index 48717f2..16303ef 100644 (file)
@@ -789,7 +789,7 @@ RTE_PMD_REGISTER_PARAM_STRING(net_vdev_netvsc,
 /** Initialize driver log type. */
 RTE_INIT(vdev_netvsc_init_log)
 {
-       vdev_netvsc_logtype = rte_log_register("pmd.vdev_netvsc");
+       vdev_netvsc_logtype = rte_log_register("pmd.net.vdev_netvsc");
        if (vdev_netvsc_logtype >= 0)
                rte_log_set_level(vdev_netvsc_logtype, RTE_LOG_NOTICE);
 }
index e58f322..b38a4b6 100644 (file)
@@ -30,6 +30,7 @@ enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
 #define ETH_VHOST_CLIENT_ARG           "client"
 #define ETH_VHOST_DEQUEUE_ZERO_COPY    "dequeue-zero-copy"
 #define ETH_VHOST_IOMMU_SUPPORT                "iommu-support"
+#define ETH_VHOST_POSTCOPY_SUPPORT     "postcopy-support"
 #define VHOST_MAX_PKT_BURST 32
 
 static const char *valid_arguments[] = {
@@ -38,6 +39,7 @@ static const char *valid_arguments[] = {
        ETH_VHOST_CLIENT_ARG,
        ETH_VHOST_DEQUEUE_ZERO_COPY,
        ETH_VHOST_IOMMU_SUPPORT,
+       ETH_VHOST_POSTCOPY_SUPPORT,
        NULL
 };
 
@@ -1070,8 +1072,7 @@ eth_dev_info(struct rte_eth_dev *dev,
 
        dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS |
                                DEV_TX_OFFLOAD_VLAN_INSERT;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
-                                   DEV_RX_OFFLOAD_CRC_STRIP;
+       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
 }
 
 static int
@@ -1221,10 +1222,12 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
        eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internal));
        if (eth_dev == NULL)
                goto error;
+       data = eth_dev->data;
 
        eth_addr = rte_zmalloc_socket(name, sizeof(*eth_addr), 0, numa_node);
        if (eth_addr == NULL)
                goto error;
+       data->mac_addrs = eth_addr;
        *eth_addr = base_eth_addr;
        eth_addr->addr_bytes[5] = eth_dev->data->port_id;
 
@@ -1254,13 +1257,11 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
        rte_spinlock_init(&vring_state->lock);
        vring_states[eth_dev->data->port_id] = vring_state;
 
-       data = eth_dev->data;
        data->nb_rx_queues = queues;
        data->nb_tx_queues = queues;
        internal->max_queues = queues;
        internal->vid = -1;
        data->dev_link = pmd_link;
-       data->mac_addrs = eth_addr;
        data->dev_flags = RTE_ETH_DEV_INTR_LSC;
 
        eth_dev->dev_ops = &ops;
@@ -1292,10 +1293,7 @@ error:
                free(internal->dev_name);
        }
        rte_free(vring_state);
-       rte_free(eth_addr);
-       if (eth_dev)
-               rte_eth_dev_release_port(eth_dev);
-       rte_free(internal);
+       rte_eth_dev_release_port(eth_dev);
        rte_free(list);
 
        return -1;
@@ -1340,13 +1338,13 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
        int client_mode = 0;
        int dequeue_zero_copy = 0;
        int iommu_support = 0;
+       int postcopy_support = 0;
        struct rte_eth_dev *eth_dev;
        const char *name = rte_vdev_device_name(dev);
 
        VHOST_LOG(INFO, "Initializing pmd_vhost for %s\n", name);
 
-       if (rte_eal_process_type() == RTE_PROC_SECONDARY &&
-           strlen(rte_vdev_device_args(dev)) == 0) {
+       if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                eth_dev = rte_eth_dev_attach_secondary(name);
                if (!eth_dev) {
                        VHOST_LOG(ERR, "Failed to probe %s\n", name);
@@ -1412,6 +1410,16 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev)
                        flags |= RTE_VHOST_USER_IOMMU_SUPPORT;
        }
 
+       if (rte_kvargs_count(kvlist, ETH_VHOST_POSTCOPY_SUPPORT) == 1) {
+               ret = rte_kvargs_process(kvlist, ETH_VHOST_POSTCOPY_SUPPORT,
+                                        &open_int, &postcopy_support);
+               if (ret < 0)
+                       goto out_free;
+
+               if (postcopy_support)
+                       flags |= RTE_VHOST_USER_POSTCOPY_SUPPORT;
+       }
+
        if (dev->device.numa_node == SOCKET_ID_ANY)
                dev->device.numa_node = rte_socket_id();
 
@@ -1437,6 +1445,9 @@ rte_pmd_vhost_remove(struct rte_vdev_device *dev)
        if (eth_dev == NULL)
                return -ENODEV;
 
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+               return rte_eth_dev_release_port(eth_dev);
+
        eth_dev_close(eth_dev);
 
        rte_free(vring_states[eth_dev->data->port_id]);
index 614357d..10a7e3f 100644 (file)
@@ -1697,7 +1697,7 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
        PMD_INIT_FUNC_TRACE();
 
        if (rte_eal_process_type() == RTE_PROC_SECONDARY)
-               return -EPERM;
+               return 0;
 
        virtio_dev_stop(eth_dev);
        virtio_dev_close(eth_dev);
@@ -1706,9 +1706,6 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->tx_pkt_burst = NULL;
        eth_dev->rx_pkt_burst = NULL;
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        /* reset interrupt callback  */
        if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
                rte_intr_callback_unregister(eth_dev->intr_handle,
@@ -2166,8 +2163,7 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
        dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS;
 
        host_features = VTPCI_OPS(hw)->get_features(hw);
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP |
-                                   DEV_RX_OFFLOAD_CRC_STRIP;
+       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
        if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
                dev_info->rx_offload_capa |=
                        DEV_RX_OFFLOAD_TCP_CKSUM |
index b726ad1..e0f80e5 100644 (file)
        (VIRTIO_PMD_DEFAULT_GUEST_FEATURES |    \
         1u << VIRTIO_NET_F_GUEST_CSUM     |    \
         1u << VIRTIO_NET_F_GUEST_TSO4     |    \
-        1u << VIRTIO_NET_F_GUEST_TSO6)
+        1u << VIRTIO_NET_F_GUEST_TSO6     |    \
+        1u << VIRTIO_NET_F_CSUM           |    \
+        1u << VIRTIO_NET_F_HOST_TSO4      |    \
+        1u << VIRTIO_NET_F_HOST_TSO6)
 
 /*
  * CQ function prototype
index 6bd22e5..b6a3c80 100644 (file)
@@ -567,16 +567,18 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
        }
 
        ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
-       if (ret < 0) {
-               PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
+       if (ret != 1) {
+               PMD_INIT_LOG(DEBUG,
+                            "failed to read pci capability list, ret %d", ret);
                return -1;
        }
 
        while (pos) {
-               ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
-               if (ret < 0) {
-                       PMD_INIT_LOG(ERR,
-                               "failed to read pci cap at pos: %x", pos);
+               ret = rte_pci_read_config(dev, &cap, 2, pos);
+               if (ret != 2) {
+                       PMD_INIT_LOG(DEBUG,
+                                    "failed to read pci cap at pos: %x ret %d",
+                                    pos, ret);
                        break;
                }
 
@@ -586,7 +588,16 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
                         * 1st byte is cap ID; 2nd byte is the position of next
                         * cap; next two bytes are the flags.
                         */
-                       uint16_t flags = ((uint16_t *)&cap)[1];
+                       uint16_t flags;
+
+                       ret = rte_pci_read_config(dev, &flags, sizeof(flags),
+                                       pos + 2);
+                       if (ret != sizeof(flags)) {
+                               PMD_INIT_LOG(DEBUG,
+                                            "failed to read pci cap at pos:"
+                                            " %x ret %d", pos + 2, ret);
+                               break;
+                       }
 
                        if (flags & PCI_MSIX_ENABLE)
                                hw->use_msix = VIRTIO_MSIX_ENABLED;
@@ -601,6 +612,14 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
                        goto next;
                }
 
+               ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
+               if (ret != sizeof(cap)) {
+                       PMD_INIT_LOG(DEBUG,
+                                    "failed to read pci cap at pos: %x ret %d",
+                                    pos, ret);
+                       break;
+               }
+
                PMD_INIT_LOG(DEBUG,
                        "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
                        pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
@@ -689,25 +708,37 @@ enum virtio_msix_status
 vtpci_msix_detect(struct rte_pci_device *dev)
 {
        uint8_t pos;
-       struct virtio_pci_cap cap;
        int ret;
 
        ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
-       if (ret < 0) {
-               PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
+       if (ret != 1) {
+               PMD_INIT_LOG(DEBUG,
+                            "failed to read pci capability list, ret %d", ret);
                return VIRTIO_MSIX_NONE;
        }
 
        while (pos) {
-               ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos);
-               if (ret < 0) {
-                       PMD_INIT_LOG(ERR,
-                               "failed to read pci cap at pos: %x", pos);
+               uint8_t cap[2];
+
+               ret = rte_pci_read_config(dev, cap, sizeof(cap), pos);
+               if (ret != sizeof(cap)) {
+                       PMD_INIT_LOG(DEBUG,
+                                    "failed to read pci cap at pos: %x ret %d",
+                                    pos, ret);
                        break;
                }
 
-               if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
-                       uint16_t flags = ((uint16_t *)&cap)[1];
+               if (cap[0] == PCI_CAP_ID_MSIX) {
+                       uint16_t flags;
+
+                       ret = rte_pci_read_config(dev, &flags, sizeof(flags),
+                                       pos + sizeof(cap));
+                       if (ret != sizeof(flags)) {
+                               PMD_INIT_LOG(DEBUG,
+                                            "failed to read pci cap at pos:"
+                                            " %x ret %d", pos + 2, ret);
+                               break;
+                       }
 
                        if (flags & PCI_MSIX_ENABLE)
                                return VIRTIO_MSIX_ENABLED;
@@ -715,7 +746,7 @@ vtpci_msix_detect(struct rte_pci_device *dev)
                                return VIRTIO_MSIX_DISABLED;
                }
 
-               pos = cap.cap_next;
+               pos = cap[1];
        }
 
        return VIRTIO_MSIX_NONE;
index 31e565b..f8bcbaa 100644 (file)
@@ -47,7 +47,7 @@ virtio_rxq_vec_setup(struct virtnet_rx *rxq)
 }
 
 /* Stub for linkage when arch specific implementation is not available */
-uint16_t __attribute__((weak))
+__rte_weak uint16_t
 virtio_recv_pkts_vec(void *rx_queue __rte_unused,
                     struct rte_mbuf **rx_pkts __rte_unused,
                     uint16_t nb_pkts __rte_unused)
index 668cc99..83a85cc 100644 (file)
@@ -88,7 +88,7 @@ struct virtio_user_backend_ops {
                         int enable);
 };
 
-struct virtio_user_backend_ops ops_user;
-struct virtio_user_backend_ops ops_kernel;
+extern struct virtio_user_backend_ops virtio_ops_user;
+extern struct virtio_user_backend_ops virtio_ops_kernel;
 
 #endif
index b244409..6b19180 100644 (file)
@@ -70,41 +70,44 @@ static uint64_t vhost_req_user_to_kernel[] = {
        [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE,
 };
 
-struct walk_arg {
-       struct vhost_memory_kernel *vm;
-       uint32_t region_nr;
-};
 static int
-add_memory_region(const struct rte_memseg_list *msl __rte_unused,
-               const struct rte_memseg *ms, size_t len, void *arg)
+add_memseg_list(const struct rte_memseg_list *msl, void *arg)
 {
-       struct walk_arg *wa = arg;
+       struct vhost_memory_kernel *vm = arg;
        struct vhost_memory_region *mr;
        void *start_addr;
+       uint64_t len;
 
-       if (wa->region_nr >= max_regions)
+       if (msl->external)
+               return 0;
+
+       if (vm->nregions >= max_regions)
                return -1;
 
-       mr = &wa->vm->regions[wa->region_nr++];
-       start_addr = ms->addr;
+       start_addr = msl->base_va;
+       len = msl->page_sz * msl->memseg_arr.len;
+
+       mr = &vm->regions[vm->nregions++];
 
        mr->guest_phys_addr = (uint64_t)(uintptr_t)start_addr;
        mr->userspace_addr = (uint64_t)(uintptr_t)start_addr;
        mr->memory_size = len;
-       mr->mmap_offset = 0;
+       mr->mmap_offset = 0; /* flags_padding */
+
+       PMD_DRV_LOG(DEBUG, "index=%u addr=%p len=%" PRIu64,
+                       vm->nregions - 1, start_addr, len);
 
        return 0;
 }
 
-/* By default, vhost kernel module allows 64 regions, but DPDK allows
- * 256 segments. As a relief, below function merges those virtually
- * adjacent memsegs into one region.
+/* By default, vhost kernel module allows 64 regions, but DPDK may
+ * have much more memory regions. Below function will treat each
+ * contiguous memory space reserved by DPDK as one region.
  */
 static struct vhost_memory_kernel *
 prepare_vhost_memory_kernel(void)
 {
        struct vhost_memory_kernel *vm;
-       struct walk_arg wa;
 
        vm = malloc(sizeof(struct vhost_memory_kernel) +
                        max_regions *
@@ -112,16 +115,18 @@ prepare_vhost_memory_kernel(void)
        if (!vm)
                return NULL;
 
-       wa.region_nr = 0;
-       wa.vm = vm;
+       vm->nregions = 0;
+       vm->padding = 0;
 
-       if (rte_memseg_contig_walk(add_memory_region, &wa) < 0) {
+       /*
+        * The memory lock has already been taken by memory subsystem
+        * or virtio_user_start_device().
+        */
+       if (rte_memseg_list_walk_thread_unsafe(add_memseg_list, vm) < 0) {
                free(vm);
                return NULL;
        }
 
-       vm->nregions = wa.region_nr;
-       vm->padding = 0;
        return vm;
 }
 
@@ -147,8 +152,8 @@ prepare_vhost_memory_kernel(void)
         (1ULL << VIRTIO_NET_F_HOST_TSO6) |     \
         (1ULL << VIRTIO_NET_F_CSUM))
 
-static int
-tap_supporte_mq(void)
+static unsigned int
+tap_support_features(void)
 {
        int tapfd;
        unsigned int tap_features;
@@ -167,7 +172,7 @@ tap_supporte_mq(void)
        }
 
        close(tapfd);
-       return tap_features & IFF_MULTI_QUEUE;
+       return tap_features;
 }
 
 static int
@@ -181,6 +186,7 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev,
        struct vhost_memory_kernel *vm = NULL;
        int vhostfd;
        unsigned int queue_sel;
+       unsigned int features;
 
        PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]);
 
@@ -234,17 +240,20 @@ vhost_kernel_ioctl(struct virtio_user_dev *dev,
        }
 
        if (!ret && req_kernel == VHOST_GET_FEATURES) {
+               features = tap_support_features();
                /* with tap as the backend, all these features are supported
                 * but not claimed by vhost-net, so we add them back when
                 * reporting to upper layer.
                 */
-               *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
-               *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
+               if (features & IFF_VNET_HDR) {
+                       *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK;
+                       *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK;
+               }
 
                /* vhost_kernel will not declare this feature, but it does
                 * support multi-queue.
                 */
-               if (tap_supporte_mq())
+               if (features & IFF_MULTI_QUEUE)
                        *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ);
        }
 
@@ -339,7 +348,7 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
                hdr_size = sizeof(struct virtio_net_hdr);
 
        tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq,
-                        (char *)dev->mac_addr);
+                        (char *)dev->mac_addr, dev->features);
        if (tapfd < 0) {
                PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel");
                return -1;
@@ -355,7 +364,7 @@ vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev,
        return 0;
 }
 
-struct virtio_user_backend_ops ops_kernel = {
+struct virtio_user_backend_ops virtio_ops_kernel = {
        .setup = vhost_kernel_setup,
        .send_request = vhost_kernel_ioctl,
        .enable_qp = vhost_kernel_enable_queue_pair
index 9ea7ade..a3faf1d 100644 (file)
 
 #include "vhost_kernel_tap.h"
 #include "../virtio_logs.h"
+#include "../virtio_pci.h"
+
+static int
+vhost_kernel_tap_set_offload(int fd, uint64_t features)
+{
+       unsigned int offload = 0;
+
+       if (features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) {
+               offload |= TUN_F_CSUM;
+               if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO4))
+                       offload |= TUN_F_TSO4;
+               if (features & (1ULL << VIRTIO_NET_F_GUEST_TSO6))
+                       offload |= TUN_F_TSO6;
+               if (features & ((1ULL << VIRTIO_NET_F_GUEST_TSO4) |
+                       (1ULL << VIRTIO_NET_F_GUEST_TSO6)) &&
+                       (features & (1ULL << VIRTIO_NET_F_GUEST_ECN)))
+                       offload |= TUN_F_TSO_ECN;
+               if (features & (1ULL << VIRTIO_NET_F_GUEST_UFO))
+                       offload |= TUN_F_UFO;
+       }
+
+       if (offload != 0) {
+               /* Check if our kernel supports TUNSETOFFLOAD */
+               if (ioctl(fd, TUNSETOFFLOAD, 0) != 0 && errno == EINVAL) {
+                       PMD_DRV_LOG(ERR, "Kernel does't support TUNSETOFFLOAD\n");
+                       return -ENOTSUP;
+               }
+
+               if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
+                       offload &= ~TUN_F_UFO;
+                       if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) {
+                               PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s\n",
+                                       strerror(errno));
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+}
 
 int
 vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
-                        const char *mac)
+                        const char *mac, uint64_t features)
 {
        unsigned int tap_features;
        int sndbuf = INT_MAX;
        struct ifreq ifr;
        int tapfd;
-       unsigned int offload =
-                       TUN_F_CSUM |
-                       TUN_F_TSO4 |
-                       TUN_F_TSO6 |
-                       TUN_F_TSO_ECN |
-                       TUN_F_UFO;
 
        /* TODO:
         * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len
@@ -90,13 +124,7 @@ vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
                goto error;
        }
 
-       /* TODO: before set the offload capabilities, we'd better (1) check
-        * negotiated features to see if necessary to offload; (2) query tap
-        * to see if it supports the offload capabilities.
-        */
-       if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0)
-               PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s",
-                          strerror(errno));
+       vhost_kernel_tap_set_offload(tapfd, features);
 
        memset(&ifr, 0, sizeof(ifr));
        ifr.ifr_hwaddr.sa_family = ARPHRD_ETHER;
index 01a026f..e0e95b4 100644 (file)
@@ -36,4 +36,4 @@
 #define PATH_NET_TUN   "/dev/net/tun"
 
 int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq,
-                        const char *mac);
+                        const char *mac, uint64_t features);
index ef6e43d..2c6eba0 100644 (file)
@@ -11,6 +11,9 @@
 #include <string.h>
 #include <errno.h>
 
+#include <rte_fbarray.h>
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 
@@ -121,133 +124,103 @@ fail:
        return -1;
 }
 
-struct hugepage_file_info {
-       uint64_t addr;            /**< virtual addr */
-       size_t   size;            /**< the file size */
-       char     path[PATH_MAX];  /**< path to backing file */
+struct walk_arg {
+       struct vhost_memory *vm;
+       int *fds;
+       int region_nr;
 };
 
-/* Two possible options:
- * 1. Match HUGEPAGE_INFO_FMT to find the file storing struct hugepage_file
- * array. This is simple but cannot be used in secondary process because
- * secondary process will close and munmap that file.
- * 2. Match HUGEFILE_FMT to find hugepage files directly.
- *
- * We choose option 2.
- */
 static int
-get_hugepage_file_info(struct hugepage_file_info huges[], int max)
+update_memory_region(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
 {
-       int idx, k, exist;
-       FILE *f;
-       char buf[BUFSIZ], *tmp, *tail;
-       char *str_underline, *str_start;
-       int huge_index;
-       uint64_t v_start, v_end;
-       struct stat stats;
-
-       f = fopen("/proc/self/maps", "r");
-       if (!f) {
-               PMD_DRV_LOG(ERR, "cannot open /proc/self/maps");
+       struct walk_arg *wa = arg;
+       struct vhost_memory_region *mr;
+       uint64_t start_addr, end_addr;
+       size_t offset;
+       int i, fd;
+
+       fd = rte_memseg_get_fd_thread_unsafe(ms);
+       if (fd < 0) {
+               PMD_DRV_LOG(ERR, "Failed to get fd, ms=%p rte_errno=%d",
+                       ms, rte_errno);
                return -1;
        }
 
-       idx = 0;
-       while (fgets(buf, sizeof(buf), f) != NULL) {
-               if (sscanf(buf, "%" PRIx64 "-%" PRIx64, &v_start, &v_end) < 2) {
-                       PMD_DRV_LOG(ERR, "Failed to parse address");
-                       goto error;
-               }
+       if (rte_memseg_get_fd_offset_thread_unsafe(ms, &offset) < 0) {
+               PMD_DRV_LOG(ERR, "Failed to get offset, ms=%p rte_errno=%d",
+                       ms, rte_errno);
+               return -1;
+       }
 
-               tmp = strchr(buf, ' ') + 1; /** skip address */
-               tmp = strchr(tmp, ' ') + 1; /** skip perm */
-               tmp = strchr(tmp, ' ') + 1; /** skip offset */
-               tmp = strchr(tmp, ' ') + 1; /** skip dev */
-               tmp = strchr(tmp, ' ') + 1; /** skip inode */
-               while (*tmp == ' ')         /** skip spaces */
-                       tmp++;
-               tail = strrchr(tmp, '\n');  /** remove newline if exists */
-               if (tail)
-                       *tail = '\0';
-
-               /* Match HUGEFILE_FMT, aka "%s/%smap_%d",
-                * which is defined in eal_filesystem.h
-                */
-               str_underline = strrchr(tmp, '_');
-               if (!str_underline)
-                       continue;
+       start_addr = (uint64_t)(uintptr_t)ms->addr;
+       end_addr = start_addr + ms->len;
 
-               str_start = str_underline - strlen("map");
-               if (str_start < tmp)
+       for (i = 0; i < wa->region_nr; i++) {
+               if (wa->fds[i] != fd)
                        continue;
 
-               if (sscanf(str_start, "map_%d", &huge_index) != 1)
-                       continue;
+               mr = &wa->vm->regions[i];
 
-               /* skip duplicated file which is mapped to different regions */
-               for (k = 0, exist = -1; k < idx; ++k) {
-                       if (!strcmp(huges[k].path, tmp)) {
-                               exist = k;
-                               break;
-                       }
-               }
-               if (exist >= 0)
-                       continue;
+               if (mr->userspace_addr + mr->memory_size < end_addr)
+                       mr->memory_size = end_addr - mr->userspace_addr;
 
-               if (idx >= max) {
-                       PMD_DRV_LOG(ERR, "Exceed maximum of %d", max);
-                       goto error;
+               if (mr->userspace_addr > start_addr) {
+                       mr->userspace_addr = start_addr;
+                       mr->guest_phys_addr = start_addr;
                }
 
-               huges[idx].addr = v_start;
-               huges[idx].size = v_end - v_start; /* To be corrected later */
-               snprintf(huges[idx].path, PATH_MAX, "%s", tmp);
-               idx++;
+               if (mr->mmap_offset > offset)
+                       mr->mmap_offset = offset;
+
+               PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+                       " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+                       mr->mmap_offset, mr->userspace_addr,
+                       mr->memory_size);
+
+               return 0;
        }
 
-       /* correct the size for files who have many regions */
-       for (k = 0; k < idx; ++k) {
-               if (stat(huges[k].path, &stats) < 0) {
-                       PMD_DRV_LOG(ERR, "Failed to stat %s, %s\n",
-                                   huges[k].path, strerror(errno));
-                       continue;
-               }
-               huges[k].size = stats.st_size;
-               PMD_DRV_LOG(INFO, "file %s, size %zx\n",
-                           huges[k].path, huges[k].size);
+       if (i >= VHOST_MEMORY_MAX_NREGIONS) {
+               PMD_DRV_LOG(ERR, "Too many memory regions");
+               return -1;
        }
 
-       fclose(f);
-       return idx;
+       mr = &wa->vm->regions[i];
+       wa->fds[i] = fd;
 
-error:
-       fclose(f);
-       return -1;
+       mr->guest_phys_addr = start_addr;
+       mr->userspace_addr = start_addr;
+       mr->memory_size = ms->len;
+       mr->mmap_offset = offset;
+
+       PMD_DRV_LOG(DEBUG, "index=%d fd=%d offset=0x%" PRIx64
+               " addr=0x%" PRIx64 " len=%" PRIu64, i, fd,
+               mr->mmap_offset, mr->userspace_addr,
+               mr->memory_size);
+
+       wa->region_nr++;
+
+       return 0;
 }
 
 static int
 prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[])
 {
-       int i, num;
-       struct hugepage_file_info huges[VHOST_MEMORY_MAX_NREGIONS];
-       struct vhost_memory_region *mr;
+       struct walk_arg wa;
 
-       num = get_hugepage_file_info(huges, VHOST_MEMORY_MAX_NREGIONS);
-       if (num < 0) {
-               PMD_INIT_LOG(ERR, "Failed to prepare memory for vhost-user");
-               return -1;
-       }
+       wa.region_nr = 0;
+       wa.vm = &msg->payload.memory;
+       wa.fds = fds;
 
-       for (i = 0; i < num; ++i) {
-               mr = &msg->payload.memory.regions[i];
-               mr->guest_phys_addr = huges[i].addr; /* use vaddr! */
-               mr->userspace_addr = huges[i].addr;
-               mr->memory_size = huges[i].size;
-               mr->mmap_offset = 0;
-               fds[i] = open(huges[i].path, O_RDWR);
-       }
+       /*
+        * The memory lock has already been taken by memory subsystem
+        * or virtio_user_start_device().
+        */
+       if (rte_memseg_walk_thread_unsafe(update_memory_region, &wa) < 0)
+               return -1;
 
-       msg->payload.memory.nregions = num;
+       msg->payload.memory.nregions = wa.region_nr;
        msg->payload.memory.padding = 0;
 
        return 0;
@@ -280,7 +253,7 @@ vhost_user_sock(struct virtio_user_dev *dev,
        int need_reply = 0;
        int fds[VHOST_MEMORY_MAX_NREGIONS];
        int fd_num = 0;
-       int i, len;
+       int len;
        int vhostfd = dev->vhostfd;
 
        RTE_SET_USED(m);
@@ -364,10 +337,6 @@ vhost_user_sock(struct virtio_user_dev *dev,
                return -1;
        }
 
-       if (req == VHOST_USER_SET_MEM_TABLE)
-               for (i = 0; i < fd_num; ++i)
-                       close(fds[i]);
-
        if (need_reply) {
                if (vhost_user_read(vhostfd, &msg) < 0) {
                        PMD_DRV_LOG(ERR, "Received msg failed: %s",
@@ -497,7 +466,7 @@ vhost_user_enable_queue_pair(struct virtio_user_dev *dev,
        return 0;
 }
 
-struct virtio_user_backend_ops ops_user = {
+struct virtio_user_backend_ops virtio_ops_user = {
        .setup = vhost_user_setup,
        .send_request = vhost_user_sock,
        .enable_qp = vhost_user_enable_queue_pair
index 7df600b..b4997ee 100644 (file)
@@ -13,6 +13,8 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 
+#include <rte_eal_memconfig.h>
+
 #include "vhost.h"
 #include "virtio_user_dev.h"
 #include "../virtio_ethdev.h"
@@ -109,9 +111,24 @@ is_vhost_user_by_type(const char *path)
 int
 virtio_user_start_device(struct virtio_user_dev *dev)
 {
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        uint64_t features;
        int ret;
 
+       /*
+        * XXX workaround!
+        *
+        * We need to make sure that the locks will be
+        * taken in the correct order to avoid deadlocks.
+        *
+        * Before releasing this lock, this thread should
+        * not trigger any memory hotplug events.
+        *
+        * This is a temporary workaround, and should be
+        * replaced when we get proper supports from the
+        * memory subsystem in the future.
+        */
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
        pthread_mutex_lock(&dev->mutex);
 
        if (is_vhost_user_by_type(dev->path) && dev->vhostfd < 0)
@@ -152,10 +169,12 @@ virtio_user_start_device(struct virtio_user_dev *dev)
 
        dev->started = true;
        pthread_mutex_unlock(&dev->mutex);
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 
        return 0;
 error:
        pthread_mutex_unlock(&dev->mutex);
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
        /* TODO: free resource here or caller to check */
        return -1;
 }
@@ -282,8 +301,14 @@ virtio_user_mem_event_cb(enum rte_mem_event type __rte_unused,
                                                 void *arg)
 {
        struct virtio_user_dev *dev = arg;
+       struct rte_memseg_list *msl;
        uint16_t i;
 
+       /* ignore externally allocated memory */
+       msl = rte_mem_virt2memseg_list(addr);
+       if (msl->external)
+               return;
+
        pthread_mutex_lock(&dev->mutex);
 
        if (dev->started == false)
@@ -319,12 +344,12 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
                        PMD_DRV_LOG(ERR, "Server mode doesn't support vhost-kernel!");
                        return -1;
                }
-               dev->ops = &ops_user;
+               dev->ops = &virtio_ops_user;
        } else {
                if (is_vhost_user_by_type(dev->path)) {
-                       dev->ops = &ops_user;
+                       dev->ops = &virtio_ops_user;
                } else {
-                       dev->ops = &ops_kernel;
+                       dev->ops = &virtio_ops_kernel;
 
                        dev->vhostfds = malloc(dev->max_queue_pairs *
                                               sizeof(int));
@@ -530,13 +555,11 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs)
        /* Server mode can't enable queue pairs if vhostfd is invalid,
         * always return 0 in this case.
         */
-       if (dev->vhostfd >= 0) {
+       if (!dev->is_server || dev->vhostfd >= 0) {
                for (i = 0; i < q_pairs; ++i)
                        ret |= dev->ops->enable_qp(dev, i, 1);
                for (i = q_pairs; i < dev->max_queue_pairs; ++i)
                        ret |= dev->ops->enable_qp(dev, i, 0);
-       } else if (!dev->is_server) {
-               ret = ~0;
        }
        dev->queue_pairs = q_pairs;
 
index 525d16c..b51cbc8 100644 (file)
@@ -422,7 +422,6 @@ virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev)
        if (!dev) {
                PMD_INIT_LOG(ERR, "malloc virtio_user_dev failed");
                rte_eth_dev_release_port(eth_dev);
-               rte_free(hw);
                return NULL;
        }
 
@@ -449,7 +448,6 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev)
        struct virtio_hw *hw = data->dev_private;
 
        rte_free(hw->virtio_user_dev);
-       rte_free(hw);
        rte_eth_dev_release_port(eth_dev);
 }
 
@@ -637,7 +635,6 @@ end:
        return ret;
 }
 
-/** Called by rte_eth_dev_detach() */
 static int
 virtio_user_pmd_remove(struct rte_vdev_device *vdev)
 {
@@ -662,7 +659,6 @@ virtio_user_pmd_remove(struct rte_vdev_device *vdev)
        dev = hw->virtio_user_dev;
        virtio_user_dev_uninit(dev);
 
-       rte_free(eth_dev->data->dev_private);
        rte_eth_dev_release_port(eth_dev);
 
        return 0;
diff --git a/drivers/net/vmxnet3/meson.build b/drivers/net/vmxnet3/meson.build
new file mode 100644 (file)
index 0000000..a92bd28
--- /dev/null
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+allow_experimental_apis = true
+sources += files(
+       'vmxnet3_ethdev.c',
+       'vmxnet3_rxtx.c',
+)
+
+error_cflags = [
+               '-Wno-unused-parameter', '-Wno-unused-value',
+                '-Wno-strict-aliasing', '-Wno-format-extra-args',
+]
+foreach flag: error_cflags
+        if cc.has_argument(flag)
+                cflags += flag
+        endif
+endforeach
index 2613cd1..41bcd45 100644 (file)
@@ -57,8 +57,7 @@
         DEV_RX_OFFLOAD_UDP_CKSUM |     \
         DEV_RX_OFFLOAD_TCP_CKSUM |     \
         DEV_RX_OFFLOAD_TCP_LRO |       \
-        DEV_RX_OFFLOAD_JUMBO_FRAME |   \
-        DEV_RX_OFFLOAD_CRC_STRIP)
+        DEV_RX_OFFLOAD_JUMBO_FRAME)
 
 static int eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -78,6 +77,7 @@ static int vmxnet3_dev_link_update(struct rte_eth_dev *dev,
 static void vmxnet3_hw_stats_save(struct vmxnet3_hw *hw);
 static int vmxnet3_dev_stats_get(struct rte_eth_dev *dev,
                                  struct rte_eth_stats *stats);
+static void vmxnet3_dev_stats_reset(struct rte_eth_dev *dev);
 static int vmxnet3_dev_xstats_get_names(struct rte_eth_dev *dev,
                                        struct rte_eth_xstat_name *xstats,
                                        unsigned int n);
@@ -120,6 +120,7 @@ static const struct eth_dev_ops vmxnet3_eth_dev_ops = {
        .stats_get            = vmxnet3_dev_stats_get,
        .xstats_get_names     = vmxnet3_dev_xstats_get_names,
        .xstats_get           = vmxnet3_dev_xstats_get,
+       .stats_reset          = vmxnet3_dev_stats_reset,
        .mac_addr_set         = vmxnet3_mac_addr_set,
        .dev_infos_get        = vmxnet3_dev_info_get,
        .dev_supported_ptypes_get = vmxnet3_dev_supported_ptypes_get,
@@ -160,8 +161,8 @@ gpa_zone_reserve(struct rte_eth_dev *dev, uint32_t size,
        char z_name[RTE_MEMZONE_NAMESIZE];
        const struct rte_memzone *mz;
 
-       snprintf(z_name, sizeof(z_name), "%s_%d_%s",
-                dev->device->driver->name, dev->data->port_id, post_string);
+       snprintf(z_name, sizeof(z_name), "eth_p%d_%s",
+                       dev->data->port_id, post_string);
 
        mz = rte_memzone_lookup(z_name);
        if (!reuse) {
@@ -335,6 +336,10 @@ eth_vmxnet3_dev_init(struct rte_eth_dev *eth_dev)
        memset(hw->saved_tx_stats, 0, sizeof(hw->saved_tx_stats));
        memset(hw->saved_rx_stats, 0, sizeof(hw->saved_rx_stats));
 
+       /* clear snapshot stats */
+       memset(hw->snapshot_tx_stats, 0, sizeof(hw->snapshot_tx_stats));
+       memset(hw->snapshot_rx_stats, 0, sizeof(hw->snapshot_rx_stats));
+
        /* set the initial link status */
        memset(&link, 0, sizeof(link));
        link.link_duplex = ETH_LINK_FULL_DUPLEX;
@@ -363,9 +368,6 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
        eth_dev->tx_pkt_burst = NULL;
        eth_dev->tx_pkt_prepare = NULL;
 
-       rte_free(eth_dev->data->mac_addrs);
-       eth_dev->data->mac_addrs = NULL;
-
        return 0;
 }
 
@@ -890,7 +892,49 @@ vmxnet3_hw_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
        VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxError, res);
        VMXNET3_UPDATE_RX_STAT(hw, q, pktsRxOutOfBuf, res);
 
-#undef VMXNET3_UPDATE_RX_STATS
+#undef VMXNET3_UPDATE_RX_STAT
+}
+
+static void
+vmxnet3_tx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
+                                       struct UPT1_TxStats *res)
+{
+               vmxnet3_hw_tx_stats_get(hw, q, res);
+
+#define VMXNET3_REDUCE_SNAPSHOT_TX_STAT(h, i, f, r)    \
+               ((r)->f -= (h)->snapshot_tx_stats[(i)].f)
+
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastPktsTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastPktsTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastPktsTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, ucastBytesTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, mcastBytesTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, bcastBytesTxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxError, res);
+       VMXNET3_REDUCE_SNAPSHOT_TX_STAT(hw, q, pktsTxDiscard, res);
+
+#undef VMXNET3_REDUCE_SNAPSHOT_TX_STAT
+}
+
+static void
+vmxnet3_rx_stats_get(struct vmxnet3_hw *hw, unsigned int q,
+                                       struct UPT1_RxStats *res)
+{
+               vmxnet3_hw_rx_stats_get(hw, q, res);
+
+#define VMXNET3_REDUCE_SNAPSHOT_RX_STAT(h, i, f, r)    \
+               ((r)->f -= (h)->snapshot_rx_stats[(i)].f)
+
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastPktsRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastPktsRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastPktsRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, ucastBytesRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, mcastBytesRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, bcastBytesRxOK, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxError, res);
+       VMXNET3_REDUCE_SNAPSHOT_RX_STAT(hw, q, pktsRxOutOfBuf, res);
+
+#undef VMXNET3_REDUCE_SNAPSHOT_RX_STAT
 }
 
 static void
@@ -1005,7 +1049,7 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES);
        for (i = 0; i < hw->num_tx_queues; i++) {
-               vmxnet3_hw_tx_stats_get(hw, i, &txStats);
+               vmxnet3_tx_stats_get(hw, i, &txStats);
 
                stats->q_opackets[i] = txStats.ucastPktsTxOK +
                        txStats.mcastPktsTxOK +
@@ -1022,7 +1066,7 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
 
        RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_RX_QUEUES);
        for (i = 0; i < hw->num_rx_queues; i++) {
-               vmxnet3_hw_rx_stats_get(hw, i, &rxStats);
+               vmxnet3_rx_stats_get(hw, i, &rxStats);
 
                stats->q_ipackets[i] = rxStats.ucastPktsRxOK +
                        rxStats.mcastPktsRxOK +
@@ -1043,6 +1087,30 @@ vmxnet3_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
        return 0;
 }
 
+static void
+vmxnet3_dev_stats_reset(struct rte_eth_dev *dev)
+{
+       unsigned int i;
+       struct vmxnet3_hw *hw = dev->data->dev_private;
+       struct UPT1_TxStats txStats;
+       struct UPT1_RxStats rxStats;
+
+       VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_GET_STATS);
+
+       RTE_BUILD_BUG_ON(RTE_ETHDEV_QUEUE_STAT_CNTRS < VMXNET3_MAX_TX_QUEUES);
+
+       for (i = 0; i < hw->num_tx_queues; i++) {
+               vmxnet3_hw_tx_stats_get(hw, i, &txStats);
+               memcpy(&hw->snapshot_tx_stats[i], &txStats,
+                       sizeof(hw->snapshot_tx_stats[0]));
+       }
+       for (i = 0; i < hw->num_rx_queues; i++) {
+               vmxnet3_hw_rx_stats_get(hw, i, &rxStats);
+               memcpy(&hw->snapshot_rx_stats[i], &rxStats,
+                       sizeof(hw->snapshot_rx_stats[0]));
+       }
+}
+
 static void
 vmxnet3_dev_info_get(struct rte_eth_dev *dev __rte_unused,
                     struct rte_eth_dev_info *dev_info)
index d3f2b35..5bc3a84 100644 (file)
@@ -98,6 +98,9 @@ struct vmxnet3_hw {
 #define VMXNET3_VFT_TABLE_SIZE     (VMXNET3_VFT_SIZE * sizeof(uint32_t))
        UPT1_TxStats          saved_tx_stats[VMXNET3_MAX_TX_QUEUES];
        UPT1_RxStats          saved_rx_stats[VMXNET3_MAX_RX_QUEUES];
+
+       UPT1_TxStats          snapshot_tx_stats[VMXNET3_MAX_TX_QUEUES];
+       UPT1_RxStats          snapshot_rx_stats[VMXNET3_MAX_RX_QUEUES];
 };
 
 #define VMXNET3_REV_3          2               /* Vmxnet3 Rev. 3 */
index 9b863dd..9bd5ff2 100644 (file)
@@ -21,10 +21,11 @@ LDLIBS += -lrte_eal
 LDLIBS += -lrte_kvargs
 LDLIBS += -lrte_mempool_dpaa2
 LDLIBS += -lrte_rawdev
+LDLIBS += -lrte_common_dpaax
 
 EXPORT_MAP := rte_pmd_dpaa2_cmdif_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # all source are stored in SRCS-y
index 1d14687..37bb24a 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 build = dpdk_conf.has('RTE_LIBRTE_DPAA2_MEMPOOL')
 deps += ['rawdev', 'mempool_dpaa2', 'bus_vdev']
 sources = files('dpaa2_cmdif.c')
index d88809e..bdd99c9 100644 (file)
@@ -22,10 +22,11 @@ LDLIBS += -lrte_mempool
 LDLIBS += -lrte_mempool_dpaa2
 LDLIBS += -lrte_rawdev
 LDLIBS += -lrte_ring
+LDLIBS += -lrte_common_dpaax
 
 EXPORT_MAP := rte_pmd_dpaa2_qdma_version.map
 
-LIBABIVER := 1
+LIBABIVER := 2
 
 #
 # all source are stored in SRCS-y
index 2787d30..f474442 100644 (file)
@@ -34,10 +34,10 @@ static struct qdma_hw_queue_list qdma_queue_list
        = TAILQ_HEAD_INITIALIZER(qdma_queue_list);
 
 /* QDMA Virtual Queues */
-struct qdma_virt_queue *qdma_vqs;
+static struct qdma_virt_queue *qdma_vqs;
 
 /* QDMA per core data */
-struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
+static struct qdma_per_core_info qdma_core_info[RTE_MAX_LCORE];
 
 static struct qdma_hw_queue *
 alloc_hw_queue(uint32_t lcore_id)
@@ -805,7 +805,7 @@ dpaa2_dpdmai_dev_uninit(struct rte_rawdev *rawdev)
                DPAA2_QDMA_ERR("dmdmai disable failed");
 
        /* Set up the DQRR storage for Rx */
-       for (i = 0; i < DPDMAI_PRIO_NUM; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                struct dpaa2_queue *rxq = &(dpdmai_dev->rx_queue[i]);
 
                if (rxq->q_storage) {
@@ -856,17 +856,17 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
                               ret);
                goto init_err;
        }
-       dpdmai_dev->num_queues = attr.num_of_priorities;
+       dpdmai_dev->num_queues = attr.num_of_queues;
 
        /* Set up Rx Queues */
-       for (i = 0; i < attr.num_of_priorities; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                struct dpaa2_queue *rxq;
 
                memset(&rx_queue_cfg, 0, sizeof(struct dpdmai_rx_queue_cfg));
                ret = dpdmai_set_rx_queue(&dpdmai_dev->dpdmai,
                                          CMD_PRI_LOW,
                                          dpdmai_dev->token,
-                                         i, &rx_queue_cfg);
+                                         i, 0, &rx_queue_cfg);
                if (ret) {
                        DPAA2_QDMA_ERR("Setting Rx queue failed with err: %d",
                                       ret);
@@ -893,9 +893,9 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
        }
 
        /* Get Rx and Tx queues FQID's */
-       for (i = 0; i < DPDMAI_PRIO_NUM; i++) {
+       for (i = 0; i < dpdmai_dev->num_queues; i++) {
                ret = dpdmai_get_rx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-                                         dpdmai_dev->token, i, &rx_attr);
+                                         dpdmai_dev->token, i, 0, &rx_attr);
                if (ret) {
                        DPAA2_QDMA_ERR("Reading device failed with err: %d",
                                       ret);
@@ -904,7 +904,7 @@ dpaa2_dpdmai_dev_init(struct rte_rawdev *rawdev, int dpdmai_id)
                dpdmai_dev->rx_queue[i].fqid = rx_attr.fqid;
 
                ret = dpdmai_get_tx_queue(&dpdmai_dev->dpdmai, CMD_PRI_LOW,
-                                         dpdmai_dev->token, i, &tx_attr);
+                                         dpdmai_dev->token, i, 0, &tx_attr);
                if (ret) {
                        DPAA2_QDMA_ERR("Reading device failed with err: %d",
                                       ret);
index c6a0578..0cbe902 100644 (file)
@@ -11,6 +11,8 @@ struct qdma_io_meta;
 #define DPAA2_QDMA_MAX_FLE 3
 #define DPAA2_QDMA_MAX_SDD 2
 
+#define DPAA2_DPDMAI_MAX_QUEUES        8
+
 /** FLE pool size: 3 Frame list + 2 source/destination descriptor */
 #define QDMA_FLE_POOL_SIZE (sizeof(struct qdma_io_meta) + \
                sizeof(struct qbman_fle) * DPAA2_QDMA_MAX_FLE + \
@@ -142,9 +144,9 @@ struct dpaa2_dpdmai_dev {
        /** Number of queue in this DPDMAI device */
        uint8_t num_queues;
        /** RX queues */
-       struct dpaa2_queue rx_queue[DPDMAI_PRIO_NUM];
+       struct dpaa2_queue rx_queue[DPAA2_DPDMAI_MAX_QUEUES];
        /** TX queues */
-       struct dpaa2_queue tx_queue[DPDMAI_PRIO_NUM];
+       struct dpaa2_queue tx_queue[DPAA2_DPDMAI_MAX_QUEUES];
 };
 
 #endif /* __DPAA2_QDMA_H__ */
index b6a081f..2a4b69c 100644 (file)
@@ -1,6 +1,8 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright 2018 NXP
 
+version = 2
+
 build = dpdk_conf.has('RTE_LIBRTE_DPAA2_MEMPOOL')
 deps += ['rawdev', 'mempool_dpaa2', 'ring']
 sources = files('dpaa2_qdma.c')
index f0939dc..848e518 100644 (file)
@@ -104,14 +104,14 @@ static struct feature_info port_features[] = {
                .resource_size = sizeof(struct feature_port_header),
                .feature_index = PORT_FEATURE_ID_HEADER,
                .revision_id = PORT_HEADER_REVISION,
-               .ops = &port_hdr_ops,
+               .ops = &ifpga_rawdev_port_hdr_ops,
        },
        {
                .name = PORT_FEATURE_ERR,
                .resource_size = sizeof(struct feature_port_error),
                .feature_index = PORT_FEATURE_ID_ERROR,
                .revision_id = PORT_ERR_REVISION,
-               .ops = &port_error_ops,
+               .ops = &ifpga_rawdev_port_error_ops,
        },
        {
                .name = PORT_FEATURE_UMSG,
@@ -124,14 +124,14 @@ static struct feature_info port_features[] = {
                .resource_size = sizeof(struct feature_port_uint),
                .feature_index = PORT_FEATURE_ID_UINT,
                .revision_id = PORT_UINT_REVISION,
-               .ops = &port_uint_ops,
+               .ops = &ifpga_rawdev_port_uint_ops,
        },
        {
                .name = PORT_FEATURE_STP,
                .resource_size = PORT_FEATURE_STP_REGION_SIZE,
                .feature_index = PORT_FEATURE_ID_STP,
                .revision_id = PORT_STP_REVISION,
-               .ops = &port_stp_ops,
+               .ops = &ifpga_rawdev_port_stp_ops,
        },
        {
                .name = PORT_FEATURE_UAFU,
index 7a39a58..4391f2f 100644 (file)
@@ -156,10 +156,10 @@ struct fpga_uafu_irq_set {
 
 int port_set_irq(struct ifpga_port_hw *port, u32 feature_id, void *irq_set);
 
-extern struct feature_ops port_hdr_ops;
-extern struct feature_ops port_error_ops;
-extern struct feature_ops port_stp_ops;
-extern struct feature_ops port_uint_ops;
+extern struct feature_ops ifpga_rawdev_port_hdr_ops;
+extern struct feature_ops ifpga_rawdev_port_error_ops;
+extern struct feature_ops ifpga_rawdev_port_stp_ops;
+extern struct feature_ops ifpga_rawdev_port_uint_ops;
 
 /* help functions for feature ops */
 int fpga_msix_set_block(struct feature *feature, unsigned int start,
index a962f5b..8b5668d 100644 (file)
@@ -326,7 +326,7 @@ static int port_hdr_set_prop(struct feature *feature, struct feature_prop *prop)
        return -ENOENT;
 }
 
-struct feature_ops port_hdr_ops = {
+struct feature_ops ifpga_rawdev_port_hdr_ops = {
        .init = port_hdr_init,
        .uinit = port_hdr_uinit,
        .get_prop = port_hdr_get_prop,
@@ -354,7 +354,7 @@ static void port_stp_uinit(struct feature *feature)
        dev_info(NULL, "port stp uinit.\n");
 }
 
-struct feature_ops port_stp_ops = {
+struct feature_ops ifpga_rawdev_port_stp_ops = {
        .init = port_stp_init,
        .uinit = port_stp_uinit,
 };
@@ -382,7 +382,7 @@ static void port_uint_uinit(struct feature *feature)
        dev_info(NULL, "PORT UINT UInit.\n");
 }
 
-struct feature_ops port_uint_ops = {
+struct feature_ops ifpga_rawdev_port_uint_ops = {
        .init = port_uint_init,
        .uinit = port_uint_uinit,
 };
index 23db562..9dd1cf5 100644 (file)
@@ -136,7 +136,7 @@ static int port_error_set_prop(struct feature *feature,
        return -ENOENT;
 }
 
-struct feature_ops port_error_ops = {
+struct feature_ops ifpga_rawdev_port_error_ops = {
        .init = port_error_init,
        .uinit = port_error_uinit,
        .get_prop = port_error_get_prop,
index cb65535..03f5112 100644 (file)
@@ -18,8 +18,8 @@ sources = [
 ]
 
 error_cflags = ['-Wno-sign-compare', '-Wno-unused-value',
-               '-Wno-format', '-Wno-unused-but-set-variable',
-               '-Wno-strict-aliasing'
+               '-Wno-format', '-Wno-error=format-security',
+               '-Wno-strict-aliasing', '-Wno-unused-but-set-variable'
 ]
 c_args = cflags
 foreach flag: error_cflags
index 3fed057..32e318f 100644 (file)
@@ -542,6 +542,7 @@ ifpga_cfg_probe(struct rte_vdev_device *dev)
        int port;
        char *name = NULL;
        char dev_name[RTE_RAWDEV_NAME_MAX_LEN];
+       int ret = -1;
 
        devargs = dev->device.devargs;
 
@@ -583,7 +584,7 @@ ifpga_cfg_probe(struct rte_vdev_device *dev)
        snprintf(dev_name, RTE_RAWDEV_NAME_MAX_LEN, "%d|%s",
        port, name);
 
-       rte_eal_hotplug_add(RTE_STR(IFPGA_BUS_NAME),
+       ret = rte_eal_hotplug_add(RTE_STR(IFPGA_BUS_NAME),
                        dev_name, devargs->args);
 end:
        if (kvlist)
@@ -591,7 +592,7 @@ end:
        if (name)
                free(name);
 
-       return 0;
+       return ret;
 }
 
 static int
index 6518a2d..d7630fc 100644 (file)
@@ -32,7 +32,7 @@
 int skeleton_pmd_logtype;
 
 /* Count of instances */
-uint16_t skeldev_init_once;
+static uint16_t skeldev_init_once;
 
 /**< Rawdev Skeleton dummy driver name */
 #define SKELETON_PMD_RAWDEV_NAME rawdev_skeleton
index 3405b89..359c9e2 100644 (file)
@@ -294,13 +294,14 @@ test_rawdev_attr_set_get(void)
                              "Attribute (Test1) not set correctly (%" PRIu64 ")",
                              ret_value);
 
+       free(dummy_value);
+
        ret_value = 0;
        ret = rte_rawdev_get_attr(TEST_DEV_ID, "Test2", &ret_value);
        RTE_TEST_ASSERT_EQUAL(*((int *)(uintptr_t)ret_value), 200,
                              "Attribute (Test2) not set correctly (%" PRIu64 ")",
                              ret_value);
 
-       free(dummy_value);
        return TEST_SUCCESS;
 }
 
index 481720c..356fcb1 100644 (file)
@@ -65,7 +65,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_HASH),y)
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += tep_termination
 endif
 DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += timer
-DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost vhost_scsi
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost vhost_scsi vdpa
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
 DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto
 endif
index 045a190..d68c06a 100644 (file)
@@ -64,7 +64,6 @@ static const struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 23d0981..b282e68 100644 (file)
@@ -122,7 +122,6 @@ static struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .rx_adv_conf = {
                .rss_conf = {
index 7893c85..a617cce 100644 (file)
@@ -56,7 +56,6 @@ SRCS-y := main.c commands.c parse_obj_list.c
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS_parse_obj_list.o := -D_GNU_SOURCE
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 
index 4cd9efd..9ecfc0b 100644 (file)
@@ -16,7 +16,7 @@ APP = ethtool
 # all source are stored in SRCS-y
 SRCS-y := main.c ethapp.c
 
-CFLAGS += -O3 -D_GNU_SOURCE -pthread -I$(SRCDIR)/../lib
+CFLAGS += -O3 -pthread -I$(SRCDIR)/../lib
 CFLAGS += $(WERROR_FLAGS)
 
 LDLIBS += -L$(subst ethtool-app,lib,$(RTE_OUTPUT))/lib
index 700bc69..92e08bc 100644 (file)
@@ -26,20 +26,6 @@ core_in_use(unsigned int lcore_id) {
                fdata->tx_core[lcore_id] || fdata->worker_core[lcore_id]);
 }
 
-static void
-eth_tx_buffer_retry(struct rte_mbuf **pkts, uint16_t unsent,
-                       void *userdata)
-{
-       int port_id = (uintptr_t) userdata;
-       unsigned int _sent = 0;
-
-       do {
-               /* Note: hard-coded TX queue */
-               _sent += rte_eth_tx_burst(port_id, 0, &pkts[_sent],
-                                         unsent - _sent);
-       } while (_sent != unsent);
-}
-
 /*
  * Parse the coremask given as argument (hexadecimal string) and fill
  * the global configuration (core role and core count) with the parsed
@@ -263,6 +249,7 @@ parse_app_args(int argc, char **argv)
 static inline int
 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
 {
+       struct rte_eth_rxconf rx_conf;
        static const struct rte_eth_conf port_conf_default = {
                .rxmode = {
                        .mq_mode = ETH_MQ_RX_RSS,
@@ -291,6 +278,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool)
        if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
                port_conf.txmode.offloads |=
                        DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+       rx_conf = dev_info.default_rxconf;
+       rx_conf.offloads = port_conf.rxmode.offloads;
 
        port_conf.rx_adv_conf.rss_conf.rss_hf &=
                dev_info.flow_type_rss_offloads;
@@ -311,7 +300,8 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool)
        /* Allocate and set up 1 RX queue per Ethernet port. */
        for (q = 0; q < rx_rings; q++) {
                retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
-                               rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+                               rte_eth_dev_socket_id(port), &rx_conf,
+                               mbuf_pool);
                if (retval < 0)
                        return retval;
        }
@@ -350,7 +340,7 @@ port_init(uint8_t port, struct rte_mempool *mbuf_pool)
 static int
 init_ports(uint16_t num_ports)
 {
-       uint16_t portid, i;
+       uint16_t portid;
 
        if (!cdata.num_mbuf)
                cdata.num_mbuf = 16384 * num_ports;
@@ -367,36 +357,26 @@ init_ports(uint16_t num_ports)
                        rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n",
                                        portid);
 
-       RTE_ETH_FOREACH_DEV(i) {
-               void *userdata = (void *)(uintptr_t) i;
-               fdata->tx_buf[i] =
-                       rte_malloc(NULL, RTE_ETH_TX_BUFFER_SIZE(32), 0);
-               if (fdata->tx_buf[i] == NULL)
-                       rte_panic("Out of memory\n");
-               rte_eth_tx_buffer_init(fdata->tx_buf[i], 32);
-               rte_eth_tx_buffer_set_err_callback(fdata->tx_buf[i],
-                                                  eth_tx_buffer_retry,
-                                                  userdata);
-       }
-
        return 0;
 }
 
 static void
 do_capability_setup(uint8_t eventdev_id)
 {
+       int ret;
        uint16_t i;
-       uint8_t mt_unsafe = 0;
+       uint8_t generic_pipeline = 0;
        uint8_t burst = 0;
 
        RTE_ETH_FOREACH_DEV(i) {
-               struct rte_eth_dev_info dev_info;
-               memset(&dev_info, 0, sizeof(struct rte_eth_dev_info));
-
-               rte_eth_dev_info_get(i, &dev_info);
-               /* Check if it is safe ask worker to tx. */
-               mt_unsafe |= !(dev_info.tx_offload_capa &
-                               DEV_TX_OFFLOAD_MT_LOCKFREE);
+               uint32_t caps = 0;
+
+               ret = rte_event_eth_tx_adapter_caps_get(eventdev_id, i, &caps);
+               if (ret)
+                       rte_exit(EXIT_FAILURE,
+                               "Invalid capability for Tx adptr port %d\n", i);
+               generic_pipeline |= !(caps &
+                               RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT);
        }
 
        struct rte_event_dev_info eventdev_info;
@@ -406,21 +386,42 @@ do_capability_setup(uint8_t eventdev_id)
        burst = eventdev_info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE ? 1 :
                0;
 
-       if (mt_unsafe)
+       if (generic_pipeline)
                set_worker_generic_setup_data(&fdata->cap, burst);
        else
-               set_worker_tx_setup_data(&fdata->cap, burst);
+               set_worker_tx_enq_setup_data(&fdata->cap, burst);
 }
 
 static void
 signal_handler(int signum)
 {
+       static uint8_t once;
+       uint16_t portid;
+
        if (fdata->done)
                rte_exit(1, "Exiting on signal %d\n", signum);
-       if (signum == SIGINT || signum == SIGTERM) {
+       if ((signum == SIGINT || signum == SIGTERM) && !once) {
                printf("\n\nSignal %d received, preparing to exit...\n",
                                signum);
+               if (cdata.dump_dev)
+                       rte_event_dev_dump(0, stdout);
+               once = 1;
                fdata->done = 1;
+               rte_smp_wmb();
+
+               RTE_ETH_FOREACH_DEV(portid) {
+                       rte_event_eth_rx_adapter_stop(portid);
+                       rte_event_eth_tx_adapter_stop(portid);
+                       rte_eth_dev_stop(portid);
+               }
+
+               rte_eal_mp_wait_lcore();
+
+               RTE_ETH_FOREACH_DEV(portid) {
+                       rte_eth_dev_close(portid);
+               }
+
+               rte_event_dev_close(0);
        }
        if (signum == SIGTSTP)
                rte_event_dev_dump(0, stdout);
@@ -499,7 +500,7 @@ main(int argc, char **argv)
        if (worker_data == NULL)
                rte_panic("rte_calloc failed\n");
 
-       int dev_id = fdata->cap.evdev_setup(&cons_data, worker_data);
+       int dev_id = fdata->cap.evdev_setup(worker_data);
        if (dev_id < 0)
                rte_exit(EXIT_FAILURE, "Error setting up eventdev\n");
 
@@ -524,8 +525,8 @@ main(int argc, char **argv)
 
                if (fdata->tx_core[lcore_id])
                        printf(
-                               "[%s()] lcore %d executing NIC Tx, and using eventdev port %u\n",
-                               __func__, lcore_id, cons_data.port_id);
+                               "[%s()] lcore %d executing NIC Tx\n",
+                               __func__, lcore_id);
 
                if (fdata->sched_core[lcore_id])
                        printf("[%s()] lcore %d executing scheduler\n",
@@ -555,9 +556,6 @@ main(int argc, char **argv)
 
        rte_eal_mp_wait_lcore();
 
-       if (cdata.dump_dev)
-               rte_event_dev_dump(dev_id, stdout);
-
        if (!cdata.quiet && (port_stat(dev_id, worker_data[0].port_id) !=
                        (uint64_t)-ENOTSUP)) {
                printf("\nPort Workload distribution:\n");
index 9703396..a6cc912 100644 (file)
@@ -16,6 +16,7 @@
 #include <rte_ethdev.h>
 #include <rte_eventdev.h>
 #include <rte_event_eth_rx_adapter.h>
+#include <rte_event_eth_tx_adapter.h>
 #include <rte_service.h>
 #include <rte_service_component.h>
 
 #define BATCH_SIZE 16
 #define MAX_NUM_CORE 64
 
-struct cons_data {
-       uint8_t dev_id;
-       uint8_t port_id;
-       uint8_t release;
-} __rte_cache_aligned;
-
 struct worker_data {
        uint8_t dev_id;
        uint8_t port_id;
 } __rte_cache_aligned;
 
 typedef int (*worker_loop)(void *);
-typedef int (*consumer_loop)(void);
 typedef void (*schedule_loop)(unsigned int);
-typedef int (*eventdev_setup)(struct cons_data *, struct worker_data *);
-typedef void (*rx_adapter_setup)(uint16_t nb_ports);
+typedef int (*eventdev_setup)(struct worker_data *);
+typedef void (*adapter_setup)(uint16_t nb_ports);
 typedef void (*opt_check)(void);
 
 struct setup_data {
        worker_loop worker;
-       consumer_loop consumer;
        schedule_loop scheduler;
        eventdev_setup evdev_setup;
-       rx_adapter_setup adptr_setup;
+       adapter_setup adptr_setup;
        opt_check check_opt;
 };
 
 struct fastpath_data {
        volatile int done;
-       uint32_t tx_lock;
        uint32_t evdev_service_id;
        uint32_t rxadptr_service_id;
+       uint32_t txadptr_service_id;
        bool rx_single;
        bool tx_single;
        bool sched_single;
@@ -62,7 +55,6 @@ struct fastpath_data {
        unsigned int tx_core[MAX_NUM_CORE];
        unsigned int sched_core[MAX_NUM_CORE];
        unsigned int worker_core[MAX_NUM_CORE];
-       struct rte_eth_dev_tx_buffer *tx_buf[RTE_MAX_ETHPORTS];
        struct setup_data cap;
 } __rte_cache_aligned;
 
@@ -88,6 +80,8 @@ struct config_data {
        int16_t next_qid[MAX_NUM_STAGES+2];
        int16_t qid[MAX_NUM_STAGES];
        uint8_t rx_adapter_id;
+       uint8_t tx_adapter_id;
+       uint8_t tx_queue_id;
        uint64_t worker_lcore_mask;
        uint64_t rx_lcore_mask;
        uint64_t tx_lcore_mask;
@@ -99,8 +93,6 @@ struct port_link {
        uint8_t priority;
 };
 
-struct cons_data cons_data;
-
 struct fastpath_data *fdata;
 struct config_data cdata;
 
@@ -142,12 +134,11 @@ schedule_devices(unsigned int lcore_id)
                }
        }
 
-       if (fdata->tx_core[lcore_id] && (fdata->tx_single ||
-                        rte_atomic32_cmpset(&(fdata->tx_lock), 0, 1))) {
-               fdata->cap.consumer();
-               rte_atomic32_clear((rte_atomic32_t *)&(fdata->tx_lock));
+       if (fdata->tx_core[lcore_id]) {
+               rte_service_run_iter_on_app_lcore(fdata->txadptr_service_id,
+                               !fdata->tx_single);
        }
 }
 
 void set_worker_generic_setup_data(struct setup_data *caps, bool burst);
-void set_worker_tx_setup_data(struct setup_data *caps, bool burst);
+void set_worker_tx_enq_setup_data(struct setup_data *caps, bool burst);
index 2215e9e..1690649 100644 (file)
@@ -119,153 +119,13 @@ worker_generic_burst(void *arg)
        return 0;
 }
 
-static __rte_always_inline int
-consumer(void)
-{
-       const uint64_t freq_khz = rte_get_timer_hz() / 1000;
-       struct rte_event packet;
-
-       static uint64_t received;
-       static uint64_t last_pkts;
-       static uint64_t last_time;
-       static uint64_t start_time;
-       int i;
-       uint8_t dev_id = cons_data.dev_id;
-       uint8_t port_id = cons_data.port_id;
-
-       do {
-               uint16_t n = rte_event_dequeue_burst(dev_id, port_id,
-                               &packet, 1, 0);
-
-               if (n == 0) {
-                       RTE_ETH_FOREACH_DEV(i)
-                               rte_eth_tx_buffer_flush(i, 0, fdata->tx_buf[i]);
-                       return 0;
-               }
-               if (start_time == 0)
-                       last_time = start_time = rte_get_timer_cycles();
-
-               received++;
-               uint8_t outport = packet.mbuf->port;
-
-               exchange_mac(packet.mbuf);
-               rte_eth_tx_buffer(outport, 0, fdata->tx_buf[outport],
-                               packet.mbuf);
-
-               if (cons_data.release)
-                       rte_event_enqueue_burst(dev_id, port_id,
-                                                               &packet, n);
-
-               /* Print out mpps every 1<22 packets */
-               if (!cdata.quiet && received >= last_pkts + (1<<22)) {
-                       const uint64_t now = rte_get_timer_cycles();
-                       const uint64_t total_ms = (now - start_time) / freq_khz;
-                       const uint64_t delta_ms = (now - last_time) / freq_khz;
-                       uint64_t delta_pkts = received - last_pkts;
-
-                       printf("# %s RX=%"PRIu64", time %"PRIu64 "ms, "
-                                       "avg %.3f mpps [current %.3f mpps]\n",
-                                       __func__,
-                                       received,
-                                       total_ms,
-                                       received / (total_ms * 1000.0),
-                                       delta_pkts / (delta_ms * 1000.0));
-                       last_pkts = received;
-                       last_time = now;
-               }
-
-               cdata.num_packets--;
-               if (cdata.num_packets <= 0)
-                       fdata->done = 1;
-       /* Be stuck in this loop if single. */
-       } while (!fdata->done && fdata->tx_single);
-
-       return 0;
-}
-
-static __rte_always_inline int
-consumer_burst(void)
-{
-       const uint64_t freq_khz = rte_get_timer_hz() / 1000;
-       struct rte_event packets[BATCH_SIZE];
-
-       static uint64_t received;
-       static uint64_t last_pkts;
-       static uint64_t last_time;
-       static uint64_t start_time;
-       unsigned int i, j;
-       uint8_t dev_id = cons_data.dev_id;
-       uint8_t port_id = cons_data.port_id;
-
-       do {
-               uint16_t n = rte_event_dequeue_burst(dev_id, port_id,
-                               packets, RTE_DIM(packets), 0);
-
-               if (n == 0) {
-                       RTE_ETH_FOREACH_DEV(j)
-                               rte_eth_tx_buffer_flush(j, 0, fdata->tx_buf[j]);
-                       return 0;
-               }
-               if (start_time == 0)
-                       last_time = start_time = rte_get_timer_cycles();
-
-               received += n;
-               for (i = 0; i < n; i++) {
-                       uint8_t outport = packets[i].mbuf->port;
-
-                       exchange_mac(packets[i].mbuf);
-                       rte_eth_tx_buffer(outport, 0, fdata->tx_buf[outport],
-                                       packets[i].mbuf);
-
-                       packets[i].op = RTE_EVENT_OP_RELEASE;
-               }
-
-               if (cons_data.release) {
-                       uint16_t nb_tx;
-
-                       nb_tx = rte_event_enqueue_burst(dev_id, port_id,
-                                                               packets, n);
-                       while (nb_tx < n)
-                               nb_tx += rte_event_enqueue_burst(dev_id,
-                                               port_id, packets + nb_tx,
-                                               n - nb_tx);
-               }
-
-               /* Print out mpps every 1<22 packets */
-               if (!cdata.quiet && received >= last_pkts + (1<<22)) {
-                       const uint64_t now = rte_get_timer_cycles();
-                       const uint64_t total_ms = (now - start_time) / freq_khz;
-                       const uint64_t delta_ms = (now - last_time) / freq_khz;
-                       uint64_t delta_pkts = received - last_pkts;
-
-                       printf("# consumer RX=%"PRIu64", time %"PRIu64 "ms, "
-                                       "avg %.3f mpps [current %.3f mpps]\n",
-                                       received,
-                                       total_ms,
-                                       received / (total_ms * 1000.0),
-                                       delta_pkts / (delta_ms * 1000.0));
-                       last_pkts = received;
-                       last_time = now;
-               }
-
-               cdata.num_packets -= n;
-               if (cdata.num_packets <= 0)
-                       fdata->done = 1;
-       /* Be stuck in this loop if single. */
-       } while (!fdata->done && fdata->tx_single);
-
-       return 0;
-}
-
 static int
-setup_eventdev_generic(struct cons_data *cons_data,
-               struct worker_data *worker_data)
+setup_eventdev_generic(struct worker_data *worker_data)
 {
        const uint8_t dev_id = 0;
        /* +1 stages is for a SINGLE_LINK TX stage */
        const uint8_t nb_queues = cdata.num_stages + 1;
-       /* + 1 is one port for consumer */
-       const uint8_t nb_ports = cdata.num_workers + 1;
+       const uint8_t nb_ports = cdata.num_workers;
        struct rte_event_dev_config config = {
                        .nb_event_queues = nb_queues,
                        .nb_event_ports = nb_ports,
@@ -285,11 +145,6 @@ setup_eventdev_generic(struct cons_data *cons_data,
                        .nb_atomic_flows = 1024,
                .nb_atomic_order_sequences = 1024,
        };
-       struct rte_event_port_conf tx_p_conf = {
-                       .dequeue_depth = 128,
-                       .enqueue_depth = 128,
-                       .new_event_threshold = 4096,
-       };
        struct rte_event_queue_conf tx_q_conf = {
                        .priority = RTE_EVENT_DEV_PRIORITY_HIGHEST,
                        .event_queue_cfg = RTE_EVENT_QUEUE_CFG_SINGLE_LINK,
@@ -297,7 +152,6 @@ setup_eventdev_generic(struct cons_data *cons_data,
 
        struct port_link worker_queues[MAX_NUM_STAGES];
        uint8_t disable_implicit_release;
-       struct port_link tx_queue;
        unsigned int i;
 
        int ret, ndev = rte_event_dev_count();
@@ -314,7 +168,6 @@ setup_eventdev_generic(struct cons_data *cons_data,
                        RTE_EVENT_DEV_CAP_IMPLICIT_RELEASE_DISABLE);
 
        wkr_p_conf.disable_implicit_release = disable_implicit_release;
-       tx_p_conf.disable_implicit_release = disable_implicit_release;
 
        if (dev_info.max_event_port_dequeue_depth <
                        config.nb_event_port_dequeue_depth)
@@ -372,8 +225,7 @@ setup_eventdev_generic(struct cons_data *cons_data,
                printf("%d: error creating qid %d\n", __LINE__, i);
                return -1;
        }
-       tx_queue.queue_id = i;
-       tx_queue.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+       cdata.tx_queue_id = i;
 
        if (wkr_p_conf.dequeue_depth > config.nb_event_port_dequeue_depth)
                wkr_p_conf.dequeue_depth = config.nb_event_port_dequeue_depth;
@@ -403,26 +255,6 @@ setup_eventdev_generic(struct cons_data *cons_data,
                w->port_id = i;
        }
 
-       if (tx_p_conf.dequeue_depth > config.nb_event_port_dequeue_depth)
-               tx_p_conf.dequeue_depth = config.nb_event_port_dequeue_depth;
-       if (tx_p_conf.enqueue_depth > config.nb_event_port_enqueue_depth)
-               tx_p_conf.enqueue_depth = config.nb_event_port_enqueue_depth;
-
-       /* port for consumer, linked to TX queue */
-       if (rte_event_port_setup(dev_id, i, &tx_p_conf) < 0) {
-               printf("Error setting up port %d\n", i);
-               return -1;
-       }
-       if (rte_event_port_link(dev_id, i, &tx_queue.queue_id,
-                               &tx_queue.priority, 1) != 1) {
-               printf("%d: error creating link for port %d\n",
-                               __LINE__, i);
-               return -1;
-       }
-       *cons_data = (struct cons_data){.dev_id = dev_id,
-                                       .port_id = i,
-                                       .release = disable_implicit_release };
-
        ret = rte_event_dev_service_id_get(dev_id,
                                &fdata->evdev_service_id);
        if (ret != -ESRCH && ret != 0) {
@@ -431,76 +263,107 @@ setup_eventdev_generic(struct cons_data *cons_data,
        }
        rte_service_runstate_set(fdata->evdev_service_id, 1);
        rte_service_set_runstate_mapped_check(fdata->evdev_service_id, 0);
-       if (rte_event_dev_start(dev_id) < 0) {
-               printf("Error starting eventdev\n");
-               return -1;
-       }
 
        return dev_id;
 }
 
 static void
-init_rx_adapter(uint16_t nb_ports)
+init_adapters(uint16_t nb_ports)
 {
        int i;
        int ret;
+       uint8_t tx_port_id = 0;
        uint8_t evdev_id = 0;
        struct rte_event_dev_info dev_info;
 
        ret = rte_event_dev_info_get(evdev_id, &dev_info);
 
-       struct rte_event_port_conf rx_p_conf = {
-               .dequeue_depth = 8,
-               .enqueue_depth = 8,
-               .new_event_threshold = 1200,
+       struct rte_event_port_conf adptr_p_conf = {
+               .dequeue_depth = cdata.worker_cq_depth,
+               .enqueue_depth = 64,
+               .new_event_threshold = 4096,
        };
 
-       if (rx_p_conf.dequeue_depth > dev_info.max_event_port_dequeue_depth)
-               rx_p_conf.dequeue_depth = dev_info.max_event_port_dequeue_depth;
-       if (rx_p_conf.enqueue_depth > dev_info.max_event_port_enqueue_depth)
-               rx_p_conf.enqueue_depth = dev_info.max_event_port_enqueue_depth;
+       if (adptr_p_conf.dequeue_depth > dev_info.max_event_port_dequeue_depth)
+               adptr_p_conf.dequeue_depth =
+                       dev_info.max_event_port_dequeue_depth;
+       if (adptr_p_conf.enqueue_depth > dev_info.max_event_port_enqueue_depth)
+               adptr_p_conf.enqueue_depth =
+                       dev_info.max_event_port_enqueue_depth;
 
        /* Create one adapter for all the ethernet ports. */
        ret = rte_event_eth_rx_adapter_create(cdata.rx_adapter_id, evdev_id,
-                       &rx_p_conf);
+                       &adptr_p_conf);
        if (ret)
                rte_exit(EXIT_FAILURE, "failed to create rx adapter[%d]",
                                cdata.rx_adapter_id);
 
+       ret = rte_event_eth_tx_adapter_create(cdata.tx_adapter_id, evdev_id,
+                       &adptr_p_conf);
+       if (ret)
+               rte_exit(EXIT_FAILURE, "failed to create tx adapter[%d]",
+                               cdata.tx_adapter_id);
+
        struct rte_event_eth_rx_adapter_queue_conf queue_conf;
        memset(&queue_conf, 0, sizeof(queue_conf));
        queue_conf.ev.sched_type = cdata.queue_type;
        queue_conf.ev.queue_id = cdata.qid[0];
 
        for (i = 0; i < nb_ports; i++) {
-               uint32_t cap;
-
-               ret = rte_event_eth_rx_adapter_caps_get(evdev_id, i, &cap);
-               if (ret)
-                       rte_exit(EXIT_FAILURE,
-                                       "failed to get event rx adapter "
-                                       "capabilities");
-
                ret = rte_event_eth_rx_adapter_queue_add(cdata.rx_adapter_id, i,
                                -1, &queue_conf);
                if (ret)
                        rte_exit(EXIT_FAILURE,
                                        "Failed to add queues to Rx adapter");
+
+               ret = rte_event_eth_tx_adapter_queue_add(cdata.tx_adapter_id, i,
+                               -1);
+               if (ret)
+                       rte_exit(EXIT_FAILURE,
+                                       "Failed to add queues to Tx adapter");
        }
 
+       ret = rte_event_eth_tx_adapter_event_port_get(cdata.tx_adapter_id,
+                       &tx_port_id);
+       if (ret)
+               rte_exit(EXIT_FAILURE,
+                               "Failed to get Tx adapter port id");
+       ret = rte_event_port_link(evdev_id, tx_port_id, &cdata.tx_queue_id,
+                       NULL, 1);
+       if (ret != 1)
+               rte_exit(EXIT_FAILURE,
+                               "Unable to link Tx adapter port to Tx queue");
+
        ret = rte_event_eth_rx_adapter_service_id_get(cdata.rx_adapter_id,
                                &fdata->rxadptr_service_id);
        if (ret != -ESRCH && ret != 0) {
                rte_exit(EXIT_FAILURE,
-                       "Error getting the service ID for sw eventdev\n");
+                       "Error getting the service ID for Rx adapter\n");
        }
        rte_service_runstate_set(fdata->rxadptr_service_id, 1);
        rte_service_set_runstate_mapped_check(fdata->rxadptr_service_id, 0);
 
+       ret = rte_event_eth_tx_adapter_service_id_get(cdata.tx_adapter_id,
+                               &fdata->txadptr_service_id);
+       if (ret != -ESRCH && ret != 0) {
+               rte_exit(EXIT_FAILURE,
+                       "Error getting the service ID for Tx adapter\n");
+       }
+       rte_service_runstate_set(fdata->txadptr_service_id, 1);
+       rte_service_set_runstate_mapped_check(fdata->txadptr_service_id, 0);
+
        ret = rte_event_eth_rx_adapter_start(cdata.rx_adapter_id);
        if (ret)
                rte_exit(EXIT_FAILURE, "Rx adapter[%d] start failed",
                                cdata.rx_adapter_id);
+
+       ret = rte_event_eth_tx_adapter_start(cdata.tx_adapter_id);
+       if (ret)
+               rte_exit(EXIT_FAILURE, "Tx adapter[%d] start failed",
+                               cdata.tx_adapter_id);
+
+       if (rte_event_dev_start(evdev_id) < 0)
+               rte_exit(EXIT_FAILURE, "Error starting eventdev");
 }
 
 static void
@@ -510,6 +373,7 @@ generic_opt_check(void)
        int ret;
        uint32_t cap = 0;
        uint8_t rx_needed = 0;
+       uint8_t sched_needed = 0;
        struct rte_event_dev_info eventdev_info;
 
        memset(&eventdev_info, 0, sizeof(struct rte_event_dev_info));
@@ -519,6 +383,8 @@ generic_opt_check(void)
                                RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES))
                rte_exit(EXIT_FAILURE,
                                "Event dev doesn't support all type queues\n");
+       sched_needed = !(eventdev_info.event_dev_cap &
+               RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED);
 
        RTE_ETH_FOREACH_DEV(i) {
                ret = rte_event_eth_rx_adapter_caps_get(0, i, &cap);
@@ -531,9 +397,8 @@ generic_opt_check(void)
 
        if (cdata.worker_lcore_mask == 0 ||
                        (rx_needed && cdata.rx_lcore_mask == 0) ||
-                       cdata.tx_lcore_mask == 0 || (cdata.sched_lcore_mask == 0
-                               && !(eventdev_info.event_dev_cap &
-                                       RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED))) {
+                       (cdata.tx_lcore_mask == 0) ||
+                       (sched_needed && cdata.sched_lcore_mask == 0)) {
                printf("Core part of pipeline was not assigned any cores. "
                        "This will stall the pipeline, please check core masks "
                        "(use -h for details on setting core masks):\n"
@@ -545,23 +410,24 @@ generic_opt_check(void)
                rte_exit(-1, "Fix core masks\n");
        }
 
-       if (eventdev_info.event_dev_cap & RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED)
+       if (!sched_needed)
                memset(fdata->sched_core, 0,
                                sizeof(unsigned int) * MAX_NUM_CORE);
+       if (!rx_needed)
+               memset(fdata->rx_core, 0,
+                               sizeof(unsigned int) * MAX_NUM_CORE);
 }
 
 void
 set_worker_generic_setup_data(struct setup_data *caps, bool burst)
 {
        if (burst) {
-               caps->consumer = consumer_burst;
                caps->worker = worker_generic_burst;
        } else {
-               caps->consumer = consumer;
                caps->worker = worker_generic;
        }
 
-       caps->adptr_setup = init_rx_adapter;
+       caps->adptr_setup = init_adapters;
        caps->scheduler = schedule_devices;
        caps->evdev_setup = setup_eventdev_generic;
        caps->check_opt = generic_opt_check;
index 3dbde92..85eb075 100644 (file)
@@ -36,10 +36,11 @@ worker_event_enqueue_burst(const uint8_t dev, const uint8_t port,
 }
 
 static __rte_always_inline void
-worker_tx_pkt(struct rte_mbuf *mbuf)
+worker_tx_pkt(const uint8_t dev, const uint8_t port, struct rte_event *ev)
 {
-       exchange_mac(mbuf);
-       while (rte_eth_tx_burst(mbuf->port, 0, &mbuf, 1) != 1)
+       exchange_mac(ev->mbuf);
+       rte_event_eth_tx_adapter_txq_set(ev->mbuf, 0);
+       while (!rte_event_eth_tx_adapter_enqueue(dev, port, ev, 1))
                rte_pause();
 }
 
@@ -64,15 +65,15 @@ worker_do_tx_single(void *arg)
                received++;
 
                if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                       worker_tx_pkt(ev.mbuf);
+                       worker_tx_pkt(dev, port, &ev);
                        tx++;
-                       continue;
+               } else {
+                       work();
+                       ev.queue_id++;
+                       worker_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
+                       worker_event_enqueue(dev, port, &ev);
+                       fwd++;
                }
-               work();
-               ev.queue_id++;
-               worker_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
-               worker_event_enqueue(dev, port, &ev);
-               fwd++;
        }
 
        if (!cdata.quiet)
@@ -100,14 +101,14 @@ worker_do_tx_single_atq(void *arg)
                received++;
 
                if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                       worker_tx_pkt(ev.mbuf);
+                       worker_tx_pkt(dev, port, &ev);
                        tx++;
-                       continue;
+               } else {
+                       work();
+                       worker_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
+                       worker_event_enqueue(dev, port, &ev);
+                       fwd++;
                }
-               work();
-               worker_fwd_event(&ev, RTE_SCHED_TYPE_ATOMIC);
-               worker_event_enqueue(dev, port, &ev);
-               fwd++;
        }
 
        if (!cdata.quiet)
@@ -141,7 +142,7 @@ worker_do_tx_single_burst(void *arg)
                        rte_prefetch0(ev[i + 1].mbuf);
                        if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
 
-                               worker_tx_pkt(ev[i].mbuf);
+                               worker_tx_pkt(dev, port, &ev[i]);
                                ev[i].op = RTE_EVENT_OP_RELEASE;
                                tx++;
 
@@ -188,7 +189,7 @@ worker_do_tx_single_burst_atq(void *arg)
                        rte_prefetch0(ev[i + 1].mbuf);
                        if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
 
-                               worker_tx_pkt(ev[i].mbuf);
+                               worker_tx_pkt(dev, port, &ev[i]);
                                ev[i].op = RTE_EVENT_OP_RELEASE;
                                tx++;
                        } else
@@ -232,7 +233,7 @@ worker_do_tx(void *arg)
 
                if (cq_id >= lst_qid) {
                        if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                               worker_tx_pkt(ev.mbuf);
+                               worker_tx_pkt(dev, port, &ev);
                                tx++;
                                continue;
                        }
@@ -280,7 +281,7 @@ worker_do_tx_atq(void *arg)
 
                if (cq_id == lst_qid) {
                        if (ev.sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                               worker_tx_pkt(ev.mbuf);
+                               worker_tx_pkt(dev, port, &ev);
                                tx++;
                                continue;
                        }
@@ -330,7 +331,7 @@ worker_do_tx_burst(void *arg)
 
                        if (cq_id >= lst_qid) {
                                if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                                       worker_tx_pkt(ev[i].mbuf);
+                                       worker_tx_pkt(dev, port, &ev[i]);
                                        tx++;
                                        ev[i].op = RTE_EVENT_OP_RELEASE;
                                        continue;
@@ -387,7 +388,7 @@ worker_do_tx_burst_atq(void *arg)
 
                        if (cq_id == lst_qid) {
                                if (ev[i].sched_type == RTE_SCHED_TYPE_ATOMIC) {
-                                       worker_tx_pkt(ev[i].mbuf);
+                                       worker_tx_pkt(dev, port, &ev[i]);
                                        tx++;
                                        ev[i].op = RTE_EVENT_OP_RELEASE;
                                        continue;
@@ -413,10 +414,8 @@ worker_do_tx_burst_atq(void *arg)
 }
 
 static int
-setup_eventdev_worker_tx(struct cons_data *cons_data,
-               struct worker_data *worker_data)
+setup_eventdev_worker_tx_enq(struct worker_data *worker_data)
 {
-       RTE_SET_USED(cons_data);
        uint8_t i;
        const uint8_t atq = cdata.all_type_queues ? 1 : 0;
        const uint8_t dev_id = 0;
@@ -575,10 +574,9 @@ setup_eventdev_worker_tx(struct cons_data *cons_data,
        }
        rte_service_runstate_set(fdata->evdev_service_id, 1);
        rte_service_set_runstate_mapped_check(fdata->evdev_service_id, 0);
-       if (rte_event_dev_start(dev_id) < 0) {
-               printf("Error starting eventdev\n");
-               return -1;
-       }
+
+       if (rte_event_dev_start(dev_id) < 0)
+               rte_exit(EXIT_FAILURE, "Error starting eventdev");
 
        return dev_id;
 }
@@ -602,7 +600,7 @@ service_rx_adapter(void *arg)
 }
 
 static void
-init_rx_adapter(uint16_t nb_ports)
+init_adapters(uint16_t nb_ports)
 {
        int i;
        int ret;
@@ -613,17 +611,18 @@ init_rx_adapter(uint16_t nb_ports)
        ret = rte_event_dev_info_get(evdev_id, &dev_info);
        adptr_services = rte_zmalloc(NULL, sizeof(struct rx_adptr_services), 0);
 
-       struct rte_event_port_conf rx_p_conf = {
-               .dequeue_depth = 8,
-               .enqueue_depth = 8,
-               .new_event_threshold = 1200,
+       struct rte_event_port_conf adptr_p_conf = {
+               .dequeue_depth = cdata.worker_cq_depth,
+               .enqueue_depth = 64,
+               .new_event_threshold = 4096,
        };
 
-       if (rx_p_conf.dequeue_depth > dev_info.max_event_port_dequeue_depth)
-               rx_p_conf.dequeue_depth = dev_info.max_event_port_dequeue_depth;
-       if (rx_p_conf.enqueue_depth > dev_info.max_event_port_enqueue_depth)
-               rx_p_conf.enqueue_depth = dev_info.max_event_port_enqueue_depth;
-
+       if (adptr_p_conf.dequeue_depth > dev_info.max_event_port_dequeue_depth)
+               adptr_p_conf.dequeue_depth =
+                       dev_info.max_event_port_dequeue_depth;
+       if (adptr_p_conf.enqueue_depth > dev_info.max_event_port_enqueue_depth)
+               adptr_p_conf.enqueue_depth =
+                       dev_info.max_event_port_enqueue_depth;
 
        struct rte_event_eth_rx_adapter_queue_conf queue_conf;
        memset(&queue_conf, 0, sizeof(queue_conf));
@@ -633,11 +632,11 @@ init_rx_adapter(uint16_t nb_ports)
                uint32_t cap;
                uint32_t service_id;
 
-               ret = rte_event_eth_rx_adapter_create(i, evdev_id, &rx_p_conf);
+               ret = rte_event_eth_rx_adapter_create(i, evdev_id,
+                               &adptr_p_conf);
                if (ret)
                        rte_exit(EXIT_FAILURE,
-                                       "failed to create rx adapter[%d]",
-                                       cdata.rx_adapter_id);
+                                       "failed to create rx adapter[%d]", i);
 
                ret = rte_event_eth_rx_adapter_caps_get(evdev_id, i, &cap);
                if (ret)
@@ -654,7 +653,6 @@ init_rx_adapter(uint16_t nb_ports)
                        rte_exit(EXIT_FAILURE,
                                        "Failed to add queues to Rx adapter");
 
-
                /* Producer needs to be scheduled. */
                if (!(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT)) {
                        ret = rte_event_eth_rx_adapter_service_id_get(i,
@@ -680,9 +678,29 @@ init_rx_adapter(uint16_t nb_ports)
                ret = rte_event_eth_rx_adapter_start(i);
                if (ret)
                        rte_exit(EXIT_FAILURE, "Rx adapter[%d] start failed",
-                                       cdata.rx_adapter_id);
+                                       i);
        }
 
+       /* We already know that Tx adapter has INTERNAL port cap*/
+       ret = rte_event_eth_tx_adapter_create(cdata.tx_adapter_id, evdev_id,
+                       &adptr_p_conf);
+       if (ret)
+               rte_exit(EXIT_FAILURE, "failed to create tx adapter[%d]",
+                               cdata.tx_adapter_id);
+
+       for (i = 0; i < nb_ports; i++) {
+               ret = rte_event_eth_tx_adapter_queue_add(cdata.tx_adapter_id, i,
+                               -1);
+               if (ret)
+                       rte_exit(EXIT_FAILURE,
+                                       "Failed to add queues to Tx adapter");
+       }
+
+       ret = rte_event_eth_tx_adapter_start(cdata.tx_adapter_id);
+       if (ret)
+               rte_exit(EXIT_FAILURE, "Tx adapter[%d] start failed",
+                               cdata.tx_adapter_id);
+
        if (adptr_services->nb_rx_adptrs) {
                struct rte_service_spec service;
 
@@ -695,8 +713,7 @@ init_rx_adapter(uint16_t nb_ports)
                                &fdata->rxadptr_service_id);
                if (ret)
                        rte_exit(EXIT_FAILURE,
-                               "Rx adapter[%d] service register failed",
-                               cdata.rx_adapter_id);
+                               "Rx adapter service register failed");
 
                rte_service_runstate_set(fdata->rxadptr_service_id, 1);
                rte_service_component_runstate_set(fdata->rxadptr_service_id,
@@ -708,23 +725,19 @@ init_rx_adapter(uint16_t nb_ports)
                rte_free(adptr_services);
        }
 
-       if (!adptr_services->nb_rx_adptrs && fdata->cap.consumer == NULL &&
-                       (dev_info.event_dev_cap &
+       if (!adptr_services->nb_rx_adptrs && (dev_info.event_dev_cap &
                         RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED))
                fdata->cap.scheduler = NULL;
-
-       if (dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED)
-               memset(fdata->sched_core, 0,
-                               sizeof(unsigned int) * MAX_NUM_CORE);
 }
 
 static void
-worker_tx_opt_check(void)
+worker_tx_enq_opt_check(void)
 {
        int i;
        int ret;
        uint32_t cap = 0;
        uint8_t rx_needed = 0;
+       uint8_t sched_needed = 0;
        struct rte_event_dev_info eventdev_info;
 
        memset(&eventdev_info, 0, sizeof(struct rte_event_dev_info));
@@ -734,32 +747,38 @@ worker_tx_opt_check(void)
                                RTE_EVENT_DEV_CAP_QUEUE_ALL_TYPES))
                rte_exit(EXIT_FAILURE,
                                "Event dev doesn't support all type queues\n");
+       sched_needed = !(eventdev_info.event_dev_cap &
+               RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED);
 
        RTE_ETH_FOREACH_DEV(i) {
                ret = rte_event_eth_rx_adapter_caps_get(0, i, &cap);
                if (ret)
                        rte_exit(EXIT_FAILURE,
-                                       "failed to get event rx adapter "
-                                       "capabilities");
+                               "failed to get event rx adapter capabilities");
                rx_needed |=
                        !(cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT);
        }
 
        if (cdata.worker_lcore_mask == 0 ||
                        (rx_needed && cdata.rx_lcore_mask == 0) ||
-                       (cdata.sched_lcore_mask == 0 &&
-                        !(eventdev_info.event_dev_cap &
-                                RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED))) {
+                       (sched_needed && cdata.sched_lcore_mask == 0)) {
                printf("Core part of pipeline was not assigned any cores. "
                        "This will stall the pipeline, please check core masks "
                        "(use -h for details on setting core masks):\n"
-                       "\trx: %"PRIu64"\n\ttx: %"PRIu64"\n\tsched: %"PRIu64
-                       "\n\tworkers: %"PRIu64"\n",
-                       cdata.rx_lcore_mask, cdata.tx_lcore_mask,
-                       cdata.sched_lcore_mask,
-                       cdata.worker_lcore_mask);
+                       "\trx: %"PRIu64"\n\tsched: %"PRIu64
+                       "\n\tworkers: %"PRIu64"\n", cdata.rx_lcore_mask,
+                       cdata.sched_lcore_mask, cdata.worker_lcore_mask);
                rte_exit(-1, "Fix core masks\n");
        }
+
+       if (!sched_needed)
+               memset(fdata->sched_core, 0,
+                               sizeof(unsigned int) * MAX_NUM_CORE);
+       if (!rx_needed)
+               memset(fdata->rx_core, 0,
+                               sizeof(unsigned int) * MAX_NUM_CORE);
+
+       memset(fdata->tx_core, 0, sizeof(unsigned int) * MAX_NUM_CORE);
 }
 
 static worker_loop
@@ -821,18 +840,15 @@ get_worker_multi_stage(bool burst)
 }
 
 void
-set_worker_tx_setup_data(struct setup_data *caps, bool burst)
+set_worker_tx_enq_setup_data(struct setup_data *caps, bool burst)
 {
        if (cdata.num_stages == 1)
                caps->worker = get_worker_single_stage(burst);
        else
                caps->worker = get_worker_multi_stage(burst);
 
-       memset(fdata->tx_core, 0, sizeof(unsigned int) * MAX_NUM_CORE);
-
-       caps->check_opt = worker_tx_opt_check;
-       caps->consumer = NULL;
+       caps->check_opt = worker_tx_enq_opt_check;
        caps->scheduler = schedule_devices;
-       caps->evdev_setup = setup_eventdev_worker_tx;
-       caps->adptr_setup = init_rx_adapter;
+       caps->evdev_setup = setup_eventdev_worker_tx_enq;
+       caps->adptr_setup = init_adapters;
 }
index 440422b..4180a86 100644 (file)
@@ -87,9 +87,6 @@
 
 /* Options for configuring ethernet port */
 static struct rte_eth_conf port_conf = {
-       .rxmode = {
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
-       },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
        },
index ce91e8a..a73d120 100644 (file)
@@ -121,7 +121,6 @@ init_port(void)
        struct rte_eth_conf port_conf = {
                .rxmode = {
                        .split_hdr_size = 0,
-                       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                },
                .txmode = {
                        .offloads =
@@ -132,22 +131,6 @@ init_port(void)
                                DEV_TX_OFFLOAD_SCTP_CKSUM  |
                                DEV_TX_OFFLOAD_TCP_TSO,
                },
-               /*
-                * Initialize fdir_conf of rte_eth_conf.
-                * Fdir is used in flow filtering for I40e,
-                * so rte_flow rules involve some fdir
-                * configurations. In long term it's better
-                * that drivers don't require any fdir
-                * configuration for rte_flow, but we need to
-                * get this workaround so that sample app can
-                * run on I40e.
-                */
-               .fdir_conf = {
-                       .mode = RTE_FDIR_MODE_PERFECT,
-                       .pballoc = RTE_FDIR_PBALLOC_64K,
-                       .status = RTE_FDIR_REPORT_STATUS,
-                       .drop_queue = 127,
-               },
        };
        struct rte_eth_txconf txq_conf;
        struct rte_eth_rxconf rxq_conf;
index 5306d76..17a877d 100644 (file)
@@ -141,8 +141,7 @@ static struct rte_eth_conf port_conf = {
                .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
                .split_hdr_size = 0,
                .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                            DEV_RX_OFFLOAD_JUMBO_FRAME |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+                            DEV_RX_OFFLOAD_JUMBO_FRAME),
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 3fb98ce..41ba7df 100644 (file)
@@ -18,6 +18,7 @@ SRCS-y += swq.c
 SRCS-y += tap.c
 SRCS-y += thread.c
 SRCS-y += tmgr.c
+SRCS-y += cryptodev.c
 
 # Build using pkg-config variables if possible
 $(shell pkg-config --exists libdpdk)
index a29c2b3..d2104aa 100644 (file)
@@ -7,9 +7,9 @@
 #include <string.h>
 
 #include <rte_string_fns.h>
+#include <rte_table_hash_func.h>
 
 #include "action.h"
-#include "hash_func.h"
 
 /**
  * Input port
@@ -57,35 +57,35 @@ port_in_action_profile_create(const char *name,
                (params->lb.f_hash == NULL)) {
                switch (params->lb.key_size) {
                case  8:
-                       params->lb.f_hash = hash_default_key8;
+                       params->lb.f_hash = rte_table_hash_crc_key8;
                        break;
 
                case 16:
-                       params->lb.f_hash = hash_default_key16;
+                       params->lb.f_hash = rte_table_hash_crc_key16;
                        break;
 
                case 24:
-                       params->lb.f_hash = hash_default_key24;
+                       params->lb.f_hash = rte_table_hash_crc_key24;
                        break;
 
                case 32:
-                       params->lb.f_hash = hash_default_key32;
+                       params->lb.f_hash = rte_table_hash_crc_key32;
                        break;
 
                case 40:
-                       params->lb.f_hash = hash_default_key40;
+                       params->lb.f_hash = rte_table_hash_crc_key40;
                        break;
 
                case 48:
-                       params->lb.f_hash = hash_default_key48;
+                       params->lb.f_hash = rte_table_hash_crc_key48;
                        break;
 
                case 56:
-                       params->lb.f_hash = hash_default_key56;
+                       params->lb.f_hash = rte_table_hash_crc_key56;
                        break;
 
                case 64:
-                       params->lb.f_hash = hash_default_key64;
+                       params->lb.f_hash = rte_table_hash_crc_key64;
                        break;
 
                default:
@@ -192,35 +192,35 @@ table_action_profile_create(const char *name,
                (params->lb.f_hash == NULL)) {
                switch (params->lb.key_size) {
                case 8:
-                       params->lb.f_hash = hash_default_key8;
+                       params->lb.f_hash = rte_table_hash_crc_key8;
                        break;
 
                case 16:
-                       params->lb.f_hash = hash_default_key16;
+                       params->lb.f_hash = rte_table_hash_crc_key16;
                        break;
 
                case 24:
-                       params->lb.f_hash = hash_default_key24;
+                       params->lb.f_hash = rte_table_hash_crc_key24;
                        break;
 
                case 32:
-                       params->lb.f_hash = hash_default_key32;
+                       params->lb.f_hash = rte_table_hash_crc_key32;
                        break;
 
                case 40:
-                       params->lb.f_hash = hash_default_key40;
+                       params->lb.f_hash = rte_table_hash_crc_key40;
                        break;
 
                case 48:
-                       params->lb.f_hash = hash_default_key48;
+                       params->lb.f_hash = rte_table_hash_crc_key48;
                        break;
 
                case 56:
-                       params->lb.f_hash = hash_default_key56;
+                       params->lb.f_hash = rte_table_hash_crc_key56;
                        break;
 
                case 64:
-                       params->lb.f_hash = hash_default_key64;
+                       params->lb.f_hash = rte_table_hash_crc_key64;
                        break;
 
                default:
@@ -333,6 +333,39 @@ table_action_profile_create(const char *name,
                }
        }
 
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_SYM_CRYPTO,
+                       &params->sym_crypto);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_TAG,
+                       NULL);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
+       if (params->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               status = rte_table_action_profile_action_register(ap,
+                       RTE_TABLE_ACTION_DECAP,
+                       NULL);
+
+               if (status) {
+                       rte_table_action_profile_free(ap);
+                       return NULL;
+               }
+       }
+
        status = rte_table_action_profile_freeze(ap);
        if (status) {
                rte_table_action_profile_free(ap);
index 417200e..cde17e6 100644 (file)
@@ -53,6 +53,7 @@ struct table_action_profile_params {
        struct rte_table_action_nat_config nat;
        struct rte_table_action_ttl_config ttl;
        struct rte_table_action_stats_config stats;
+       struct rte_table_action_sym_crypto_config sym_crypto;
 };
 
 struct table_action_profile {
index 102a1d6..d1e5540 100644 (file)
@@ -12,6 +12,8 @@
 #include <rte_ethdev.h>
 
 #include "cli.h"
+
+#include "cryptodev.h"
 #include "kni.h"
 #include "link.h"
 #include "mempool.h"
@@ -785,6 +787,65 @@ cmd_kni(char **tokens,
        }
 }
 
+static const char cmd_cryptodev_help[] =
+"cryptodev <cryptodev_name>\n"
+"   dev <device_name> | dev_id <device_id>\n"
+"   queue <n_queues> <queue_size>\n";
+
+static void
+cmd_cryptodev(char **tokens,
+       uint32_t n_tokens,
+       char *out,
+       size_t out_size)
+{
+       struct cryptodev_params params;
+       char *name;
+
+       memset(&params, 0, sizeof(params));
+       if (n_tokens != 7) {
+               snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
+               return;
+       }
+
+       name = tokens[1];
+
+       if (strcmp(tokens[2], "dev") == 0)
+               params.dev_name = tokens[3];
+       else if (strcmp(tokens[2], "dev_id") == 0) {
+               if (parser_read_uint32(&params.dev_id, tokens[3]) < 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "dev_id");
+                       return;
+               }
+       } else {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "cryptodev");
+               return;
+       }
+
+       if (strcmp(tokens[4], "queue")) {
+               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                       "4");
+               return;
+       }
+
+       if (parser_read_uint32(&params.n_queues, tokens[5]) < 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "q");
+               return;
+       }
+
+       if (parser_read_uint32(&params.queue_size, tokens[6]) < 0) {
+               snprintf(out, out_size, MSG_ARG_INVALID,
+                       "queue_size");
+               return;
+       }
+
+       if (cryptodev_create(name, &params) == NULL) {
+               snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]);
+               return;
+       }
+}
 
 static const char cmd_port_in_action_profile_help[] =
 "port in action profile <profile_name>\n"
@@ -961,13 +1022,19 @@ static const char cmd_table_action_profile_help[] =
 "       tc <n_tc>\n"
 "       stats none | pkts | bytes | both]\n"
 "   [tm spp <n_subports_per_port> pps <n_pipes_per_subport>]\n"
-"   [encap ether | vlan | qinq | mpls | pppoe]\n"
+"   [encap ether | vlan | qinq | mpls | pppoe |\n"
+"       vxlan offset <ether_offset> ipv4 | ipv6 vlan on | off]\n"
 "   [nat src | dst\n"
 "       proto udp | tcp]\n"
 "   [ttl drop | fwd\n"
 "       stats none | pkts]\n"
 "   [stats pkts | bytes | both]\n"
-"   [time]\n";
+"   [time]\n"
+"   [sym_crypto dev <CRYPTODEV_NAME> offset <op_offset> "
+"       mempool_create <mempool_name>\n"
+"       mempool_init <mempool_name>]\n"
+"   [tag]\n"
+"   [decap]\n";
 
 static void
 cmd_table_action_profile(char **tokens,
@@ -1157,6 +1224,8 @@ cmd_table_action_profile(char **tokens,
        } /* tm */
 
        if ((t0 < n_tokens) && (strcmp(tokens[t0], "encap") == 0)) {
+               uint32_t n_extra_tokens = 0;
+
                if (n_tokens < t0 + 2) {
                        snprintf(out, out_size, MSG_ARG_MISMATCH,
                                "action profile encap");
@@ -1173,13 +1242,61 @@ cmd_table_action_profile(char **tokens,
                        p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_MPLS;
                else if (strcmp(tokens[t0 + 1], "pppoe") == 0)
                        p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE;
-               else {
+               else if (strcmp(tokens[t0 + 1], "vxlan") == 0) {
+                       if (n_tokens < t0 + 2 + 5) {
+                               snprintf(out, out_size, MSG_ARG_MISMATCH,
+                                       "action profile encap vxlan");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2], "offset") != 0) {
+                               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                                       "vxlan: offset");
+                               return;
+                       }
+
+                       if (parser_read_uint32(&p.encap.vxlan.data_offset,
+                               tokens[t0 + 2 + 1]) != 0) {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: ether_offset");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 2], "ipv4") == 0)
+                               p.encap.vxlan.ip_version = 1;
+                       else if (strcmp(tokens[t0 + 2 + 2], "ipv6") == 0)
+                               p.encap.vxlan.ip_version = 0;
+                       else {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: ipv4 or ipv6");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 3], "vlan") != 0) {
+                               snprintf(out, out_size, MSG_ARG_NOT_FOUND,
+                                       "vxlan: vlan");
+                               return;
+                       }
+
+                       if (strcmp(tokens[t0 + 2 + 4], "on") == 0)
+                               p.encap.vxlan.vlan = 1;
+                       else if (strcmp(tokens[t0 + 2 + 4], "off") == 0)
+                               p.encap.vxlan.vlan = 0;
+                       else {
+                               snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "vxlan: on or off");
+                               return;
+                       }
+
+                       p.encap.encap_mask = 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN;
+                       n_extra_tokens = 5;
+               } else {
                        snprintf(out, out_size, MSG_ARG_MISMATCH, "encap");
                        return;
                }
 
                p.action_mask |= 1LLU << RTE_TABLE_ACTION_ENCAP;
-               t0 += 2;
+               t0 += 2 + n_extra_tokens;
        } /* encap */
 
        if ((t0 < n_tokens) && (strcmp(tokens[t0], "nat") == 0)) {
@@ -1285,6 +1402,67 @@ cmd_table_action_profile(char **tokens,
                t0 += 1;
        } /* time */
 
+       if ((t0 < n_tokens) && (strcmp(tokens[t0], "sym_crypto") == 0)) {
+               struct cryptodev *cryptodev;
+               struct mempool *mempool;
+
+               if (n_tokens < t0 + 9 ||
+                               strcmp(tokens[t0 + 1], "dev") ||
+                               strcmp(tokens[t0 + 3], "offset") ||
+                               strcmp(tokens[t0 + 5], "mempool_create") ||
+                               strcmp(tokens[t0 + 7], "mempool_init")) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "table action profile sym_crypto");
+                       return;
+               }
+
+               cryptodev = cryptodev_find(tokens[t0 + 2]);
+               if (cryptodev == NULL) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "table action profile sym_crypto");
+                       return;
+               }
+
+               p.sym_crypto.cryptodev_id = cryptodev->dev_id;
+
+               if (parser_read_uint32(&p.sym_crypto.op_offset,
+                               tokens[t0 + 4]) != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                                       "table action profile sym_crypto");
+                       return;
+               }
+
+               mempool = mempool_find(tokens[t0 + 6]);
+               if (mempool == NULL) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "table action profile sym_crypto");
+                       return;
+               }
+               p.sym_crypto.mp_create = mempool->m;
+
+               mempool = mempool_find(tokens[t0 + 8]);
+               if (mempool == NULL) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "table action profile sym_crypto");
+                       return;
+               }
+               p.sym_crypto.mp_init = mempool->m;
+
+               p.action_mask |= 1LLU << RTE_TABLE_ACTION_SYM_CRYPTO;
+
+               t0 += 9;
+       } /* sym_crypto */
+
+       if ((t0 < n_tokens) && (strcmp(tokens[t0], "tag") == 0)) {
+               p.action_mask |= 1LLU << RTE_TABLE_ACTION_TAG;
+               t0 += 1;
+       } /* tag */
+
+       if ((t0 < n_tokens) && (strcmp(tokens[t0], "decap") == 0)) {
+               p.action_mask |= 1LLU << RTE_TABLE_ACTION_DECAP;
+               t0 += 1;
+       } /* decap */
+
        if (t0 < n_tokens) {
                snprintf(out, out_size, MSG_ARG_MISMATCH, tokens[0]);
                return;
@@ -1366,6 +1544,7 @@ static const char cmd_pipeline_port_in_help[] =
 "   | tap <tap_name> mempool <mempool_name> mtu <mtu>\n"
 "   | kni <kni_name>\n"
 "   | source mempool <mempool_name> file <file_name> bpp <n_bytes_per_pkt>\n"
+"   | cryptodev <cryptodev_name> rxq <queue_id>\n"
 "   [action <port_in_action_profile_name>]\n"
 "   [disabled]\n";
 
@@ -1538,6 +1717,26 @@ cmd_pipeline_port_in(char **tokens,
                }
 
                t0 += 7;
+       } else if (strcmp(tokens[t0], "cryptodev") == 0) {
+               if (n_tokens < t0 + 3) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port in cryptodev");
+                       return;
+               }
+
+               p.type = PORT_IN_CRYPTODEV;
+
+               p.dev_name = tokens[t0 + 1];
+               if (parser_read_uint16(&p.rxq.queue_id, tokens[t0 + 3]) != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "rxq");
+                       return;
+               }
+
+               p.cryptodev.arg_callback = NULL;
+               p.cryptodev.f_callback = NULL;
+
+               t0 += 4;
        } else {
                snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]);
                return;
@@ -1584,7 +1783,8 @@ static const char cmd_pipeline_port_out_help[] =
 "   | tmgr <tmgr_name>\n"
 "   | tap <tap_name>\n"
 "   | kni <kni_name>\n"
-"   | sink [file <file_name> pkts <max_n_pkts>]\n";
+"   | sink [file <file_name> pkts <max_n_pkts>]\n"
+"   | cryptodev <cryptodev_name> txq <txq_id> offset <crypto_op_offset>\n";
 
 static void
 cmd_pipeline_port_out(char **tokens,
@@ -1718,6 +1918,41 @@ cmd_pipeline_port_out(char **tokens,
                                return;
                        }
                }
+
+       } else if (strcmp(tokens[6], "cryptodev") == 0) {
+               if (n_tokens != 12) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               p.type = PORT_OUT_CRYPTODEV;
+
+               p.dev_name = tokens[7];
+
+               if (strcmp(tokens[8], "txq")) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               if (parser_read_uint16(&p.cryptodev.queue_id, tokens[9])
+                               != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID, "queue_id");
+                       return;
+               }
+
+               if (strcmp(tokens[10], "offset")) {
+                       snprintf(out, out_size, MSG_ARG_MISMATCH,
+                               "pipeline port out cryptodev");
+                       return;
+               }
+
+               if (parser_read_uint32(&p.cryptodev.op_offset, tokens[11])
+                               != 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID, "queue_id");
+                       return;
+               }
        } else {
                snprintf(out, out_size, MSG_ARG_INVALID, tokens[0]);
                return;
@@ -2861,11 +3096,31 @@ parse_match(char **tokens,
  *          [label1 <label> <tc> <ttl>
  *          [label2 <label> <tc> <ttl>
  *          [label3 <label> <tc> <ttl>]]]
- *       | pppoe <da> <sa> <session_id>]
+ *       | pppoe <da> <sa> <session_id>
+ *       | vxlan ether <da> <sa>
+ *          [vlan <pcp> <dei> <vid>]
+ *          ipv4 <sa> <da> <dscp> <ttl>
+ *          | ipv6 <sa> <da> <flow_label> <dscp> <hop_limit>
+ *          udp <sp> <dp>
+ *          vxlan <vni>]
  *    [nat ipv4 | ipv6 <addr> <port>]
  *    [ttl dec | keep]
  *    [stats]
  *    [time]
+ *    [sym_crypto
+ *       encrypt | decrypt
+ *       type
+ *       | cipher
+ *          cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+ *       | cipher_auth
+ *          cipher_algo <algo> cipher_key <key> cipher_iv <iv>
+ *          auth_algo <algo> auth_key <key> digest_size <size>
+ *       | aead
+ *          aead_algo <algo> aead_key <key> aead_iv <iv> aead_aad <aad>
+ *          digest_size <size>
+ *       data_offset <data_offset>]
+ *    [tag <tag>]
+ *    [decap <n>]
  *
  * where:
  *    <pa> ::= g | y | r | drop
@@ -3254,6 +3509,122 @@ parse_table_action_encap(char **tokens,
                return 1 + 4;
        }
 
+       /* vxlan */
+       if (n_tokens && (strcmp(tokens[0], "vxlan") == 0)) {
+               uint32_t n = 0;
+
+               n_tokens--;
+               tokens++;
+               n++;
+
+               /* ether <da> <sa> */
+               if ((n_tokens < 3) ||
+                       strcmp(tokens[0], "ether") ||
+                       parse_mac_addr(tokens[1], &a->encap.vxlan.ether.da) ||
+                       parse_mac_addr(tokens[2], &a->encap.vxlan.ether.sa))
+                       return 0;
+
+               n_tokens -= 3;
+               tokens += 3;
+               n += 3;
+
+               /* [vlan <pcp> <dei> <vid>] */
+               if (strcmp(tokens[0], "vlan") == 0) {
+                       uint32_t pcp, dei, vid;
+
+                       if ((n_tokens < 4) ||
+                               parser_read_uint32(&pcp, tokens[1]) ||
+                               (pcp > 7) ||
+                               parser_read_uint32(&dei, tokens[2]) ||
+                               (dei > 1) ||
+                               parser_read_uint32(&vid, tokens[3]) ||
+                               (vid > 0xFFF))
+                               return 0;
+
+                       a->encap.vxlan.vlan.pcp = pcp;
+                       a->encap.vxlan.vlan.dei = dei;
+                       a->encap.vxlan.vlan.vid = vid;
+
+                       n_tokens -= 4;
+                       tokens += 4;
+                       n += 4;
+               }
+
+               /* ipv4 <sa> <da> <dscp> <ttl>
+                  | ipv6 <sa> <da> <flow_label> <dscp> <hop_limit> */
+               if (strcmp(tokens[0], "ipv4") == 0) {
+                       struct in_addr sa, da;
+                       uint8_t dscp, ttl;
+
+                       if ((n_tokens < 5) ||
+                               parse_ipv4_addr(tokens[1], &sa) ||
+                               parse_ipv4_addr(tokens[2], &da) ||
+                               parser_read_uint8(&dscp, tokens[3]) ||
+                               (dscp > 64) ||
+                               parser_read_uint8(&ttl, tokens[4]))
+                               return 0;
+
+                       a->encap.vxlan.ipv4.sa = rte_be_to_cpu_32(sa.s_addr);
+                       a->encap.vxlan.ipv4.da = rte_be_to_cpu_32(da.s_addr);
+                       a->encap.vxlan.ipv4.dscp = dscp;
+                       a->encap.vxlan.ipv4.ttl = ttl;
+
+                       n_tokens -= 5;
+                       tokens += 5;
+                       n += 5;
+               } else if (strcmp(tokens[0], "ipv6") == 0) {
+                       struct in6_addr sa, da;
+                       uint32_t flow_label;
+                       uint8_t dscp, hop_limit;
+
+                       if ((n_tokens < 6) ||
+                               parse_ipv6_addr(tokens[1], &sa) ||
+                               parse_ipv6_addr(tokens[2], &da) ||
+                               parser_read_uint32(&flow_label, tokens[3]) ||
+                               parser_read_uint8(&dscp, tokens[4]) ||
+                               (dscp > 64) ||
+                               parser_read_uint8(&hop_limit, tokens[5]))
+                               return 0;
+
+                       memcpy(a->encap.vxlan.ipv6.sa, sa.s6_addr, 16);
+                       memcpy(a->encap.vxlan.ipv6.da, da.s6_addr, 16);
+                       a->encap.vxlan.ipv6.flow_label = flow_label;
+                       a->encap.vxlan.ipv6.dscp = dscp;
+                       a->encap.vxlan.ipv6.hop_limit = hop_limit;
+
+                       n_tokens -= 6;
+                       tokens += 6;
+                       n += 6;
+               } else
+                       return 0;
+
+               /* udp <sp> <dp> */
+               if ((n_tokens < 3) ||
+                       strcmp(tokens[0], "udp") ||
+                       parser_read_uint16(&a->encap.vxlan.udp.sp, tokens[1]) ||
+                       parser_read_uint16(&a->encap.vxlan.udp.dp, tokens[2]))
+                       return 0;
+
+               n_tokens -= 3;
+               tokens += 3;
+               n += 3;
+
+               /* vxlan <vni> */
+               if ((n_tokens < 2) ||
+                       strcmp(tokens[0], "vxlan") ||
+                       parser_read_uint32(&a->encap.vxlan.vxlan.vni, tokens[1]) ||
+                       (a->encap.vxlan.vxlan.vni > 0xFFFFFF))
+                       return 0;
+
+               n_tokens -= 2;
+               tokens += 2;
+               n += 2;
+
+               a->encap.type = RTE_TABLE_ACTION_ENCAP_VXLAN;
+               a->action_mask |= 1 << RTE_TABLE_ACTION_ENCAP;
+               return 1 + n;
+       }
+
        return 0;
 }
 
@@ -3348,6 +3719,400 @@ parse_table_action_time(char **tokens,
        return 1;
 }
 
+static void
+parse_free_sym_crypto_param_data(struct rte_table_action_sym_crypto_params *p)
+{
+       struct rte_crypto_sym_xform *xform[2] = {NULL};
+       uint32_t i;
+
+       xform[0] = p->xform;
+       if (xform[0])
+               xform[1] = xform[0]->next;
+
+       for (i = 0; i < 2; i++) {
+               if (xform[i] == NULL)
+                       continue;
+
+               switch (xform[i]->type) {
+               case RTE_CRYPTO_SYM_XFORM_CIPHER:
+                       if (xform[i]->cipher.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->cipher_auth.cipher_iv.val)
+                               free(p->cipher_auth.cipher_iv.val);
+                       if (p->cipher_auth.cipher_iv_update.val)
+                               free(p->cipher_auth.cipher_iv_update.val);
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AUTH:
+                       if (xform[i]->auth.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->cipher_auth.auth_iv.val)
+                               free(p->cipher_auth.cipher_iv.val);
+                       if (p->cipher_auth.auth_iv_update.val)
+                               free(p->cipher_auth.cipher_iv_update.val);
+                       break;
+               case RTE_CRYPTO_SYM_XFORM_AEAD:
+                       if (xform[i]->aead.key.data)
+                               free(xform[i]->cipher.key.data);
+                       if (p->aead.iv.val)
+                               free(p->aead.iv.val);
+                       if (p->aead.aad.val)
+                               free(p->aead.aad.val);
+                       break;
+               default:
+                       continue;
+               }
+       }
+
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_cipher(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_cipher;
+       int status;
+       size_t len;
+
+       if (n_tokens < 7 || strcmp(tokens[1], "cipher_algo") ||
+                       strcmp(tokens[3], "cipher_key") ||
+                       strcmp(tokens[5], "cipher_iv"))
+               return NULL;
+
+       xform_cipher = calloc(1, sizeof(*xform_cipher));
+       if (xform_cipher == NULL)
+               return NULL;
+
+       xform_cipher->type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+       xform_cipher->cipher.op = encrypt ? RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+                       RTE_CRYPTO_CIPHER_OP_DECRYPT;
+
+       /* cipher_algo */
+       status = rte_cryptodev_get_cipher_algo_enum(
+                       &xform_cipher->cipher.algo, tokens[2]);
+       if (status < 0)
+               goto error_exit;
+
+       /* cipher_key */
+       len = strlen(tokens[4]);
+       xform_cipher->cipher.key.data = calloc(1, len / 2 + 1);
+       if (xform_cipher->cipher.key.data == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[4],
+                       xform_cipher->cipher.key.data,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher->cipher.key.length = (uint16_t)len;
+
+       /* cipher_iv */
+       len = strlen(tokens[6]);
+
+       p->cipher_auth.cipher_iv.val = calloc(1, len / 2 + 1);
+       if (p->cipher_auth.cipher_iv.val == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[6],
+                       p->cipher_auth.cipher_iv.val,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher->cipher.iv.length = (uint16_t)len;
+       xform_cipher->cipher.iv.offset = RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET;
+       p->cipher_auth.cipher_iv.length = (uint32_t)len;
+       *used_n_tokens = 7;
+
+       return xform_cipher;
+
+error_exit:
+       if (xform_cipher->cipher.key.data)
+               free(xform_cipher->cipher.key.data);
+
+       if (p->cipher_auth.cipher_iv.val) {
+               free(p->cipher_auth.cipher_iv.val);
+               p->cipher_auth.cipher_iv.val = NULL;
+       }
+
+       free(xform_cipher);
+
+       return NULL;
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_cipher_auth(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_cipher;
+       struct rte_crypto_sym_xform *xform_auth;
+       int status;
+       size_t len;
+
+       if (n_tokens < 13 ||
+                       strcmp(tokens[7], "auth_algo") ||
+                       strcmp(tokens[9], "auth_key") ||
+                       strcmp(tokens[11], "digest_size"))
+               return NULL;
+
+       xform_auth = calloc(1, sizeof(*xform_auth));
+       if (xform_auth == NULL)
+               return NULL;
+
+       xform_auth->type = RTE_CRYPTO_SYM_XFORM_AUTH;
+       xform_auth->auth.op = encrypt ? RTE_CRYPTO_AUTH_OP_GENERATE :
+                       RTE_CRYPTO_AUTH_OP_VERIFY;
+
+       /* auth_algo */
+       status = rte_cryptodev_get_auth_algo_enum(&xform_auth->auth.algo,
+                       tokens[8]);
+       if (status < 0)
+               goto error_exit;
+
+       /* auth_key */
+       len = strlen(tokens[10]);
+       xform_auth->auth.key.data = calloc(1, len / 2 + 1);
+       if (xform_auth->auth.key.data == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[10],
+                       xform_auth->auth.key.data, (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_auth->auth.key.length = (uint16_t)len;
+
+       if (strcmp(tokens[11], "digest_size"))
+               goto error_exit;
+
+       status = parser_read_uint16(&xform_auth->auth.digest_length,
+                       tokens[12]);
+       if (status < 0)
+               goto error_exit;
+
+       xform_cipher = parse_table_action_cipher(p, tokens, 7, encrypt,
+                       used_n_tokens);
+       if (xform_cipher == NULL)
+               goto error_exit;
+
+       *used_n_tokens += 6;
+
+       if (encrypt) {
+               xform_cipher->next = xform_auth;
+               return xform_cipher;
+       } else {
+               xform_auth->next = xform_cipher;
+               return xform_auth;
+       }
+
+error_exit:
+       if (xform_auth->auth.key.data)
+               free(xform_auth->auth.key.data);
+       if (p->cipher_auth.auth_iv.val) {
+               free(p->cipher_auth.auth_iv.val);
+               p->cipher_auth.auth_iv.val = 0;
+       }
+
+       free(xform_auth);
+
+       return NULL;
+}
+
+static struct rte_crypto_sym_xform *
+parse_table_action_aead(struct rte_table_action_sym_crypto_params *p,
+               char **tokens, uint32_t n_tokens, uint32_t encrypt,
+               uint32_t *used_n_tokens)
+{
+       struct rte_crypto_sym_xform *xform_aead;
+       int status;
+       size_t len;
+
+       if (n_tokens < 11 || strcmp(tokens[1], "aead_algo") ||
+                       strcmp(tokens[3], "aead_key") ||
+                       strcmp(tokens[5], "aead_iv") ||
+                       strcmp(tokens[7], "aead_aad") ||
+                       strcmp(tokens[9], "digest_size"))
+               return NULL;
+
+       xform_aead = calloc(1, sizeof(*xform_aead));
+       if (xform_aead == NULL)
+               return NULL;
+
+       xform_aead->type = RTE_CRYPTO_SYM_XFORM_AEAD;
+       xform_aead->aead.op = encrypt ? RTE_CRYPTO_AEAD_OP_ENCRYPT :
+                       RTE_CRYPTO_AEAD_OP_DECRYPT;
+
+       /* aead_algo */
+       status = rte_cryptodev_get_aead_algo_enum(&xform_aead->aead.algo,
+                       tokens[2]);
+       if (status < 0)
+               goto error_exit;
+
+       /* aead_key */
+       len = strlen(tokens[4]);
+       xform_aead->aead.key.data = calloc(1, len / 2 + 1);
+       if (xform_aead->aead.key.data == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[4], xform_aead->aead.key.data,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.key.length = (uint16_t)len;
+
+       /* aead_iv */
+       len = strlen(tokens[6]);
+       p->aead.iv.val = calloc(1, len / 2 + 1);
+       if (p->aead.iv.val == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[6], p->aead.iv.val,
+                       (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.iv.length = (uint16_t)len;
+       xform_aead->aead.iv.offset = RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET;
+       p->aead.iv.length = (uint32_t)len;
+
+       /* aead_aad */
+       len = strlen(tokens[8]);
+       p->aead.aad.val = calloc(1, len / 2 + 1);
+       if (p->aead.aad.val == NULL)
+               goto error_exit;
+
+       status = parse_hex_string(tokens[8], p->aead.aad.val, (uint32_t *)&len);
+       if (status < 0)
+               goto error_exit;
+
+       xform_aead->aead.aad_length = (uint16_t)len;
+       p->aead.aad.length = (uint32_t)len;
+
+       /* digest_size */
+       status = parser_read_uint16(&xform_aead->aead.digest_length,
+                       tokens[10]);
+       if (status < 0)
+               goto error_exit;
+
+       *used_n_tokens = 11;
+
+       return xform_aead;
+
+error_exit:
+       if (xform_aead->aead.key.data)
+               free(xform_aead->aead.key.data);
+       if (p->aead.iv.val) {
+               free(p->aead.iv.val);
+               p->aead.iv.val = NULL;
+       }
+       if (p->aead.aad.val) {
+               free(p->aead.aad.val);
+               p->aead.aad.val = NULL;
+       }
+
+       free(xform_aead);
+
+       return NULL;
+}
+
+
+static uint32_t
+parse_table_action_sym_crypto(char **tokens,
+       uint32_t n_tokens,
+       struct table_rule_action *a)
+{
+       struct rte_table_action_sym_crypto_params *p = &a->sym_crypto;
+       struct rte_crypto_sym_xform *xform = NULL;
+       uint32_t used_n_tokens;
+       uint32_t encrypt;
+       int status;
+
+       if ((n_tokens < 12) ||
+               strcmp(tokens[0], "sym_crypto") ||
+               strcmp(tokens[2], "type"))
+               return 0;
+
+       memset(p, 0, sizeof(*p));
+
+       if (strcmp(tokens[1], "encrypt") == 0)
+               encrypt = 1;
+       else
+               encrypt = 0;
+
+       status = parser_read_uint32(&p->data_offset, tokens[n_tokens - 1]);
+       if (status < 0)
+               return 0;
+
+       if (strcmp(tokens[3], "cipher") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_cipher(p, tokens, n_tokens, encrypt,
+                               &used_n_tokens);
+       } else if (strcmp(tokens[3], "cipher_auth") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_cipher_auth(p, tokens, n_tokens,
+                               encrypt, &used_n_tokens);
+       } else if (strcmp(tokens[3], "aead") == 0) {
+               tokens += 3;
+               n_tokens -= 3;
+
+               xform = parse_table_action_aead(p, tokens, n_tokens, encrypt,
+                               &used_n_tokens);
+       }
+
+       if (xform == NULL)
+               return 0;
+
+       p->xform = xform;
+
+       if (strcmp(tokens[used_n_tokens], "data_offset")) {
+               parse_free_sym_crypto_param_data(p);
+               return 0;
+       }
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_SYM_CRYPTO;
+
+       return used_n_tokens + 5;
+}
+
+static uint32_t
+parse_table_action_tag(char **tokens,
+       uint32_t n_tokens,
+       struct table_rule_action *a)
+{
+       if ((n_tokens < 2) ||
+               strcmp(tokens[0], "tag"))
+               return 0;
+
+       if (parser_read_uint32(&a->tag.tag, tokens[1]))
+               return 0;
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_TAG;
+       return 2;
+}
+
+static uint32_t
+parse_table_action_decap(char **tokens,
+       uint32_t n_tokens,
+       struct table_rule_action *a)
+{
+       if ((n_tokens < 2) ||
+               strcmp(tokens[0], "decap"))
+               return 0;
+
+       if (parser_read_uint16(&a->decap.n, tokens[1]))
+               return 0;
+
+       a->action_mask |= 1 << RTE_TABLE_ACTION_DECAP;
+       return 2;
+}
+
 static uint32_t
 parse_table_action(char **tokens,
        uint32_t n_tokens,
@@ -3492,6 +4257,47 @@ parse_table_action(char **tokens,
                n_tokens -= n;
        }
 
+       if (n_tokens && (strcmp(tokens[0], "sym_crypto") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_sym_crypto(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action sym_crypto");
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
+       if (n_tokens && (strcmp(tokens[0], "tag") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_tag(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action tag");
+                       return 0;
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
+       if (n_tokens && (strcmp(tokens[0], "decap") == 0)) {
+               uint32_t n;
+
+               n = parse_table_action_decap(tokens, n_tokens, a);
+               if (n == 0) {
+                       snprintf(out, out_size, MSG_ARG_INVALID,
+                               "action decap");
+                       return 0;
+               }
+
+               tokens += n;
+               n_tokens -= n;
+       }
+
        if (n_tokens0 - n_tokens == 1) {
                snprintf(out, out_size, MSG_ARG_INVALID, "action");
                return 0;
@@ -3579,6 +4385,9 @@ cmd_pipeline_table_rule_add(char **tokens,
                snprintf(out, out_size, MSG_CMD_FAIL, tokens[0]);
                return;
        }
+
+       if (a.action_mask & 1 << RTE_TABLE_ACTION_SYM_CRYPTO)
+               parse_free_sym_crypto_param_data(&a.sym_crypto);
 }
 
 
@@ -4570,6 +5379,11 @@ cmd_help(char **tokens, uint32_t n_tokens, char *out, size_t out_size)
                return;
        }
 
+       if (strcmp(tokens[0], "cryptodev") == 0) {
+               snprintf(out, out_size, "\n%s\n", cmd_cryptodev_help);
+               return;
+       }
+
        if ((n_tokens == 4) &&
                (strcmp(tokens[0], "port") == 0) &&
                (strcmp(tokens[1], "in") == 0) &&
@@ -4860,6 +5674,11 @@ cli_process(char *in, char *out, size_t out_size)
                return;
        }
 
+       if (strcmp(tokens[0], "cryptodev") == 0) {
+               cmd_cryptodev(tokens, n_tokens, out, out_size);
+               return;
+       }
+
        if (strcmp(tokens[0], "port") == 0) {
                cmd_port_in_action_profile(tokens, n_tokens, out, out_size);
                return;
index 6b08e9e..30fca80 100644 (file)
@@ -8,7 +8,6 @@
 #include <unistd.h>
 #include <sys/types.h>
 
-#define __USE_GNU
 #include <sys/socket.h>
 
 #include <sys/epoll.h>
diff --git a/examples/ip_pipeline/cryptodev.c b/examples/ip_pipeline/cryptodev.c
new file mode 100644 (file)
index 0000000..c4ba72b
--- /dev/null
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
+#include <rte_string_fns.h>
+
+#include "cryptodev.h"
+
+static struct cryptodev_list cryptodev_list;
+
+int
+cryptodev_init(void)
+{
+       TAILQ_INIT(&cryptodev_list);
+
+       return 0;
+}
+
+struct cryptodev *
+cryptodev_find(const char *name)
+{
+       struct cryptodev *cryptodev;
+
+       if (name == NULL)
+               return NULL;
+
+       TAILQ_FOREACH(cryptodev, &cryptodev_list, node)
+               if (strcmp(cryptodev->name, name) == 0)
+                       return cryptodev;
+
+       return NULL;
+}
+
+struct cryptodev *
+cryptodev_next(struct cryptodev *cryptodev)
+{
+       return (cryptodev == NULL) ?
+                       TAILQ_FIRST(&cryptodev_list) :
+                       TAILQ_NEXT(cryptodev, node);
+}
+
+struct cryptodev *
+cryptodev_create(const char *name, struct cryptodev_params *params)
+{
+       struct rte_cryptodev_info dev_info;
+       struct rte_cryptodev_config dev_conf;
+       struct rte_cryptodev_qp_conf queue_conf;
+       struct cryptodev *cryptodev;
+       uint32_t dev_id, i;
+       uint32_t socket_id;
+       int status;
+
+       /* Check input params */
+       if ((name == NULL) ||
+               cryptodev_find(name) ||
+               (params->n_queues == 0) ||
+               (params->queue_size == 0))
+               return NULL;
+
+       if (params->dev_name) {
+               status = rte_cryptodev_get_dev_id(params->dev_name);
+               if (status == -1)
+                       return NULL;
+
+               dev_id = (uint32_t)status;
+       } else {
+               if (rte_cryptodev_pmd_is_valid_dev(params->dev_id) == 0)
+                       return NULL;
+
+               dev_id = params->dev_id;
+       }
+
+       socket_id = rte_cryptodev_socket_id(dev_id);
+       rte_cryptodev_info_get(dev_id, &dev_info);
+
+       if (dev_info.max_nb_queue_pairs < params->n_queues)
+               return NULL;
+       if (dev_info.feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED)
+               return NULL;
+
+       dev_conf.socket_id = socket_id;
+       dev_conf.nb_queue_pairs = params->n_queues;
+
+       status = rte_cryptodev_configure(dev_id, &dev_conf);
+       if (status < 0)
+               return NULL;
+
+       queue_conf.nb_descriptors = params->queue_size;
+       for (i = 0; i < params->n_queues; i++) {
+               status = rte_cryptodev_queue_pair_setup(dev_id, i,
+                               &queue_conf, socket_id, NULL);
+               if (status < 0)
+                       return NULL;
+       }
+
+       if (rte_cryptodev_start(dev_id) < 0)
+               return NULL;
+
+       cryptodev = calloc(1, sizeof(struct cryptodev));
+       if (cryptodev == NULL) {
+               rte_cryptodev_stop(dev_id);
+               return NULL;
+       }
+
+       strlcpy(cryptodev->name, name, sizeof(cryptodev->name));
+       cryptodev->dev_id = dev_id;
+       cryptodev->n_queues = params->n_queues;
+
+       TAILQ_INSERT_TAIL(&cryptodev_list, cryptodev, node);
+
+       return cryptodev;
+}
diff --git a/examples/ip_pipeline/cryptodev.h b/examples/ip_pipeline/cryptodev.h
new file mode 100644 (file)
index 0000000..d06b3f2
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _INCLUDE_SYM_C_H_
+#define _INCLUDE_SYM_C_H_
+
+#include <stdint.h>
+#include <sys/queue.h>
+
+#include <rte_cryptodev.h>
+
+#include "common.h"
+
+struct cryptodev {
+       TAILQ_ENTRY(cryptodev) node;
+       char name[NAME_SIZE];
+       uint16_t dev_id;
+       uint32_t n_queues;
+};
+
+TAILQ_HEAD(cryptodev_list, cryptodev);
+
+int
+cryptodev_init(void);
+
+struct cryptodev *
+cryptodev_find(const char *name);
+
+struct cryptodev *
+cryptodev_next(struct cryptodev *cryptodev);
+
+struct cryptodev_params {
+       const char *dev_name;
+       uint32_t dev_id; /**< Valid only when *dev_name* is NULL. */
+       uint32_t n_queues;
+       uint32_t queue_size;
+};
+
+struct cryptodev *
+cryptodev_create(const char *name, struct cryptodev_params *params);
+
+#endif
diff --git a/examples/ip_pipeline/examples/flow_crypto.cli b/examples/ip_pipeline/examples/flow_crypto.cli
new file mode 100644 (file)
index 0000000..9b639de
--- /dev/null
@@ -0,0 +1,58 @@
+; SPDX-License-Identifier: BSD-3-Clause
+; Copyright(c) 2018 Intel Corporation
+
+;                 ________________
+; LINK0 RXQ0 --->|                |---> CRYPTO0 TXQ0
+;                |      Flow      |
+; CRYPTO0 RXQ0-->| Classification |---> LINK0 TXQ0
+;                |________________|
+;                        |
+;                        +-----------> SINK0 (flow lookup miss)
+;
+; Input packet: Ethernet/IPv4
+;
+; Packet buffer layout:
+; #   Field Name       Offset (Bytes)   Size (Bytes)
+; 0   Mbuf             0                128
+; 1   Headroom         128              128
+; 2   Ethernet header  256              14
+; 3   IPv4 header      280              20
+; 4   Packet           256              1536
+; 5   Crypto Operation 1792             160
+
+mempool MEMPOOL0 buffer 2304 pool 32K cache 256 cpu 1
+mempool MEMPOOL_SESSION0 buffer 1024 pool 1024 cache 128 cpu 1
+
+link LINK0 dev 0000:81:00.0 rxq 1 128 MEMPOOL0 txq 1 512 promiscuous on
+
+#Cryptodev
+cryptodev CRYPTO0 dev crypto_aesni_gcm0 queue 1 1024
+
+table action profile AP0 ipv4 offset 270 fwd sym_crypto dev CRYPTO0 offset 1792 mempool_create MEMPOOL_SESSION0 mempool_init MEMPOOL_SESSION0
+table action profile AP1 ipv4 offset 270 fwd
+
+pipeline PIPELINE0 period 10 offset_port_id 0 cpu 1
+
+pipeline PIPELINE0 port in bsz 32 link LINK0 rxq 0
+pipeline PIPELINE0 port in bsz 32 cryptodev CRYPTO0 rxq 0
+
+pipeline PIPELINE0 port out bsz 32 cryptodev CRYPTO0 txq 0 offset 1792
+pipeline PIPELINE0 port out bsz 32 link LINK0 txq 0
+pipeline PIPELINE0 port out bsz 32 sink
+
+pipeline PIPELINE0 table match hash ext key 8 mask FFFFFFFF00000000 offset 282 buckets 1K size 4K action AP0
+pipeline PIPELINE0 table match stub action AP1
+
+pipeline PIPELINE0 port in 0 table 0
+pipeline PIPELINE0 port in 1 table 1
+
+thread 24 pipeline PIPELINE0 enable
+
+pipeline PIPELINE0 table 0 rule add match default action fwd port 2
+
+#AES-GCM encrypt
+pipeline PIPELINE0 table 0 rule add match hash ipv4_addr 100.0.0.10 action fwd port 0 sym_crypto encrypt type aead aead_algo aes-gcm aead_key 000102030405060708090a0b0c0d0e0f aead_iv 000102030405060708090a0b aead_aad 000102030405060708090a0b0c0d0e0f digest_size 8 data_offset 290
+#AES-GCM decrypt
+#pipeline PIPELINE0 table 0 rule add match hash ipv4_addr 100.0.0.10 action fwd port 0 sym_crypto decrypt type aead aead_algo aes-gcm aead_key 000102030405060708090a0b0c0d0e0f aead_iv 000102030405060708090a0b aead_aad 000102030405060708090a0b0c0d0e0f digest_size 8 data_offset 290
+
+pipeline PIPELINE0 table 1 rule add match default action fwd port 1
diff --git a/examples/ip_pipeline/hash_func.h b/examples/ip_pipeline/hash_func.h
deleted file mode 100644 (file)
index f1b9d94..0000000
+++ /dev/null
@@ -1,357 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2018 Intel Corporation
- */
-
-#ifndef __INCLUDE_HASH_FUNC_H__
-#define __INCLUDE_HASH_FUNC_H__
-
-static inline uint64_t
-hash_xor_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = seed ^ (k[0] & m[0]);
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-
-       xor0 ^= k[2] & m[2];
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-
-       xor0 ^= xor1;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-
-       xor0 ^= xor1;
-
-       xor0 ^= k[4] & m[4];
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-
-       xor0 ^= xor1;
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-
-       xor0 ^= xor1;
-       xor2 ^= k[6] & m[6];
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-static inline uint64_t
-hash_xor_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t xor0, xor1, xor2, xor3;
-
-       xor0 = ((k[0] & m[0]) ^ seed) ^ (k[1] & m[1]);
-       xor1 = (k[2] & m[2]) ^ (k[3] & m[3]);
-       xor2 = (k[4] & m[4]) ^ (k[5] & m[5]);
-       xor3 = (k[6] & m[6]) ^ (k[7] & m[7]);
-
-       xor0 ^= xor1;
-       xor2 ^= xor3;
-
-       xor0 ^= xor2;
-
-       return (xor0 >> 32) ^ xor0;
-}
-
-#if defined(RTE_ARCH_X86_64)
-
-#include <x86intrin.h>
-
-static inline uint64_t
-hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t crc0;
-
-       crc0 = _mm_crc32_u64(seed, k[0] & m[0]);
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, crc0, crc1;
-
-       k0 = k[0] & m[0];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc0 = _mm_crc32_u64(crc0, k2);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-
-       crc0 = _mm_crc32_u64(crc0, crc1);
-       crc1 = _mm_crc32_u64(crc2, crc3);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc0 = _mm_crc32_u64(crc0, crc1);
-       crc1 = _mm_crc32_u64(crc2, crc3);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, k5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc4 = _mm_crc32_u64(k5, k[6] & m[6]);
-       crc5 = k5 >> 32;
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = _mm_crc32_u64(k0, seed);
-       crc1 = _mm_crc32_u64(k0 >> 32, k[1] & m[1]);
-
-       crc2 = _mm_crc32_u64(k2, k[3] & m[3]);
-       crc3 = _mm_crc32_u64(k2 >> 32, k[4] & m[4]);
-
-       crc4 = _mm_crc32_u64(k5, k[6] & m[6]);
-       crc5 = _mm_crc32_u64(k5 >> 32, k[7] & m[7]);
-
-       crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
-       crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-#define hash_default_key8                      hash_crc_key8
-#define hash_default_key16                     hash_crc_key16
-#define hash_default_key24                     hash_crc_key24
-#define hash_default_key32                     hash_crc_key32
-#define hash_default_key40                     hash_crc_key40
-#define hash_default_key48                     hash_crc_key48
-#define hash_default_key56                     hash_crc_key56
-#define hash_default_key64                     hash_crc_key64
-
-#elif defined(RTE_ARCH_ARM64)
-#include "hash_func_arm64.h"
-#else
-
-#define hash_default_key8                      hash_xor_key8
-#define hash_default_key16                     hash_xor_key16
-#define hash_default_key24                     hash_xor_key24
-#define hash_default_key32                     hash_xor_key32
-#define hash_default_key40                     hash_xor_key40
-#define hash_default_key48                     hash_xor_key48
-#define hash_default_key56                     hash_xor_key56
-#define hash_default_key64                     hash_xor_key64
-
-#endif
-
-#endif
diff --git a/examples/ip_pipeline/hash_func_arm64.h b/examples/ip_pipeline/hash_func_arm64.h
deleted file mode 100644 (file)
index 50df816..0000000
+++ /dev/null
@@ -1,232 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2017-2018 Linaro Limited.
- */
-#ifndef __HASH_FUNC_ARM64_H__
-#define __HASH_FUNC_ARM64_H__
-
-#define _CRC32CX(crc, val)     \
-       __asm__("crc32cx %w[c], %w[c], %x[v]":[c] "+r" (crc):[v] "r" (val))
-
-static inline uint64_t
-hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key;
-       uint64_t *m = mask;
-       uint32_t crc0;
-
-       crc0 = seed;
-       _CRC32CX(crc0, k[0] & m[0]);
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1;
-
-       k0 = k[0] & m[0];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       _CRC32CX(crc0, k2);
-
-       crc0 ^= crc1;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-
-       _CRC32CX(crc0, crc1);
-       _CRC32CX(crc2, crc3);
-
-       crc0 ^= crc2;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       _CRC32CX(crc0, crc1);
-       _CRC32CX(crc2, crc3);
-
-       crc0 ^= crc2;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, k5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       crc4 = k5;
-        _CRC32CX(crc4, k[6] & m[6]);
-       crc5 = k5 >> 32;
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, ((uint64_t)crc4 << 32) ^ crc5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-static inline uint64_t
-hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
-       uint64_t seed)
-{
-       uint64_t *k = key, k0, k2, k5;
-       uint64_t *m = mask;
-       uint32_t crc0, crc1, crc2, crc3, crc4, crc5;
-
-       k0 = k[0] & m[0];
-       k2 = k[2] & m[2];
-       k5 = k[5] & m[5];
-
-       crc0 = k0;
-       _CRC32CX(crc0, seed);
-       crc1 = k0 >> 32;
-       _CRC32CX(crc1, k[1] & m[1]);
-
-       crc2 = k2;
-       _CRC32CX(crc2, k[3] & m[3]);
-       crc3 = k2 >> 32;
-       _CRC32CX(crc3, k[4] & m[4]);
-
-       crc4 = k5;
-        _CRC32CX(crc4, k[6] & m[6]);
-       crc5 = k5 >> 32;
-       _CRC32CX(crc5, k[7] & m[7]);
-
-       _CRC32CX(crc0, ((uint64_t)crc1 << 32) ^ crc2);
-       _CRC32CX(crc3, ((uint64_t)crc4 << 32) ^ crc5);
-
-       crc0 ^= crc3;
-
-       return crc0;
-}
-
-#define hash_default_key8                      hash_crc_key8
-#define hash_default_key16                     hash_crc_key16
-#define hash_default_key24                     hash_crc_key24
-#define hash_default_key32                     hash_crc_key32
-#define hash_default_key40                     hash_crc_key40
-#define hash_default_key48                     hash_crc_key48
-#define hash_default_key56                     hash_crc_key56
-#define hash_default_key64                     hash_crc_key64
-
-#endif
index 392a890..787eb86 100644 (file)
@@ -48,7 +48,6 @@ static struct rte_eth_conf port_conf_default = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = 9000, /* Jumbo frame max packet len */
                .split_hdr_size = 0, /* Header split buffer size */
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .rx_adv_conf = {
                .rss_conf = {
index a69face..97d1e91 100644 (file)
@@ -14,6 +14,7 @@
 #include "cli.h"
 #include "conn.h"
 #include "kni.h"
+#include "cryptodev.h"
 #include "link.h"
 #include "mempool.h"
 #include "pipeline.h"
@@ -210,6 +211,14 @@ main(int argc, char **argv)
                return status;
        }
 
+       /* Sym Crypto */
+       status = cryptodev_init();
+       if (status) {
+               printf("Error: Cryptodev initialization failed (%d)\n",
+                               status);
+               return status;
+       }
+
        /* Action */
        status = port_in_action_profile_init();
        if (status) {
index a9f2ea4..5e5fe64 100644 (file)
@@ -21,5 +21,6 @@ sources = files(
        'swq.c',
        'tap.c',
        'thread.c',
-       'tmgr.c'
+       'tmgr.c',
+       'cryptodev.c'
 )
index 43fe867..b23d6c0 100644 (file)
 #include <rte_port_source_sink.h>
 #include <rte_port_fd.h>
 #include <rte_port_sched.h>
+#include <rte_port_sym_crypto.h>
 
 #include <rte_table_acl.h>
 #include <rte_table_array.h>
 #include <rte_table_hash.h>
+#include <rte_table_hash_func.h>
 #include <rte_table_lpm.h>
 #include <rte_table_lpm_ipv6.h>
 #include <rte_table_stub.h>
@@ -35,8 +37,7 @@
 #include "tap.h"
 #include "tmgr.h"
 #include "swq.h"
-
-#include "hash_func.h"
+#include "cryptodev.h"
 
 #ifndef PIPELINE_MSGQ_SIZE
 #define PIPELINE_MSGQ_SIZE                                 64
@@ -163,6 +164,7 @@ pipeline_port_in_create(const char *pipeline_name,
                struct rte_port_kni_reader_params kni;
 #endif
                struct rte_port_source_params source;
+               struct rte_port_sym_crypto_reader_params sym_crypto;
        } pp;
 
        struct pipeline *pipeline;
@@ -296,6 +298,27 @@ pipeline_port_in_create(const char *pipeline_name,
                break;
        }
 
+       case PORT_IN_CRYPTODEV:
+       {
+               struct cryptodev *cryptodev;
+
+               cryptodev = cryptodev_find(params->dev_name);
+               if (cryptodev == NULL)
+                       return -1;
+
+               if (params->rxq.queue_id > cryptodev->n_queues - 1)
+                       return -1;
+
+               pp.sym_crypto.cryptodev_id = cryptodev->dev_id;
+               pp.sym_crypto.queue_id = params->cryptodev.queue_id;
+               pp.sym_crypto.f_callback = params->cryptodev.f_callback;
+               pp.sym_crypto.arg_callback = params->cryptodev.arg_callback;
+               p.ops = &rte_port_sym_crypto_reader_ops;
+               p.arg_create = &pp.sym_crypto;
+
+               break;
+       }
+
        default:
                return -1;
        }
@@ -385,6 +408,7 @@ pipeline_port_out_create(const char *pipeline_name,
                struct rte_port_kni_writer_params kni;
 #endif
                struct rte_port_sink_params sink;
+               struct rte_port_sym_crypto_writer_params sym_crypto;
        } pp;
 
        union {
@@ -394,6 +418,7 @@ pipeline_port_out_create(const char *pipeline_name,
 #ifdef RTE_LIBRTE_KNI
                struct rte_port_kni_writer_nodrop_params kni;
 #endif
+               struct rte_port_sym_crypto_writer_nodrop_params sym_crypto;
        } pp_nodrop;
 
        struct pipeline *pipeline;
@@ -549,6 +574,40 @@ pipeline_port_out_create(const char *pipeline_name,
                break;
        }
 
+       case PORT_OUT_CRYPTODEV:
+       {
+               struct cryptodev *cryptodev;
+
+               cryptodev = cryptodev_find(params->dev_name);
+               if (cryptodev == NULL)
+                       return -1;
+
+               if (params->cryptodev.queue_id >= cryptodev->n_queues)
+                       return -1;
+
+               pp.sym_crypto.cryptodev_id = cryptodev->dev_id;
+               pp.sym_crypto.queue_id = params->cryptodev.queue_id;
+               pp.sym_crypto.tx_burst_sz = params->burst_size;
+               pp.sym_crypto.crypto_op_offset = params->cryptodev.op_offset;
+
+               pp_nodrop.sym_crypto.cryptodev_id = cryptodev->dev_id;
+               pp_nodrop.sym_crypto.queue_id = params->cryptodev.queue_id;
+               pp_nodrop.sym_crypto.tx_burst_sz = params->burst_size;
+               pp_nodrop.sym_crypto.n_retries = params->retry;
+               pp_nodrop.sym_crypto.crypto_op_offset =
+                               params->cryptodev.op_offset;
+
+               if (params->retry == 0) {
+                       p.ops = &rte_port_sym_crypto_writer_ops;
+                       p.arg_create = &pp.sym_crypto;
+               } else {
+                       p.ops = &rte_port_sym_crypto_writer_nodrop_ops;
+                       p.arg_create = &pp_nodrop.sym_crypto;
+               }
+
+               break;
+       }
+
        default:
                return -1;
        }
@@ -818,28 +877,28 @@ pipeline_table_create(const char *pipeline_name,
 
                switch (params->match.hash.key_size) {
                case  8:
-                       f_hash = hash_default_key8;
+                       f_hash = rte_table_hash_crc_key8;
                        break;
                case 16:
-                       f_hash = hash_default_key16;
+                       f_hash = rte_table_hash_crc_key16;
                        break;
                case 24:
-                       f_hash = hash_default_key24;
+                       f_hash = rte_table_hash_crc_key24;
                        break;
                case 32:
-                       f_hash = hash_default_key32;
+                       f_hash = rte_table_hash_crc_key32;
                        break;
                case 40:
-                       f_hash = hash_default_key40;
+                       f_hash = rte_table_hash_crc_key40;
                        break;
                case 48:
-                       f_hash = hash_default_key48;
+                       f_hash = rte_table_hash_crc_key48;
                        break;
                case 56:
-                       f_hash = hash_default_key56;
+                       f_hash = rte_table_hash_crc_key56;
                        break;
                case 64:
-                       f_hash = hash_default_key64;
+                       f_hash = rte_table_hash_crc_key64;
                        break;
                default:
                        return -1;
index a953a29..e5b1d5d 100644 (file)
@@ -27,6 +27,7 @@ enum port_in_type {
        PORT_IN_TAP,
        PORT_IN_KNI,
        PORT_IN_SOURCE,
+       PORT_IN_CRYPTODEV,
 };
 
 struct port_in_params {
@@ -48,6 +49,12 @@ struct port_in_params {
                        const char *file_name;
                        uint32_t n_bytes_per_pkt;
                } source;
+
+               struct {
+                       uint16_t queue_id;
+                       void *f_callback;
+                       void *arg_callback;
+               } cryptodev;
        };
        uint32_t burst_size;
 
@@ -62,6 +69,7 @@ enum port_out_type {
        PORT_OUT_TAP,
        PORT_OUT_KNI,
        PORT_OUT_SINK,
+       PORT_OUT_CRYPTODEV,
 };
 
 struct port_out_params {
@@ -76,6 +84,11 @@ struct port_out_params {
                        const char *file_name;
                        uint32_t max_n_pkts;
                } sink;
+
+               struct {
+                       uint16_t queue_id;
+                       uint32_t op_offset;
+               } cryptodev;
        };
        uint32_t burst_size;
        int retry;
@@ -268,6 +281,9 @@ struct table_rule_action {
        struct rte_table_action_ttl_params ttl;
        struct rte_table_action_stats_params stats;
        struct rte_table_action_time_params time;
+       struct rte_table_action_sym_crypto_params sym_crypto;
+       struct rte_table_action_tag_params tag;
+       struct rte_table_action_decap_params decap;
 };
 
 int
index 7fc0333..4bd971f 100644 (file)
@@ -2244,29 +2244,37 @@ match_convert(struct table_rule_match *mh,
                                ml->acl_add.field_value[0].mask_range.u8 =
                                        mh->match.acl.proto_mask;
 
-                               ml->acl_add.field_value[1].value.u32 = sa32[0];
+                               ml->acl_add.field_value[1].value.u32 =
+                                       rte_be_to_cpu_32(sa32[0]);
                                ml->acl_add.field_value[1].mask_range.u32 =
                                        sa32_depth[0];
-                               ml->acl_add.field_value[2].value.u32 = sa32[1];
+                               ml->acl_add.field_value[2].value.u32 =
+                                       rte_be_to_cpu_32(sa32[1]);
                                ml->acl_add.field_value[2].mask_range.u32 =
                                        sa32_depth[1];
-                               ml->acl_add.field_value[3].value.u32 = sa32[2];
+                               ml->acl_add.field_value[3].value.u32 =
+                                       rte_be_to_cpu_32(sa32[2]);
                                ml->acl_add.field_value[3].mask_range.u32 =
                                        sa32_depth[2];
-                               ml->acl_add.field_value[4].value.u32 = sa32[3];
+                               ml->acl_add.field_value[4].value.u32 =
+                                       rte_be_to_cpu_32(sa32[3]);
                                ml->acl_add.field_value[4].mask_range.u32 =
                                        sa32_depth[3];
 
-                               ml->acl_add.field_value[5].value.u32 = da32[0];
+                               ml->acl_add.field_value[5].value.u32 =
+                                       rte_be_to_cpu_32(da32[0]);
                                ml->acl_add.field_value[5].mask_range.u32 =
                                        da32_depth[0];
-                               ml->acl_add.field_value[6].value.u32 = da32[1];
+                               ml->acl_add.field_value[6].value.u32 =
+                                       rte_be_to_cpu_32(da32[1]);
                                ml->acl_add.field_value[6].mask_range.u32 =
                                        da32_depth[1];
-                               ml->acl_add.field_value[7].value.u32 = da32[2];
+                               ml->acl_add.field_value[7].value.u32 =
+                                       rte_be_to_cpu_32(da32[2]);
                                ml->acl_add.field_value[7].mask_range.u32 =
                                        da32_depth[2];
-                               ml->acl_add.field_value[8].value.u32 = da32[3];
+                               ml->acl_add.field_value[8].value.u32 =
+                                       rte_be_to_cpu_32(da32[3]);
                                ml->acl_add.field_value[8].mask_range.u32 =
                                        da32_depth[3];
 
@@ -2308,36 +2316,36 @@ match_convert(struct table_rule_match *mh,
                                        mh->match.acl.proto_mask;
 
                                ml->acl_delete.field_value[1].value.u32 =
-                                       sa32[0];
+                                       rte_be_to_cpu_32(sa32[0]);
                                ml->acl_delete.field_value[1].mask_range.u32 =
                                        sa32_depth[0];
                                ml->acl_delete.field_value[2].value.u32 =
-                                       sa32[1];
+                                       rte_be_to_cpu_32(sa32[1]);
                                ml->acl_delete.field_value[2].mask_range.u32 =
                                        sa32_depth[1];
                                ml->acl_delete.field_value[3].value.u32 =
-                                       sa32[2];
+                                       rte_be_to_cpu_32(sa32[2]);
                                ml->acl_delete.field_value[3].mask_range.u32 =
                                        sa32_depth[2];
                                ml->acl_delete.field_value[4].value.u32 =
-                                       sa32[3];
+                                       rte_be_to_cpu_32(sa32[3]);
                                ml->acl_delete.field_value[4].mask_range.u32 =
                                        sa32_depth[3];
 
                                ml->acl_delete.field_value[5].value.u32 =
-                                       da32[0];
+                                       rte_be_to_cpu_32(da32[0]);
                                ml->acl_delete.field_value[5].mask_range.u32 =
                                        da32_depth[0];
                                ml->acl_delete.field_value[6].value.u32 =
-                                       da32[1];
+                                       rte_be_to_cpu_32(da32[1]);
                                ml->acl_delete.field_value[6].mask_range.u32 =
                                        da32_depth[1];
                                ml->acl_delete.field_value[7].value.u32 =
-                                       da32[2];
+                                       rte_be_to_cpu_32(da32[2]);
                                ml->acl_delete.field_value[7].mask_range.u32 =
                                        da32_depth[2];
                                ml->acl_delete.field_value[8].value.u32 =
-                                       da32[3];
+                                       rte_be_to_cpu_32(da32[3]);
                                ml->acl_delete.field_value[8].mask_range.u32 =
                                        da32_depth[3];
 
@@ -2476,6 +2484,36 @@ action_convert(struct rte_table_action *a,
                        return status;
        }
 
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_SYM_CRYPTO,
+                       &action->sym_crypto);
+
+               if (status)
+                       return status;
+       }
+
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_TAG,
+                       &action->tag);
+
+               if (status)
+                       return status;
+       }
+
+       if (action->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               status = rte_table_action_apply(a,
+                       data,
+                       RTE_TABLE_ACTION_DECAP,
+                       &action->decap);
+
+               if (status)
+                       return status;
+       }
+
        return 0;
 }
 
index b830f67..17b55d4 100644 (file)
@@ -165,8 +165,7 @@ static struct rte_eth_conf port_conf = {
                .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
                .split_hdr_size = 0,
                .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                            DEV_RX_OFFLOAD_JUMBO_FRAME |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+                            DEV_RX_OFFLOAD_JUMBO_FRAME),
        },
        .rx_adv_conf = {
                        .rss_conf = {
index ee9e590..e33232c 100644 (file)
@@ -96,6 +96,7 @@ esp_inbound(struct rte_mbuf *m, struct ipsec_sa *sa,
 
                switch (sa->cipher_algo) {
                case RTE_CRYPTO_CIPHER_NULL:
+               case RTE_CRYPTO_CIPHER_3DES_CBC:
                case RTE_CRYPTO_CIPHER_AES_CBC:
                        /* Copy IV at the end of crypto operation */
                        rte_memcpy(iv_ptr, iv, sa->iv_len);
@@ -326,6 +327,7 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa,
        } else {
                switch (sa->cipher_algo) {
                case RTE_CRYPTO_CIPHER_NULL:
+               case RTE_CRYPTO_CIPHER_3DES_CBC:
                case RTE_CRYPTO_CIPHER_AES_CBC:
                        memset(iv, 0, sa->iv_len);
                        break;
@@ -387,6 +389,7 @@ esp_outbound(struct rte_mbuf *m, struct ipsec_sa *sa,
        } else {
                switch (sa->cipher_algo) {
                case RTE_CRYPTO_CIPHER_NULL:
+               case RTE_CRYPTO_CIPHER_3DES_CBC:
                case RTE_CRYPTO_CIPHER_AES_CBC:
                        sym_cop->cipher.data.offset = ip_hdr_len +
                                sizeof(struct esp_hdr);
index b45b87b..1bc0b5b 100644 (file)
@@ -54,7 +54,7 @@
 #define NB_MBUF        (32000)
 
 #define CDEV_QUEUE_DESC 2048
-#define CDEV_MAP_ENTRIES 1024
+#define CDEV_MAP_ENTRIES 16384
 #define CDEV_MP_NB_OBJS 2048
 #define CDEV_MP_CACHE_SZ 64
 #define MAX_QUEUE_PAIRS 1
@@ -197,8 +197,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CHECKSUM |
-                           DEV_RX_OFFLOAD_CRC_STRIP,
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
@@ -1392,9 +1391,27 @@ cryptodevs_init(void)
 
        uint32_t max_sess_sz = 0, sess_sz;
        for (cdev_id = 0; cdev_id < rte_cryptodev_count(); cdev_id++) {
+               void *sec_ctx;
+
+               /* Get crypto priv session size */
                sess_sz = rte_cryptodev_sym_get_private_session_size(cdev_id);
                if (sess_sz > max_sess_sz)
                        max_sess_sz = sess_sz;
+
+               /*
+                * If crypto device is security capable, need to check the
+                * size of security session as well.
+                */
+
+               /* Get security context of the crypto device */
+               sec_ctx = rte_cryptodev_get_sec_ctx(cdev_id);
+               if (sec_ctx == NULL)
+                       continue;
+
+               /* Get size of security session */
+               sess_sz = rte_security_session_get_size(sec_ctx);
+               if (sess_sz > max_sess_sz)
+                       max_sess_sz = sess_sz;
        }
        RTE_ETH_FOREACH_DEV(port_id) {
                void *sec_ctx;
index 4ab8e09..d2d3550 100644 (file)
@@ -81,6 +81,13 @@ const struct supported_cipher_algo cipher_algos[] = {
                .iv_len = 8,
                .block_size = 16, /* XXX AESNI MB limition, should be 4 */
                .key_len = 20
+       },
+       {
+               .keyword = "3des-cbc",
+               .algo = RTE_CRYPTO_CIPHER_3DES_CBC,
+               .iv_len = 8,
+               .block_size = 8,
+               .key_len = 24
        }
 };
 
@@ -327,7 +334,8 @@ parse_sa_tokens(char **tokens, uint32_t n_tokens,
                        if (status->status < 0)
                                return;
 
-                       if (algo->algo == RTE_CRYPTO_CIPHER_AES_CBC)
+                       if (algo->algo == RTE_CRYPTO_CIPHER_AES_CBC ||
+                               algo->algo == RTE_CRYPTO_CIPHER_3DES_CBC)
                                rule->salt = (uint32_t)rte_rand();
 
                        if (algo->algo == RTE_CRYPTO_CIPHER_AES_CTR) {
@@ -810,6 +818,7 @@ sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[],
                } else {
                        switch (sa->cipher_algo) {
                        case RTE_CRYPTO_CIPHER_NULL:
+                       case RTE_CRYPTO_CIPHER_3DES_CBC:
                        case RTE_CRYPTO_CIPHER_AES_CBC:
                                iv_length = sa->iv_len;
                                break;
index 331c32e..4073a49 100644 (file)
@@ -109,8 +109,7 @@ static struct rte_eth_conf port_conf = {
        .rxmode = {
                .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_JUMBO_FRAME |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+               .offloads = DEV_RX_OFFLOAD_JUMBO_FRAME,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
@@ -774,7 +773,7 @@ main(int argc, char **argv)
                        qconf->tx_queue_id[portid] = queueid;
                        queueid++;
                }
-
+               rte_eth_allmulticast_enable(portid);
                /* Start device */
                ret = rte_eth_dev_start(portid);
                if (ret < 0)
index 7e19d2e..dd90d7d 100644 (file)
@@ -20,6 +20,7 @@ static: build/$(APP)-static
 
 PC_FILE := $(shell pkg-config --path libdpdk)
 CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
 LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
 
@@ -54,6 +55,7 @@ please change the definition of the RTE_TARGET environment variable)
 endif
 
 CFLAGS += -O3
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS)
 
 include $(RTE_SDK)/mk/rte.extapp.mk
index 8133608..e37b1ad 100644 (file)
@@ -94,9 +94,6 @@ static struct kni_port_params *kni_port_params_array[RTE_MAX_ETHPORTS];
 
 /* Options for configuring ethernet port */
 static struct rte_eth_conf port_conf = {
-       .rxmode = {
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
-       },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
        },
@@ -109,6 +106,8 @@ static struct rte_mempool * pktmbuf_pool = NULL;
 static uint32_t ports_mask = 0;
 /* Ports set in promiscuous mode off by default. */
 static int promiscuous_on = 0;
+/* Monitor link status continually. off by default. */
+static int monitor_links;
 
 /* Structure type for recording kni interface specific stats */
 struct kni_interface_stats {
@@ -172,14 +171,13 @@ signal_handler(int signum)
        /* When we receive a USR2 signal, reset stats */
        if (signum == SIGUSR2) {
                memset(&kni_stats, 0, sizeof(kni_stats));
-               printf("\n**Statistics have been reset**\n");
+               printf("\n** Statistics have been reset **\n");
                return;
        }
 
        /* When we receive a RTMIN or SIGINT signal, stop kni processing */
        if (signum == SIGRTMIN || signum == SIGINT){
-               printf("SIGRTMIN is received, and the KNI processing is "
-                                                       "going to stop\n");
+               printf("\nSIGRTMIN/SIGINT received. KNI processing stopping.\n");
                rte_atomic32_inc(&kni_stop);
                return;
         }
@@ -225,7 +223,8 @@ kni_ingress(struct kni_port_params *p)
                }
                /* Burst tx to kni */
                num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
-               kni_stats[port_id].rx_packets += num;
+               if (num)
+                       kni_stats[port_id].rx_packets += num;
 
                rte_kni_handle_request(p->kni[i]);
                if (unlikely(num < nb_rx)) {
@@ -262,7 +261,8 @@ kni_egress(struct kni_port_params *p)
                }
                /* Burst tx to eth */
                nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
-               kni_stats[port_id].tx_packets += nb_tx;
+               if (nb_tx)
+                       kni_stats[port_id].tx_packets += nb_tx;
                if (unlikely(nb_tx < num)) {
                        /* Free mbufs not tx to NIC */
                        kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
@@ -328,11 +328,12 @@ main_loop(__rte_unused void *arg)
 static void
 print_usage(const char *prgname)
 {
-       RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK -P "
+       RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK -P -m "
                   "[--config (port,lcore_rx,lcore_tx,lcore_kthread...)"
                   "[,(port,lcore_rx,lcore_tx,lcore_kthread...)]]\n"
                   "    -p PORTMASK: hex bitmask of ports to use\n"
                   "    -P : enable promiscuous mode\n"
+                  "    -m : enable monitoring of port carrier state\n"
                   "    --config (port,lcore_rx,lcore_tx,lcore_kthread...): "
                   "port and lcore configurations\n",
                   prgname);
@@ -513,7 +514,7 @@ parse_args(int argc, char **argv)
        opterr = 0;
 
        /* Parse command line */
-       while ((opt = getopt_long(argc, argv, "p:P", longopts,
+       while ((opt = getopt_long(argc, argv, "p:Pm", longopts,
                                                &longindex)) != EOF) {
                switch (opt) {
                case 'p':
@@ -522,6 +523,9 @@ parse_args(int argc, char **argv)
                case 'P':
                        promiscuous_on = 1;
                        break;
+               case 'm':
+                       monitor_links = 1;
+                       break;
                case 0:
                        if (!strncmp(longopts[longindex].name,
                                     CMDLINE_OPT_CONFIG,
@@ -677,6 +681,55 @@ check_all_ports_link_status(uint32_t port_mask)
        }
 }
 
+static void
+log_link_state(struct rte_kni *kni, int prev, struct rte_eth_link *link)
+{
+       if (kni == NULL || link == NULL)
+               return;
+
+       if (prev == ETH_LINK_DOWN && link->link_status == ETH_LINK_UP) {
+               RTE_LOG(INFO, APP, "%s NIC Link is Up %d Mbps %s %s.\n",
+                       rte_kni_get_name(kni),
+                       link->link_speed,
+                       link->link_autoneg ?  "(AutoNeg)" : "(Fixed)",
+                       link->link_duplex ?  "Full Duplex" : "Half Duplex");
+       } else if (prev == ETH_LINK_UP && link->link_status == ETH_LINK_DOWN) {
+               RTE_LOG(INFO, APP, "%s NIC Link is Down.\n",
+                       rte_kni_get_name(kni));
+       }
+}
+
+/*
+ * Monitor the link status of all ports and update the
+ * corresponding KNI interface(s)
+ */
+static void *
+monitor_all_ports_link_status(void *arg)
+{
+       uint16_t portid;
+       struct rte_eth_link link;
+       unsigned int i;
+       struct kni_port_params **p = kni_port_params_array;
+       int prev;
+       (void) arg;
+
+       while (monitor_links) {
+               rte_delay_ms(500);
+               RTE_ETH_FOREACH_DEV(portid) {
+                       if ((ports_mask & (1 << portid)) == 0)
+                               continue;
+                       memset(&link, 0, sizeof(link));
+                       rte_eth_link_get_nowait(portid, &link);
+                       for (i = 0; i < p[portid]->nb_kni; i++) {
+                               prev = rte_kni_update_link(p[portid]->kni[i],
+                                               link.link_status);
+                               log_link_state(p[portid]->kni[i], prev, &link);
+                       }
+               }
+       }
+       return NULL;
+}
+
 /* Callback for request of changing MTU */
 static int
 kni_change_mtu(uint16_t port_id, unsigned int new_mtu)
@@ -896,6 +949,9 @@ main(int argc, char** argv)
        int ret;
        uint16_t nb_sys_ports, port;
        unsigned i;
+       void *retval;
+       pthread_t kni_link_tid;
+       int pid;
 
        /* Associate signal_hanlder function with USR signals */
        signal(SIGUSR1, signal_handler);
@@ -952,12 +1008,31 @@ main(int argc, char** argv)
        }
        check_all_ports_link_status(ports_mask);
 
+       pid = getpid();
+       RTE_LOG(INFO, APP, "========================\n");
+       RTE_LOG(INFO, APP, "KNI Running\n");
+       RTE_LOG(INFO, APP, "kill -SIGUSR1 %d\n", pid);
+       RTE_LOG(INFO, APP, "    Show KNI Statistics.\n");
+       RTE_LOG(INFO, APP, "kill -SIGUSR2 %d\n", pid);
+       RTE_LOG(INFO, APP, "    Zero KNI Statistics.\n");
+       RTE_LOG(INFO, APP, "========================\n");
+       fflush(stdout);
+
+       ret = rte_ctrl_thread_create(&kni_link_tid,
+                                    "KNI link status check", NULL,
+                                    monitor_all_ports_link_status, NULL);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE,
+                       "Could not create link status thread!\n");
+
        /* Launch per-lcore function on every lcore */
        rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
        RTE_LCORE_FOREACH_SLAVE(i) {
                if (rte_eal_wait_lcore(i) < 0)
                        return -1;
        }
+       monitor_links = 0;
+       pthread_join(kni_link_tid, &retval);
 
        /* Release resources */
        RTE_ETH_FOREACH_DEV(port) {
index 7913163..fd6ae44 100644 (file)
@@ -12,3 +12,4 @@ deps += ['kni', 'bus_pci']
 sources = files(
        'main.c'
 )
+allow_experimental_apis = true
index aec770c..b6eeabd 100644 (file)
@@ -23,7 +23,6 @@ CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
 LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
 LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
 
-CFLAGS += -D_GNU_SOURCE
 LDFLAGS += -lpqos
 
 build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
@@ -66,7 +65,6 @@ endif
 EXTRA_CFLAGS += -O3 -g -Wfatal-errors
 
 CFLAGS += -I$(PQOS_INSTALL_PATH)/../include
-CFLAGS_cat.o := -D_GNU_SOURCE
 
 LDLIBS += -L$(PQOS_INSTALL_PATH)
 LDLIBS += -lpqos
index 1234e7b..4e2777a 100644 (file)
@@ -9,7 +9,6 @@
 pqos = cc.find_library('pqos', required: false)
 build = pqos.found()
 ext_deps += pqos
-cflags += '-D_GNU_SOURCE'
 cflags += '-I/usr/local/include' # assume pqos lib installed in /usr/local
 sources = files(
        'cat.c', 'l2fwd-cat.c'
index 6061b75..f12fd26 100644 (file)
@@ -213,7 +213,6 @@ static struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index af54233..a4d28e1 100644 (file)
@@ -90,7 +90,6 @@ struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 2d8b4d1..0bf2b53 100644 (file)
@@ -81,7 +81,6 @@ struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 9bb4c5b..6c23215 100644 (file)
@@ -82,7 +82,6 @@ static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 7c063a8..a322ce4 100644 (file)
@@ -127,8 +127,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
-                            DEV_RX_OFFLOAD_CHECKSUM),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index d7e39a3..772ec7b 100644 (file)
@@ -23,6 +23,8 @@ CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
 LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
 LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
 build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
        $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
 
@@ -54,6 +56,7 @@ please change the definition of the RTE_TARGET environment variable)
 all:
 else
 
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
index d15cd52..0b3f8fe 100644 (file)
@@ -43,6 +43,7 @@
 #include <rte_timer.h>
 #include <rte_power.h>
 #include <rte_spinlock.h>
+#include <rte_power_empty_poll.h>
 
 #include "perf_core.h"
 #include "main.h"
@@ -55,6 +56,8 @@
 
 /* 100 ms interval */
 #define TIMER_NUMBER_PER_SECOND           10
+/* (10ms) */
+#define INTERVALS_PER_SECOND             100
 /* 100000 us */
 #define SCALING_PERIOD                    (1000000/TIMER_NUMBER_PER_SECOND)
 #define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
  */
 #define RTE_TEST_RX_DESC_DEFAULT 1024
 #define RTE_TEST_TX_DESC_DEFAULT 1024
+
+/*
+ * These two thresholds were decided on by running the training algorithm on
+ * a 2.5GHz Xeon. These defaults can be overridden by supplying non-zero values
+ * for the med_threshold and high_threshold parameters on the command line.
+ */
+#define EMPTY_POLL_MED_THRESHOLD 350000UL
+#define EMPTY_POLL_HGH_THRESHOLD 580000UL
+
+
+
 static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
 static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
 
@@ -132,6 +146,14 @@ static uint32_t enabled_port_mask = 0;
 static int promiscuous_on = 0;
 /* NUMA is enabled by default. */
 static int numa_on = 1;
+/* emptypoll is disabled by default. */
+static bool empty_poll_on;
+static bool empty_poll_train;
+volatile bool empty_poll_stop;
+static struct  ep_params *ep_params;
+static struct  ep_policy policy;
+static long  ep_med_edpi, ep_hgh_edpi;
+
 static int parse_ptype; /**< Parse packet type using rx callback, and */
                        /**< disabled by default */
 
@@ -180,8 +202,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
-                            DEV_RX_OFFLOAD_CHECKSUM),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
@@ -331,6 +352,19 @@ static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
 static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
                unsigned int lcore_id, uint16_t port_id, uint16_t queue_id);
 
+
+/*
+ * These defaults are using the max frequency index (1), a medium index (9)
+ * and a typical low frequency index (14). These can be adjusted to use
+ * different indexes using the relevant command line parameters.
+ */
+static uint8_t  freq_tlb[] = {14, 9, 1};
+
+static int is_done(void)
+{
+       return empty_poll_stop;
+}
+
 /* exit signal handler */
 static void
 signal_exit_now(int sigtype)
@@ -340,6 +374,10 @@ signal_exit_now(int sigtype)
        int ret;
 
        if (sigtype == SIGINT) {
+               if (empty_poll_on)
+                       empty_poll_stop = true;
+
+
                for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
                        if (rte_lcore_is_enabled(lcore_id) == 0)
                                continue;
@@ -352,16 +390,19 @@ signal_exit_now(int sigtype)
                                                        "core%u\n", lcore_id);
                }
 
-               RTE_ETH_FOREACH_DEV(portid) {
-                       if ((enabled_port_mask & (1 << portid)) == 0)
-                               continue;
+               if (!empty_poll_on) {
+                       RTE_ETH_FOREACH_DEV(portid) {
+                               if ((enabled_port_mask & (1 << portid)) == 0)
+                                       continue;
 
-                       rte_eth_dev_stop(portid);
-                       rte_eth_dev_close(portid);
+                               rte_eth_dev_stop(portid);
+                               rte_eth_dev_close(portid);
+                       }
                }
        }
 
-       rte_exit(EXIT_SUCCESS, "User forced exit\n");
+       if (!empty_poll_on)
+               rte_exit(EXIT_SUCCESS, "User forced exit\n");
 }
 
 /*  Freqency scale down timer callback */
@@ -826,7 +867,110 @@ static int event_register(struct lcore_conf *qconf)
 
        return 0;
 }
+/* main processing loop */
+static int
+main_empty_poll_loop(__attribute__((unused)) void *dummy)
+{
+       struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+       unsigned int lcore_id;
+       uint64_t prev_tsc, diff_tsc, cur_tsc;
+       int i, j, nb_rx;
+       uint8_t queueid;
+       uint16_t portid;
+       struct lcore_conf *qconf;
+       struct lcore_rx_queue *rx_queue;
+
+       const uint64_t drain_tsc =
+               (rte_get_tsc_hz() + US_PER_S - 1) /
+               US_PER_S * BURST_TX_DRAIN_US;
+
+       prev_tsc = 0;
+
+       lcore_id = rte_lcore_id();
+       qconf = &lcore_conf[lcore_id];
+
+       if (qconf->n_rx_queue == 0) {
+               RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n",
+                       lcore_id);
+               return 0;
+       }
+
+       for (i = 0; i < qconf->n_rx_queue; i++) {
+               portid = qconf->rx_queue_list[i].port_id;
+               queueid = qconf->rx_queue_list[i].queue_id;
+               RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%u "
+                               "rxqueueid=%hhu\n", lcore_id, portid, queueid);
+       }
+
+       while (!is_done()) {
+               stats[lcore_id].nb_iteration_looped++;
+
+               cur_tsc = rte_rdtsc();
+               /*
+                * TX burst queue drain
+                */
+               diff_tsc = cur_tsc - prev_tsc;
+               if (unlikely(diff_tsc > drain_tsc)) {
+                       for (i = 0; i < qconf->n_tx_port; ++i) {
+                               portid = qconf->tx_port_id[i];
+                               rte_eth_tx_buffer_flush(portid,
+                                               qconf->tx_queue_id[portid],
+                                               qconf->tx_buffer[portid]);
+                       }
+                       prev_tsc = cur_tsc;
+               }
+
+               /*
+                * Read packet from RX queues
+                */
+               for (i = 0; i < qconf->n_rx_queue; ++i) {
+                       rx_queue = &(qconf->rx_queue_list[i]);
+                       rx_queue->idle_hint = 0;
+                       portid = rx_queue->port_id;
+                       queueid = rx_queue->queue_id;
+
+                       nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+                                       MAX_PKT_BURST);
+
+                       stats[lcore_id].nb_rx_processed += nb_rx;
+
+                       if (nb_rx == 0) {
+
+                               rte_power_empty_poll_stat_update(lcore_id);
+
+                               continue;
+                       } else {
+                               rte_power_poll_stat_update(lcore_id, nb_rx);
+                       }
 
+
+                       /* Prefetch first packets */
+                       for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+                               rte_prefetch0(rte_pktmbuf_mtod(
+                                                       pkts_burst[j], void *));
+                       }
+
+                       /* Prefetch and forward already prefetched packets */
+                       for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+                               rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+                                                       j + PREFETCH_OFFSET],
+                                                       void *));
+                               l3fwd_simple_forward(pkts_burst[j], portid,
+                                               qconf);
+                       }
+
+                       /* Forward remaining prefetched packets */
+                       for (; j < nb_rx; j++) {
+                               l3fwd_simple_forward(pkts_burst[j], portid,
+                                               qconf);
+                       }
+
+               }
+
+       }
+
+       return 0;
+}
 /* main processing loop */
 static int
 main_loop(__attribute__((unused)) void *dummy)
@@ -1128,7 +1272,9 @@ print_usage(const char *prgname)
                "  --no-numa: optional, disable numa awareness\n"
                "  --enable-jumbo: enable jumbo frame"
                " which max packet len is PKTLEN in decimal (64-9600)\n"
-               "  --parse-ptype: parse packet type by software\n",
+               "  --parse-ptype: parse packet type by software\n"
+               "  --empty-poll: enable empty poll detection"
+               " follow (training_flag, high_threshold, med_threshold)\n",
                prgname);
 }
 
@@ -1221,7 +1367,55 @@ parse_config(const char *q_arg)
 
        return 0;
 }
+static int
+parse_ep_config(const char *q_arg)
+{
+       char s[256];
+       const char *p = q_arg;
+       char *end;
+       int  num_arg;
+
+       char *str_fld[3];
+
+       int training_flag;
+       int med_edpi;
+       int hgh_edpi;
+
+       ep_med_edpi = EMPTY_POLL_MED_THRESHOLD;
+       ep_hgh_edpi = EMPTY_POLL_MED_THRESHOLD;
+
+       snprintf(s, sizeof(s), "%s", p);
+
+       num_arg = rte_strsplit(s, sizeof(s), str_fld, 3, ',');
+
+       empty_poll_train = false;
+
+       if (num_arg == 0)
+               return 0;
+
+       if (num_arg == 3) {
+
+               training_flag = strtoul(str_fld[0], &end, 0);
+               med_edpi = strtoul(str_fld[1], &end, 0);
+               hgh_edpi = strtoul(str_fld[2], &end, 0);
+
+               if (training_flag == 1)
+                       empty_poll_train = true;
+
+               if (med_edpi > 0)
+                       ep_med_edpi = med_edpi;
+
+               if (med_edpi > 0)
+                       ep_hgh_edpi = hgh_edpi;
+
+       } else {
+
+               return -1;
+       }
+
+       return 0;
 
+}
 #define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
 
 /* Parse the argument given in the command line of the application */
@@ -1231,6 +1425,7 @@ parse_args(int argc, char **argv)
        int opt, ret;
        char **argvopt;
        int option_index;
+       uint32_t limit;
        char *prgname = argv[0];
        static struct option lgopts[] = {
                {"config", 1, 0, 0},
@@ -1238,13 +1433,14 @@ parse_args(int argc, char **argv)
                {"high-perf-cores", 1, 0, 0},
                {"no-numa", 0, 0, 0},
                {"enable-jumbo", 0, 0, 0},
+               {"empty-poll", 1, 0, 0},
                {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
                {NULL, 0, 0, 0}
        };
 
        argvopt = argv;
 
-       while ((opt = getopt_long(argc, argvopt, "p:P",
+       while ((opt = getopt_long(argc, argvopt, "p:l:m:h:P",
                                lgopts, &option_index)) != EOF) {
 
                switch (opt) {
@@ -1261,7 +1457,18 @@ parse_args(int argc, char **argv)
                        printf("Promiscuous mode selected\n");
                        promiscuous_on = 1;
                        break;
-
+               case 'l':
+                       limit = parse_max_pkt_len(optarg);
+                       freq_tlb[LOW] = limit;
+                       break;
+               case 'm':
+                       limit = parse_max_pkt_len(optarg);
+                       freq_tlb[MED] = limit;
+                       break;
+               case 'h':
+                       limit = parse_max_pkt_len(optarg);
+                       freq_tlb[HGH] = limit;
+                       break;
                /* long options */
                case 0:
                        if (!strncmp(lgopts[option_index].name, "config", 6)) {
@@ -1299,6 +1506,20 @@ parse_args(int argc, char **argv)
                                numa_on = 0;
                        }
 
+                       if (!strncmp(lgopts[option_index].name,
+                                               "empty-poll", 10)) {
+                               printf("empty-poll is enabled\n");
+                               empty_poll_on = true;
+                               ret = parse_ep_config(optarg);
+
+                               if (ret) {
+                                       printf("invalid empty poll config\n");
+                                       print_usage(prgname);
+                                       return -1;
+                               }
+
+                       }
+
                        if (!strncmp(lgopts[option_index].name,
                                        "enable-jumbo", 12)) {
                                struct option lenopts =
@@ -1647,6 +1868,59 @@ init_power_library(void)
        }
        return ret;
 }
+static void
+empty_poll_setup_timer(void)
+{
+       int lcore_id = rte_lcore_id();
+       uint64_t hz = rte_get_timer_hz();
+
+       struct  ep_params *ep_ptr = ep_params;
+
+       ep_ptr->interval_ticks = hz / INTERVALS_PER_SECOND;
+
+       rte_timer_reset_sync(&ep_ptr->timer0,
+                       ep_ptr->interval_ticks,
+                       PERIODICAL,
+                       lcore_id,
+                       rte_empty_poll_detection,
+                       (void *)ep_ptr);
+
+}
+static int
+launch_timer(unsigned int lcore_id)
+{
+       int64_t prev_tsc = 0, cur_tsc, diff_tsc, cycles_10ms;
+
+       RTE_SET_USED(lcore_id);
+
+
+       if (rte_get_master_lcore() != lcore_id) {
+               rte_panic("timer on lcore:%d which is not master core:%d\n",
+                               lcore_id,
+                               rte_get_master_lcore());
+       }
+
+       RTE_LOG(INFO, POWER, "Bring up the Timer\n");
+
+       empty_poll_setup_timer();
+
+       cycles_10ms = rte_get_timer_hz() / 100;
+
+       while (!is_done()) {
+               cur_tsc = rte_rdtsc();
+               diff_tsc = cur_tsc - prev_tsc;
+               if (diff_tsc > cycles_10ms) {
+                       rte_timer_manage();
+                       prev_tsc = cur_tsc;
+                       cycles_10ms = rte_get_timer_hz() / 100;
+               }
+       }
+
+       RTE_LOG(INFO, POWER, "Timer_subsystem is done\n");
+
+       return 0;
+}
+
 
 int
 main(int argc, char **argv)
@@ -1829,13 +2103,15 @@ main(int argc, char **argv)
                if (rte_lcore_is_enabled(lcore_id) == 0)
                        continue;
 
-               /* init timer structures for each enabled lcore */
-               rte_timer_init(&power_timers[lcore_id]);
-               hz = rte_get_timer_hz();
-               rte_timer_reset(&power_timers[lcore_id],
-                       hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
-                                               power_timer_cb, NULL);
-
+               if (empty_poll_on == false) {
+                       /* init timer structures for each enabled lcore */
+                       rte_timer_init(&power_timers[lcore_id]);
+                       hz = rte_get_timer_hz();
+                       rte_timer_reset(&power_timers[lcore_id],
+                                       hz/TIMER_NUMBER_PER_SECOND,
+                                       SINGLE, lcore_id,
+                                       power_timer_cb, NULL);
+               }
                qconf = &lcore_conf[lcore_id];
                printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
                fflush(stdout);
@@ -1906,12 +2182,43 @@ main(int argc, char **argv)
 
        check_all_ports_link_status(enabled_port_mask);
 
+       if (empty_poll_on == true) {
+
+               if (empty_poll_train) {
+                       policy.state = TRAINING;
+               } else {
+                       policy.state = MED_NORMAL;
+                       policy.med_base_edpi = ep_med_edpi;
+                       policy.hgh_base_edpi = ep_hgh_edpi;
+               }
+
+               ret = rte_power_empty_poll_stat_init(&ep_params,
+                               freq_tlb,
+                               &policy);
+               if (ret < 0)
+                       rte_exit(EXIT_FAILURE, "empty poll init failed");
+       }
+
+
        /* launch per-lcore init on every lcore */
-       rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+       if (empty_poll_on == false) {
+               rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+       } else {
+               empty_poll_stop = false;
+               rte_eal_mp_remote_launch(main_empty_poll_loop, NULL,
+                               SKIP_MASTER);
+       }
+
+       if (empty_poll_on == true)
+               launch_timer(rte_lcore_id());
+
        RTE_LCORE_FOREACH_SLAVE(lcore_id) {
                if (rte_eal_wait_lcore(lcore_id) < 0)
                        return -1;
        }
 
+       if (empty_poll_on)
+               rte_power_empty_poll_stat_free();
+
        return 0;
 }
index 20c8054..a3c5c2f 100644 (file)
@@ -9,6 +9,7 @@
 if host_machine.system() != 'linux'
        build = false
 endif
+allow_experimental_apis = true
 deps += ['power', 'timer', 'lpm', 'hash']
 sources = files(
        'main.c', 'perf_core.c'
index 5edd91a..41137f9 100644 (file)
@@ -161,8 +161,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
-                            DEV_RX_OFFLOAD_CHECKSUM),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index ab019b9..e4b99ef 100644 (file)
@@ -120,8 +120,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
-                            DEV_RX_OFFLOAD_CHECKSUM),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index 3b73207..f3346d2 100644 (file)
@@ -79,7 +79,6 @@ struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index fc8df71..197b019 100644 (file)
@@ -50,7 +50,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
 
 CFLAGS += -O3 -g
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS_config.o := -D_GNU_SOURCE
 
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
index f2045f2..3ab7d02 100644 (file)
@@ -45,8 +45,7 @@ static struct rte_eth_conf port_conf = {
        .rxmode = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index 4ee7a11..af81c76 100644 (file)
@@ -22,6 +22,10 @@ default_cflags = machine_args
 if cc.has_argument('-Wno-format-truncation')
        default_cflags += '-Wno-format-truncation'
 endif
+
+# specify -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 foreach example: examples
        name = example
        build = true
index a6708b7..b76b02f 100644 (file)
@@ -13,5 +13,6 @@ include $(RTE_SDK)/mk/rte.vars.mk
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += client_server_mp
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += simple_mp
 DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += symmetric_mp
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += hotplug_mp
 
 include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/multi_process/hotplug_mp/Makefile b/examples/multi_process/hotplug_mp/Makefile
new file mode 100644 (file)
index 0000000..bc36aea
--- /dev/null
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = hotplug_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/hotplug_mp/commands.c b/examples/multi_process/hotplug_mp/commands.c
new file mode 100644 (file)
index 0000000..b068593
--- /dev/null
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+#include <rte_ethdev.h>
+
+/**********************************************************/
+
+struct cmd_help_result {
+       cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+                           struct cmdline *cl,
+                           __attribute__((unused)) void *data)
+{
+       cmdline_printf(cl,
+                      "commands:\n"
+                      "- attach <devargs>\n"
+                      "- detach <devargs>\n"
+                      "- list\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+       TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+       .f = cmd_help_parsed,  /* function to call */
+       .data = NULL,      /* 2nd arg of func */
+       .help_str = "show help",
+       .tokens = {        /* token list, NULL terminated */
+               (void *)&cmd_help_help,
+               NULL,
+       },
+};
+
+/**********************************************************/
+
+struct cmd_quit_result {
+       cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+                           struct cmdline *cl,
+                           __attribute__((unused)) void *data)
+{
+       cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+       TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+       .f = cmd_quit_parsed,  /* function to call */
+       .data = NULL,      /* 2nd arg of func */
+       .help_str = "quit",
+       .tokens = {        /* token list, NULL terminated */
+               (void *)&cmd_quit_quit,
+               NULL,
+       },
+};
+
+/**********************************************************/
+
+struct cmd_list_result {
+       cmdline_fixed_string_t list;
+};
+
+static void cmd_list_parsed(__attribute__((unused)) void *parsed_result,
+                           struct cmdline *cl,
+                           __attribute__((unused)) void *data)
+{
+       uint16_t port_id;
+       char dev_name[RTE_DEV_NAME_MAX_LEN];
+
+       cmdline_printf(cl, "list all etherdev\n");
+
+       RTE_ETH_FOREACH_DEV(port_id) {
+               rte_eth_dev_get_name_by_port(port_id, dev_name);
+               if (strlen(dev_name) > 0)
+                       cmdline_printf(cl, "%d\t%s\n", port_id, dev_name);
+               else
+                       printf("empty dev_name is not expected!\n");
+       }
+}
+
+cmdline_parse_token_string_t cmd_list_list =
+       TOKEN_STRING_INITIALIZER(struct cmd_list_result, list, "list");
+
+cmdline_parse_inst_t cmd_list = {
+       .f = cmd_list_parsed,  /* function to call */
+       .data = NULL,      /* 2nd arg of func */
+       .help_str = "list all devices",
+       .tokens = {        /* token list, NULL terminated */
+               (void *)&cmd_list_list,
+               NULL,
+       },
+};
+
+/**********************************************************/
+
+struct cmd_dev_attach_result {
+       cmdline_fixed_string_t attach;
+       cmdline_fixed_string_t devargs;
+};
+
+static void cmd_dev_attach_parsed(void *parsed_result,
+                                 struct cmdline *cl,
+                                 __attribute__((unused)) void *data)
+{
+       struct cmd_dev_attach_result *res = parsed_result;
+       struct rte_devargs da;
+
+       memset(&da, 0, sizeof(da));
+
+       if (rte_devargs_parsef(&da, "%s", res->devargs)) {
+               cmdline_printf(cl, "cannot parse devargs\n");
+               if (da.args)
+                       free(da.args);
+               return;
+       }
+
+       if (!rte_eal_hotplug_add(da.bus->name, da.name, da.args))
+               cmdline_printf(cl, "attached device %s\n", da.name);
+       else
+               cmdline_printf(cl, "failed to attached device %s\n",
+                               da.name);
+}
+
+cmdline_parse_token_string_t cmd_dev_attach_attach =
+       TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, attach,
+                                "attach");
+cmdline_parse_token_string_t cmd_dev_attach_devargs =
+       TOKEN_STRING_INITIALIZER(struct cmd_dev_attach_result, devargs, NULL);
+
+cmdline_parse_inst_t cmd_attach_device = {
+       .f = cmd_dev_attach_parsed,  /* function to call */
+       .data = NULL,      /* 2nd arg of func */
+       .help_str = "attach a device",
+       .tokens = {        /* token list, NULL terminated */
+               (void *)&cmd_dev_attach_attach,
+               (void *)&cmd_dev_attach_devargs,
+               NULL,
+       },
+};
+
+/**********************************************************/
+
+struct cmd_dev_detach_result {
+       cmdline_fixed_string_t detach;
+       cmdline_fixed_string_t devargs;
+};
+
+static void cmd_dev_detach_parsed(void *parsed_result,
+                                  struct cmdline *cl,
+                                  __attribute__((unused)) void *data)
+{
+       struct cmd_dev_detach_result *res = parsed_result;
+       struct rte_devargs da;
+
+       memset(&da, 0, sizeof(da));
+
+       if (rte_devargs_parsef(&da, "%s", res->devargs)) {
+               cmdline_printf(cl, "cannot parse devargs\n");
+               if (da.args)
+                       free(da.args);
+               return;
+       }
+
+       printf("detaching...\n");
+       if (!rte_eal_hotplug_remove(da.bus->name, da.name))
+               cmdline_printf(cl, "detached device %s\n",
+                       da.name);
+       else
+               cmdline_printf(cl, "failed to dettach device %s\n",
+                       da.name);
+}
+
+cmdline_parse_token_string_t cmd_dev_detach_detach =
+       TOKEN_STRING_INITIALIZER(struct cmd_dev_detach_result, detach,
+                                "detach");
+
+cmdline_parse_token_string_t cmd_dev_detach_devargs =
+       TOKEN_STRING_INITIALIZER(struct cmd_dev_detach_result, devargs, NULL);
+
+cmdline_parse_inst_t cmd_detach_device = {
+       .f = cmd_dev_detach_parsed,  /* function to call */
+       .data = NULL,      /* 2nd arg of func */
+       .help_str = "detach a device",
+       .tokens = {        /* token list, NULL terminated */
+               (void *)&cmd_dev_detach_detach,
+               (void *)&cmd_dev_detach_devargs,
+               NULL,
+       },
+};
+
+/**********************************************************/
+/**********************************************************/
+/****** CONTEXT (list of instruction) */
+
+cmdline_parse_ctx_t main_ctx[] = {
+       (cmdline_parse_inst_t *)&cmd_help,
+       (cmdline_parse_inst_t *)&cmd_quit,
+       (cmdline_parse_inst_t *)&cmd_list,
+       (cmdline_parse_inst_t *)&cmd_attach_device,
+       (cmdline_parse_inst_t *)&cmd_detach_device,
+       NULL,
+};
diff --git a/examples/multi_process/hotplug_mp/commands.h b/examples/multi_process/hotplug_mp/commands.h
new file mode 100644 (file)
index 0000000..afcf177
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _COMMANDS_H_
+#define _COMMANDS_H_
+
+extern cmdline_parse_ctx_t main_ctx[];
+
+#endif /* _COMMANDS_H_ */
diff --git a/examples/multi_process/hotplug_mp/main.c b/examples/multi_process/hotplug_mp/main.c
new file mode 100644 (file)
index 0000000..d668580
--- /dev/null
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <termios.h>
+#include <sys/queue.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+       int ret;
+       struct cmdline *cl;
+
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               rte_panic("Cannot init EAL\n");
+
+       cl = cmdline_stdin_new(main_ctx, "example> ");
+       if (cl == NULL)
+               rte_panic("Cannot create cmdline instance\n");
+       cmdline_interact(cl);
+       cmdline_stdin_exit(cl);
+
+       rte_eal_cleanup();
+
+       return 0;
+}
index c6c6a53..c310e94 100644 (file)
@@ -178,8 +178,7 @@ smp_port_init(uint16_t port, struct rte_mempool *mbuf_pool,
                        .rxmode = {
                                .mq_mode        = ETH_MQ_RX_RSS,
                                .split_hdr_size = 0,
-                               .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                                            DEV_RX_OFFLOAD_CRC_STRIP),
+                               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
                        },
                        .rx_adv_conf = {
                                .rss_conf = {
index 7afca28..216e010 100644 (file)
@@ -26,7 +26,6 @@
 struct rte_eth_conf eth_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 5392fce..4f8747b 100644 (file)
@@ -2,8 +2,6 @@
  * Copyright(c) 2010-2016 Intel Corporation
  */
 
-#define _GNU_SOURCE
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
@@ -306,8 +304,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index 7d0d581..03ff394 100644 (file)
@@ -2,7 +2,6 @@
  * Copyright(c) 2015 Intel Corporation
  */
 
-#define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
index 53f1243..a02de06 100644 (file)
@@ -6,7 +6,6 @@
 #include <stdlib.h>
 #include <sys/types.h>
 #include <errno.h>
-#define __USE_GNU
 #include <sched.h>
 #include <dlfcn.h>
 
index 5cf4e9d..9b01124 100644 (file)
@@ -56,8 +56,7 @@ static struct rte_eth_conf port_conf = {
                .mq_mode        = ETH_MQ_RX_RSS,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
-                            DEV_RX_OFFLOAD_CRC_STRIP),
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index a7ecf97..45b0a9e 100644 (file)
@@ -57,8 +57,6 @@ else
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS_args.o := -D_GNU_SOURCE
-CFLAGS_cfg_file.o := -D_GNU_SOURCE
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 
index 94cbb26..37c2b95 100644 (file)
@@ -59,7 +59,6 @@ static struct rte_eth_conf port_conf = {
        .rxmode = {
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_DCB_NONE,
index 1916438..5a0f64f 100644 (file)
@@ -24,7 +24,6 @@
 static struct rte_eth_conf port_conf = {
                .rxmode = {
                        .split_hdr_size = 0,
-                       .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
                },
                .txmode = {
                        .mq_mode = ETH_DCB_NONE,
index 2cd5729..c7c7928 100644 (file)
@@ -118,6 +118,12 @@ apply_profile(int profile_id)
        struct profile *p = &profiles[profile_id];
        const uint8_t core_off = 1;
 
+       if (p->num_cores > rte_lcore_count() + 1) {
+               printf("insufficent cores to run (%s)",
+                       p->name);
+               return;
+       }
+
        for (i = 0; i < p->num_cores; i++) {
                uint32_t core = i + core_off;
                ret = rte_service_lcore_add(core);
index 8ec1a38..4c15643 100644 (file)
@@ -60,7 +60,6 @@ endif
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS += -D_GNU_SOURCE
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 endif
index b99ab97..ad7fbe9 100644 (file)
@@ -69,7 +69,6 @@ uint8_t tep_filter_type[] = {RTE_TUNNEL_FILTER_IMAC_TENID,
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
diff --git a/examples/vdpa/Makefile b/examples/vdpa/Makefile
new file mode 100644 (file)
index 0000000..42672a2
--- /dev/null
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vdpa
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vdpa/main.c b/examples/vdpa/main.c
new file mode 100644 (file)
index 0000000..d2e2cb7
--- /dev/null
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <getopt.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_vhost.h>
+#include <rte_vdpa.h>
+#include <rte_pci.h>
+#include <rte_string_fns.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_VDPA_SAMPLE_PORTS 1024
+#define RTE_LOGTYPE_VDPA RTE_LOGTYPE_USER1
+
+struct vdpa_port {
+       char ifname[MAX_PATH_LEN];
+       int did;
+       int vid;
+       uint64_t flags;
+};
+
+static struct vdpa_port vports[MAX_VDPA_SAMPLE_PORTS];
+
+static char iface[MAX_PATH_LEN];
+static int dev_total;
+static int devcnt;
+static int interactive;
+static int client_mode;
+
+/* display usage */
+static void
+vdpa_usage(const char *prgname)
+{
+       printf("Usage: %s [EAL options] -- "
+                                "      --interactive|-i: run in interactive mode.\n"
+                                "      --iface <path>: specify the path prefix of the socket files, e.g. /tmp/vhost-user-.\n"
+                                "      --client: register a vhost-user socket as client mode.\n",
+                                prgname);
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+       static const char *short_option = "i";
+       static struct option long_option[] = {
+               {"iface", required_argument, NULL, 0},
+               {"interactive", no_argument, &interactive, 1},
+               {"client", no_argument, &client_mode, 1},
+               {NULL, 0, 0, 0},
+       };
+       int opt, idx;
+       char *prgname = argv[0];
+
+       while ((opt = getopt_long(argc, argv, short_option, long_option, &idx))
+                       != EOF) {
+               switch (opt) {
+               case 'i':
+                       printf("Interactive-mode selected\n");
+                       interactive = 1;
+                       break;
+               /* long options */
+               case 0:
+                       if (strncmp(long_option[idx].name, "iface",
+                                               MAX_PATH_LEN) == 0) {
+                               rte_strscpy(iface, optarg, MAX_PATH_LEN);
+                               printf("iface %s\n", iface);
+                       }
+                       if (!strcmp(long_option[idx].name, "interactive")) {
+                               printf("Interactive-mode selected\n");
+                               interactive = 1;
+                       }
+                       break;
+
+               default:
+                       vdpa_usage(prgname);
+                       return -1;
+               }
+       }
+
+       if (iface[0] == '\0' && interactive == 0) {
+               vdpa_usage(prgname);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int
+new_device(int vid)
+{
+       char ifname[MAX_PATH_LEN];
+       int i;
+
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+               if (strncmp(ifname, vports[i].ifname, MAX_PATH_LEN) == 0) {
+                       printf("\nnew port %s, did: %d\n",
+                                       ifname, vports[i].did);
+                       vports[i].vid = vid;
+                       break;
+               }
+       }
+
+       if (i >= MAX_VDPA_SAMPLE_PORTS)
+               return -1;
+
+       return 0;
+}
+
+static void
+destroy_device(int vid)
+{
+       char ifname[MAX_PATH_LEN];
+       int i;
+
+       rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+       for (i = 0; i < MAX_VDPA_SAMPLE_PORTS; i++) {
+               if (strcmp(ifname, vports[i].ifname) == 0) {
+                       printf("\ndestroy port %s, did: %d\n",
+                                       ifname, vports[i].did);
+                       break;
+               }
+       }
+}
+
+static const struct vhost_device_ops vdpa_sample_devops = {
+       .new_device = new_device,
+       .destroy_device = destroy_device,
+};
+
+static int
+start_vdpa(struct vdpa_port *vport)
+{
+       int ret;
+       char *socket_path = vport->ifname;
+       int did = vport->did;
+
+       if (client_mode)
+               vport->flags |= RTE_VHOST_USER_CLIENT;
+
+       if (access(socket_path, F_OK) != -1 && !client_mode) {
+               RTE_LOG(ERR, VDPA,
+                       "%s exists, please remove it or specify another file and try again.\n",
+                       socket_path);
+               return -1;
+       }
+       ret = rte_vhost_driver_register(socket_path, vport->flags);
+       if (ret != 0)
+               rte_exit(EXIT_FAILURE,
+                       "register driver failed: %s\n",
+                       socket_path);
+
+       ret = rte_vhost_driver_callback_register(socket_path,
+                       &vdpa_sample_devops);
+       if (ret != 0)
+               rte_exit(EXIT_FAILURE,
+                       "register driver ops failed: %s\n",
+                       socket_path);
+
+       ret = rte_vhost_driver_attach_vdpa_device(socket_path, did);
+       if (ret != 0)
+               rte_exit(EXIT_FAILURE,
+                       "attach vdpa device failed: %s\n",
+                       socket_path);
+
+       if (rte_vhost_driver_start(socket_path) < 0)
+               rte_exit(EXIT_FAILURE,
+                       "start vhost driver failed: %s\n",
+                       socket_path);
+       return 0;
+}
+
+static void
+close_vdpa(struct vdpa_port *vport)
+{
+       int ret;
+       char *socket_path = vport->ifname;
+
+       ret = rte_vhost_driver_detach_vdpa_device(socket_path);
+       if (ret != 0)
+               RTE_LOG(ERR, VDPA,
+                               "detach vdpa device failed: %s\n",
+                               socket_path);
+
+       ret = rte_vhost_driver_unregister(socket_path);
+       if (ret != 0)
+               RTE_LOG(ERR, VDPA,
+                               "Fail to unregister vhost driver for %s.\n",
+                               socket_path);
+}
+
+static void
+vdpa_sample_quit(void)
+{
+       int i;
+       for (i = 0; i < RTE_MIN(MAX_VDPA_SAMPLE_PORTS, dev_total); i++) {
+               if (vports[i].ifname[0] != '\0')
+                       close_vdpa(&vports[i]);
+       }
+}
+
+static void
+signal_handler(int signum)
+{
+       if (signum == SIGINT || signum == SIGTERM) {
+               printf("\nSignal %d received, preparing to exit...\n", signum);
+               vdpa_sample_quit();
+               exit(0);
+       }
+}
+
+/* interactive cmds */
+
+/* *** Help command with introduction. *** */
+struct cmd_help_result {
+       cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+               struct cmdline *cl,
+               __attribute__((unused)) void *data)
+{
+       cmdline_printf(
+               cl,
+               "\n"
+               "The following commands are currently available:\n\n"
+               "Control:\n"
+               "    help                                      : Show interactive instructions.\n"
+               "    list                                      : list all available vdpa devices.\n"
+               "    create <socket file> <vdev addr>          : create a new vdpa port.\n"
+               "    quit                                      : exit vdpa sample app.\n"
+       );
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+       TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+       .f = cmd_help_parsed,
+       .data = NULL,
+       .help_str = "show help",
+       .tokens = {
+               (void *)&cmd_help_help,
+               NULL,
+       },
+};
+
+/* *** List all available vdpa devices *** */
+struct cmd_list_result {
+       cmdline_fixed_string_t action;
+};
+
+static void cmd_list_vdpa_devices_parsed(
+               __attribute__((unused)) void *parsed_result,
+               struct cmdline *cl,
+               __attribute__((unused)) void *data)
+{
+       int did;
+       uint32_t queue_num;
+       uint64_t features;
+       struct rte_vdpa_device *vdev;
+       struct rte_pci_addr addr;
+
+       cmdline_printf(cl, "device id\tdevice address\tqueue num\tsupported features\n");
+       for (did = 0; did < dev_total; did++) {
+               vdev = rte_vdpa_get_device(did);
+               if (!vdev)
+                       continue;
+               if (vdev->ops->get_queue_num(did, &queue_num) < 0) {
+                       RTE_LOG(ERR, VDPA,
+                               "failed to get vdpa queue number "
+                               "for device id %d.\n", did);
+                       continue;
+               }
+               if (vdev->ops->get_features(did, &features) < 0) {
+                       RTE_LOG(ERR, VDPA,
+                               "failed to get vdpa features "
+                               "for device id %d.\n", did);
+                       continue;
+               }
+               addr = vdev->addr.pci_addr;
+               cmdline_printf(cl,
+                       "%d\t\t" PCI_PRI_FMT "\t%" PRIu32 "\t\t0x%" PRIx64 "\n",
+                       did, addr.domain, addr.bus, addr.devid,
+                       addr.function, queue_num, features);
+       }
+}
+
+cmdline_parse_token_string_t cmd_action_list =
+       TOKEN_STRING_INITIALIZER(struct cmd_list_result, action, "list");
+
+cmdline_parse_inst_t cmd_list_vdpa_devices = {
+       .f = cmd_list_vdpa_devices_parsed,
+       .data = NULL,
+       .help_str = "list all available vdpa devices",
+       .tokens = {
+               (void *)&cmd_action_list,
+               NULL,
+       },
+};
+
+/* *** Create new vdpa port *** */
+struct cmd_create_result {
+       cmdline_fixed_string_t action;
+       cmdline_fixed_string_t socket_path;
+       cmdline_fixed_string_t bdf;
+};
+
+static void cmd_create_vdpa_port_parsed(void *parsed_result,
+               struct cmdline *cl,
+               __attribute__((unused)) void *data)
+{
+       int did;
+       struct cmd_create_result *res = parsed_result;
+       struct rte_vdpa_dev_addr addr;
+
+       rte_strscpy(vports[devcnt].ifname, res->socket_path, MAX_PATH_LEN);
+       if (rte_pci_addr_parse(res->bdf, &addr.pci_addr) != 0) {
+               cmdline_printf(cl, "Unable to parse the given bdf.\n");
+               return;
+       }
+       addr.type = PCI_ADDR;
+       did = rte_vdpa_find_device_id(&addr);
+       if (did < 0) {
+               cmdline_printf(cl, "Unable to find vdpa device id.\n");
+               return;
+       }
+
+       vports[devcnt].did = did;
+
+       if (start_vdpa(&vports[devcnt]) == 0)
+               devcnt++;
+}
+
+cmdline_parse_token_string_t cmd_action_create =
+       TOKEN_STRING_INITIALIZER(struct cmd_create_result, action, "create");
+cmdline_parse_token_string_t cmd_socket_path =
+       TOKEN_STRING_INITIALIZER(struct cmd_create_result, socket_path, NULL);
+cmdline_parse_token_string_t cmd_bdf =
+       TOKEN_STRING_INITIALIZER(struct cmd_create_result, bdf, NULL);
+
+cmdline_parse_inst_t cmd_create_vdpa_port = {
+       .f = cmd_create_vdpa_port_parsed,
+       .data = NULL,
+       .help_str = "create a new vdpa port",
+       .tokens = {
+               (void *)&cmd_action_create,
+               (void *)&cmd_socket_path,
+               (void *)&cmd_bdf,
+               NULL,
+       },
+};
+
+/* *** QUIT *** */
+struct cmd_quit_result {
+       cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+               struct cmdline *cl,
+               __attribute__((unused)) void *data)
+{
+       vdpa_sample_quit();
+       cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+       TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+       .f = cmd_quit_parsed,
+       .data = NULL,
+       .help_str = "quit: exit application",
+       .tokens = {
+               (void *)&cmd_quit_quit,
+               NULL,
+       },
+};
+cmdline_parse_ctx_t main_ctx[] = {
+       (cmdline_parse_inst_t *)&cmd_help,
+       (cmdline_parse_inst_t *)&cmd_list_vdpa_devices,
+       (cmdline_parse_inst_t *)&cmd_create_vdpa_port,
+       (cmdline_parse_inst_t *)&cmd_quit,
+       NULL,
+};
+
+int
+main(int argc, char *argv[])
+{
+       char ch;
+       int i;
+       int ret;
+       struct cmdline *cl;
+
+       ret = rte_eal_init(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "eal init failed\n");
+       argc -= ret;
+       argv += ret;
+
+       dev_total = rte_vdpa_get_device_num();
+       if (dev_total <= 0)
+               rte_exit(EXIT_FAILURE, "No available vdpa device found\n");
+
+       signal(SIGINT, signal_handler);
+       signal(SIGTERM, signal_handler);
+
+       ret = parse_args(argc, argv);
+       if (ret < 0)
+               rte_exit(EXIT_FAILURE, "invalid argument\n");
+
+       if (interactive == 1) {
+               cl = cmdline_stdin_new(main_ctx, "vdpa> ");
+               if (cl == NULL)
+                       rte_panic("Cannot create cmdline instance\n");
+               cmdline_interact(cl);
+               cmdline_stdin_exit(cl);
+       } else {
+               for (i = 0; i < RTE_MIN(MAX_VDPA_SAMPLE_PORTS, dev_total);
+                               i++) {
+                       vports[i].did = i;
+                       snprintf(vports[i].ifname, MAX_PATH_LEN, "%s%d",
+                                       iface, i);
+
+                       start_vdpa(&vports[i]);
+               }
+
+               printf("enter \'q\' to quit\n");
+               while (scanf("%c", &ch)) {
+                       if (ch == 'q')
+                               break;
+                       while (ch != '\n') {
+                               if (scanf("%c", &ch))
+                                       printf("%c", ch);
+                       }
+                       printf("enter \'q\' to quit\n");
+               }
+               vdpa_sample_quit();
+       }
+
+       return 0;
+}
diff --git a/examples/vdpa/meson.build b/examples/vdpa/meson.build
new file mode 100644 (file)
index 0000000..2e38a06
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+if host_machine.system() != 'linux'
+       build = false
+endif
+deps += 'vhost'
+allow_experimental_apis = true
+sources = files(
+       'main.c'
+)
\ No newline at end of file
index a2ea97a..c696438 100644 (file)
@@ -61,7 +61,6 @@ else
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS += -D_GNU_SOURCE
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 
index 2175c11..dc9ea10 100644 (file)
@@ -58,9 +58,6 @@
 /* Max number of devices. Limited by vmdq. */
 #define MAX_DEVICES 64
 
-/* Size of buffers used for snprintfs. */
-#define MAX_PRINT_BUFF 6072
-
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64
 
@@ -121,8 +118,7 @@ static struct rte_eth_conf vmdq_conf_default = {
                 * this fixes bug of ipv4 forwarding in guest can't
                 * forward pakets from one virtio dev to another virtio dev.
                 */
-               .offloads = (DEV_RX_OFFLOAD_CRC_STRIP |
-                            DEV_RX_OFFLOAD_VLAN_STRIP),
+               .offloads = DEV_RX_OFFLOAD_VLAN_STRIP,
        },
 
        .txmode = {
index 83d3310..a620abf 100644 (file)
@@ -25,7 +25,6 @@ SRCS-y := main.c
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
 CFLAGS += $(WERROR_FLAGS)
-CFLAGS += -D_GNU_SOURCE
 
 include $(RTE_SDK)/mk/rte.extapp.mk
 
index f334d71..cbb5e49 100644 (file)
@@ -14,6 +14,7 @@
 #include <rte_vhost.h>
 #include <rte_cryptodev.h>
 #include <rte_vhost_crypto.h>
+#include <rte_string_fns.h>
 
 #include <cmdline_rdline.h>
 #include <cmdline_parse.h>
 #define SESSION_MAP_ENTRIES            (1024)
 #define REFRESH_TIME_SEC               (3)
 
-#define MAX_NB_SOCKETS                 (32)
-#define DEF_SOCKET_FILE                        "/tmp/vhost_crypto1.socket"
+#define MAX_NB_SOCKETS                 (4)
+#define MAX_NB_WORKER_CORES            (16)
 
-struct vhost_crypto_options {
+struct lcore_option {
+       uint32_t lcore_id;
        char *socket_files[MAX_NB_SOCKETS];
        uint32_t nb_sockets;
        uint8_t cid;
        uint16_t qid;
-       uint32_t zero_copy;
-       uint32_t guest_polling;
-} options;
+};
 
 struct vhost_crypto_info {
        int vids[MAX_NB_SOCKETS];
+       uint32_t nb_vids;
        struct rte_mempool *sess_pool;
        struct rte_mempool *cop_pool;
-       uint32_t lcore_id;
        uint8_t cid;
        uint32_t qid;
-       uint32_t nb_vids;
+       uint32_t nb_inflight_ops;
        volatile uint32_t initialized[MAX_NB_SOCKETS];
+} __rte_cache_aligned;
 
-} info;
+struct vhost_crypto_options {
+       struct lcore_option los[MAX_NB_WORKER_CORES];
+       struct vhost_crypto_info *infos[MAX_NB_WORKER_CORES];
+       uint32_t nb_los;
+       uint32_t zero_copy;
+       uint32_t guest_polling;
+} options;
 
+#define CONFIG_KEYWORD         "config"
 #define SOCKET_FILE_KEYWORD    "socket-file"
-#define CRYPTODEV_ID_KEYWORD   "cdev-id"
-#define CRYPTODEV_QUEUE_KEYWORD        "cdev-queue-id"
 #define ZERO_COPY_KEYWORD      "zero-copy"
 #define POLLING_KEYWORD                "guest-polling"
 
-uint64_t vhost_cycles[2], last_v_cycles[2];
-uint64_t outpkt_amount;
+#define NB_SOCKET_FIELDS       (2)
+
+static uint32_t
+find_lo(uint32_t lcore_id)
+{
+       uint32_t i;
+
+       for (i = 0; i < options.nb_los; i++)
+               if (options.los[i].lcore_id == lcore_id)
+                       return i;
+
+       return UINT32_MAX;
+}
 
 /** support *SOCKET_FILE_PATH:CRYPTODEV_ID* format */
 static int
 parse_socket_arg(char *arg)
 {
-       uint32_t nb_sockets = options.nb_sockets;
-       size_t len = strlen(arg);
+       uint32_t nb_sockets;
+       uint32_t lcore_id;
+       char *str_fld[NB_SOCKET_FIELDS];
+       struct lcore_option *lo;
+       uint32_t idx;
+       char *end;
+
+       if (rte_strsplit(arg, strlen(arg), str_fld, NB_SOCKET_FIELDS, ',') !=
+                               NB_SOCKET_FIELDS) {
+               RTE_LOG(ERR, USER1, "Invalid socket parameter '%s'\n", arg);
+               return -EINVAL;
+       }
+
+       errno = 0;
+       lcore_id = strtoul(str_fld[0], &end, 0);
+       if (errno != 0 || end == str_fld[0] || lcore_id > 255)
+               return -EINVAL;
+
+       idx = find_lo(lcore_id);
+       if (idx == UINT32_MAX) {
+               if (options.nb_los == MAX_NB_WORKER_CORES)
+                       return -ENOMEM;
+               lo = &options.los[options.nb_los];
+               lo->lcore_id = lcore_id;
+               options.nb_los++;
+       } else
+               lo = &options.los[idx];
+
+       nb_sockets = lo->nb_sockets;
 
        if (nb_sockets >= MAX_NB_SOCKETS) {
                RTE_LOG(ERR, USER1, "Too many socket files!\n");
                return -ENOMEM;
        }
 
-       options.socket_files[nb_sockets] = rte_malloc(NULL, len, 0);
-       if (!options.socket_files[nb_sockets]) {
+       lo->socket_files[nb_sockets] = strdup(str_fld[1]);
+       if (!lo->socket_files[nb_sockets]) {
                RTE_LOG(ERR, USER1, "Insufficient memory\n");
                return -ENOMEM;
        }
 
-       rte_memcpy(options.socket_files[nb_sockets], arg, len);
-
-       options.nb_sockets++;
+       lo->nb_sockets++;
 
        return 0;
 }
 
 static int
-parse_cryptodev_id(const char *q_arg)
+parse_config(char *q_arg)
 {
-       char *end = NULL;
-       uint64_t pm;
-
-       /* parse decimal string */
-       pm = strtoul(q_arg, &end, 10);
-       if (pm > rte_cryptodev_count()) {
-               RTE_LOG(ERR, USER1, "Invalid Cryptodev ID %s\n", q_arg);
-               return -1;
-       }
+       struct lcore_option *lo;
+       char s[256];
+       const char *p, *p0 = q_arg;
+       char *end;
+       enum fieldnames {
+               FLD_LCORE = 0,
+               FLD_CID,
+               FLD_QID,
+               _NUM_FLD
+       };
+       uint32_t flds[_NUM_FLD];
+       char *str_fld[_NUM_FLD];
+       uint32_t i;
+       uint32_t size;
 
-       options.cid = (uint8_t)pm;
+       while ((p = strchr(p0, '(')) != NULL) {
+               ++p;
+               p0 = strchr(p, ')');
+               if (p0 == NULL)
+                       return -1;
 
-       return 0;
-}
+               size = p0 - p;
+               if (size >= sizeof(s))
+                       return -1;
 
-static int
-parse_cdev_queue_id(const char *q_arg)
-{
-       char *end = NULL;
-       uint64_t pm;
+               snprintf(s, sizeof(s), "%.*s", size, p);
+               if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
+                               _NUM_FLD)
+                       return -1;
+               for (i = 0; i < _NUM_FLD; i++) {
+                       errno = 0;
+                       flds[i] = strtoul(str_fld[i], &end, 0);
+                       if (errno != 0 || end == str_fld[i] || flds[i] > 255)
+                               return -EINVAL;
+               }
 
-       /* parse decimal string */
-       pm = strtoul(q_arg, &end, 10);
-       if (pm == UINT64_MAX) {
-               RTE_LOG(ERR, USER1, "Invalid Cryptodev Queue ID %s\n", q_arg);
-               return -1;
+               if (flds[FLD_LCORE] > RTE_MAX_LCORE)
+                       return -EINVAL;
+
+               i = find_lo(flds[FLD_LCORE]);
+               if (i == UINT32_MAX) {
+                       if (options.nb_los == MAX_NB_WORKER_CORES)
+                               return -ENOMEM;
+                       lo = &options.los[options.nb_los];
+                       options.nb_los++;
+               } else
+                       lo = &options.los[i];
+
+               lo->lcore_id = flds[FLD_LCORE];
+               lo->cid = flds[FLD_CID];
+               lo->qid = flds[FLD_QID];
        }
 
-       options.qid = (uint16_t)pm;
-
        return 0;
 }
 
@@ -127,13 +192,12 @@ static void
 vhost_crypto_usage(const char *prgname)
 {
        printf("%s [EAL options] --\n"
-               "  --%s SOCKET-FILE-PATH\n"
-               "  --%s CRYPTODEV_ID: crypto device id\n"
-               "  --%s CDEV_QUEUE_ID: crypto device queue id\n"
+               "  --%s <lcore>,SOCKET-FILE-PATH\n"
+               "  --%s (lcore,cdev_id,queue_id)[,(lcore,cdev_id,queue_id)]"
                "  --%s: zero copy\n"
                "  --%s: guest polling\n",
-               prgname, SOCKET_FILE_KEYWORD, CRYPTODEV_ID_KEYWORD,
-               CRYPTODEV_QUEUE_KEYWORD, ZERO_COPY_KEYWORD, POLLING_KEYWORD);
+               prgname, SOCKET_FILE_KEYWORD, CONFIG_KEYWORD,
+               ZERO_COPY_KEYWORD, POLLING_KEYWORD);
 }
 
 static int
@@ -145,19 +209,12 @@ vhost_crypto_parse_args(int argc, char **argv)
        int option_index;
        struct option lgopts[] = {
                        {SOCKET_FILE_KEYWORD, required_argument, 0, 0},
-                       {CRYPTODEV_ID_KEYWORD, required_argument, 0, 0},
-                       {CRYPTODEV_QUEUE_KEYWORD, required_argument, 0, 0},
+                       {CONFIG_KEYWORD, required_argument, 0, 0},
                        {ZERO_COPY_KEYWORD, no_argument, 0, 0},
                        {POLLING_KEYWORD, no_argument, 0, 0},
                        {NULL, 0, 0, 0}
        };
 
-       options.cid = 0;
-       options.qid = 0;
-       options.nb_sockets = 0;
-       options.guest_polling = 0;
-       options.zero_copy = RTE_VHOST_CRYPTO_ZERO_COPY_DISABLE;
-
        argvopt = argv;
 
        while ((opt = getopt_long(argc, argvopt, "s:",
@@ -173,15 +230,8 @@ vhost_crypto_parse_args(int argc, char **argv)
                                        return ret;
                                }
                        } else if (strcmp(lgopts[option_index].name,
-                                       CRYPTODEV_ID_KEYWORD) == 0) {
-                               ret = parse_cryptodev_id(optarg);
-                               if (ret < 0) {
-                                       vhost_crypto_usage(prgname);
-                                       return ret;
-                               }
-                       } else if (strcmp(lgopts[option_index].name,
-                                       CRYPTODEV_QUEUE_KEYWORD) == 0) {
-                               ret = parse_cdev_queue_id(optarg);
+                                       CONFIG_KEYWORD) == 0) {
+                               ret = parse_config(optarg);
                                if (ret < 0) {
                                        vhost_crypto_usage(prgname);
                                        return ret;
@@ -203,22 +253,15 @@ vhost_crypto_parse_args(int argc, char **argv)
                }
        }
 
-       if (options.nb_sockets == 0) {
-               options.socket_files[0] = strdup(DEF_SOCKET_FILE);
-               options.nb_sockets = 1;
-               RTE_LOG(INFO, USER1,
-                               "VHOST-CRYPTO: use default socket file %s\n",
-                               DEF_SOCKET_FILE);
-       }
-
        return 0;
 }
 
 static int
 new_device(int vid)
 {
+       struct vhost_crypto_info *info = NULL;
        char path[PATH_MAX];
-       uint32_t idx, i;
+       uint32_t i, j;
        int ret;
 
        ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
@@ -227,23 +270,25 @@ new_device(int vid)
                return ret;
        }
 
-       for (idx = 0; idx < options.nb_sockets; idx++) {
-               if (strcmp(path, options.socket_files[idx]) == 0)
+       for (i = 0; i < options.nb_los; i++) {
+               for (j = 0; j < options.los[i].nb_sockets; j++) {
+                       if (strcmp(path, options.los[i].socket_files[j]) == 0) {
+                               info = options.infos[i];
+                               break;
+                       }
+               }
+
+               if (info)
                        break;
        }
 
-       if (idx == options.nb_sockets) {
+       if (!info) {
                RTE_LOG(ERR, USER1, "Cannot find recorded socket\n");
                return -ENOENT;
        }
 
-       for (i = 0; i < 2; i++) {
-               vhost_cycles[i] = 0;
-               last_v_cycles[i] = 0;
-       }
-
-       ret = rte_vhost_crypto_create(vid, info.cid, info.sess_pool,
-                       rte_lcore_to_socket_id(info.lcore_id));
+       ret = rte_vhost_crypto_create(vid, info->cid, info->sess_pool,
+                       rte_lcore_to_socket_id(options.los[i].lcore_id));
        if (ret) {
                RTE_LOG(ERR, USER1, "Cannot create vhost crypto\n");
                return ret;
@@ -256,8 +301,8 @@ new_device(int vid)
                return ret;
        }
 
-       info.vids[idx] = vid;
-       info.initialized[idx] = 1;
+       info->vids[j] = vid;
+       info->initialized[j] = 1;
 
        rte_wmb();
 
@@ -269,19 +314,30 @@ new_device(int vid)
 static void
 destroy_device(int vid)
 {
-       uint32_t i;
-
-       for (i = 0; i < info.nb_vids; i++) {
-               if (vid == info.vids[i])
+       struct vhost_crypto_info *info = NULL;
+       uint32_t i, j;
+
+       for (i = 0; i < options.nb_los; i++) {
+               for (j = 0; j < options.los[i].nb_sockets; j++) {
+                       if (options.infos[i]->vids[j] == vid) {
+                               info = options.infos[i];
+                               break;
+                       }
+               }
+               if (info)
                        break;
        }
 
-       if (i == info.nb_vids) {
+       if (!info) {
                RTE_LOG(ERR, USER1, "Cannot find socket file from list\n");
                return;
        }
 
-       info.initialized[i] = 0;
+       do {
+
+       } while (info->nb_inflight_ops);
+
+       info->initialized[j] = 0;
 
        rte_wmb();
 
@@ -302,25 +358,24 @@ static void clrscr(void)
 }
 
 static int
-vhost_crypto_worker(__rte_unused void *arg)
+vhost_crypto_worker(void *arg)
 {
        struct rte_crypto_op *ops[NB_VIRTIO_QUEUES][MAX_PKT_BURST + 1];
        struct rte_crypto_op *ops_deq[NB_VIRTIO_QUEUES][MAX_PKT_BURST + 1];
-       uint32_t nb_inflight_ops = 0;
+       struct vhost_crypto_info *info = arg;
        uint16_t nb_callfds;
        int callfds[VIRTIO_CRYPTO_MAX_NUM_BURST_VQS];
        uint32_t lcore_id = rte_lcore_id();
        uint32_t burst_size = MAX_PKT_BURST;
        uint32_t i, j, k;
        uint32_t to_fetch, fetched;
-       uint64_t t_start, t_end, interval;
 
        int ret = 0;
 
        RTE_LOG(INFO, USER1, "Processing on Core %u started\n", lcore_id);
 
        for (i = 0; i < NB_VIRTIO_QUEUES; i++) {
-               if (rte_crypto_op_bulk_alloc(info.cop_pool,
+               if (rte_crypto_op_bulk_alloc(info->cop_pool,
                                RTE_CRYPTO_OP_TYPE_SYMMETRIC, ops[i],
                                burst_size) < burst_size) {
                        RTE_LOG(ERR, USER1, "Failed to alloc cops\n");
@@ -330,45 +385,38 @@ vhost_crypto_worker(__rte_unused void *arg)
        }
 
        while (1) {
-               for (i = 0; i < info.nb_vids; i++) {
-                       if (unlikely(info.initialized[i] == 0))
+               for (i = 0; i < info->nb_vids; i++) {
+                       if (unlikely(info->initialized[i] == 0))
                                continue;
 
                        for (j = 0; j < NB_VIRTIO_QUEUES; j++) {
-                               t_start = rte_rdtsc_precise();
-
                                to_fetch = RTE_MIN(burst_size,
                                                (NB_CRYPTO_DESCRIPTORS -
-                                               nb_inflight_ops));
+                                               info->nb_inflight_ops));
                                fetched = rte_vhost_crypto_fetch_requests(
-                                               info.vids[i], j, ops[j],
+                                               info->vids[i], j, ops[j],
                                                to_fetch);
-                               nb_inflight_ops += rte_cryptodev_enqueue_burst(
-                                               info.cid, info.qid, ops[j],
+                               info->nb_inflight_ops +=
+                                               rte_cryptodev_enqueue_burst(
+                                               info->cid, info->qid, ops[j],
                                                fetched);
                                if (unlikely(rte_crypto_op_bulk_alloc(
-                                               info.cop_pool,
+                                               info->cop_pool,
                                                RTE_CRYPTO_OP_TYPE_SYMMETRIC,
                                                ops[j], fetched) < fetched)) {
                                        RTE_LOG(ERR, USER1, "Failed realloc\n");
                                        return -1;
                                }
-                               t_end = rte_rdtsc_precise();
-                               interval = t_end - t_start;
-
-                               vhost_cycles[fetched > 0] += interval;
 
-                               t_start = t_end;
                                fetched = rte_cryptodev_dequeue_burst(
-                                               info.cid, info.qid,
+                                               info->cid, info->qid,
                                                ops_deq[j], RTE_MIN(burst_size,
-                                               nb_inflight_ops));
+                                               info->nb_inflight_ops));
                                fetched = rte_vhost_crypto_finalize_requests(
                                                ops_deq[j], fetched, callfds,
                                                &nb_callfds);
 
-                               nb_inflight_ops -= fetched;
-                               outpkt_amount += fetched;
+                               info->nb_inflight_ops -= fetched;
 
                                if (!options.guest_polling) {
                                        for (k = 0; k < nb_callfds; k++)
@@ -376,11 +424,8 @@ vhost_crypto_worker(__rte_unused void *arg)
                                                                (eventfd_t)1);
                                }
 
-                               rte_mempool_put_bulk(info.cop_pool,
+                               rte_mempool_put_bulk(info->cop_pool,
                                                (void **)ops_deq[j], fetched);
-                               interval = rte_rdtsc_precise() - t_start;
-
-                               vhost_cycles[fetched > 0] += interval;
                        }
                }
        }
@@ -388,17 +433,27 @@ exit:
        return ret;
 }
 
-
 static void
-unregister_drivers(int socket_num)
+free_resource(void)
 {
-       int ret;
+       uint32_t i, j;
+
+       for (i = 0; i < options.nb_los; i++) {
+               struct lcore_option *lo = &options.los[i];
+               struct vhost_crypto_info *info = options.infos[i];
+
+               rte_mempool_free(info->cop_pool);
+               rte_mempool_free(info->sess_pool);
+
+               for (j = 0; j < lo->nb_sockets; j++) {
+                       rte_vhost_driver_unregister(lo->socket_files[i]);
+                       free(lo->socket_files[i]);
+               }
+
+               rte_free(info);
+       }
 
-       ret = rte_vhost_driver_unregister(options.socket_files[socket_num]);
-       if (ret != 0)
-               RTE_LOG(ERR, USER1,
-                       "Fail to unregister vhost driver for %s.\n",
-                       options.socket_files[socket_num]);
+       memset(&options, 0, sizeof(options));
 }
 
 int
@@ -407,10 +462,8 @@ main(int argc, char *argv[])
        struct rte_cryptodev_qp_conf qp_conf = {NB_CRYPTO_DESCRIPTORS};
        struct rte_cryptodev_config config;
        struct rte_cryptodev_info dev_info;
-       uint32_t cryptodev_id;
-       uint32_t worker_lcore;
        char name[128];
-       uint32_t i = 0;
+       uint32_t i, j, lcore;
        int ret;
 
        ret = rte_eal_init(argc, argv);
@@ -423,114 +476,121 @@ main(int argc, char *argv[])
        if (ret < 0)
                rte_exit(EXIT_FAILURE, "Failed to parse arguments!\n");
 
-       info.cid = options.cid;
-       info.qid = options.qid;
+       for (i = 0; i < options.nb_los; i++) {
+               struct lcore_option *lo = &options.los[i];
+               struct vhost_crypto_info *info;
 
-       worker_lcore = rte_get_next_lcore(0, 1, 0);
-       if (worker_lcore == RTE_MAX_LCORE)
-               rte_exit(EXIT_FAILURE, "Not enough lcore\n");
-
-       cryptodev_id = info.cid;
-       rte_cryptodev_info_get(cryptodev_id, &dev_info);
-       if (dev_info.max_nb_queue_pairs < info.qid + 1) {
-               RTE_LOG(ERR, USER1, "Number of queues cannot over %u",
-                               dev_info.max_nb_queue_pairs);
-               goto error_exit;
-       }
+               info = rte_zmalloc_socket(NULL, sizeof(*info),
+                               RTE_CACHE_LINE_SIZE, rte_lcore_to_socket_id(
+                                               lo->lcore_id));
+               if (!info) {
+                       ret = -ENOMEM;
+                       goto error_exit;
+               }
 
-       config.nb_queue_pairs = dev_info.max_nb_queue_pairs;
-       config.socket_id = rte_lcore_to_socket_id(worker_lcore);
+               info->cid = lo->cid;
+               info->qid = lo->qid;
+               info->nb_vids = lo->nb_sockets;
 
-       ret = rte_cryptodev_configure(cryptodev_id, &config);
-       if (ret < 0) {
-               RTE_LOG(ERR, USER1, "Failed to configure cryptodev %u",
-                               cryptodev_id);
-               goto error_exit;
-       }
+               rte_cryptodev_info_get(info->cid, &dev_info);
+               if (dev_info.max_nb_queue_pairs < info->qid + 1) {
+                       RTE_LOG(ERR, USER1, "Number of queues cannot over %u",
+                                       dev_info.max_nb_queue_pairs);
+                       goto error_exit;
+               }
 
-       snprintf(name, 127, "SESS_POOL_%u", worker_lcore);
-       info.sess_pool = rte_mempool_create(name, SESSION_MAP_ENTRIES,
-                       rte_cryptodev_sym_get_private_session_size(
-                       cryptodev_id), 64, 0, NULL, NULL, NULL, NULL,
-                       rte_lcore_to_socket_id(worker_lcore), 0);
-       if (!info.sess_pool) {
-               RTE_LOG(ERR, USER1, "Failed to create mempool");
-               goto error_exit;
-       }
+               config.nb_queue_pairs = dev_info.max_nb_queue_pairs;
+               config.socket_id = rte_lcore_to_socket_id(lo->lcore_id);
 
-       snprintf(name, 127, "COPPOOL_%u", worker_lcore);
-       info.cop_pool = rte_crypto_op_pool_create(name,
-                       RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MEMPOOL_OBJS,
-                       NB_CACHE_OBJS, 0, rte_lcore_to_socket_id(worker_lcore));
+               ret = rte_cryptodev_configure(info->cid, &config);
+               if (ret < 0) {
+                       RTE_LOG(ERR, USER1, "Failed to configure cryptodev %u",
+                                       info->cid);
+                       goto error_exit;
+               }
 
-       if (!info.cop_pool) {
-               RTE_LOG(ERR, USER1, "Lcore %u failed to create crypto pool",
-                               worker_lcore);
-               ret = -1;
-               goto error_exit;
-       }
+               snprintf(name, 127, "SESS_POOL_%u", lo->lcore_id);
+               info->sess_pool = rte_mempool_create(name, SESSION_MAP_ENTRIES,
+                               rte_cryptodev_sym_get_private_session_size(
+                               info->cid), 64, 0, NULL, NULL, NULL, NULL,
+                               rte_lcore_to_socket_id(lo->lcore_id), 0);
+               if (!info->sess_pool) {
+                       RTE_LOG(ERR, USER1, "Failed to create mempool");
+                       goto error_exit;
+               }
 
-       info.nb_vids = options.nb_sockets;
-       for (i = 0; i < MAX_NB_SOCKETS; i++)
-               info.vids[i] = -1;
+               snprintf(name, 127, "COPPOOL_%u", lo->lcore_id);
+               info->cop_pool = rte_crypto_op_pool_create(name,
+                               RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MEMPOOL_OBJS,
+                               NB_CACHE_OBJS, 0,
+                               rte_lcore_to_socket_id(lo->lcore_id));
 
-       for (i = 0; i < dev_info.max_nb_queue_pairs; i++) {
-               ret = rte_cryptodev_queue_pair_setup(cryptodev_id, i,
-                               &qp_conf, rte_lcore_to_socket_id(worker_lcore),
-                               info.sess_pool);
-               if (ret < 0) {
-                       RTE_LOG(ERR, USER1, "Failed to configure qp %u\n",
-                                       info.cid);
+               if (!info->cop_pool) {
+                       RTE_LOG(ERR, USER1, "Failed to create crypto pool");
+                       ret = -ENOMEM;
                        goto error_exit;
                }
-       }
 
-       ret = rte_cryptodev_start(cryptodev_id);
-       if (ret < 0) {
-               RTE_LOG(ERR, USER1, "Failed to start cryptodev %u\n", info.cid);
-               goto error_exit;
+               options.infos[i] = info;
+
+               for (j = 0; j < dev_info.max_nb_queue_pairs; j++) {
+                       ret = rte_cryptodev_queue_pair_setup(info->cid, j,
+                                       &qp_conf, rte_lcore_to_socket_id(
+                                                       lo->lcore_id),
+                                       info->sess_pool);
+                       if (ret < 0) {
+                               RTE_LOG(ERR, USER1, "Failed to configure qp\n");
+                               goto error_exit;
+                       }
+               }
        }
 
-       info.cid = cryptodev_id;
-       info.lcore_id = worker_lcore;
+       for (i = 0; i < options.nb_los; i++) {
+               struct lcore_option *lo = &options.los[i];
+               struct vhost_crypto_info *info = options.infos[i];
 
-       if (rte_eal_remote_launch(vhost_crypto_worker, NULL, worker_lcore)
-                       < 0) {
-               RTE_LOG(ERR, USER1, "Failed to start worker lcore");
-               goto error_exit;
-       }
+               ret = rte_cryptodev_start(lo->cid);
+               if (ret < 0) {
+                       RTE_LOG(ERR, USER1, "Failed to start cryptodev\n");
+                       goto error_exit;
+               }
 
-       for (i = 0; i < options.nb_sockets; i++) {
-               if (rte_vhost_driver_register(options.socket_files[i],
-                               RTE_VHOST_USER_DEQUEUE_ZERO_COPY) < 0) {
-                       RTE_LOG(ERR, USER1, "socket %s already exists\n",
-                                       options.socket_files[i]);
+               if (rte_eal_remote_launch(vhost_crypto_worker, info,
+                               lo->lcore_id) < 0) {
+                       RTE_LOG(ERR, USER1, "Failed to start worker lcore");
                        goto error_exit;
                }
 
-               rte_vhost_driver_callback_register(options.socket_files[i],
+               for (j = 0; j < lo->nb_sockets; j++) {
+                       ret = rte_vhost_driver_register(lo->socket_files[j],
+                               RTE_VHOST_USER_DEQUEUE_ZERO_COPY);
+                       if (ret < 0) {
+                               RTE_LOG(ERR, USER1, "socket %s already exists\n",
+                                       lo->socket_files[j]);
+                               goto error_exit;
+                       }
+
+                       rte_vhost_driver_callback_register(lo->socket_files[j],
                                &virtio_crypto_device_ops);
 
-               if (rte_vhost_driver_start(options.socket_files[i]) < 0) {
-                       RTE_LOG(ERR, USER1, "failed to start vhost driver.\n");
-                       goto error_exit;
+                       ret = rte_vhost_driver_start(lo->socket_files[j]);
+                       if (ret < 0)  {
+                               RTE_LOG(ERR, USER1, "failed to start vhost.\n");
+                               goto error_exit;
+                       }
                }
        }
 
-       RTE_LCORE_FOREACH(worker_lcore)
-               rte_eal_wait_lcore(worker_lcore);
+       RTE_LCORE_FOREACH(lcore)
+               rte_eal_wait_lcore(lcore);
 
-       rte_mempool_free(info.sess_pool);
-       rte_mempool_free(info.cop_pool);
+       free_resource();
 
        return 0;
 
 error_exit:
-       for (i = 0; i < options.nb_sockets; i++)
-               unregister_drivers(i);
 
-       rte_mempool_free(info.cop_pool);
-       rte_mempool_free(info.sess_pool);
+       free_resource();
 
        return -1;
 }
index 0f4876f..daf19fb 100644 (file)
@@ -8,7 +8,7 @@
 
 allow_experimental_apis = true
 deps += ['vhost', 'cryptodev']
-cflags += ['-D_GNU_SOURCE','-D_FILE_OFFSET_BITS=64']
+cflags += ['-D_FILE_OFFSET_BITS=64']
 sources = files(
        'main.c'
 )
index fa0cf72..523aee0 100644 (file)
@@ -18,7 +18,7 @@ shared: build/$(APP)-shared
 static: build/$(APP)-static
        ln -sf $(APP)-static build/$(APP)
 
-CFLAGS += -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -D_FILE_OFFSET_BITS=64
 LDFLAGS += -pthread
 
 PC_FILE := $(shell pkg-config --path libdpdk)
@@ -57,7 +57,7 @@ please change the definition of the RTE_TARGET environment variable)
 all:
 else
 
-CFLAGS += -D_GNU_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -O2
 CFLAGS += $(WERROR_FLAGS)
 
index 5f92370..2303bca 100644 (file)
@@ -10,7 +10,7 @@ if host_machine.system() != 'linux'
        build = false
 endif
 deps += 'vhost'
-cflags += ['-D_GNU_SOURCE','-D_FILE_OFFSET_BITS=64']
+cflags += ['-D_FILE_OFFSET_BITS=64']
 sources = files(
        'scsi.c', 'vhost_scsi.c'
 )
index 13a5205..50147c0 100644 (file)
@@ -31,6 +31,12 @@ CFLAGS += $(WERROR_FLAGS)
 
 LDLIBS += -lvirt
 
+JANSSON := $(shell pkg-config --exists jansson; echo $$?)
+ifeq ($(JANSSON), 0)
+LDLIBS += $(shell pkg-config --libs jansson)
+CFLAGS += -DUSE_JANSSON
+endif
+
 ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),y)
 
 ifeq ($(CONFIG_RTE_LIBRTE_IXGBE_PMD),y)
index 927fc35..4fac099 100644 (file)
@@ -13,6 +13,7 @@
 
 #include <sys/queue.h>
 #include <sys/types.h>
+#include <sys/stat.h>
 #include <sys/socket.h>
 #include <sys/select.h>
 
@@ -43,7 +44,8 @@ static unsigned char *global_cpumaps;
 static virVcpuInfo *global_vircpuinfo;
 static size_t global_maplen;
 
-static unsigned global_n_host_cpus;
+static unsigned int global_n_host_cpus;
+static bool global_hypervisor_available;
 
 /*
  * Represents a single Virtual Machine
@@ -198,7 +200,11 @@ get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu)
 {
        struct virtual_machine_info *vm_info =
                        (struct virtual_machine_info *)chan_info->priv_info;
-       return rte_atomic64_read(&vm_info->pcpu_mask[vcpu]);
+
+       if (global_hypervisor_available && (vm_info != NULL))
+               return rte_atomic64_read(&vm_info->pcpu_mask[vcpu]);
+       else
+               return 0;
 }
 
 static inline int
@@ -279,6 +285,38 @@ open_non_blocking_channel(struct channel_info *info)
        return 0;
 }
 
+static int
+open_host_channel(struct channel_info *info)
+{
+       int flags;
+
+       info->fd = open(info->channel_path, O_RDWR | O_RSYNC);
+       if (info->fd == -1) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Error(%s) opening fifo for '%s'\n",
+                               strerror(errno),
+                               info->channel_path);
+               return -1;
+       }
+
+       /* Get current flags */
+       flags = fcntl(info->fd, F_GETFL, 0);
+       if (flags < 0) {
+               RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) fcntl get flags socket for"
+                               "'%s'\n", strerror(errno), info->channel_path);
+               return 1;
+       }
+       /* Set to Non Blocking */
+       flags |= O_NONBLOCK;
+       if (fcntl(info->fd, F_SETFL, flags) < 0) {
+               RTE_LOG(WARNING, CHANNEL_MANAGER,
+                               "Error(%s) setting non-blocking "
+                               "socket for '%s'\n",
+                               strerror(errno), info->channel_path);
+               return -1;
+       }
+       return 0;
+}
+
 static int
 setup_channel_info(struct virtual_machine_info **vm_info_dptr,
                struct channel_info **chan_info_dptr, unsigned channel_num)
@@ -289,6 +327,7 @@ setup_channel_info(struct virtual_machine_info **vm_info_dptr,
        chan_info->channel_num = channel_num;
        chan_info->priv_info = (void *)vm_info;
        chan_info->status = CHANNEL_MGR_CHANNEL_DISCONNECTED;
+       chan_info->type = CHANNEL_TYPE_BINARY;
        if (open_non_blocking_channel(chan_info) < 0) {
                RTE_LOG(ERR, CHANNEL_MANAGER, "Could not open channel: "
                                "'%s' for VM '%s'\n",
@@ -311,6 +350,42 @@ setup_channel_info(struct virtual_machine_info **vm_info_dptr,
        return 0;
 }
 
+static void
+fifo_path(char *dst, unsigned int len)
+{
+       snprintf(dst, len, "%sfifo", CHANNEL_MGR_SOCKET_PATH);
+}
+
+static int
+setup_host_channel_info(struct channel_info **chan_info_dptr,
+               unsigned int channel_num)
+{
+       struct channel_info *chan_info = *chan_info_dptr;
+
+       chan_info->channel_num = channel_num;
+       chan_info->priv_info = (void *)NULL;
+       chan_info->status = CHANNEL_MGR_CHANNEL_DISCONNECTED;
+       chan_info->type = CHANNEL_TYPE_JSON;
+
+       fifo_path(chan_info->channel_path, sizeof(chan_info->channel_path));
+
+       if (open_host_channel(chan_info) < 0) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Could not open host channel: "
+                               "'%s'\n",
+                               chan_info->channel_path);
+               return -1;
+       }
+       if (add_channel_to_monitor(&chan_info) < 0) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Could add channel: "
+                               "'%s' to epoll ctl\n",
+                               chan_info->channel_path);
+               return -1;
+
+       }
+       chan_info->status = CHANNEL_MGR_CHANNEL_CONNECTED;
+       return 0;
+}
+
 int
 add_all_channels(const char *vm_name)
 {
@@ -465,6 +540,45 @@ add_channels(const char *vm_name, unsigned *channel_list,
        return num_channels_enabled;
 }
 
+int
+add_host_channel(void)
+{
+       struct channel_info *chan_info;
+       char socket_path[PATH_MAX];
+       int num_channels_enabled = 0;
+       int ret;
+
+       fifo_path(socket_path, sizeof(socket_path));
+
+       ret = mkfifo(socket_path, 0660);
+       if ((errno != EEXIST) && (ret < 0)) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Cannot create fifo '%s' error: "
+                               "%s\n", socket_path, strerror(errno));
+               return 0;
+       }
+
+       if (access(socket_path, F_OK) < 0) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Channel path '%s' error: "
+                               "%s\n", socket_path, strerror(errno));
+               return 0;
+       }
+       chan_info = rte_malloc(NULL, sizeof(*chan_info), 0);
+       if (chan_info == NULL) {
+               RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for "
+                               "channel '%s'\n", socket_path);
+               return 0;
+       }
+       snprintf(chan_info->channel_path,
+                       sizeof(chan_info->channel_path), "%s", socket_path);
+       if (setup_host_channel_info(&chan_info, 0) < 0) {
+               rte_free(chan_info);
+               return 0;
+       }
+       num_channels_enabled++;
+
+       return num_channels_enabled;
+}
+
 int
 remove_channel(struct channel_info **chan_info_dptr)
 {
@@ -559,6 +673,8 @@ get_all_vm(int *num_vm, int *num_vcpu)
                                VIR_CONNECT_LIST_DOMAINS_PERSISTENT;
        unsigned int domain_flag = VIR_DOMAIN_VCPU_CONFIG;
 
+       if (!global_hypervisor_available)
+               return;
 
        memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen);
        if (virNodeGetInfo(global_vir_conn_ptr, &node_info)) {
@@ -768,38 +884,42 @@ connect_hypervisor(const char *path)
        }
        return 0;
 }
-
 int
-channel_manager_init(const char *path)
+channel_manager_init(const char *path __rte_unused)
 {
        virNodeInfo info;
 
        LIST_INIT(&vm_list_head);
        if (connect_hypervisor(path) < 0) {
-               RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to initialize channel manager\n");
-               return -1;
-       }
-
-       global_maplen = VIR_CPU_MAPLEN(CHANNEL_CMDS_MAX_CPUS);
+               global_n_host_cpus = 64;
+               global_hypervisor_available = 0;
+               RTE_LOG(INFO, CHANNEL_MANAGER, "Unable to initialize channel manager\n");
+       } else {
+               global_hypervisor_available = 1;
+
+               global_maplen = VIR_CPU_MAPLEN(CHANNEL_CMDS_MAX_CPUS);
+
+               global_vircpuinfo = rte_zmalloc(NULL,
+                               sizeof(*global_vircpuinfo) *
+                               CHANNEL_CMDS_MAX_CPUS, RTE_CACHE_LINE_SIZE);
+               if (global_vircpuinfo == NULL) {
+                       RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for CPU Info\n");
+                       goto error;
+               }
+               global_cpumaps = rte_zmalloc(NULL,
+                               CHANNEL_CMDS_MAX_CPUS * global_maplen,
+                               RTE_CACHE_LINE_SIZE);
+               if (global_cpumaps == NULL)
+                       goto error;
 
-       global_vircpuinfo = rte_zmalloc(NULL, sizeof(*global_vircpuinfo) *
-                       CHANNEL_CMDS_MAX_CPUS, RTE_CACHE_LINE_SIZE);
-       if (global_vircpuinfo == NULL) {
-               RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for CPU Info\n");
-               goto error;
-       }
-       global_cpumaps = rte_zmalloc(NULL, CHANNEL_CMDS_MAX_CPUS * global_maplen,
-                       RTE_CACHE_LINE_SIZE);
-       if (global_cpumaps == NULL) {
-               goto error;
+               if (virNodeGetInfo(global_vir_conn_ptr, &info)) {
+                       RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n");
+                       goto error;
+               }
+               global_n_host_cpus = (unsigned int)info.cpus;
        }
 
-       if (virNodeGetInfo(global_vir_conn_ptr, &info)) {
-               RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n");
-               goto error;
-       }
 
-       global_n_host_cpus = (unsigned)info.cpus;
 
        if (global_n_host_cpus > CHANNEL_CMDS_MAX_CPUS) {
                RTE_LOG(WARNING, CHANNEL_MANAGER, "The number of host CPUs(%u) exceeds the "
@@ -811,7 +931,8 @@ channel_manager_init(const char *path)
 
        return 0;
 error:
-       disconnect_hypervisor();
+       if (global_hypervisor_available)
+               disconnect_hypervisor();
        return -1;
 }
 
@@ -838,7 +959,10 @@ channel_manager_exit(void)
                rte_free(vm_info);
        }
 
-       rte_free(global_cpumaps);
-       rte_free(global_vircpuinfo);
-       disconnect_hypervisor();
+       if (global_hypervisor_available) {
+               /* Only needed if hypervisor available */
+               rte_free(global_cpumaps);
+               rte_free(global_vircpuinfo);
+               disconnect_hypervisor();
+       }
 }
index 872ec61..d948b30 100644 (file)
@@ -37,7 +37,7 @@ struct sockaddr_un _sockaddr_un;
 #define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path)
 #endif
 
-#define MAX_VMS 4
+#define MAX_CLIENTS 64
 #define MAX_VCPUS 20
 
 
@@ -47,13 +47,20 @@ struct libvirt_vm_info {
        uint8_t num_cpus;
 };
 
-struct libvirt_vm_info lvm_info[MAX_VMS];
+struct libvirt_vm_info lvm_info[MAX_CLIENTS];
 /* Communication Channel Status */
 enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0,
        CHANNEL_MGR_CHANNEL_CONNECTED,
        CHANNEL_MGR_CHANNEL_DISABLED,
        CHANNEL_MGR_CHANNEL_PROCESSING};
 
+/* Communication Channel Type */
+enum channel_type {
+       CHANNEL_TYPE_BINARY = 0,
+       CHANNEL_TYPE_INI,
+       CHANNEL_TYPE_JSON
+};
+
 /* VM libvirt(qemu/KVM) connection status */
 enum vm_status { CHANNEL_MGR_VM_INACTIVE = 0, CHANNEL_MGR_VM_ACTIVE};
 
@@ -66,6 +73,7 @@ struct channel_info {
        volatile uint32_t status;    /**< Connection status(enum channel_status) */
        int fd;                      /**< AF_UNIX socket fd */
        unsigned channel_num;        /**< CHANNEL_MGR_SOCKET_PATH/<vm_name>.channel_num */
+       enum channel_type type;      /**< Binary, ini, json, etc. */
        void *priv_info;             /**< Pointer to private info, do not modify */
 };
 
@@ -226,6 +234,15 @@ int add_all_channels(const char *vm_name);
 int add_channels(const char *vm_name, unsigned *channel_list,
                unsigned num_channels);
 
+/**
+ * Set up a fifo by which host applications can send command an policies
+ * through a fifo to the vm_power_manager
+ *
+ * @return
+ *  - 0 for success
+ */
+int add_host_channel(void);
+
 /**
  * Remove a channel definition from the channel manager. This must only be
  * called from the channel monitor thread.
index 7fa47ba..5da5315 100644 (file)
@@ -9,11 +9,18 @@
 #include <signal.h>
 #include <errno.h>
 #include <string.h>
+#include <fcntl.h>
 #include <sys/types.h>
 #include <sys/epoll.h>
 #include <sys/queue.h>
 #include <sys/time.h>
-
+#include <sys/socket.h>
+#include <sys/select.h>
+#ifdef USE_JANSSON
+#include <jansson.h>
+#else
+#pragma message "Jansson dev libs unavailable, not including JSON parsing"
+#endif
 #include <rte_log.h>
 #include <rte_memory.h>
 #include <rte_malloc.h>
 
 uint64_t vsi_pkt_count_prev[384];
 uint64_t rdtsc_prev[384];
+#define MAX_JSON_STRING_LEN 1024
+char json_data[MAX_JSON_STRING_LEN];
 
 double time_period_ms = 1;
 static volatile unsigned run_loop = 1;
 static int global_event_fd;
 static unsigned int policy_is_set;
 static struct epoll_event *global_events_list;
-static struct policy policies[MAX_VMS];
+static struct policy policies[MAX_CLIENTS];
+
+#ifdef USE_JANSSON
+
+union PFID {
+       struct ether_addr addr;
+       uint64_t pfid;
+};
+
+static int
+str_to_ether_addr(const char *a, struct ether_addr *ether_addr)
+{
+       int i;
+       char *end;
+       unsigned long o[ETHER_ADDR_LEN];
+
+       i = 0;
+       do {
+               errno = 0;
+               o[i] = strtoul(a, &end, 16);
+               if (errno != 0 || end == a || (end[0] != ':' && end[0] != 0))
+                       return -1;
+               a = end + 1;
+       } while (++i != RTE_DIM(o) / sizeof(o[0]) && end[0] != 0);
+
+       /* Junk at the end of line */
+       if (end[0] != 0)
+               return -1;
+
+       /* Support the format XX:XX:XX:XX:XX:XX */
+       if (i == ETHER_ADDR_LEN) {
+               while (i-- != 0) {
+                       if (o[i] > UINT8_MAX)
+                               return -1;
+                       ether_addr->addr_bytes[i] = (uint8_t)o[i];
+               }
+       /* Support the format XXXX:XXXX:XXXX */
+       } else if (i == ETHER_ADDR_LEN / 2) {
+               while (i-- != 0) {
+                       if (o[i] > UINT16_MAX)
+                               return -1;
+                       ether_addr->addr_bytes[i * 2] =
+                                       (uint8_t)(o[i] >> 8);
+                       ether_addr->addr_bytes[i * 2 + 1] =
+                                       (uint8_t)(o[i] & 0xff);
+               }
+       /* unknown format */
+       } else
+               return -1;
+
+       return 0;
+}
+
+static int
+set_policy_mac(struct channel_packet *pkt, int idx, char *mac)
+{
+       union PFID pfid;
+       int ret;
+
+       /* Use port MAC address as the vfid */
+       ret = str_to_ether_addr(mac, &pfid.addr);
+
+       if (ret != 0) {
+               RTE_LOG(ERR, CHANNEL_MONITOR,
+                       "Invalid mac address received in JSON\n");
+               pkt->vfid[idx] = 0;
+               return -1;
+       }
+
+       printf("Received MAC Address: %02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 ":"
+                       "%02" PRIx8 ":%02" PRIx8 ":%02" PRIx8 "\n",
+                       pfid.addr.addr_bytes[0], pfid.addr.addr_bytes[1],
+                       pfid.addr.addr_bytes[2], pfid.addr.addr_bytes[3],
+                       pfid.addr.addr_bytes[4], pfid.addr.addr_bytes[5]);
+
+       pkt->vfid[idx] = pfid.pfid;
+       return 0;
+}
+
+
+static int
+parse_json_to_pkt(json_t *element, struct channel_packet *pkt)
+{
+       const char *key;
+       json_t *value;
+       int ret;
+
+       memset(pkt, 0, sizeof(struct channel_packet));
+
+       pkt->nb_mac_to_monitor = 0;
+       pkt->t_boost_status.tbEnabled = false;
+       pkt->workload = LOW;
+       pkt->policy_to_use = TIME;
+       pkt->command = PKT_POLICY;
+       pkt->core_type = CORE_TYPE_PHYSICAL;
+
+       json_object_foreach(element, key, value) {
+               if (!strcmp(key, "policy")) {
+                       /* Recurse in to get the contents of profile */
+                       ret = parse_json_to_pkt(value, pkt);
+                       if (ret)
+                               return ret;
+               } else if (!strcmp(key, "instruction")) {
+                       /* Recurse in to get the contents of instruction */
+                       ret = parse_json_to_pkt(value, pkt);
+                       if (ret)
+                               return ret;
+               } else if (!strcmp(key, "name")) {
+                       strcpy(pkt->vm_name, json_string_value(value));
+               } else if (!strcmp(key, "command")) {
+                       char command[32];
+                       snprintf(command, 32, "%s", json_string_value(value));
+                       if (!strcmp(command, "power")) {
+                               pkt->command = CPU_POWER;
+                       } else if (!strcmp(command, "create")) {
+                               pkt->command = PKT_POLICY;
+                       } else if (!strcmp(command, "destroy")) {
+                               pkt->command = PKT_POLICY_REMOVE;
+                       } else {
+                               RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Invalid command received in JSON\n");
+                               return -1;
+                       }
+               } else if (!strcmp(key, "policy_type")) {
+                       char command[32];
+                       snprintf(command, 32, "%s", json_string_value(value));
+                       if (!strcmp(command, "TIME")) {
+                               pkt->policy_to_use = TIME;
+                       } else if (!strcmp(command, "TRAFFIC")) {
+                               pkt->policy_to_use = TRAFFIC;
+                       } else if (!strcmp(command, "WORKLOAD")) {
+                               pkt->policy_to_use = WORKLOAD;
+                       } else if (!strcmp(command, "BRANCH_RATIO")) {
+                               pkt->policy_to_use = BRANCH_RATIO;
+                       } else {
+                               RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Wrong policy_type received in JSON\n");
+                               return -1;
+                       }
+               } else if (!strcmp(key, "workload")) {
+                       char command[32];
+                       snprintf(command, 32, "%s", json_string_value(value));
+                       if (!strcmp(command, "HIGH")) {
+                               pkt->workload = HIGH;
+                       } else if (!strcmp(command, "MEDIUM")) {
+                               pkt->workload = MEDIUM;
+                       } else if (!strcmp(command, "LOW")) {
+                               pkt->workload = LOW;
+                       } else {
+                               RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Wrong workload received in JSON\n");
+                               return -1;
+                       }
+               } else if (!strcmp(key, "busy_hours")) {
+                       unsigned int i;
+                       size_t size = json_array_size(value);
+
+                       for (i = 0; i < size; i++) {
+                               int hour = (int)json_integer_value(
+                                               json_array_get(value, i));
+                               pkt->timer_policy.busy_hours[i] = hour;
+                       }
+               } else if (!strcmp(key, "quiet_hours")) {
+                       unsigned int i;
+                       size_t size = json_array_size(value);
+
+                       for (i = 0; i < size; i++) {
+                               int hour = (int)json_integer_value(
+                                               json_array_get(value, i));
+                               pkt->timer_policy.quiet_hours[i] = hour;
+                       }
+               } else if (!strcmp(key, "core_list")) {
+                       unsigned int i;
+                       size_t size = json_array_size(value);
+
+                       for (i = 0; i < size; i++) {
+                               int core = (int)json_integer_value(
+                                               json_array_get(value, i));
+                               pkt->vcpu_to_control[i] = core;
+                       }
+                       pkt->num_vcpu = size;
+               } else if (!strcmp(key, "mac_list")) {
+                       unsigned int i;
+                       size_t size = json_array_size(value);
+
+                       for (i = 0; i < size; i++) {
+                               char mac[32];
+                               snprintf(mac, 32, "%s", json_string_value(
+                                               json_array_get(value, i)));
+                               set_policy_mac(pkt, i, mac);
+                       }
+                       pkt->nb_mac_to_monitor = size;
+               } else if (!strcmp(key, "avg_packet_thresh")) {
+                       pkt->traffic_policy.avg_max_packet_thresh =
+                                       (uint32_t)json_integer_value(value);
+               } else if (!strcmp(key, "max_packet_thresh")) {
+                       pkt->traffic_policy.max_max_packet_thresh =
+                                       (uint32_t)json_integer_value(value);
+               } else if (!strcmp(key, "unit")) {
+                       char unit[32];
+                       snprintf(unit, 32, "%s", json_string_value(value));
+                       if (!strcmp(unit, "SCALE_UP")) {
+                               pkt->unit = CPU_POWER_SCALE_UP;
+                       } else if (!strcmp(unit, "SCALE_DOWN")) {
+                               pkt->unit = CPU_POWER_SCALE_DOWN;
+                       } else if (!strcmp(unit, "SCALE_MAX")) {
+                               pkt->unit = CPU_POWER_SCALE_MAX;
+                       } else if (!strcmp(unit, "SCALE_MIN")) {
+                               pkt->unit = CPU_POWER_SCALE_MIN;
+                       } else if (!strcmp(unit, "ENABLE_TURBO")) {
+                               pkt->unit = CPU_POWER_ENABLE_TURBO;
+                       } else if (!strcmp(unit, "DISABLE_TURBO")) {
+                               pkt->unit = CPU_POWER_DISABLE_TURBO;
+                       } else {
+                               RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Invalid command received in JSON\n");
+                               return -1;
+                       }
+               } else if (!strcmp(key, "resource_id")) {
+                       pkt->resource_id = (uint32_t)json_integer_value(value);
+               } else {
+                       RTE_LOG(ERR, CHANNEL_MONITOR,
+                               "Unknown key received in JSON string: %s\n",
+                               key);
+               }
+       }
+       return 0;
+}
+#endif
 
 void channel_monitor_exit(void)
 {
@@ -66,7 +303,7 @@ static void
 core_share_status(int pNo)
 {
 
-       int noVms, noVcpus, z, x, t;
+       int noVms = 0, noVcpus = 0, z, x, t;
 
        get_all_vm(&noVms, &noVcpus);
 
@@ -85,6 +322,33 @@ core_share_status(int pNo)
        }
 }
 
+
+static int
+pcpu_monitor(struct policy *pol, struct core_info *ci, int pcpu, int count)
+{
+       int ret = 0;
+
+       if (pol->pkt.policy_to_use == BRANCH_RATIO) {
+               ci->cd[pcpu].oob_enabled = 1;
+               ret = add_core_to_monitor(pcpu);
+               if (ret == 0)
+                       RTE_LOG(INFO, CHANNEL_MONITOR,
+                                       "Monitoring pcpu %d OOB for %s\n",
+                                       pcpu, pol->pkt.vm_name);
+               else
+                       RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Error monitoring pcpu %d OOB for %s\n",
+                                       pcpu, pol->pkt.vm_name);
+
+       } else {
+               pol->core_share[count].pcpu = pcpu;
+               RTE_LOG(INFO, CHANNEL_MONITOR,
+                               "Monitoring pcpu %d for %s\n",
+                               pcpu, pol->pkt.vm_name);
+       }
+       return ret;
+}
+
 static void
 get_pcpu_to_control(struct policy *pol)
 {
@@ -94,34 +358,42 @@ get_pcpu_to_control(struct policy *pol)
        int pcpu, count;
        uint64_t mask_u64b;
        struct core_info *ci;
-       int ret;
 
        ci = get_core_info();
 
-       RTE_LOG(INFO, CHANNEL_MONITOR, "Looking for pcpu for %s\n",
-                       pol->pkt.vm_name);
-       get_info_vm(pol->pkt.vm_name, &info);
-
-       for (count = 0; count < pol->pkt.num_vcpu; count++) {
-               mask_u64b = info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
-               for (pcpu = 0; mask_u64b; mask_u64b &= ~(1ULL << pcpu++)) {
-                       if ((mask_u64b >> pcpu) & 1) {
-                               if (pol->pkt.policy_to_use == BRANCH_RATIO) {
-                                       ci->cd[pcpu].oob_enabled = 1;
-                                       ret = add_core_to_monitor(pcpu);
-                                       if (ret == 0)
-                                               printf("Monitoring pcpu %d via Branch Ratio\n",
-                                                               pcpu);
-                                       else
-                                               printf("Failed to start OOB Monitoring pcpu %d\n",
-                                                               pcpu);
-
-                               } else {
-                                       pol->core_share[count].pcpu = pcpu;
-                                       printf("Monitoring pcpu %d\n", pcpu);
-                               }
+       RTE_LOG(DEBUG, CHANNEL_MONITOR,
+                       "Looking for pcpu for %s\n", pol->pkt.vm_name);
+
+       /*
+        * So now that we're handling virtual and physical cores, we need to
+        * differenciate between them when adding them to the branch monitor.
+        * Virtual cores need to be converted to physical cores.
+        */
+       if (pol->pkt.core_type == CORE_TYPE_VIRTUAL) {
+               /*
+                * If the cores in the policy are virtual, we need to map them
+                * to physical core. We look up the vm info and use that for
+                * the mapping.
+                */
+               get_info_vm(pol->pkt.vm_name, &info);
+               for (count = 0; count < pol->pkt.num_vcpu; count++) {
+                       mask_u64b =
+                               info.pcpu_mask[pol->pkt.vcpu_to_control[count]];
+                       for (pcpu = 0; mask_u64b;
+                                       mask_u64b &= ~(1ULL << pcpu++)) {
+                               if ((mask_u64b >> pcpu) & 1)
+                                       pcpu_monitor(pol, ci, pcpu, count);
                        }
                }
+       } else {
+               /*
+                * If the cores in the policy are physical, we just use
+                * those core id's directly.
+                */
+               for (count = 0; count < pol->pkt.num_vcpu; count++) {
+                       pcpu = pol->pkt.vcpu_to_control[count];
+                       pcpu_monitor(pol, ci, pcpu, count);
+               }
        }
 }
 
@@ -160,8 +432,13 @@ update_policy(struct channel_packet *pkt)
        unsigned int updated = 0;
        int i;
 
-       for (i = 0; i < MAX_VMS; i++) {
+
+       RTE_LOG(INFO, CHANNEL_MONITOR,
+                       "Applying policy for %s\n", pkt->vm_name);
+
+       for (i = 0; i < MAX_CLIENTS; i++) {
                if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
+                       /* Copy the contents of *pkt into the policy.pkt */
                        policies[i].pkt = *pkt;
                        get_pcpu_to_control(&policies[i]);
                        if (get_pfid(&policies[i]) == -1) {
@@ -174,7 +451,7 @@ update_policy(struct channel_packet *pkt)
                }
        }
        if (!updated) {
-               for (i = 0; i < MAX_VMS; i++) {
+               for (i = 0; i < MAX_CLIENTS; i++) {
                        if (policies[i].enabled == 0) {
                                policies[i].pkt = *pkt;
                                get_pcpu_to_control(&policies[i]);
@@ -189,6 +466,24 @@ update_policy(struct channel_packet *pkt)
        return 0;
 }
 
+static int
+remove_policy(struct channel_packet *pkt __rte_unused)
+{
+       int i;
+
+       /*
+        * Disabling the policy is simply a case of setting
+        * enabled to 0
+        */
+       for (i = 0; i < MAX_CLIENTS; i++) {
+               if (strcmp(policies[i].pkt.vm_name, pkt->vm_name) == 0) {
+                       policies[i].enabled = 0;
+                       return 0;
+               }
+       }
+       return -1;
+}
+
 static uint64_t
 get_pkt_diff(struct policy *pol)
 {
@@ -233,8 +528,6 @@ apply_traffic_profile(struct policy *pol)
 
        diff = get_pkt_diff(pol);
 
-       RTE_LOG(INFO, CHANNEL_MONITOR, "Applying traffic profile\n");
-
        if (diff >= (pol->pkt.traffic_policy.max_max_packet_thresh)) {
                for (count = 0; count < pol->pkt.num_vcpu; count++) {
                        if (pol->core_share[count].status != 1)
@@ -278,9 +571,6 @@ apply_time_profile(struct policy *pol)
                                if (pol->core_share[count].status != 1) {
                                        power_manager_scale_core_max(
                                                pol->core_share[count].pcpu);
-                               RTE_LOG(INFO, CHANNEL_MONITOR,
-                                       "Scaling up core %d to max\n",
-                                       pol->core_share[count].pcpu);
                                }
                        }
                        break;
@@ -290,9 +580,6 @@ apply_time_profile(struct policy *pol)
                                if (pol->core_share[count].status != 1) {
                                        power_manager_scale_core_min(
                                                pol->core_share[count].pcpu);
-                               RTE_LOG(INFO, CHANNEL_MONITOR,
-                                       "Scaling down core %d to min\n",
-                                       pol->core_share[count].pcpu);
                        }
                }
                        break;
@@ -346,7 +633,6 @@ apply_policy(struct policy *pol)
                apply_workload_profile(pol);
 }
 
-
 static int
 process_request(struct channel_packet *pkt, struct channel_info *chan_info)
 {
@@ -362,10 +648,12 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info)
        if (pkt->command == CPU_POWER) {
                core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
                if (core_mask == 0) {
-                       RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
-                               "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
-                               (unsigned)pkt->unit);
-                       return -1;
+                       /*
+                        * Core mask will be 0 in the case where
+                        * hypervisor is not available so we're working in
+                        * the host, so use the core as the mask.
+                        */
+                       core_mask = 1ULL << pkt->resource_id;
                }
                if (__builtin_popcountll(core_mask) == 1) {
 
@@ -421,12 +709,20 @@ process_request(struct channel_packet *pkt, struct channel_info *chan_info)
        }
 
        if (pkt->command == PKT_POLICY) {
-               RTE_LOG(INFO, CHANNEL_MONITOR, "\nProcessing Policy request from Guest\n");
+               RTE_LOG(INFO, CHANNEL_MONITOR, "Processing policy request %s\n",
+                               pkt->vm_name);
                update_policy(pkt);
                policy_is_set = 1;
        }
 
-       /* Return is not checked as channel status may have been set to DISABLED
+       if (pkt->command == PKT_POLICY_REMOVE) {
+               RTE_LOG(INFO, CHANNEL_MONITOR,
+                                "Removing policy %s\n", pkt->vm_name);
+               remove_policy(pkt);
+       }
+
+       /*
+        * Return is not checked as channel status may have been set to DISABLED
         * from management thread
         */
        rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
@@ -448,13 +744,16 @@ add_channel_to_monitor(struct channel_info **chan_info)
                                "to epoll\n", info->channel_path);
                return -1;
        }
+       RTE_LOG(ERR, CHANNEL_MONITOR, "Added channel '%s' "
+                       "to monitor\n", info->channel_path);
        return 0;
 }
 
 int
 remove_channel_from_monitor(struct channel_info *chan_info)
 {
-       if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
+       if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL,
+                       chan_info->fd, NULL) < 0) {
                RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
                                "from epoll\n", chan_info->channel_path);
                return -1;
@@ -467,11 +766,13 @@ channel_monitor_init(void)
 {
        global_event_fd = epoll_create1(0);
        if (global_event_fd == 0) {
-               RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
-                               "error %s\n", strerror(errno));
+               RTE_LOG(ERR, CHANNEL_MONITOR,
+                               "Error creating epoll context with error %s\n",
+                               strerror(errno));
                return -1;
        }
-       global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
+       global_events_list = rte_malloc("epoll_events",
+                       sizeof(*global_events_list)
                        * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
        if (global_events_list == NULL) {
                RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
@@ -481,6 +782,103 @@ channel_monitor_init(void)
        return 0;
 }
 
+static void
+read_binary_packet(struct channel_info *chan_info)
+{
+       struct channel_packet pkt;
+       void *buffer = &pkt;
+       int buffer_len = sizeof(pkt);
+       int n_bytes, err = 0;
+
+       while (buffer_len > 0) {
+               n_bytes = read(chan_info->fd,
+                               buffer, buffer_len);
+               if (n_bytes == buffer_len)
+                       break;
+               if (n_bytes == -1) {
+                       err = errno;
+                       RTE_LOG(DEBUG, CHANNEL_MONITOR,
+                               "Received error on "
+                               "channel '%s' read: %s\n",
+                               chan_info->channel_path,
+                               strerror(err));
+                       remove_channel(&chan_info);
+                       break;
+               }
+               buffer = (char *)buffer + n_bytes;
+               buffer_len -= n_bytes;
+       }
+       if (!err)
+               process_request(&pkt, chan_info);
+}
+
+#ifdef USE_JANSSON
+static void
+read_json_packet(struct channel_info *chan_info)
+{
+       struct channel_packet pkt;
+       int n_bytes, ret;
+       json_t *root;
+       json_error_t error;
+
+       /* read opening brace to closing brace */
+       do {
+               int idx = 0;
+               int indent = 0;
+               do {
+                       n_bytes = read(chan_info->fd, &json_data[idx], 1);
+                       if (n_bytes == 0)
+                               break;
+                       if (json_data[idx] == '{')
+                               indent++;
+                       if (json_data[idx] == '}')
+                               indent--;
+                       if ((indent > 0) || (idx > 0))
+                               idx++;
+                       if (indent == 0)
+                               json_data[idx] = 0;
+                       if (idx >= MAX_JSON_STRING_LEN-1)
+                               break;
+               } while (indent > 0);
+
+               if (indent > 0)
+                       /*
+                        * We've broken out of the read loop without getting
+                        * a closing brace, so throw away the data
+                        */
+                       json_data[idx] = 0;
+
+               if (strlen(json_data) == 0)
+                       continue;
+
+               printf("got [%s]\n", json_data);
+
+               root = json_loads(json_data, 0, &error);
+
+               if (root) {
+                       /*
+                        * Because our data is now in the json
+                        * object, we can overwrite the pkt
+                        * with a channel_packet struct, using
+                        * parse_json_to_pkt()
+                        */
+                       ret = parse_json_to_pkt(root, &pkt);
+                       json_decref(root);
+                       if (ret) {
+                               RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "Error validating JSON profile data\n");
+                               break;
+                       }
+                       process_request(&pkt, chan_info);
+               } else {
+                       RTE_LOG(ERR, CHANNEL_MONITOR,
+                                       "JSON error on line %d: %s\n",
+                                       error.line, error.text);
+               }
+       } while (n_bytes > 0);
+}
+#endif
+
 void
 run_channel_monitor(void)
 {
@@ -496,7 +894,8 @@ run_channel_monitor(void)
                                        global_events_list[i].data.ptr;
                        if ((global_events_list[i].events & EPOLLERR) ||
                                (global_events_list[i].events & EPOLLHUP)) {
-                               RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
+                               RTE_LOG(INFO, CHANNEL_MONITOR,
+                                               "Remote closed connection for "
                                                "channel '%s'\n",
                                                chan_info->channel_path);
                                remove_channel(&chan_info);
@@ -504,38 +903,25 @@ run_channel_monitor(void)
                        }
                        if (global_events_list[i].events & EPOLLIN) {
 
-                               int n_bytes, err = 0;
-                               struct channel_packet pkt;
-                               void *buffer = &pkt;
-                               int buffer_len = sizeof(pkt);
-
-                               while (buffer_len > 0) {
-                                       n_bytes = read(chan_info->fd,
-                                                       buffer, buffer_len);
-                                       if (n_bytes == buffer_len)
-                                               break;
-                                       if (n_bytes == -1) {
-                                               err = errno;
-                                               RTE_LOG(DEBUG, CHANNEL_MONITOR,
-                                                       "Received error on "
-                                                       "channel '%s' read: %s\n",
-                                                       chan_info->channel_path,
-                                                       strerror(err));
-                                               remove_channel(&chan_info);
-                                               break;
-                                       }
-                                       buffer = (char *)buffer + n_bytes;
-                                       buffer_len -= n_bytes;
+                               switch (chan_info->type) {
+                               case CHANNEL_TYPE_BINARY:
+                                       read_binary_packet(chan_info);
+                                       break;
+#ifdef USE_JANSSON
+                               case CHANNEL_TYPE_JSON:
+                                       read_json_packet(chan_info);
+                                       break;
+#endif
+                               default:
+                                       break;
                                }
-                               if (!err)
-                                       process_request(&pkt, chan_info);
                        }
                }
                rte_delay_us(time_period_ms*1000);
                if (policy_is_set) {
                        int j;
 
-                       for (j = 0; j < MAX_VMS; j++) {
+                       for (j = 0; j < MAX_CLIENTS; j++) {
                                if (policies[j].enabled == 1)
                                        apply_policy(&policies[j]);
                        }
diff --git a/examples/vm_power_manager/guest_cli/meson.build b/examples/vm_power_manager/guest_cli/meson.build
new file mode 100644 (file)
index 0000000..9e821ce
--- /dev/null
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+# Setting the name here because the default name will conflict with the
+# vm_power_manager app because of the way the directories are parsed.
+name = 'guest_cli'
+
+deps += ['power']
+
+sources = files(
+       'main.c', 'parse.c', 'vm_power_cli_guest.c'
+)
+
+opt_dep = cc.find_library('virt', required : false)
+build = opt_dep.found()
+ext_deps += opt_dep
index 0db1b80..2d9e768 100644 (file)
@@ -92,6 +92,7 @@ set_policy_defaults(struct channel_packet *pkt)
        pkt->timer_policy.hours_to_use_traffic_profile[0] = 8;
        pkt->timer_policy.hours_to_use_traffic_profile[1] = 10;
 
+       pkt->core_type = CORE_TYPE_VIRTUAL;
        pkt->workload = LOW;
        pkt->policy_to_use = TIME;
        pkt->command = PKT_POLICY;
index 58c5fa4..893bf4c 100644 (file)
@@ -421,6 +421,8 @@ main(int argc, char **argv)
                return -1;
        }
 
+       add_host_channel();
+
        printf("Running core monitor on lcore id %d\n", lcore_id);
        rte_eal_remote_launch(run_core_monitor, NULL, lcore_id);
 
index c370d74..f98445b 100644 (file)
@@ -6,5 +6,38 @@
 # To build this example as a standalone application with an already-installed
 # DPDK instance, use 'make'
 
-# Example app currently unsupported by meson build
-build = false
+if dpdk_conf.has('RTE_LIBRTE_BNXT_PMD')
+       deps += ['pmd_bnxt']
+endif
+
+if dpdk_conf.has('RTE_LIBRTE_I40E_PMD')
+       deps += ['pmd_i40e']
+endif
+
+if dpdk_conf.has('RTE_LIBRTE_IXGBE_PMD')
+       deps += ['pmd_ixgbe']
+endif
+
+deps += ['power']
+
+
+sources = files(
+       'channel_manager.c', 'channel_monitor.c', 'main.c', 'parse.c', 'power_manager.c', 'vm_power_cli.c'
+)
+
+# If we're on X86, pull in the x86 code for the branch monitor algo.
+if dpdk_conf.has('RTE_ARCH_X86_64')
+       sources += files('oob_monitor_x86.c')
+else
+       sources += files('oob_monitor_nop.c')
+endif
+
+opt_dep = cc.find_library('virt', required : false)
+build = opt_dep.found()
+ext_deps += opt_dep
+
+opt_dep = dependency('jansson', required : false)
+if opt_dep.found()
+       ext_deps += opt_dep
+       cflags += '-DUSE_JANSSON'
+endif
index 3398eac..3cf394b 100644 (file)
@@ -26,8 +26,7 @@ struct rte_uio_pci_dev {
        struct uio_info info;
        struct pci_dev *pdev;
        enum rte_intr_mode mode;
-       struct mutex lock;
-       int refcnt;
+       atomic_t refcnt;
 };
 
 static int wc_activate;
@@ -320,23 +319,19 @@ igbuio_pci_open(struct uio_info *info, struct inode *inode)
        struct pci_dev *dev = udev->pdev;
        int err;
 
-       mutex_lock(&udev->lock);
-       if (++udev->refcnt > 1) {
-               mutex_unlock(&udev->lock);
+       if (atomic_inc_return(&udev->refcnt) != 1)
                return 0;
-       }
 
        /* set bus master, which was cleared by the reset function */
        pci_set_master(dev);
 
        /* enable interrupts */
        err = igbuio_pci_enable_interrupts(udev);
-       mutex_unlock(&udev->lock);
        if (err) {
+               atomic_dec(&udev->refcnt);
                dev_err(&dev->dev, "Enable interrupt fails\n");
-               return err;
        }
-       return 0;
+       return err;
 }
 
 static int
@@ -345,19 +340,14 @@ igbuio_pci_release(struct uio_info *info, struct inode *inode)
        struct rte_uio_pci_dev *udev = info->priv;
        struct pci_dev *dev = udev->pdev;
 
-       mutex_lock(&udev->lock);
-       if (--udev->refcnt > 0) {
-               mutex_unlock(&udev->lock);
-               return 0;
-       }
-
-       /* disable interrupts */
-       igbuio_pci_disable_interrupts(udev);
+       if (atomic_dec_and_test(&udev->refcnt)) {
+               /* disable interrupts */
+               igbuio_pci_disable_interrupts(udev);
 
-       /* stop the device from further DMA */
-       pci_clear_master(dev);
+               /* stop the device from further DMA */
+               pci_clear_master(dev);
+       }
 
-       mutex_unlock(&udev->lock);
        return 0;
 }
 
@@ -489,7 +479,6 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (!udev)
                return -ENOMEM;
 
-       mutex_init(&udev->lock);
        /*
         * enable device: ask low-level code to enable I/O and
         * memory
@@ -529,6 +518,7 @@ igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
        udev->info.release = igbuio_pci_release;
        udev->info.priv = udev;
        udev->pdev = dev;
+       atomic_set(&udev->refcnt, 0);
 
        err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp);
        if (err != 0)
@@ -580,7 +570,8 @@ igbuio_pci_remove(struct pci_dev *dev)
 {
        struct rte_uio_pci_dev *udev = pci_get_drvdata(dev);
 
-       mutex_destroy(&udev->lock);
+       igbuio_pci_release(&udev->info, NULL);
+
        sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp);
        uio_unregister_device(&udev->info);
        igbuio_pci_release_iomem(&udev->info);
index 71ed2e7..f5a9d5c 100644 (file)
@@ -15,4 +15,6 @@ custom_target('igb_uio',
                        '/../../../lib/librte_eal/common/include',
                'modules'],
        depends: mkfile,
+       install: true,
+       install_dir: kernel_dir + '/../extra/dpdk',
        build_by_default: get_option('enable_kmods'))
diff --git a/kernel/linux/kni/Kbuild b/kernel/linux/kni/Kbuild
new file mode 100644 (file)
index 0000000..de5c27f
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+ccflags-y := $(MODULE_CFLAGS)
+obj-m := rte_kni.o
+rte_kni-y := $(patsubst $(src)/%.c,%.o,$(wildcard $(src)/*.c)) \
+ $(patsubst $(src)/%.c,%.o,$(wildcard $(src)/ethtool/ixgbe/*.c)) \
+ $(patsubst $(src)/%.c,%.o,$(wildcard $(src)/ethtool/igb/*.c))
index 002f75c..b6bddc0 100644 (file)
@@ -135,6 +135,7 @@ static const char igb_gstrings_test[][ETH_GSTRING_LEN] = {
 #define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN)
 #endif /* ETHTOOL_TEST */
 
+#ifndef ETHTOOL_GLINKSETTINGS
 static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -259,7 +260,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 #endif /* ETH_TP_MDI_X */
        return 0;
 }
+#endif
 
+#ifndef ETHTOOL_SLINKSETTINGS
 static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 {
        struct igb_adapter *adapter = netdev_priv(netdev);
@@ -364,6 +367,7 @@ static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
        clear_bit(__IGB_RESETTING, &adapter->state);
        return 0;
 }
+#endif
 
 static u32 igb_get_link(struct net_device *netdev)
 {
@@ -2737,8 +2741,12 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 #endif /* ETHTOOL_GRXRINGS */
 
 static const struct ethtool_ops igb_ethtool_ops = {
+#ifndef ETHTOOL_GLINKSETTINGS
        .get_settings           = igb_get_settings,
+#endif
+#ifndef ETHTOOL_SLINKSETTINGS
        .set_settings           = igb_set_settings,
+#endif
        .get_drvinfo            = igb_get_drvinfo,
        .get_regs_len           = igb_get_regs_len,
        .get_regs               = igb_get_regs,
diff --git a/kernel/linux/kni/ethtool/igb/meson.build b/kernel/linux/kni/ethtool/igb/meson.build
new file mode 100644 (file)
index 0000000..2f796ac
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+kni_igb_sources = files(
+       'e1000_82575.c',
+       'e1000_api.c',
+       'e1000_i210.c',
+       'e1000_mac.c',
+       'e1000_manage.c',
+       'e1000_mbx.c',
+       'e1000_nvm.c',
+       'e1000_phy.c',
+       'igb_ethtool.c',
+       'igb_main.c',
+       'igb_param.c',
+       'igb_vmdq.c')
index 6ff9413..cc15ec6 100644 (file)
@@ -890,8 +890,10 @@ s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame);
 #endif /* CONFIG_DCB */
 
 extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring);
+#ifndef ETHTOOL_GLINKSETTINGS
 extern int ixgbe_get_settings(struct net_device *netdev,
                              struct ethtool_cmd *ecmd);
+#endif
 extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter,
                            struct net_device *netdev, unsigned int vfn);
 extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter);
index 44cdc9f..f2ded19 100644 (file)
@@ -158,6 +158,7 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 #define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN)
 #endif /* ETHTOOL_TEST */
 
+#ifndef ETHTOOL_GLINKSETTINGS
 int ixgbe_get_settings(struct net_device *netdev,
                       struct ethtool_cmd *ecmd)
 {
@@ -347,7 +348,9 @@ int ixgbe_get_settings(struct net_device *netdev,
 
        return 0;
 }
+#endif
 
+#ifndef ETHTOOL_SLINKSETTINGS
 static int ixgbe_set_settings(struct net_device *netdev,
                              struct ethtool_cmd *ecmd)
 {
@@ -391,6 +394,7 @@ static int ixgbe_set_settings(struct net_device *netdev,
        }
        return err;
 }
+#endif
 
 static void ixgbe_get_pauseparam(struct net_device *netdev,
                                 struct ethtool_pauseparam *pause)
@@ -2815,8 +2819,12 @@ static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 #endif /* ETHTOOL_GRXRINGS */
 //static
 struct ethtool_ops ixgbe_ethtool_ops = {
+#ifndef ETHTOOL_GLINKSETTINGS
        .get_settings           = ixgbe_get_settings,
+#endif
+#ifndef ETHTOOL_SLINKSETTINGS
        .set_settings           = ixgbe_set_settings,
+#endif
        .get_drvinfo            = ixgbe_get_drvinfo,
        .get_regs_len           = ixgbe_get_regs_len,
        .get_regs               = ixgbe_get_regs,
diff --git a/kernel/linux/kni/ethtool/ixgbe/meson.build b/kernel/linux/kni/ethtool/ixgbe/meson.build
new file mode 100644 (file)
index 0000000..73248b1
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+kni_ixgbe_sources = files(
+       'ixgbe_82598.c',
+       'ixgbe_82599.c',
+       'ixgbe_api.c',
+       'ixgbe_common.c',
+       'ixgbe_ethtool.c',
+       'ixgbe_main.c',
+       'ixgbe_phy.c',
+       'ixgbe_x540.c',
+       'kcompat.c')
diff --git a/kernel/linux/kni/ethtool/meson.build b/kernel/linux/kni/ethtool/meson.build
new file mode 100644 (file)
index 0000000..7a8458d
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+subdir('igb')
+subdir('ixgbe')
index 6275ef2..688f574 100644 (file)
@@ -29,6 +29,9 @@
 
 #define MBUF_BURST_SZ 32
 
+/* Default carrier state for created KNI network interfaces */
+extern uint32_t dflt_carrier;
+
 /**
  * A structure describing the private information for a kni device.
  */
index a44e7d9..b1c84f8 100644 (file)
@@ -27,6 +27,8 @@ kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info);
 }
 
+/* ETHTOOL_GLINKSETTINGS replaces ETHTOOL_GSET */
+#ifndef ETHTOOL_GLINKSETTINGS
 static int
 kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
@@ -34,7 +36,10 @@ kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 
        return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd);
 }
+#endif
 
+/* ETHTOOL_SLINKSETTINGS replaces ETHTOOL_SSET */
+#ifndef ETHTOOL_SLINKSETTINGS
 static int
 kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 {
@@ -42,6 +47,7 @@ kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 
        return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd);
 }
+#endif
 
 static void
 kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
@@ -190,8 +196,12 @@ kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats,
 struct ethtool_ops kni_ethtool_ops = {
        .begin                  = kni_check_if_running,
        .get_drvinfo            = kni_get_drvinfo,
+#ifndef ETHTOOL_GLINKSETTINGS
        .get_settings           = kni_get_settings,
+#endif
+#ifndef ETHTOOL_SLINKSETTINGS
        .set_settings           = kni_set_settings,
+#endif
        .get_regs_len           = kni_get_regs_len,
        .get_regs               = kni_get_regs,
        .get_wol                = kni_get_wol,
index 9a4762d..3f4781c 100644 (file)
@@ -8,6 +8,14 @@
 
 #include <exec-env/rte_kni_common.h>
 
+/* Skip some memory barriers on Linux < 3.14 */
+#ifndef smp_load_acquire
+#define smp_load_acquire(a) (*(a))
+#endif
+#ifndef smp_store_release
+#define smp_store_release(a, b) *(a) = (b)
+#endif
+
 /**
  * Adds num elements into the fifo. Return the number actually written
  */
@@ -16,7 +24,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
        uint32_t i = 0;
        uint32_t fifo_write = fifo->write;
-       uint32_t fifo_read = fifo->read;
+       uint32_t fifo_read = smp_load_acquire(&fifo->read);
        uint32_t new_write = fifo_write;
 
        for (i = 0; i < num; i++) {
@@ -27,7 +35,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, uint32_t num)
                fifo->buffer[fifo_write] = data[i];
                fifo_write = new_write;
        }
-       fifo->write = fifo_write;
+       smp_store_release(&fifo->write, fifo_write);
 
        return i;
 }
@@ -40,7 +48,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 {
        uint32_t i = 0;
        uint32_t new_read = fifo->read;
-       uint32_t fifo_write = fifo->write;
+       uint32_t fifo_write = smp_load_acquire(&fifo->write);
 
        for (i = 0; i < num; i++) {
                if (new_read == fifo_write)
@@ -49,7 +57,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
                data[i] = fifo->buffer[new_read];
                new_read = (new_read + 1) & (fifo->len - 1);
        }
-       fifo->read = new_read;
+       smp_store_release(&fifo->read, new_read);
 
        return i;
 }
@@ -60,7 +68,9 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, uint32_t num)
 static inline uint32_t
 kni_fifo_count(struct rte_kni_fifo *fifo)
 {
-       return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+       uint32_t fifo_write = smp_load_acquire(&fifo->write);
+       uint32_t fifo_read = smp_load_acquire(&fifo->read);
+       return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
 }
 
 /**
@@ -69,7 +79,9 @@ kni_fifo_count(struct rte_kni_fifo *fifo)
 static inline uint32_t
 kni_fifo_free_count(struct rte_kni_fifo *fifo)
 {
-       return (fifo->read - fifo->write - 1) & (fifo->len - 1);
+       uint32_t fifo_write = smp_load_acquire(&fifo->write);
+       uint32_t fifo_read = smp_load_acquire(&fifo->read);
+       return (fifo_read - fifo_write - 1) & (fifo->len - 1);
 }
 
 #endif /* _KNI_FIFO_H_ */
index fa69f8e..522ae23 100644 (file)
@@ -39,6 +39,10 @@ static char *lo_mode;
 static char *kthread_mode;
 static uint32_t multiple_kthread_on;
 
+/* Default carrier state for created KNI network interfaces */
+static char *carrier;
+uint32_t dflt_carrier;
+
 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
 
 static int kni_net_id;
@@ -466,6 +470,8 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
                return -ENODEV;
        }
 
+       netif_carrier_off(net_dev);
+
        ret = kni_run_thread(knet, kni, dev_info.force_bind);
        if (ret != 0)
                return ret;
@@ -590,6 +596,24 @@ kni_parse_kthread_mode(void)
        return 0;
 }
 
+static int __init
+kni_parse_carrier_state(void)
+{
+       if (!carrier) {
+               dflt_carrier = 0;
+               return 0;
+       }
+
+       if (strcmp(carrier, "off") == 0)
+               dflt_carrier = 0;
+       else if (strcmp(carrier, "on") == 0)
+               dflt_carrier = 1;
+       else
+               return -1;
+
+       return 0;
+}
+
 static int __init
 kni_init(void)
 {
@@ -605,6 +629,16 @@ kni_init(void)
        else
                pr_debug("Multiple kernel thread mode enabled\n");
 
+       if (kni_parse_carrier_state() < 0) {
+               pr_err("Invalid parameter for carrier\n");
+               return -EINVAL;
+       }
+
+       if (dflt_carrier == 0)
+               pr_debug("Default carrier state set to off.\n");
+       else
+               pr_debug("Default carrier state set to on.\n");
+
 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
        rc = register_pernet_subsys(&kni_net_ops);
 #else
@@ -647,19 +681,27 @@ kni_exit(void)
 module_init(kni_init);
 module_exit(kni_exit);
 
-module_param(lo_mode, charp, S_IRUGO | S_IWUSR);
+module_param(lo_mode, charp, 0644);
 MODULE_PARM_DESC(lo_mode,
 "KNI loopback mode (default=lo_mode_none):\n"
-"    lo_mode_none        Kernel loopback disabled\n"
-"    lo_mode_fifo        Enable kernel loopback with fifo\n"
-"    lo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
-"\n"
+"\t\tlo_mode_none        Kernel loopback disabled\n"
+"\t\tlo_mode_fifo        Enable kernel loopback with fifo\n"
+"\t\tlo_mode_fifo_skb    Enable kernel loopback with fifo and skb buffer\n"
+"\t\t"
 );
 
-module_param(kthread_mode, charp, S_IRUGO);
+module_param(kthread_mode, charp, 0644);
 MODULE_PARM_DESC(kthread_mode,
 "Kernel thread mode (default=single):\n"
-"    single    Single kernel thread mode enabled.\n"
-"    multiple  Multiple kernel thread mode enabled.\n"
-"\n"
+"\t\tsingle    Single kernel thread mode enabled.\n"
+"\t\tmultiple  Multiple kernel thread mode enabled.\n"
+"\t\t"
+);
+
+module_param(carrier, charp, 0644);
+MODULE_PARM_DESC(carrier,
+"Default carrier state for KNI interface (default=off):\n"
+"\t\toff   Interfaces will be created with carrier state set to off.\n"
+"\t\ton    Interfaces will be created with carrier state set to on.\n"
+"\t\t"
 );
index 7fcfa10..7371b6d 100644 (file)
@@ -133,6 +133,10 @@ kni_net_open(struct net_device *dev)
        struct kni_dev *kni = netdev_priv(dev);
 
        netif_start_queue(dev);
+       if (dflt_carrier == 1)
+               netif_carrier_on(dev);
+       else
+               netif_carrier_off(dev);
 
        memset(&req, 0, sizeof(req));
        req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
@@ -152,6 +156,7 @@ kni_net_release(struct net_device *dev)
        struct kni_dev *kni = netdev_priv(dev);
 
        netif_stop_queue(dev); /* can't transmit any more */
+       netif_carrier_off(dev);
 
        memset(&req, 0, sizeof(req));
        req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF;
@@ -597,7 +602,7 @@ kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        pr_debug("kni_net_ioctl group:%d cmd:%d\n",
                ((struct kni_dev *)netdev_priv(dev))->group_id, cmd);
 
-       return 0;
+       return -EOPNOTSUPP;
 }
 
 static void
diff --git a/kernel/linux/kni/meson.build b/kernel/linux/kni/meson.build
new file mode 100644 (file)
index 0000000..a09af5a
--- /dev/null
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+subdir('ethtool')
+
+kni_mkfile = custom_target('rte_kni_makefile',
+       output: 'Makefile',
+       command: ['touch', '@OUTPUT@'])
+
+kni_sources = files(
+       'kni_ethtool.c',
+       'kni_misc.c',
+       'kni_net.c',
+       'Kbuild')
+
+custom_target('rte_kni',
+       input: kni_sources + kni_igb_sources + kni_ixgbe_sources,
+       output: 'rte_kni.ko',
+       command: ['make', '-j4', '-C', kernel_dir,
+               'M=' + meson.current_build_dir(),
+               'src=' + meson.current_source_dir(),
+               'MODULE_CFLAGS=-include ' + meson.source_root() + '/config/rte_config.h' +
+               ' -I' + meson.source_root() + '/lib/librte_eal/common/include' +
+               ' -I' + meson.source_root() + '/lib/librte_eal/linuxapp/eal/include' +
+               ' -I' + meson.build_root() +
+               ' -I' + meson.current_source_dir() +
+               ' -I' + meson.current_source_dir() + '/ethtool/ixgbe' +
+               ' -I' + meson.current_source_dir() + '/ethtool/igb',
+               'modules'],
+       depends: kni_mkfile,
+       console: true,
+       install: true,
+       install_dir: kernel_dir + '/../extra/dpdk',
+       build_by_default: get_option('enable_kmods'))
index a924c7b..5b7ec06 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2018 Intel Corporation
 
-subdirs = ['igb_uio']
+subdirs = ['igb_uio', 'kni']
 
 WARN_CROSS_COMPILE='Need "kernel_dir" option for kmod compilation when cross-compiling'
 WARN_NO_HEADERS='Cannot compile kernel modules as requested - are kernel headers installed?'
index afa604e..b7370ef 100644 (file)
@@ -25,6 +25,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ethdev
 DEPDIRS-librte_ethdev := librte_net librte_eal librte_mempool librte_ring
 DEPDIRS-librte_ethdev += librte_mbuf
 DEPDIRS-librte_ethdev += librte_kvargs
+DEPDIRS-librte_ethdev += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_BBDEV) += librte_bbdev
 DEPDIRS-librte_bbdev := librte_eal librte_mempool librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev
@@ -50,7 +51,7 @@ DEPDIRS-librte_hash := librte_eal librte_ring
 DIRS-$(CONFIG_RTE_LIBRTE_EFD) += librte_efd
 DEPDIRS-librte_efd := librte_eal librte_ring librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm
-DEPDIRS-librte_lpm := librte_eal
+DEPDIRS-librte_lpm := librte_eal librte_hash
 DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl
 DEPDIRS-librte_acl := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_MEMBER) += librte_member
@@ -71,7 +72,7 @@ DEPDIRS-librte_bitratestats := librte_eal librte_metrics librte_ethdev
 DIRS-$(CONFIG_RTE_LIBRTE_LATENCY_STATS) += librte_latencystats
 DEPDIRS-librte_latencystats := librte_eal librte_metrics librte_ethdev librte_mbuf
 DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power
-DEPDIRS-librte_power := librte_eal
+DEPDIRS-librte_power := librte_eal librte_timer
 DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter
 DEPDIRS-librte_meter := librte_eal
 DIRS-$(CONFIG_RTE_LIBRTE_FLOW_CLASSIFY) += librte_flow_classify
@@ -105,6 +106,8 @@ DEPDIRS-librte_gso := librte_eal librte_mbuf librte_ethdev librte_net
 DEPDIRS-librte_gso += librte_mempool
 DIRS-$(CONFIG_RTE_LIBRTE_BPF) += librte_bpf
 DEPDIRS-librte_bpf := librte_eal librte_mempool librte_mbuf librte_ethdev
+DIRS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += librte_telemetry
+DEPDIRS-librte_telemetry := librte_eal librte_metrics librte_ethdev
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni
index 2f1243c..db7d322 100644 (file)
@@ -16,7 +16,7 @@ EAL_REGISTER_TAILQ(rte_acl_tailq)
  * If the compiler doesn't support AVX2 instructions,
  * then the dummy one would be used instead for AVX2 classify method.
  */
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
        __rte_unused const uint8_t **data,
        __rte_unused uint32_t *results,
@@ -26,7 +26,7 @@ rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx,
        return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
        __rte_unused const uint8_t **data,
        __rte_unused uint32_t *results,
@@ -36,7 +36,7 @@ rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx,
        return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
        __rte_unused const uint8_t **data,
        __rte_unused uint32_t *results,
@@ -46,7 +46,7 @@ rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx,
        return -ENOTSUP;
 }
 
-int __attribute__ ((weak))
+__rte_weak int
 rte_acl_classify_altivec(__rte_unused const struct rte_acl_ctx *ctx,
        __rte_unused const uint8_t **data,
        __rte_unused uint32_t *results,
index 34c3b9c..aa22e70 100644 (file)
@@ -88,7 +88,7 @@ enum {
        RTE_ACL_TYPE_SHIFT = 29,
        RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t),
        RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX,
-       RTE_ACL_MIN_PRIORITY = 0,
+       RTE_ACL_MIN_PRIORITY = 1,
 };
 
 #define        RTE_ACL_MASKLEN_TO_BITMASK(v, s)        \
index 2b84fe7..d9d163b 100644 (file)
@@ -131,7 +131,7 @@ rte_bpf_load(const struct rte_bpf_prm *prm)
        return bpf;
 }
 
-__rte_experimental __attribute__ ((weak)) struct rte_bpf *
+__rte_experimental __rte_weak struct rte_bpf *
 rte_bpf_elf_load(const struct rte_bpf_prm *prm, const char *fname,
        const char *sname)
 {
index 31731e7..11d09cd 100644 (file)
@@ -75,7 +75,7 @@ rte_bpf_eth_tx_unload(uint16_t port, uint16_t queue);
  * @param prm
  *  Parameters used to create and initialise the BPF exeution context.
  * @param flags
- *  Flags that define expected expected behavior of the loaded filter
+ *  Flags that define expected behavior of the loaded filter
  *  (i.e. jited/non-jited version to use).
  * @return
  *   Zero on successful completion or negative error code otherwise.
index ddae1cf..c64142b 100644 (file)
@@ -25,7 +25,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_vt100.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c
 SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c
 
-CFLAGS += -D_GNU_SOURCE
 LDLIBS += -lrte_eal
 
 # install includes
index 591b78b..d9042f0 100644 (file)
@@ -126,35 +126,11 @@ cmdline_printf(const struct cmdline *cl, const char *fmt, ...)
        if (!cl || !fmt)
                return;
 
-#ifdef _GNU_SOURCE
        if (cl->s_out < 0)
                return;
        va_start(ap, fmt);
        vdprintf(cl->s_out, fmt, ap);
        va_end(ap);
-#else
-       int ret;
-       char *buf;
-
-       if (cl->s_out < 0)
-               return;
-
-       buf = malloc(BUFSIZ);
-       if (buf == NULL)
-               return;
-       va_start(ap, fmt);
-       ret = vsnprintf(buf, BUFSIZ, fmt, ap);
-       va_end(ap);
-       if (ret < 0) {
-               free(buf);
-               return;
-       }
-       if (ret >= BUFSIZ)
-               ret = BUFSIZ - 1;
-       ret = write(cl->s_out, buf, ret);
-       (void)ret;
-       free(buf);
-#endif
 }
 
 int
index 5741817..3049890 100644 (file)
@@ -1,6 +1,10 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+# This library is processed before EAL
+includes = [global_inc]
+includes += include_directories('../librte_eal/common/include')
+
 version = 2
 sources = files('cmdline.c',
        'cmdline_cirbuf.c',
index 98ad0cf..c663be5 100644 (file)
@@ -83,8 +83,8 @@ struct rte_comp_op_pool_private {
  * @param nb_ops
  *   Number of operations to allocate
  * @return
- *   - 0: Success
- *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ *   - nb_ops: Success, the nb_ops requested was allocated
+ *   - 0: Not enough entries in the mempool; no ops are retrieved.
  */
 static inline int
 rte_comp_op_raw_bulk_alloc(struct rte_mempool *mempool,
index ee9056e..395ce29 100644 (file)
@@ -448,8 +448,8 @@ rte_comp_op_alloc(struct rte_mempool *mempool);
  * @param nb_ops
  *   Number of operations to allocate
  * @return
- *   - 0: Success
- *   - -ENOENT: Not enough entries in the mempool; no ops are retrieved.
+ *   - nb_ops: Success, the nb_ops requested was allocated
+ *   - 0: Not enough entries in the mempool; no ops are retrieved.
  */
 int __rte_experimental
 rte_comp_op_bulk_alloc(struct rte_mempool *mempool,
index 9091dd6..10101eb 100644 (file)
 #define RTE_COMPRESSDEV_DETACHED  (0)
 #define RTE_COMPRESSDEV_ATTACHED  (1)
 
-struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS];
-
-struct rte_compressdev *rte_compressdevs = &rte_comp_devices[0];
+static struct rte_compressdev rte_comp_devices[RTE_COMPRESS_MAX_DEVS];
 
 static struct rte_compressdev_global compressdev_globals = {
-               .devs                   = &rte_comp_devices[0],
+               .devs                   = rte_comp_devices,
                .data                   = { NULL },
                .nb_devs                = 0,
                .max_devs               = RTE_COMPRESS_MAX_DEVS
 };
 
-struct rte_compressdev_global *rte_compressdev_globals = &compressdev_globals;
-
 const struct rte_compressdev_capabilities * __rte_experimental
 rte_compressdev_capability_get(uint8_t dev_id,
                        enum rte_comp_algorithm algo)
@@ -78,7 +74,7 @@ rte_compressdev_get_feature_name(uint64_t flag)
 static struct rte_compressdev *
 rte_compressdev_get_dev(uint8_t dev_id)
 {
-       return &rte_compressdev_globals->devs[dev_id];
+       return &compressdev_globals.devs[dev_id];
 }
 
 struct rte_compressdev * __rte_experimental
@@ -90,8 +86,8 @@ rte_compressdev_pmd_get_named_dev(const char *name)
        if (name == NULL)
                return NULL;
 
-       for (i = 0; i < rte_compressdev_globals->max_devs; i++) {
-               dev = &rte_compressdev_globals->devs[i];
+       for (i = 0; i < compressdev_globals.max_devs; i++) {
+               dev = &compressdev_globals.devs[i];
 
                if ((dev->attached == RTE_COMPRESSDEV_ATTACHED) &&
                                (strcmp(dev->data->name, name) == 0))
@@ -106,7 +102,7 @@ rte_compressdev_is_valid_dev(uint8_t dev_id)
 {
        struct rte_compressdev *dev = NULL;
 
-       if (dev_id >= rte_compressdev_globals->nb_devs)
+       if (dev_id >= compressdev_globals.nb_devs)
                return 0;
 
        dev = rte_compressdev_get_dev(dev_id);
@@ -125,10 +121,10 @@ rte_compressdev_get_dev_id(const char *name)
        if (name == NULL)
                return -1;
 
-       for (i = 0; i < rte_compressdev_globals->nb_devs; i++)
-               if ((strcmp(rte_compressdev_globals->devs[i].data->name, name)
+       for (i = 0; i < compressdev_globals.nb_devs; i++)
+               if ((strcmp(compressdev_globals.devs[i].data->name, name)
                                == 0) &&
-                               (rte_compressdev_globals->devs[i].attached ==
+                               (compressdev_globals.devs[i].attached ==
                                                RTE_COMPRESSDEV_ATTACHED))
                        return i;
 
@@ -138,7 +134,7 @@ rte_compressdev_get_dev_id(const char *name)
 uint8_t __rte_experimental
 rte_compressdev_count(void)
 {
-       return rte_compressdev_globals->nb_devs;
+       return compressdev_globals.nb_devs;
 }
 
 uint8_t __rte_experimental
@@ -146,8 +142,8 @@ rte_compressdev_devices_get(const char *driver_name, uint8_t *devices,
        uint8_t nb_devices)
 {
        uint8_t i, count = 0;
-       struct rte_compressdev *devs = rte_compressdev_globals->devs;
-       uint8_t max_devs = rte_compressdev_globals->max_devs;
+       struct rte_compressdev *devs = compressdev_globals.devs;
+       uint8_t max_devs = compressdev_globals.max_devs;
 
        for (i = 0; i < max_devs && count < nb_devices; i++) {
 
@@ -578,7 +574,7 @@ uint16_t __rte_experimental
 rte_compressdev_dequeue_burst(uint8_t dev_id, uint16_t qp_id,
                struct rte_comp_op **ops, uint16_t nb_ops)
 {
-       struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+       struct rte_compressdev *dev = &rte_comp_devices[dev_id];
 
        nb_ops = (*dev->dequeue_burst)
                        (dev->data->queue_pairs[qp_id], ops, nb_ops);
@@ -590,7 +586,7 @@ uint16_t __rte_experimental
 rte_compressdev_enqueue_burst(uint8_t dev_id, uint16_t qp_id,
                struct rte_comp_op **ops, uint16_t nb_ops)
 {
-       struct rte_compressdev *dev = &rte_compressdevs[dev_id];
+       struct rte_compressdev *dev = &rte_comp_devices[dev_id];
 
        return (*dev->enqueue_burst)(
                        dev->data->queue_pairs[qp_id], ops, nb_ops);
index 7de4f33..95beb26 100644 (file)
@@ -92,24 +92,20 @@ rte_compressdev_pmd_create(const char *name,
        struct rte_compressdev *compressdev;
 
        if (params->name[0] != '\0') {
-               COMPRESSDEV_LOG(INFO, "[%s] User specified device name = %s\n",
-                               device->driver->name, params->name);
+               COMPRESSDEV_LOG(INFO, "User specified device name = %s\n",
+                               params->name);
                name = params->name;
        }
 
-       COMPRESSDEV_LOG(INFO, "[%s] - Creating compressdev %s\n",
-                       device->driver->name, name);
+       COMPRESSDEV_LOG(INFO, "Creating compressdev %s\n", name);
 
-       COMPRESSDEV_LOG(INFO,
-       "[%s] - Init parameters - name: %s, socket id: %d",
-                       device->driver->name, name,
-                       params->socket_id);
+       COMPRESSDEV_LOG(INFO, "Init parameters - name: %s, socket id: %d",
+                       name, params->socket_id);
 
        /* allocate device structure */
        compressdev = rte_compressdev_pmd_allocate(name, params->socket_id);
        if (compressdev == NULL) {
-               COMPRESSDEV_LOG(ERR, "[%s] Failed to allocate comp device %s",
-                               device->driver->name, name);
+               COMPRESSDEV_LOG(ERR, "Failed to allocate comp device %s", name);
                return NULL;
        }
 
@@ -123,8 +119,8 @@ rte_compressdev_pmd_create(const char *name,
 
                if (compressdev->data->dev_private == NULL) {
                        COMPRESSDEV_LOG(ERR,
-               "[%s] Cannot allocate memory for compressdev %s private data",
-                                       device->driver->name, name);
+                                       "Cannot allocate memory for compressdev"
+                                       " %s private data", name);
 
                        rte_compressdev_pmd_release_device(compressdev);
                        return NULL;
@@ -141,8 +137,7 @@ rte_compressdev_pmd_destroy(struct rte_compressdev *compressdev)
 {
        int retval;
 
-       COMPRESSDEV_LOG(INFO, "[%s] Closing comp device %s",
-                       compressdev->device->driver->name,
+       COMPRESSDEV_LOG(INFO, "Closing comp device %s",
                        compressdev->device->name);
 
        /* free comp device */
index 38e9ea0..043353c 100644 (file)
@@ -51,11 +51,6 @@ struct rte_compressdev_global {
        uint8_t max_devs;               /**< Max number of devices */
 };
 
-/** Pointer to global array of comp devices */
-extern struct rte_compressdev *rte_compressdevs;
-/** Pointer to global comp devices data structure */
-extern struct rte_compressdev_global *rte_compressdev_globals;
-
 /**
  * Get the rte_compressdev structure device pointer for the named device.
  *
index c114888..a8f94c0 100644 (file)
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_cryptodev.a
 
 # library version
-LIBABIVER := 4
+LIBABIVER := 5
 
 # build flags
 CFLAGS += -O3
index 295f509..990dd3d 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 4
+version = 5
 sources = files('rte_cryptodev.c', 'rte_cryptodev_pmd.c')
 headers = files('rte_cryptodev.h',
        'rte_cryptodev_pmd.h',
index 63ae23f..a52eaaa 100644 (file)
 
 static uint8_t nb_drivers;
 
-struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
+static struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS];
 
-struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0];
+struct rte_cryptodev *rte_cryptodevs = rte_crypto_devices;
 
 static struct rte_cryptodev_global cryptodev_globals = {
-               .devs                   = &rte_crypto_devices[0],
+               .devs                   = rte_crypto_devices,
                .data                   = { NULL },
                .nb_devs                = 0,
                .max_devs               = RTE_CRYPTO_MAX_DEVS
 };
 
-struct rte_cryptodev_global *rte_cryptodev_globals = &cryptodev_globals;
-
 /* spinlock for crypto device callbacks */
 static rte_spinlock_t rte_cryptodev_cb_lock = RTE_SPINLOCK_INITIALIZER;
 
@@ -486,7 +484,7 @@ rte_cryptodev_get_feature_name(uint64_t flag)
 struct rte_cryptodev *
 rte_cryptodev_pmd_get_dev(uint8_t dev_id)
 {
-       return &rte_cryptodev_globals->devs[dev_id];
+       return &cryptodev_globals.devs[dev_id];
 }
 
 struct rte_cryptodev *
@@ -498,8 +496,8 @@ rte_cryptodev_pmd_get_named_dev(const char *name)
        if (name == NULL)
                return NULL;
 
-       for (i = 0; i < rte_cryptodev_globals->max_devs; i++) {
-               dev = &rte_cryptodev_globals->devs[i];
+       for (i = 0; i < cryptodev_globals.max_devs; i++) {
+               dev = &cryptodev_globals.devs[i];
 
                if ((dev->attached == RTE_CRYPTODEV_ATTACHED) &&
                                (strcmp(dev->data->name, name) == 0))
@@ -514,7 +512,7 @@ rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id)
 {
        struct rte_cryptodev *dev = NULL;
 
-       if (dev_id >= rte_cryptodev_globals->nb_devs)
+       if (dev_id >= cryptodev_globals.nb_devs)
                return 0;
 
        dev = rte_cryptodev_pmd_get_dev(dev_id);
@@ -533,10 +531,10 @@ rte_cryptodev_get_dev_id(const char *name)
        if (name == NULL)
                return -1;
 
-       for (i = 0; i < rte_cryptodev_globals->nb_devs; i++)
-               if ((strcmp(rte_cryptodev_globals->devs[i].data->name, name)
+       for (i = 0; i < cryptodev_globals.nb_devs; i++)
+               if ((strcmp(cryptodev_globals.devs[i].data->name, name)
                                == 0) &&
-                               (rte_cryptodev_globals->devs[i].attached ==
+                               (cryptodev_globals.devs[i].attached ==
                                                RTE_CRYPTODEV_ATTACHED))
                        return i;
 
@@ -546,7 +544,7 @@ rte_cryptodev_get_dev_id(const char *name)
 uint8_t
 rte_cryptodev_count(void)
 {
-       return rte_cryptodev_globals->nb_devs;
+       return cryptodev_globals.nb_devs;
 }
 
 uint8_t
@@ -554,9 +552,9 @@ rte_cryptodev_device_count_by_driver(uint8_t driver_id)
 {
        uint8_t i, dev_count = 0;
 
-       for (i = 0; i < rte_cryptodev_globals->max_devs; i++)
-               if (rte_cryptodev_globals->devs[i].driver_id == driver_id &&
-                       rte_cryptodev_globals->devs[i].attached ==
+       for (i = 0; i < cryptodev_globals.max_devs; i++)
+               if (cryptodev_globals.devs[i].driver_id == driver_id &&
+                       cryptodev_globals.devs[i].attached ==
                                        RTE_CRYPTODEV_ATTACHED)
                        dev_count++;
 
@@ -568,8 +566,8 @@ rte_cryptodev_devices_get(const char *driver_name, uint8_t *devices,
        uint8_t nb_devices)
 {
        uint8_t i, count = 0;
-       struct rte_cryptodev *devs = rte_cryptodev_globals->devs;
-       uint8_t max_devs = rte_cryptodev_globals->max_devs;
+       struct rte_cryptodev *devs = cryptodev_globals.devs;
+       uint8_t max_devs = cryptodev_globals.max_devs;
 
        for (i = 0; i < max_devs && count < nb_devices; i++) {
 
@@ -1477,6 +1475,9 @@ rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type,
                elt_size += sizeof(struct rte_crypto_sym_op);
        } else if (type == RTE_CRYPTO_OP_TYPE_ASYMMETRIC) {
                elt_size += sizeof(struct rte_crypto_asym_op);
+       } else if (type == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+               elt_size += RTE_MAX(sizeof(struct rte_crypto_sym_op),
+                                   sizeof(struct rte_crypto_asym_op));
        } else {
                CDEV_LOG_ERR("Invalid op_type\n");
                return NULL;
index 2088ac3..f03bdbd 100644 (file)
@@ -93,24 +93,20 @@ rte_cryptodev_pmd_create(const char *name,
        struct rte_cryptodev *cryptodev;
 
        if (params->name[0] != '\0') {
-               CDEV_LOG_INFO("[%s] User specified device name = %s\n",
-                               device->driver->name, params->name);
+               CDEV_LOG_INFO("User specified device name = %s\n", params->name);
                name = params->name;
        }
 
-       CDEV_LOG_INFO("[%s] - Creating cryptodev %s\n",
-                       device->driver->name, name);
+       CDEV_LOG_INFO("Creating cryptodev %s\n", name);
 
-       CDEV_LOG_INFO("[%s] - Initialisation parameters - name: %s,"
+       CDEV_LOG_INFO("Initialisation parameters - name: %s,"
                        "socket id: %d, max queue pairs: %u",
-                       device->driver->name, name,
-                       params->socket_id, params->max_nb_queue_pairs);
+                       name, params->socket_id, params->max_nb_queue_pairs);
 
        /* allocate device structure */
        cryptodev = rte_cryptodev_pmd_allocate(name, params->socket_id);
        if (cryptodev == NULL) {
-               CDEV_LOG_ERR("[%s] Failed to allocate crypto device for %s",
-                               device->driver->name, name);
+               CDEV_LOG_ERR("Failed to allocate crypto device for %s", name);
                return NULL;
        }
 
@@ -123,9 +119,8 @@ rte_cryptodev_pmd_create(const char *name,
                                                params->socket_id);
 
                if (cryptodev->data->dev_private == NULL) {
-                       CDEV_LOG_ERR("[%s] Cannot allocate memory for "
-                                       "cryptodev %s private data",
-                                       device->driver->name, name);
+                       CDEV_LOG_ERR("Cannot allocate memory for cryptodev %s"
+                                       " private data", name);
 
                        rte_cryptodev_pmd_release_device(cryptodev);
                        return NULL;
@@ -145,9 +140,7 @@ rte_cryptodev_pmd_destroy(struct rte_cryptodev *cryptodev)
 {
        int retval;
 
-       CDEV_LOG_INFO("[%s] Closing crypto device %s",
-                       cryptodev->device->driver->name,
-                       cryptodev->device->name);
+       CDEV_LOG_INFO("Closing crypto device %s", cryptodev->device->name);
 
        /* free crypto device */
        retval = rte_cryptodev_pmd_release_device(cryptodev);
index 6ff49d6..1b6cafd 100644 (file)
@@ -71,9 +71,6 @@ struct cryptodev_driver {
        uint8_t id;
 };
 
-/** pointer to global crypto devices data structure. */
-extern struct rte_cryptodev_global *rte_cryptodev_globals;
-
 /**
  * Get the rte_cryptodev structure device pointer for the device. Assumes a
  * valid device index.
index d27da3d..bfeddaa 100644 (file)
@@ -22,7 +22,7 @@ LDLIBS += -lrte_kvargs
 
 EXPORT_MAP := ../../rte_eal_version.map
 
-LIBABIVER := 8
+LIBABIVER := 9
 
 # specific to bsdapp exec-env
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -62,10 +62,12 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_option.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_reciprocal.c
 
@@ -77,11 +79,6 @@ SRCS-y += rte_cycles.c
 
 CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
 
-CFLAGS_eal.o := -D_GNU_SOURCE
-#CFLAGS_eal_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_log.o := -D_GNU_SOURCE
-CFLAGS_eal_common_log.o := -D_GNU_SOURCE
-
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
index d7ae9d6..508cbc4 100644 (file)
@@ -42,6 +42,7 @@
 #include <rte_devargs.h>
 #include <rte_version.h>
 #include <rte_vfio.h>
+#include <rte_option.h>
 #include <rte_atomic.h>
 #include <malloc_heap.h>
 
@@ -141,7 +142,7 @@ eal_create_runtime_dir(void)
 }
 
 const char *
-eal_get_runtime_dir(void)
+rte_eal_get_runtime_dir(void)
 {
        return runtime_dir;
 }
@@ -414,12 +415,20 @@ eal_parse_args(int argc, char **argv)
        argvopt = argv;
        optind = 1;
        optreset = 1;
+       opterr = 0;
 
        while ((opt = getopt_long(argc, argvopt, eal_short_options,
                                  eal_long_options, &option_index)) != EOF) {
 
-               /* getopt is not happy, stop right now */
+               /*
+                * getopt didn't recognise the option, lets parse the
+                * registered options to see if the flag is valid
+                */
                if (opt == '?') {
+                       ret = rte_option_parse(argv[optind-1]);
+                       if (ret == 0)
+                               continue;
+
                        eal_usage(prgname);
                        ret = -1;
                        goto out;
@@ -502,6 +511,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
 {
        int *socket_id = arg;
 
+       if (msl->external)
+               return 0;
+
        if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
                return 1;
 
@@ -607,7 +619,7 @@ rte_eal_init(int argc, char **argv)
        internal_config.legacy_mem = true;
 
        if (eal_plugins_init() < 0) {
-               rte_eal_init_alert("Cannot init plugins\n");
+               rte_eal_init_alert("Cannot init plugins");
                rte_errno = EINVAL;
                rte_atomic32_clear(&run_once);
                return -1;
@@ -622,7 +634,7 @@ rte_eal_init(int argc, char **argv)
        rte_config_init();
 
        if (rte_eal_intr_init() < 0) {
-               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               rte_eal_init_alert("Cannot init interrupt-handling thread");
                return -1;
        }
 
@@ -630,7 +642,7 @@ rte_eal_init(int argc, char **argv)
         * bus through mp channel in the secondary process before the bus scan.
         */
        if (rte_mp_channel_init() < 0) {
-               rte_eal_init_alert("failed to init mp channel\n");
+               rte_eal_init_alert("failed to init mp channel");
                if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                        rte_errno = EFAULT;
                        return -1;
@@ -638,14 +650,21 @@ rte_eal_init(int argc, char **argv)
        }
 
        if (rte_bus_scan()) {
-               rte_eal_init_alert("Cannot scan the buses for devices\n");
+               rte_eal_init_alert("Cannot scan the buses for devices");
                rte_errno = ENODEV;
                rte_atomic32_clear(&run_once);
                return -1;
        }
 
-       /* autodetect the iova mapping mode (default is iova_pa) */
-       rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
+       /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+       if (internal_config.iova_mode == RTE_IOVA_DC) {
+               /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+               rte_eal_get_configuration()->iova_mode =
+                       rte_bus_get_iommu_class();
+       } else {
+               rte_eal_get_configuration()->iova_mode =
+                       internal_config.iova_mode;
+       }
 
        if (internal_config.no_hugetlbfs == 0) {
                /* rte_config isn't initialized yet */
@@ -685,37 +704,37 @@ rte_eal_init(int argc, char **argv)
         * initialize memzones first.
         */
        if (rte_eal_memzone_init() < 0) {
-               rte_eal_init_alert("Cannot init memzone\n");
+               rte_eal_init_alert("Cannot init memzone");
                rte_errno = ENODEV;
                return -1;
        }
 
        if (rte_eal_memory_init() < 0) {
-               rte_eal_init_alert("Cannot init memory\n");
+               rte_eal_init_alert("Cannot init memory");
                rte_errno = ENOMEM;
                return -1;
        }
 
        if (rte_eal_malloc_heap_init() < 0) {
-               rte_eal_init_alert("Cannot init malloc heap\n");
+               rte_eal_init_alert("Cannot init malloc heap");
                rte_errno = ENODEV;
                return -1;
        }
 
        if (rte_eal_tailqs_init() < 0) {
-               rte_eal_init_alert("Cannot init tail queues for objects\n");
+               rte_eal_init_alert("Cannot init tail queues for objects");
                rte_errno = EFAULT;
                return -1;
        }
 
        if (rte_eal_alarm_init() < 0) {
-               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               rte_eal_init_alert("Cannot init interrupt-handling thread");
                /* rte_eal_alarm_init sets rte_errno on failure. */
                return -1;
        }
 
        if (rte_eal_timer_init() < 0) {
-               rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+               rte_eal_init_alert("Cannot init HPET or TSC timers");
                rte_errno = ENOTSUP;
                return -1;
        }
@@ -765,14 +784,14 @@ rte_eal_init(int argc, char **argv)
        /* initialize services so vdevs register service during bus_probe. */
        ret = rte_service_init();
        if (ret) {
-               rte_eal_init_alert("rte_service_init() failed\n");
+               rte_eal_init_alert("rte_service_init() failed");
                rte_errno = ENOEXEC;
                return -1;
        }
 
        /* Probe all the buses and devices/drivers on them */
        if (rte_bus_probe()) {
-               rte_eal_init_alert("Cannot probe devices\n");
+               rte_eal_init_alert("Cannot probe devices");
                rte_errno = ENOTSUP;
                return -1;
        }
@@ -788,6 +807,9 @@ rte_eal_init(int argc, char **argv)
 
        rte_eal_mcfg_complete();
 
+       /* Call each registered callback, if enabled */
+       rte_option_init();
+
        return fctret;
 }
 
index 1c6c51b..255d611 100644 (file)
@@ -19,3 +19,17 @@ rte_dev_event_monitor_stop(void)
        RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
        return -1;
 }
+
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void)
+{
+       RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+       return -1;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void)
+{
+       RTE_LOG(ERR, EAL, "Device event is not supported for FreeBSD\n");
+       return -1;
+}
index f7f07ab..a5847f0 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <inttypes.h>
 
+#include <rte_errno.h>
 #include <rte_log.h>
 #include <rte_memory.h>
 
@@ -47,6 +48,26 @@ eal_memalloc_sync_with_primary(void)
        return -1;
 }
 
+int
+eal_memalloc_get_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused)
+{
+       return -ENOTSUP;
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx __rte_unused, int seg_idx __rte_unused,
+               int fd __rte_unused)
+{
+       return -ENOTSUP;
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx __rte_unused,
+               int seg_idx __rte_unused, size_t *offset __rte_unused)
+{
+       return -ENOTSUP;
+}
+
 int
 eal_memalloc_init(void)
 {
index 16d2bc7..4b092e1 100644 (file)
@@ -79,6 +79,7 @@ rte_eal_hugepage_init(void)
                }
                msl->base_va = addr;
                msl->page_sz = page_sz;
+               msl->len = internal_config.memory;
                msl->socket_id = 0;
 
                /* populate memsegs. each memseg is 1 page long */
@@ -235,12 +236,15 @@ struct attach_walk_args {
        int seg_idx;
 };
 static int
-attach_segment(const struct rte_memseg_list *msl __rte_unused,
-               const struct rte_memseg *ms, void *arg)
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+               void *arg)
 {
        struct attach_walk_args *wa = arg;
        void *addr;
 
+       if (msl->external)
+               return 0;
+
        addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
                        MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
                        wa->seg_idx * EAL_PAGE_SIZE);
@@ -370,6 +374,7 @@ alloc_va_space(struct rte_memseg_list *msl)
                return -1;
        }
        msl->base_va = addr;
+       msl->len = mem_sz;
 
        return 0;
 }
index cca6882..87d8c45 100644 (file)
@@ -12,6 +12,7 @@ INC += rte_tailq.h rte_interrupts.h rte_alarm.h
 INC += rte_string_fns.h rte_version.h
 INC += rte_eal_memconfig.h rte_malloc_heap.h
 INC += rte_hexdump.h rte_devargs.h rte_bus.h rte_dev.h rte_class.h
+INC += rte_option.h
 INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
 INC += rte_malloc.h rte_keepalive.h rte_time.h
 INC += rte_service.h rte_service_component.h
index c6bd922..79731e1 100644 (file)
@@ -2,4 +2,4 @@
 # Copyright(c) 2017 Intel Corporation.
 
 eal_common_arch_sources = files('rte_cpuflags.c',
-       'rte_cycles.c')
+       'rte_cycles.c', 'rte_hypervisor.c')
diff --git a/lib/librte_eal/common/arch/ppc_64/meson.build b/lib/librte_eal/common/arch/ppc_64/meson.build
new file mode 100644 (file)
index 0000000..40b3dc5
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+eal_common_arch_sources = files('rte_cpuflags.c',
+       'rte_cycles.c', 'rte_hypervisor.c')
index 4e0f779..14bf204 100644 (file)
@@ -2,4 +2,4 @@
 # Copyright(c) 2017 Intel Corporation
 
 eal_common_arch_sources = files('rte_spinlock.c', 'rte_cpuflags.c',
-       'rte_cycles.c')
+       'rte_cycles.c', 'rte_hypervisor.c')
index 0943851..c8f1901 100644 (file)
 #include <rte_bus.h>
 #include <rte_debug.h>
 #include <rte_string_fns.h>
+#include <rte_errno.h>
 
 #include "eal_private.h"
 
-struct rte_bus_list rte_bus_list =
+static struct rte_bus_list rte_bus_list =
        TAILQ_HEAD_INITIALIZER(rte_bus_list);
 
 void
@@ -242,3 +243,45 @@ rte_bus_get_iommu_class(void)
        }
        return mode;
 }
+
+static int
+bus_handle_sigbus(const struct rte_bus *bus,
+                       const void *failure_addr)
+{
+       int ret;
+
+       if (!bus->sigbus_handler)
+               return -1;
+
+       ret = bus->sigbus_handler(failure_addr);
+
+       /* find bus but handle failed, keep the errno be set. */
+       if (ret < 0 && rte_errno == 0)
+               rte_errno = ENOTSUP;
+
+       return ret > 0;
+}
+
+int
+rte_bus_sigbus_handler(const void *failure_addr)
+{
+       struct rte_bus *bus;
+
+       int ret = 0;
+       int old_errno = rte_errno;
+
+       rte_errno = 0;
+
+       bus = rte_bus_find(NULL, bus_handle_sigbus, failure_addr);
+       /* can not find bus. */
+       if (!bus)
+               return 1;
+       /* find bus but handle failed, pass on the new errno. */
+       else if (rte_errno != 0)
+               return -1;
+
+       /* restore the old errno. */
+       rte_errno = old_errno;
+
+       return ret;
+}
index 404a906..d922266 100644 (file)
@@ -9,7 +9,7 @@
 #include <rte_class.h>
 #include <rte_debug.h>
 
-struct rte_class_list rte_class_list =
+static struct rte_class_list rte_class_list =
        TAILQ_HEAD_INITIALIZER(rte_class_list);
 
 __rte_experimental void
index 678dbca..62e9ed4 100644 (file)
 #include <rte_log.h>
 #include <rte_spinlock.h>
 #include <rte_malloc.h>
+#include <rte_string_fns.h>
 
 #include "eal_private.h"
+#include "hotplug_mp.h"
 
 /**
  * The device event callback description.
@@ -74,119 +76,110 @@ static int cmp_dev_name(const struct rte_device *dev, const void *_name)
        return strcmp(dev->name, name);
 }
 
-int rte_eal_dev_attach(const char *name, const char *devargs)
+int __rte_experimental
+rte_dev_is_probed(const struct rte_device *dev)
 {
-       struct rte_bus *bus;
+       /* The field driver should be set only when the probe is successful. */
+       return dev->driver != NULL;
+}
 
-       if (name == NULL || devargs == NULL) {
-               RTE_LOG(ERR, EAL, "Invalid device or arguments provided\n");
+/* helper function to build devargs, caller should free the memory */
+static int
+build_devargs(const char *busname, const char *devname,
+             const char *drvargs, char **devargs)
+{
+       int length;
+
+       length = snprintf(NULL, 0, "%s:%s,%s", busname, devname, drvargs);
+       if (length < 0)
                return -EINVAL;
-       }
 
-       bus = rte_bus_find_by_device_name(name);
-       if (bus == NULL) {
-               RTE_LOG(ERR, EAL, "Unable to find a bus for the device '%s'\n",
-                       name);
+       *devargs = malloc(length + 1);
+       if (*devargs == NULL)
+               return -ENOMEM;
+
+       length = snprintf(*devargs, length + 1, "%s:%s,%s",
+                       busname, devname, drvargs);
+       if (length < 0) {
+               free(*devargs);
                return -EINVAL;
        }
-       if (strcmp(bus->name, "pci") == 0 || strcmp(bus->name, "vdev") == 0)
-               return rte_eal_hotplug_add(bus->name, name, devargs);
-
-       RTE_LOG(ERR, EAL,
-               "Device attach is only supported for PCI and vdev devices.\n");
 
-       return -ENOTSUP;
+       return 0;
 }
 
-int rte_eal_dev_detach(struct rte_device *dev)
+int
+rte_eal_hotplug_add(const char *busname, const char *devname,
+                   const char *drvargs)
 {
-       struct rte_bus *bus;
-       int ret;
 
-       if (dev == NULL) {
-               RTE_LOG(ERR, EAL, "Invalid device provided.\n");
-               return -EINVAL;
-       }
+       char *devargs;
+       int ret;
 
-       bus = rte_bus_find_by_device(dev);
-       if (bus == NULL) {
-               RTE_LOG(ERR, EAL, "Cannot find bus for device (%s)\n",
-                       dev->name);
-               return -EINVAL;
-       }
+       ret = build_devargs(busname, devname, drvargs, &devargs);
+       if (ret != 0)
+               return ret;
 
-       if (bus->unplug == NULL) {
-               RTE_LOG(ERR, EAL, "Bus function not supported\n");
-               return -ENOTSUP;
-       }
+       ret = rte_dev_probe(devargs);
+       free(devargs);
 
-       ret = bus->unplug(dev);
-       if (ret)
-               RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
-                       dev->name);
        return ret;
 }
 
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
-                       const char *devargs)
+/* probe device at local process. */
+int
+local_dev_probe(const char *devargs, struct rte_device **new_dev)
 {
-       struct rte_bus *bus;
        struct rte_device *dev;
        struct rte_devargs *da;
        int ret;
 
-       bus = rte_bus_find_by_name(busname);
-       if (bus == NULL) {
-               RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
-               return -ENOENT;
-       }
-
-       if (bus->plug == NULL) {
-               RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
-                       bus->name);
-               return -ENOTSUP;
-       }
-
+       *new_dev = NULL;
        da = calloc(1, sizeof(*da));
        if (da == NULL)
                return -ENOMEM;
 
-       ret = rte_devargs_parsef(da, "%s:%s,%s",
-                                busname, devname, devargs);
+       ret = rte_devargs_parse(da, devargs);
        if (ret)
                goto err_devarg;
 
+       if (da->bus->plug == NULL) {
+               RTE_LOG(ERR, EAL, "Function plug not supported by bus (%s)\n",
+                       da->bus->name);
+               ret = -ENOTSUP;
+               goto err_devarg;
+       }
+
        ret = rte_devargs_insert(da);
        if (ret)
                goto err_devarg;
 
-       ret = bus->scan();
+       ret = da->bus->scan();
        if (ret)
                goto err_devarg;
 
-       dev = bus->find_device(NULL, cmp_dev_name, devname);
+       dev = da->bus->find_device(NULL, cmp_dev_name, da->name);
        if (dev == NULL) {
                RTE_LOG(ERR, EAL, "Cannot find device (%s)\n",
-                       devname);
+                       da->name);
                ret = -ENODEV;
                goto err_devarg;
        }
 
-       if (dev->driver != NULL) {
-               RTE_LOG(ERR, EAL, "Device is already plugged\n");
-               return -EEXIST;
-       }
-
-       ret = bus->plug(dev);
+       ret = dev->bus->plug(dev);
        if (ret) {
+               if (rte_dev_is_probed(dev)) /* if already succeeded earlier */
+                       return ret; /* no rollback */
                RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n",
                        dev->name);
                goto err_devarg;
        }
+
+       *new_dev = dev;
        return 0;
 
 err_devarg:
-       if (rte_devargs_remove(busname, devname)) {
+       if (rte_devargs_remove(da) != 0) {
                free(da->args);
                free(da);
        }
@@ -194,40 +187,235 @@ err_devarg:
 }
 
 int __rte_experimental
-rte_eal_hotplug_remove(const char *busname, const char *devname)
+rte_dev_probe(const char *devargs)
 {
-       struct rte_bus *bus;
+       struct eal_dev_mp_req req;
        struct rte_device *dev;
        int ret;
 
+       memset(&req, 0, sizeof(req));
+       req.t = EAL_DEV_REQ_TYPE_ATTACH;
+       strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               /**
+                * If in secondary process, just send IPC request to
+                * primary process.
+                */
+               ret = eal_dev_hotplug_request_to_primary(&req);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL,
+                               "Failed to send hotplug request to primary\n");
+                       return -ENOMSG;
+               }
+               if (req.result != 0)
+                       RTE_LOG(ERR, EAL,
+                               "Failed to hotplug add device\n");
+               return req.result;
+       }
+
+       /* attach a shared device from primary start from here: */
+
+       /* primary attach the new device itself. */
+       ret = local_dev_probe(devargs, &dev);
+
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to attach device on primary process\n");
+
+               /**
+                * it is possible that secondary process failed to attached a
+                * device that primary process have during initialization,
+                * so for -EEXIST case, we still need to sync with secondary
+                * process.
+                */
+               if (ret != -EEXIST)
+                       return ret;
+       }
+
+       /* primary send attach sync request to secondary. */
+       ret = eal_dev_hotplug_request_to_secondary(&req);
+
+       /* if any communication error, we need to rollback. */
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to send hotplug add request to secondary\n");
+               ret = -ENOMSG;
+               goto rollback;
+       }
+
+       /**
+        * if any secondary failed to attach, we need to consider if rollback
+        * is necessary.
+        */
+       if (req.result != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to attach device on secondary process\n");
+               ret = req.result;
+
+               /* for -EEXIST, we don't need to rollback. */
+               if (ret == -EEXIST)
+                       return ret;
+               goto rollback;
+       }
+
+       return 0;
+
+rollback:
+       req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+
+       /* primary send rollback request to secondary. */
+       if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+               RTE_LOG(WARNING, EAL,
+                       "Failed to rollback device attach on secondary."
+                       "Devices in secondary may not sync with primary\n");
+
+       /* primary rollback itself. */
+       if (local_dev_remove(dev) != 0)
+               RTE_LOG(WARNING, EAL,
+                       "Failed to rollback device attach on primary."
+                       "Devices in secondary may not sync with primary\n");
+
+       return ret;
+}
+
+int
+rte_eal_hotplug_remove(const char *busname, const char *devname)
+{
+       struct rte_device *dev;
+       struct rte_bus *bus;
+
        bus = rte_bus_find_by_name(busname);
        if (bus == NULL) {
                RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", busname);
                return -ENOENT;
        }
 
-       if (bus->unplug == NULL) {
-               RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
-                       bus->name);
-               return -ENOTSUP;
-       }
-
        dev = bus->find_device(NULL, cmp_dev_name, devname);
        if (dev == NULL) {
                RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", devname);
                return -EINVAL;
        }
 
-       if (dev->driver == NULL) {
-               RTE_LOG(ERR, EAL, "Device is already unplugged\n");
-               return -ENOENT;
+       return rte_dev_remove(dev);
+}
+
+/* remove device at local process. */
+int
+local_dev_remove(struct rte_device *dev)
+{
+       int ret;
+
+       if (dev->bus->unplug == NULL) {
+               RTE_LOG(ERR, EAL, "Function unplug not supported by bus (%s)\n",
+                       dev->bus->name);
+               return -ENOTSUP;
        }
 
-       ret = bus->unplug(dev);
-       if (ret)
+       ret = dev->bus->unplug(dev);
+       if (ret) {
                RTE_LOG(ERR, EAL, "Driver cannot detach the device (%s)\n",
                        dev->name);
-       rte_devargs_remove(busname, devname);
+               return ret;
+       }
+
+       return 0;
+}
+
+int __rte_experimental
+rte_dev_remove(struct rte_device *dev)
+{
+       struct eal_dev_mp_req req;
+       char *devargs;
+       int ret;
+
+       if (!rte_dev_is_probed(dev)) {
+               RTE_LOG(ERR, EAL, "Device is not probed\n");
+               return -ENOENT;
+       }
+
+       ret = build_devargs(dev->bus->name, dev->name, "", &devargs);
+       if (ret != 0)
+               return ret;
+
+       memset(&req, 0, sizeof(req));
+       req.t = EAL_DEV_REQ_TYPE_DETACH;
+       strlcpy(req.devargs, devargs, EAL_DEV_MP_DEV_ARGS_MAX_LEN);
+       free(devargs);
+
+       if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+               /**
+                * If in secondary process, just send IPC request to
+                * primary process.
+                */
+               ret = eal_dev_hotplug_request_to_primary(&req);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL,
+                               "Failed to send hotplug request to primary\n");
+                       return -ENOMSG;
+               }
+               if (req.result != 0)
+                       RTE_LOG(ERR, EAL,
+                               "Failed to hotplug remove device\n");
+               return req.result;
+       }
+
+       /* detach a device from primary start from here: */
+
+       /* primary send detach sync request to secondary */
+       ret = eal_dev_hotplug_request_to_secondary(&req);
+
+       /**
+        * if communication error, we need to rollback, because it is possible
+        * part of the secondary processes still detached it successfully.
+        */
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to send device detach request to secondary\n");
+               ret = -ENOMSG;
+               goto rollback;
+       }
+
+       /**
+        * if any secondary failed to detach, we need to consider if rollback
+        * is necessary.
+        */
+       if (req.result != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to detach device on secondary process\n");
+               ret = req.result;
+               /**
+                * if -ENOENT, we don't need to rollback, since devices is
+                * already detached on secondary process.
+                */
+               if (ret != -ENOENT)
+                       goto rollback;
+       }
+
+       /* primary detach the device itself. */
+       ret = local_dev_remove(dev);
+
+       /* if primary failed, still need to consider if rollback is necessary */
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL,
+                       "Failed to detach device on primary process\n");
+               /* if -ENOENT, we don't need to rollback */
+               if (ret == -ENOENT)
+                       return ret;
+               goto rollback;
+       }
+
+       return 0;
+
+rollback:
+       req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+
+       /* primary send rollback request to secondary. */
+       if (eal_dev_hotplug_request_to_secondary(&req) != 0)
+               RTE_LOG(WARNING, EAL,
+                       "Failed to rollback device detach on secondary."
+                       "Devices in secondary may not sync with primary\n");
+
        return ret;
 }
 
@@ -342,8 +530,9 @@ rte_dev_event_callback_unregister(const char *device_name,
        return ret;
 }
 
-void
-dev_callback_process(char *device_name, enum rte_dev_event_type event)
+void __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+                              enum rte_dev_event_type event)
 {
        struct dev_event_callback *cb_lst;
 
index dac2402..b7b9cb6 100644 (file)
@@ -4,9 +4,6 @@
 
 /* This file manages the list of devices and their arguments, as given
  * by the user at startup
- *
- * Code here should not call rte_log since the EAL environment
- * may not be initialized.
  */
 
 #include <stdio.h>
 TAILQ_HEAD(rte_devargs_list, rte_devargs);
 
 /** Global list of user devices */
-struct rte_devargs_list devargs_list =
+static struct rte_devargs_list devargs_list =
        TAILQ_HEAD_INITIALIZER(devargs_list);
 
-int
-rte_eal_parse_devargs_str(const char *devargs_str,
-                       char **drvname, char **drvargs)
-{
-       char *sep;
-
-       if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL))
-               return -1;
-
-       *drvname = strdup(devargs_str);
-       if (*drvname == NULL)
-               return -1;
-
-       /* set the first ',' to '\0' to split name and arguments */
-       sep = strchr(*drvname, ',');
-       if (sep != NULL) {
-               sep[0] = '\0';
-               *drvargs = strdup(sep + 1);
-       } else {
-               *drvargs = strdup("");
-       }
-
-       if (*drvargs == NULL) {
-               free(*drvname);
-               *drvname = NULL;
-               return -1;
-       }
-       return 0;
-}
-
 static size_t
 devargs_layer_count(const char *s)
 {
@@ -270,6 +237,7 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
        va_list ap;
        size_t len;
        char *dev;
+       int ret;
 
        if (da == NULL)
                return -EINVAL;
@@ -288,7 +256,10 @@ rte_devargs_parsef(struct rte_devargs *da, const char *format, ...)
        vsnprintf(dev, len + 1, format, ap);
        va_end(ap);
 
-       return rte_devargs_parse(da, dev);
+       ret = rte_devargs_parse(da, dev);
+
+       free(dev);
+       return ret;
 }
 
 int __rte_experimental
@@ -296,7 +267,7 @@ rte_devargs_insert(struct rte_devargs *da)
 {
        int ret;
 
-       ret = rte_devargs_remove(da->bus->name, da->name);
+       ret = rte_devargs_remove(da);
        if (ret < 0)
                return ret;
        TAILQ_INSERT_TAIL(&devargs_list, da, next);
@@ -342,14 +313,17 @@ fail:
 }
 
 int __rte_experimental
-rte_devargs_remove(const char *busname, const char *devname)
+rte_devargs_remove(struct rte_devargs *devargs)
 {
        struct rte_devargs *d;
        void *tmp;
 
+       if (devargs == NULL || devargs->bus == NULL)
+               return -1;
+
        TAILQ_FOREACH_SAFE(d, &devargs_list, next, tmp) {
-               if (strcmp(d->bus->name, busname) == 0 &&
-                   strcmp(d->name, devname) == 0) {
+               if (strcmp(d->bus->name, devargs->bus->name) == 0 &&
+                   strcmp(d->name, devargs->name) == 0) {
                        TAILQ_REMOVE(&devargs_list, d, next);
                        free(d->args);
                        free(d);
index 43caf3c..ea0735c 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright(c) 2017-2018 Intel Corporation
  */
 
+#include <fcntl.h>
 #include <inttypes.h>
 #include <limits.h>
 #include <sys/mman.h>
@@ -878,6 +879,10 @@ rte_fbarray_destroy(struct rte_fbarray *arr)
        if (ret)
                return ret;
 
+       /* with no shconf, there were never any files to begin with */
+       if (internal_config.no_shconf)
+               return 0;
+
        /* try deleting the file */
        eal_get_fbarray_path(path, sizeof(path), arr->name);
 
index fbfb1b0..12dcedf 100644 (file)
@@ -2,6 +2,7 @@
  * Copyright(c) 2010-2014 Intel Corporation
  */
 
+#include <fcntl.h>
 #include <errno.h>
 #include <stdio.h>
 #include <stdint.h>
 static void *next_baseaddr;
 static uint64_t system_page_sz;
 
+#ifdef RTE_ARCH_64
+/*
+ * Linux kernel uses a really high address as starting address for serving
+ * mmaps calls. If there exists addressing limitations and IOVA mode is VA,
+ * this starting address is likely too high for those devices. However, it
+ * is possible to use a lower address in the process virtual address space
+ * as with 64 bits there is a lot of available space.
+ *
+ * Current known limitations are 39 or 40 bits. Setting the starting address
+ * at 4GB implies there are 508GB or 1020GB for mapping the available
+ * hugepages. This is likely enough for most systems, although a device with
+ * addressing limitations should call rte_eal_check_dma_mask for ensuring all
+ * memory is within supported range.
+ */
+static uint64_t baseaddr = 0x100000000;
+#endif
+
 void *
 eal_get_virtual_area(void *requested_addr, size_t *size,
                size_t page_sz, int flags, int mmap_flags)
@@ -60,6 +78,11 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
                        rte_eal_process_type() == RTE_PROC_PRIMARY)
                next_baseaddr = (void *) internal_config.base_virtaddr;
 
+#ifdef RTE_ARCH_64
+       if (next_baseaddr == NULL && internal_config.base_virtaddr == 0 &&
+                       rte_eal_process_type() == RTE_PROC_PRIMARY)
+               next_baseaddr = (void *) baseaddr;
+#endif
        if (requested_addr == NULL && next_baseaddr != NULL) {
                requested_addr = next_baseaddr;
                requested_addr = RTE_PTR_ALIGN(requested_addr, page_sz);
@@ -91,7 +114,17 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
                                mmap_flags, -1, 0);
                if (mapped_addr == MAP_FAILED && allow_shrink)
                        *size -= page_sz;
-       } while (allow_shrink && mapped_addr == MAP_FAILED && *size > 0);
+
+               if (mapped_addr != MAP_FAILED && addr_is_hint &&
+                   mapped_addr != requested_addr) {
+                       /* hint was not used. Try with another offset */
+                       munmap(mapped_addr, map_sz);
+                       mapped_addr = MAP_FAILED;
+                       next_baseaddr = RTE_PTR_ADD(next_baseaddr, page_sz);
+                       requested_addr = next_baseaddr;
+               }
+       } while ((allow_shrink || addr_is_hint) &&
+                mapped_addr == MAP_FAILED && *size > 0);
 
        /* align resulting address - if map failed, we will ignore the value
         * anyway, so no need to add additional checks.
@@ -171,7 +204,7 @@ virt2memseg(const void *addr, const struct rte_memseg_list *msl)
 
        /* a memseg list was specified, check if it's the right one */
        start = msl->base_va;
-       end = RTE_PTR_ADD(start, (size_t)msl->page_sz * msl->memseg_arr.len);
+       end = RTE_PTR_ADD(start, msl->len);
 
        if (addr < start || addr >= end)
                return NULL;
@@ -194,8 +227,7 @@ virt2memseg_list(const void *addr)
                msl = &mcfg->memsegs[msl_idx];
 
                start = msl->base_va;
-               end = RTE_PTR_ADD(start,
-                               (size_t)msl->page_sz * msl->memseg_arr.len);
+               end = RTE_PTR_ADD(start, msl->len);
                if (addr >= start && addr < end)
                        break;
        }
@@ -273,6 +305,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg)
 {
        uint64_t *total_len = arg;
 
+       if (msl->external)
+               return 0;
+
        *total_len += msl->memseg_arr.count * msl->page_sz;
 
        return 0;
@@ -294,7 +329,7 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
                void *arg)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       int msl_idx, ms_idx;
+       int msl_idx, ms_idx, fd;
        FILE *f = arg;
 
        msl_idx = msl - mcfg->memsegs;
@@ -305,10 +340,11 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
        if (ms_idx < 0)
                return -1;
 
+       fd = eal_memalloc_get_seg_fd(msl_idx, ms_idx);
        fprintf(f, "Segment %i-%i: IOVA:0x%"PRIx64", len:%zu, "
                        "virt:%p, socket_id:%"PRId32", "
                        "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", "
-                       "nrank:%"PRIx32"\n",
+                       "nrank:%"PRIx32" fd:%i\n",
                        msl_idx, ms_idx,
                        ms->iova,
                        ms->len,
@@ -316,7 +352,8 @@ dump_memseg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
                        ms->socket_id,
                        ms->hugepage_sz,
                        ms->nchannel,
-                       ms->nrank);
+                       ms->nrank,
+                       fd);
 
        return 0;
 }
@@ -383,6 +420,66 @@ rte_dump_physmem_layout(FILE *f)
        rte_memseg_walk(dump_memseg, f);
 }
 
+static int
+check_iova(const struct rte_memseg_list *msl __rte_unused,
+               const struct rte_memseg *ms, void *arg)
+{
+       uint64_t *mask = arg;
+       rte_iova_t iova;
+
+       /* higher address within segment */
+       iova = (ms->iova + ms->len) - 1;
+       if (!(iova & *mask))
+               return 0;
+
+       RTE_LOG(DEBUG, EAL, "memseg iova %"PRIx64", len %zx, out of range\n",
+                           ms->iova, ms->len);
+
+       RTE_LOG(DEBUG, EAL, "\tusing dma mask %"PRIx64"\n", *mask);
+       return 1;
+}
+
+#if defined(RTE_ARCH_64)
+#define MAX_DMA_MASK_BITS 63
+#else
+#define MAX_DMA_MASK_BITS 31
+#endif
+
+/* check memseg iovas are within the required range based on dma mask */
+int __rte_experimental
+rte_eal_check_dma_mask(uint8_t maskbits)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       uint64_t mask;
+
+       /* sanity check */
+       if (maskbits > MAX_DMA_MASK_BITS) {
+               RTE_LOG(ERR, EAL, "wrong dma mask size %u (Max: %u)\n",
+                                  maskbits, MAX_DMA_MASK_BITS);
+               return -1;
+       }
+
+       /* create dma mask */
+       mask = ~((1ULL << maskbits) - 1);
+
+       if (rte_memseg_walk(check_iova, &mask))
+               /*
+                * Dma mask precludes hugepage usage.
+                * This device can not be used and we do not need to keep
+                * the dma mask.
+                */
+               return 1;
+
+       /*
+        * we need to keep the more restricted maskbit for checking
+        * potential dynamic memory allocation in the future.
+        */
+       mcfg->dma_maskbits = mcfg->dma_maskbits == 0 ? maskbits :
+                            RTE_MIN(mcfg->dma_maskbits, maskbits);
+
+       return 0;
+}
+
 /* return the number of memory channels */
 unsigned rte_memory_get_nchannel(void)
 {
@@ -548,6 +645,105 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
        return ret;
 }
 
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct rte_memseg_list *msl;
+       struct rte_fbarray *arr;
+       int msl_idx, seg_idx, ret;
+
+       if (ms == NULL) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+
+       msl = rte_mem_virt2memseg_list(ms->addr);
+       if (msl == NULL) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       arr = &msl->memseg_arr;
+
+       msl_idx = msl - mcfg->memsegs;
+       seg_idx = rte_fbarray_find_idx(arr, ms);
+
+       if (!rte_fbarray_is_used(arr, seg_idx)) {
+               rte_errno = ENOENT;
+               return -1;
+       }
+
+       ret = eal_memalloc_get_seg_fd(msl_idx, seg_idx);
+       if (ret < 0) {
+               rte_errno = -ret;
+               ret = -1;
+       }
+       return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       int ret;
+
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+       ret = rte_memseg_get_fd_thread_unsafe(ms);
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+               size_t *offset)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct rte_memseg_list *msl;
+       struct rte_fbarray *arr;
+       int msl_idx, seg_idx, ret;
+
+       if (ms == NULL || offset == NULL) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+
+       msl = rte_mem_virt2memseg_list(ms->addr);
+       if (msl == NULL) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       arr = &msl->memseg_arr;
+
+       msl_idx = msl - mcfg->memsegs;
+       seg_idx = rte_fbarray_find_idx(arr, ms);
+
+       if (!rte_fbarray_is_used(arr, seg_idx)) {
+               rte_errno = ENOENT;
+               return -1;
+       }
+
+       ret = eal_memalloc_get_seg_fd_offset(msl_idx, seg_idx, offset);
+       if (ret < 0) {
+               rte_errno = -ret;
+               ret = -1;
+       }
+       return ret;
+}
+
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       int ret;
+
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+       ret = rte_memseg_get_fd_offset_thread_unsafe(ms, offset);
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
 /* init memory subsystem */
 int
 rte_eal_memory_init(void)
index 7300fe0..b7081af 100644 (file)
@@ -120,13 +120,15 @@ memzone_reserve_aligned_thread_unsafe(const char *name, size_t len,
                return NULL;
        }
 
-       if ((socket_id != SOCKET_ID_ANY) &&
-           (socket_id >= RTE_MAX_NUMA_NODES || socket_id < 0)) {
+       if ((socket_id != SOCKET_ID_ANY) && socket_id < 0) {
                rte_errno = EINVAL;
                return NULL;
        }
 
-       if (!rte_eal_has_hugepages())
+       /* only set socket to SOCKET_ID_ANY if we aren't allocating for an
+        * external heap.
+        */
+       if (!rte_eal_has_hugepages() && socket_id < RTE_MAX_NUMA_NODES)
                socket_id = SOCKET_ID_ANY;
 
        contig = (flags & RTE_MEMZONE_IOVA_CONTIG) != 0;
index dd5f974..b82f3dd 100644 (file)
@@ -58,6 +58,7 @@ eal_long_options[] = {
        {OPT_HELP,              0, NULL, OPT_HELP_NUM             },
        {OPT_HUGE_DIR,          1, NULL, OPT_HUGE_DIR_NUM         },
        {OPT_HUGE_UNLINK,       0, NULL, OPT_HUGE_UNLINK_NUM      },
+       {OPT_IOVA_MODE,         1, NULL, OPT_IOVA_MODE_NUM        },
        {OPT_LCORES,            1, NULL, OPT_LCORES_NUM           },
        {OPT_LOG_LEVEL,         1, NULL, OPT_LOG_LEVEL_NUM        },
        {OPT_MASTER_LCORE,      1, NULL, OPT_MASTER_LCORE_NUM     },
@@ -205,6 +206,7 @@ eal_reset_internal_config(struct internal_config *internal_cfg)
 #endif
        internal_cfg->vmware_tsc_map = 0;
        internal_cfg->create_uio_dev = 0;
+       internal_cfg->iova_mode = RTE_IOVA_DC;
        internal_cfg->user_mbuf_pool_ops_name = NULL;
        internal_cfg->init_complete = 0;
 }
@@ -1075,6 +1077,25 @@ eal_parse_proc_type(const char *arg)
        return RTE_PROC_INVALID;
 }
 
+static int
+eal_parse_iova_mode(const char *name)
+{
+       int mode;
+
+       if (name == NULL)
+               return -1;
+
+       if (!strcmp("pa", name))
+               mode = RTE_IOVA_PA;
+       else if (!strcmp("va", name))
+               mode = RTE_IOVA_VA;
+       else
+               return -1;
+
+       internal_config.iova_mode = mode;
+       return 0;
+}
+
 int
 eal_parse_common_option(int opt, const char *optarg,
                        struct internal_config *conf)
@@ -1281,6 +1302,13 @@ eal_parse_common_option(int opt, const char *optarg,
        case OPT_SINGLE_FILE_SEGMENTS_NUM:
                conf->single_file_segments = 1;
                break;
+       case OPT_IOVA_MODE_NUM:
+               if (eal_parse_iova_mode(optarg) < 0) {
+                       RTE_LOG(ERR, EAL, "invalid parameters for --"
+                               OPT_IOVA_MODE "\n");
+                       return -1;
+               }
+               break;
 
        /* don't know what to do, leave this to caller */
        default:
@@ -1384,10 +1412,16 @@ eal_check_common_options(struct internal_config *internal_cfg)
                        " is only supported in non-legacy memory mode\n");
        }
        if (internal_cfg->single_file_segments &&
-                       internal_cfg->hugepage_unlink) {
+                       internal_cfg->hugepage_unlink &&
+                       !internal_cfg->in_memory) {
                RTE_LOG(ERR, EAL, "Option --"OPT_SINGLE_FILE_SEGMENTS" is "
-                       "not compatible with neither --"OPT_IN_MEMORY" nor "
-                       "--"OPT_HUGE_UNLINK"\n");
+                       "not compatible with --"OPT_HUGE_UNLINK"\n");
+               return -1;
+       }
+       if (internal_cfg->legacy_mem &&
+                       internal_cfg->in_memory) {
+               RTE_LOG(ERR, EAL, "Option --"OPT_LEGACY_MEM" is not compatible "
+                               "with --"OPT_IN_MEMORY"\n");
                return -1;
        }
 
@@ -1428,6 +1462,8 @@ eal_common_usage(void)
               "  --"OPT_VDEV"              Add a virtual device.\n"
               "                      The argument format is <driver><id>[,key=val,...]\n"
               "                      (ex: --vdev=net_pcap0,iface=eth2).\n"
+              "  --"OPT_IOVA_MODE"   Set IOVA mode. 'pa' for IOVA_PA\n"
+              "                      'va' for IOVA_VA\n"
               "  -d LIB.so|DIR       Add a driver or driver directory\n"
               "                      (can be used multiple times)\n"
               "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
index 9fcb912..97663d3 100644 (file)
@@ -939,13 +939,17 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
        if (check_input(req) == false)
                return -1;
 
+       reply->nb_sent = 0;
+       reply->nb_received = 0;
+       reply->msgs = NULL;
+
        if (internal_config.no_shconf) {
                RTE_LOG(DEBUG, EAL, "No shared files mode enabled, IPC is disabled\n");
                return 0;
        }
 
        if (gettimeofday(&now, NULL) < 0) {
-               RTE_LOG(ERR, EAL, "Faile to get current time\n");
+               RTE_LOG(ERR, EAL, "Failed to get current time\n");
                rte_errno = errno;
                return -1;
        }
@@ -954,10 +958,6 @@ rte_mp_request_sync(struct rte_mp_msg *req, struct rte_mp_reply *reply,
        end.tv_sec = now.tv_sec + ts->tv_sec +
                        (now.tv_usec * 1000 + ts->tv_nsec) / 1000000000;
 
-       reply->nb_sent = 0;
-       reply->nb_received = 0;
-       reply->msgs = NULL;
-
        /* for secondary process, send request to the primary process only */
        if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
                pthread_mutex_lock(&pending_requests.lock);
index 6ac5f82..60c5dd6 100644 (file)
@@ -38,3 +38,29 @@ einval_error:
        errno = EINVAL;
        return -1;
 }
+
+/* Copy src string into dst.
+ *
+ * Return negative value and NUL-terminate if dst is too short,
+ * Otherwise return number of bytes copied.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize)
+{
+       size_t nleft = dsize;
+       size_t res = 0;
+
+       /* Copy as many bytes as will fit. */
+       while (nleft != 0) {
+               dst[res] = src[res];
+               if (src[res] == '\0')
+                       return res;
+               res++;
+               nleft--;
+       }
+
+       /* Not enough room in dst, set NUL and return error. */
+       if (res != 0)
+               dst[res - 1] = '\0';
+       return -E2BIG;
+}
index 2e2b770..dcf26bf 100644 (file)
@@ -7,9 +7,11 @@
 #include <unistd.h>
 #include <inttypes.h>
 #include <sys/types.h>
+#include <time.h>
 #include <errno.h>
 
 #include <rte_common.h>
+#include <rte_compat.h>
 #include <rte_log.h>
 #include <rte_cycles.h>
 #include <rte_pause.h>
@@ -31,6 +33,28 @@ rte_delay_us_block(unsigned int us)
                rte_pause();
 }
 
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us)
+{
+       struct timespec wait[2];
+       int ind = 0;
+
+       wait[0].tv_sec = 0;
+       if (us >= US_PER_S) {
+               wait[0].tv_sec = us / US_PER_S;
+               us -= wait[0].tv_sec * US_PER_S;
+       }
+       wait[0].tv_nsec = 1000 * us;
+
+       while (nanosleep(&wait[ind], &wait[1 - ind]) && errno == EINTR) {
+               /*
+                * Sleep was interrupted. Flip the index, so the 'remainder'
+                * will become the 'request' for a next call.
+                */
+               ind = 1 - ind;
+       }
+}
+
 uint64_t
 rte_get_tsc_hz(void)
 {
index de05feb..b3e8ae5 100644 (file)
@@ -27,7 +27,7 @@ eal_create_runtime_dir(void);
 
 /* returns runtime dir */
 const char *
-eal_get_runtime_dir(void);
+rte_eal_get_runtime_dir(void);
 
 #define RUNTIME_CONFIG_FNAME "config"
 static inline const char *
@@ -35,7 +35,7 @@ eal_runtime_config_path(void)
 {
        static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
                        RUNTIME_CONFIG_FNAME);
        return buffer;
 }
@@ -47,7 +47,7 @@ eal_mp_socket_path(void)
 {
        static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
                        MP_SOCKET_FNAME);
        return buffer;
 }
@@ -55,7 +55,8 @@ eal_mp_socket_path(void)
 #define FBARRAY_NAME_FMT "%s/fbarray_%s"
 static inline const char *
 eal_get_fbarray_path(char *buffer, size_t buflen, const char *name) {
-       snprintf(buffer, buflen, FBARRAY_NAME_FMT, eal_get_runtime_dir(), name);
+       snprintf(buffer, buflen, FBARRAY_NAME_FMT, rte_eal_get_runtime_dir(),
+                       name);
        return buffer;
 }
 
@@ -66,7 +67,7 @@ eal_hugepage_info_path(void)
 {
        static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
                        HUGEPAGE_INFO_FNAME);
        return buffer;
 }
@@ -78,7 +79,7 @@ eal_hugepage_data_path(void)
 {
        static char buffer[PATH_MAX]; /* static so auto-zeroed */
 
-       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", eal_get_runtime_dir(),
+       snprintf(buffer, sizeof(buffer) - 1, "%s/%s", rte_eal_get_runtime_dir(),
                        HUGEPAGE_DATA_FNAME);
        return buffer;
 }
@@ -99,7 +100,7 @@ eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id
 static inline const char *
 eal_get_hugefile_lock_path(char *buffer, size_t buflen, int f_id)
 {
-       snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, eal_get_runtime_dir(),
+       snprintf(buffer, buflen, HUGEFILE_LOCK_FMT, rte_eal_get_runtime_dir(),
                        f_id);
        buffer[buflen - 1] = '\0';
        return buffer;
index 00ee6e0..737f17e 100644 (file)
@@ -70,6 +70,7 @@ struct internal_config {
                        /**< user defined mbuf pool ops name */
        unsigned num_hugepage_sizes;      /**< how many sizes on this system */
        struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES];
+       enum rte_iova_mode iova_mode ;    /**< Set IOVA mode on this system  */
        volatile unsigned int init_complete;
        /**< indicates whether EAL has completed initialization */
 };
index 36bb1a0..af917c2 100644 (file)
@@ -76,6 +76,17 @@ eal_memalloc_mem_alloc_validator_unregister(const char *name, int socket_id);
 int
 eal_memalloc_mem_alloc_validate(int socket_id, size_t new_len);
 
+/* returns fd or -errno */
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx);
+
+/* returns 0 or -errno */
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd);
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset);
+
 int
 eal_memalloc_init(void);
 
index 96e1667..5271f94 100644 (file)
@@ -63,6 +63,8 @@ enum {
        OPT_LEGACY_MEM_NUM,
 #define OPT_SINGLE_FILE_SEGMENTS    "single-file-segments"
        OPT_SINGLE_FILE_SEGMENTS_NUM,
+#define OPT_IOVA_MODE          "iova-mode"
+       OPT_IOVA_MODE_NUM,
        OPT_LONG_MAX_NUM
 };
 
index 4f809a8..442c6dc 100644 (file)
@@ -258,18 +258,6 @@ struct rte_bus *rte_bus_find_by_device_name(const char *str);
 
 int rte_mp_channel_init(void);
 
-/**
- * Internal Executes all the user application registered callbacks for
- * the specific device. It is for DPDK internal user only. User
- * application should not call it directly.
- *
- * @param device_name
- *  The device name.
- * @param event
- *  the device event type.
- */
-void dev_callback_process(char *device_name, enum rte_dev_event_type event);
-
 /**
  * @internal
  * Parse a device string and store its information in an
@@ -304,4 +292,82 @@ int
 rte_devargs_layers_parse(struct rte_devargs *devargs,
                         const char *devstr);
 
+/*
+ * probe a device at local process.
+ *
+ * @param devargs
+ *   Device arguments including bus, class and driver properties.
+ * @param new_dev
+ *   new device be probed as output.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_probe(const char *devargs, struct rte_device **new_dev);
+
+/**
+ * Hotplug remove a given device from a specific bus at local process.
+ *
+ * @param dev
+ *   Data structure of the device to remove.
+ * @return
+ *   0 on success, negative on error.
+ */
+int local_dev_remove(struct rte_device *dev);
+
+/**
+ * Iterate over all buses to find the corresponding bus to handle the sigbus
+ * error.
+ * @param failure_addr
+ *     Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ *      0 success to handle the sigbus.
+ *     -1 failed to handle the sigbus
+ *      1 no bus can handler the sigbus
+ */
+int rte_bus_sigbus_handler(const void *failure_addr);
+
+/**
+ * @internal
+ * Register the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_register(void);
+
+/**
+ * @internal
+ * Unregister the sigbus handler.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int
+dev_sigbus_handler_unregister(void);
+
+/**
+ * Check if the option is registered.
+ *
+ * @param option
+ *  The option to be parsed.
+ *
+ * @return
+ *  0 on success
+ * @return
+ *  -1 on fail
+ */
+int
+rte_option_parse(const char *opt);
+
+/**
+ * Iterate through the registered options and execute the associated
+ * callback if enabled.
+ */
+void
+rte_option_init(void);
+
 #endif /* _EAL_PRIVATE_H_ */
diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c
new file mode 100644 (file)
index 0000000..84f59d9
--- /dev/null
@@ -0,0 +1,426 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_alarm.h>
+#include <rte_string_fns.h>
+#include <rte_devargs.h>
+
+#include "hotplug_mp.h"
+#include "eal_private.h"
+
+#define MP_TIMEOUT_S 5 /**< 5 seconds timeouts */
+
+struct mp_reply_bundle {
+       struct rte_mp_msg msg;
+       void *peer;
+};
+
+static int cmp_dev_name(const struct rte_device *dev, const void *_name)
+{
+       const char *name = _name;
+
+       return strcmp(dev->name, name);
+}
+
+/**
+ * Secondary to primary request.
+ * start from function eal_dev_hotplug_request_to_primary.
+ *
+ * device attach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary receive the request and attach the new device if
+ *    failed goto i).
+ * c) primary forward attach sync request to all secondary.
+ * d) secondary receive the request and attach the device and send a reply.
+ * e) primary check the reply if all success goes to j).
+ * f) primary send attach rollback sync request to all secondary.
+ * g) secondary receive the request and detach the device and send a reply.
+ * h) primary receive the reply and detach device as rollback action.
+ * i) send attach fail to secondary as a reply of step a), goto k).
+ * j) send attach success to secondary as a reply of step a).
+ * k) secondary receive reply and return.
+ *
+ * device detach on secondary:
+ * a) secondary send sync request to the primary.
+ * b) primary send detach sync request to all secondary.
+ * c) secondary detach the device and send a reply.
+ * d) primary check the reply if all success goes to g).
+ * e) primary send detach rollback sync request to all secondary.
+ * f) secondary receive the request and attach back device. goto h).
+ * g) primary detach the device if success goto i), else goto e).
+ * h) primary send detach fail to secondary as a reply of step a), goto j).
+ * i) primary send detach success to secondary as a reply of step a).
+ * j) secondary receive reply and return.
+ */
+
+static int
+send_response_to_secondary(const struct eal_dev_mp_req *req,
+                       int result,
+                       const void *peer)
+{
+       struct rte_mp_msg mp_resp;
+       struct eal_dev_mp_req *resp =
+               (struct eal_dev_mp_req *)mp_resp.param;
+       int ret;
+
+       memset(&mp_resp, 0, sizeof(mp_resp));
+       mp_resp.len_param = sizeof(*resp);
+       strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+       memcpy(resp, req, sizeof(*req));
+       resp->result = result;
+
+       ret = rte_mp_reply(&mp_resp, peer);
+       if (ret != 0)
+               RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+       return ret;
+}
+
+static void
+__handle_secondary_request(void *param)
+{
+       struct mp_reply_bundle *bundle = param;
+               const struct rte_mp_msg *msg = &bundle->msg;
+       const struct eal_dev_mp_req *req =
+               (const struct eal_dev_mp_req *)msg->param;
+       struct eal_dev_mp_req tmp_req;
+       struct rte_devargs *da;
+       struct rte_device *dev;
+       struct rte_bus *bus;
+       int ret = 0;
+
+       tmp_req = *req;
+
+       if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+               ret = local_dev_probe(req->devargs, &dev);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n");
+                       if (ret != -EEXIST)
+                               goto finish;
+               }
+               ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+                       ret = -ENOMSG;
+                       goto rollback;
+               }
+               if (tmp_req.result != 0) {
+                       ret = tmp_req.result;
+                       RTE_LOG(ERR, EAL, "Failed to hotplug add device on secondary\n");
+                       if (ret != -EEXIST)
+                               goto rollback;
+               }
+       } else if (req->t == EAL_DEV_REQ_TYPE_DETACH) {
+               da = calloc(1, sizeof(*da));
+               if (da == NULL) {
+                       ret = -ENOMEM;
+                       goto finish;
+               }
+
+               ret = rte_devargs_parse(da, req->devargs);
+               if (ret != 0)
+                       goto finish;
+
+               ret = eal_dev_hotplug_request_to_secondary(&tmp_req);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Failed to send hotplug request to secondary\n");
+                       ret = -ENOMSG;
+                       goto rollback;
+               }
+
+               bus = rte_bus_find_by_name(da->bus->name);
+               if (bus == NULL) {
+                       RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+                       ret = -ENOENT;
+                       goto finish;
+               }
+
+               dev = bus->find_device(NULL, cmp_dev_name, da->name);
+               if (dev == NULL) {
+                       RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+                       ret = -ENOENT;
+                       goto finish;
+               }
+
+               if (tmp_req.result != 0) {
+                       RTE_LOG(ERR, EAL, "Failed to hotplug remove device on secondary\n");
+                       ret = tmp_req.result;
+                       if (ret != -ENOENT)
+                               goto rollback;
+               }
+
+               ret = local_dev_remove(dev);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Failed to hotplug remove device on primary\n");
+                       if (ret != -ENOENT)
+                               goto rollback;
+               }
+       } else {
+               RTE_LOG(ERR, EAL, "unsupported secondary to primary request\n");
+               ret = -ENOTSUP;
+       }
+       goto finish;
+
+rollback:
+       if (req->t == EAL_DEV_REQ_TYPE_ATTACH) {
+               tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK;
+               eal_dev_hotplug_request_to_secondary(&tmp_req);
+               local_dev_remove(dev);
+       } else {
+               tmp_req.t = EAL_DEV_REQ_TYPE_DETACH_ROLLBACK;
+               eal_dev_hotplug_request_to_secondary(&tmp_req);
+       }
+
+finish:
+       ret = send_response_to_secondary(&tmp_req, ret, bundle->peer);
+       if (ret)
+               RTE_LOG(ERR, EAL, "failed to send response to secondary\n");
+
+       free(bundle->peer);
+       free(bundle);
+}
+
+static int
+handle_secondary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+       struct mp_reply_bundle *bundle;
+       const struct eal_dev_mp_req *req =
+               (const struct eal_dev_mp_req *)msg->param;
+       int ret = 0;
+
+       bundle = malloc(sizeof(*bundle));
+       if (bundle == NULL) {
+               RTE_LOG(ERR, EAL, "not enough memory\n");
+               return send_response_to_secondary(req, -ENOMEM, peer);
+       }
+
+       bundle->msg = *msg;
+       /**
+        * We need to send reply on interrupt thread, but peer can't be
+        * parsed directly, so this is a temporal hack, need to be fixed
+        * when it is ready.
+        */
+       bundle->peer = strdup(peer);
+
+       /**
+        * We are at IPC callback thread, sync IPC is not allowed due to
+        * dead lock, so we delegate the task to interrupt thread.
+        */
+       ret = rte_eal_alarm_set(1, __handle_secondary_request, bundle);
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL, "failed to add mp task\n");
+               return send_response_to_secondary(req, ret, peer);
+       }
+       return 0;
+}
+
+static void __handle_primary_request(void *param)
+{
+       struct mp_reply_bundle *bundle = param;
+       struct rte_mp_msg *msg = &bundle->msg;
+       const struct eal_dev_mp_req *req =
+               (const struct eal_dev_mp_req *)msg->param;
+       struct rte_mp_msg mp_resp;
+       struct eal_dev_mp_req *resp =
+               (struct eal_dev_mp_req *)mp_resp.param;
+       struct rte_devargs *da;
+       struct rte_device *dev;
+       struct rte_bus *bus;
+       int ret = 0;
+
+       memset(&mp_resp, 0, sizeof(mp_resp));
+
+       switch (req->t) {
+       case EAL_DEV_REQ_TYPE_ATTACH:
+       case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK:
+               ret = local_dev_probe(req->devargs, &dev);
+               break;
+       case EAL_DEV_REQ_TYPE_DETACH:
+       case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
+               da = calloc(1, sizeof(*da));
+               if (da == NULL) {
+                       ret = -ENOMEM;
+                       goto quit;
+               }
+
+               ret = rte_devargs_parse(da, req->devargs);
+               if (ret != 0)
+                       goto quit;
+
+               bus = rte_bus_find_by_name(da->bus->name);
+               if (bus == NULL) {
+                       RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n", da->bus->name);
+                       ret = -ENOENT;
+                       goto quit;
+               }
+
+               dev = bus->find_device(NULL, cmp_dev_name, da->name);
+               if (dev == NULL) {
+                       RTE_LOG(ERR, EAL, "Cannot find plugged device (%s)\n", da->name);
+                       ret = -ENOENT;
+                       goto quit;
+               }
+
+               ret = local_dev_remove(dev);
+quit:
+               break;
+       default:
+               ret = -EINVAL;
+       }
+
+       strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+       mp_resp.len_param = sizeof(*req);
+       memcpy(resp, req, sizeof(*resp));
+       resp->result = ret;
+       if (rte_mp_reply(&mp_resp, bundle->peer) < 0)
+               RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+
+       free(bundle->peer);
+       free(bundle);
+}
+
+static int
+handle_primary_request(const struct rte_mp_msg *msg, const void *peer)
+{
+       struct rte_mp_msg mp_resp;
+       const struct eal_dev_mp_req *req =
+               (const struct eal_dev_mp_req *)msg->param;
+       struct eal_dev_mp_req *resp =
+               (struct eal_dev_mp_req *)mp_resp.param;
+       struct mp_reply_bundle *bundle;
+       int ret = 0;
+
+       memset(&mp_resp, 0, sizeof(mp_resp));
+       strlcpy(mp_resp.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_resp.name));
+       mp_resp.len_param = sizeof(*req);
+       memcpy(resp, req, sizeof(*resp));
+
+       bundle = calloc(1, sizeof(*bundle));
+       if (bundle == NULL) {
+               resp->result = -ENOMEM;
+               ret = rte_mp_reply(&mp_resp, peer);
+               if (ret)
+                       RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+               return ret;
+       }
+
+       bundle->msg = *msg;
+       /**
+        * We need to send reply on interrupt thread, but peer can't be
+        * parsed directly, so this is a temporal hack, need to be fixed
+        * when it is ready.
+        */
+       bundle->peer = (void *)strdup(peer);
+
+       /**
+        * We are at IPC callback thread, sync IPC is not allowed due to
+        * dead lock, so we delegate the task to interrupt thread.
+        */
+       ret = rte_eal_alarm_set(1, __handle_primary_request, bundle);
+       if (ret != 0) {
+               resp->result = ret;
+               ret = rte_mp_reply(&mp_resp, peer);
+               if  (ret != 0) {
+                       RTE_LOG(ERR, EAL, "failed to send reply to primary request\n");
+                       return ret;
+               }
+       }
+       return 0;
+}
+
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req)
+{
+       struct rte_mp_msg mp_req;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+       struct eal_dev_mp_req *resp;
+       int ret;
+
+       memset(&mp_req, 0, sizeof(mp_req));
+       memcpy(mp_req.param, req, sizeof(*req));
+       mp_req.len_param = sizeof(*req);
+       strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+       ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+       if (ret || mp_reply.nb_received != 1) {
+               RTE_LOG(ERR, EAL, "cannot send request to primary");
+               if (!ret)
+                       return -1;
+               return ret;
+       }
+
+       resp = (struct eal_dev_mp_req *)mp_reply.msgs[0].param;
+       req->result = resp->result;
+
+       return ret;
+}
+
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req)
+{
+       struct rte_mp_msg mp_req;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = MP_TIMEOUT_S, .tv_nsec = 0};
+       int ret;
+       int i;
+
+       memset(&mp_req, 0, sizeof(mp_req));
+       memcpy(mp_req.param, req, sizeof(*req));
+       mp_req.len_param = sizeof(*req);
+       strlcpy(mp_req.name, EAL_DEV_MP_ACTION_REQUEST, sizeof(mp_req.name));
+
+       ret = rte_mp_request_sync(&mp_req, &mp_reply, &ts);
+       if (ret != 0) {
+               RTE_LOG(ERR, EAL, "rte_mp_request_sync failed\n");
+               return ret;
+       }
+
+       if (mp_reply.nb_sent != mp_reply.nb_received) {
+               RTE_LOG(ERR, EAL, "not all secondary reply\n");
+               return -1;
+       }
+
+       req->result = 0;
+       for (i = 0; i < mp_reply.nb_received; i++) {
+               struct eal_dev_mp_req *resp =
+                       (struct eal_dev_mp_req *)mp_reply.msgs[i].param;
+               if (resp->result != 0) {
+                       req->result = resp->result;
+                       if (req->t == EAL_DEV_REQ_TYPE_ATTACH &&
+                               req->result != -EEXIST)
+                               break;
+                       if (req->t == EAL_DEV_REQ_TYPE_DETACH &&
+                               req->result != -ENOENT)
+                               break;
+               }
+       }
+
+       return 0;
+}
+
+int rte_mp_dev_hotplug_init(void)
+{
+       int ret;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+                                       handle_secondary_request);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+                               EAL_DEV_MP_ACTION_REQUEST);
+                       return ret;
+               }
+       } else {
+               ret = rte_mp_action_register(EAL_DEV_MP_ACTION_REQUEST,
+                                       handle_primary_request);
+               if (ret != 0) {
+                       RTE_LOG(ERR, EAL, "Couldn't register '%s' action\n",
+                               EAL_DEV_MP_ACTION_REQUEST);
+                       return ret;
+               }
+       }
+
+       return 0;
+}
diff --git a/lib/librte_eal/common/hotplug_mp.h b/lib/librte_eal/common/hotplug_mp.h
new file mode 100644 (file)
index 0000000..597fde3
--- /dev/null
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _HOTPLUG_MP_H_
+#define _HOTPLUG_MP_H_
+
+#include "rte_dev.h"
+#include "rte_bus.h"
+
+#define EAL_DEV_MP_ACTION_REQUEST      "eal_dev_mp_request"
+#define EAL_DEV_MP_ACTION_RESPONSE     "eal_dev_mp_response"
+
+#define EAL_DEV_MP_DEV_NAME_MAX_LEN RTE_DEV_NAME_MAX_LEN
+#define EAL_DEV_MP_BUS_NAME_MAX_LEN 32
+#define EAL_DEV_MP_DEV_ARGS_MAX_LEN 128
+
+enum eal_dev_req_type {
+       EAL_DEV_REQ_TYPE_ATTACH,
+       EAL_DEV_REQ_TYPE_DETACH,
+       EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK,
+       EAL_DEV_REQ_TYPE_DETACH_ROLLBACK,
+};
+
+struct eal_dev_mp_req {
+       enum eal_dev_req_type t;
+       char devargs[EAL_DEV_MP_DEV_ARGS_MAX_LEN];
+       int result;
+};
+
+/**
+ * This is a synchronous wrapper for secondary process send
+ * request to primary process, this is invoked when an attach
+ * or detach request is issued from primary process.
+ */
+int eal_dev_hotplug_request_to_primary(struct eal_dev_mp_req *req);
+
+/**
+ * this is a synchronous wrapper for primary process send
+ * request to secondary process, this is invoked when an attach
+ * or detach request issued from secondary process.
+ */
+int eal_dev_hotplug_request_to_secondary(struct eal_dev_mp_req *req);
+
+
+#endif /* _HOTPLUG_MP_H_ */
index c4f974f..859b097 100644 (file)
@@ -29,8 +29,8 @@ extern "C" {
 #ifndef RTE_ARM_EAL_RDTSC_USE_PMU
 
 /**
- * This call is easily portable to any ARM architecture, however,
- * it may be damn slow and inprecise for some tasks.
+ * This call is easily portable to any architecture, however,
+ * it may require a system call and inprecise for some tasks.
  */
 static inline uint64_t
 __rte_rdtsc_syscall(void)
diff --git a/lib/librte_eal/common/include/arch/ppc_64/meson.build b/lib/librte_eal/common/include/arch/ppc_64/meson.build
new file mode 100644 (file)
index 0000000..00f9611
--- /dev/null
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Luca Boccassi <bluca@debian.org>
+
+install_headers(
+       'rte_atomic.h',
+       'rte_byteorder.h',
+       'rte_cpuflags.h',
+       'rte_cycles.h',
+       'rte_io.h',
+       'rte_memcpy.h',
+       'rte_pause.h',
+       'rte_prefetch.h',
+       'rte_rwlock.h',
+       'rte_spinlock.h',
+       'rte_vect.h',
+       subdir: get_option('include_subdir_arch'))
index 8bd8357..16e47ce 100644 (file)
@@ -9,10 +9,17 @@
 extern "C" {
 #endif
 
+#include "rte_atomic.h"
+
 #include "generic/rte_pause.h"
 
 static inline void rte_pause(void)
 {
+       /* Set hardware multi-threading low priority */
+       asm volatile("or 1,1,1");
+       /* Set hardware multi-threading medium priority */
+       asm volatile("or 2,2,2");
+       rte_compiler_barrier();
 }
 
 #ifdef __cplusplus
index 0ff1af5..ac379e8 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include <stdint.h>
+#include <rte_compat.h>
 #include <rte_debug.h>
 #include <rte_atomic.h>
 
@@ -157,6 +158,16 @@ rte_delay_ms(unsigned ms)
  */
 void rte_delay_us_block(unsigned int us);
 
+/**
+ * Delay function that uses system sleep.
+ * Does not block the CPU core.
+ *
+ * @param us
+ *   Number of microseconds to wait.
+ */
+void __rte_experimental
+rte_delay_us_sleep(unsigned int us);
+
 /**
  * Replace rte_delay_us with user defined function.
  *
index d9facc6..7a36ce7 100644 (file)
@@ -88,7 +88,7 @@ __rte_bitmap_index1_inc(struct rte_bitmap *bmp)
 static inline uint64_t
 __rte_bitmap_mask1_get(struct rte_bitmap *bmp)
 {
-       return (~1lu) << bmp->offset1;
+       return (~1llu) << bmp->offset1;
 }
 
 static inline void
@@ -317,7 +317,7 @@ rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos)
        index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2;
        offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK;
        slab2 = bmp->array2 + index2;
-       return (*slab2) & (1lu << offset2);
+       return (*slab2) & (1llu << offset2);
 }
 
 /**
@@ -342,8 +342,8 @@ rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos)
        slab2 = bmp->array2 + index2;
        slab1 = bmp->array1 + index1;
 
-       *slab2 |= 1lu << offset2;
-       *slab1 |= 1lu << offset1;
+       *slab2 |= 1llu << offset2;
+       *slab1 |= 1llu << offset1;
 }
 
 /**
@@ -370,7 +370,7 @@ rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab)
        slab1 = bmp->array1 + index1;
 
        *slab2 |= slab;
-       *slab1 |= 1lu << offset1;
+       *slab1 |= 1llu << offset1;
 }
 
 static inline uint64_t
@@ -408,7 +408,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
        slab2 = bmp->array2 + index2;
 
        /* Return if array2 slab is not all-zeros */
-       *slab2 &= ~(1lu << offset2);
+       *slab2 &= ~(1llu << offset2);
        if (*slab2){
                return;
        }
@@ -424,7 +424,7 @@ rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos)
        index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2);
        offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK;
        slab1 = bmp->array1 + index1;
-       *slab1 &= ~(1lu << offset1);
+       *slab1 &= ~(1llu << offset1);
 
        return;
 }
index b7b5b08..6be4b5c 100644 (file)
@@ -167,6 +167,35 @@ typedef int (*rte_bus_unplug_t)(struct rte_device *dev);
  */
 typedef int (*rte_bus_parse_t)(const char *name, void *addr);
 
+/**
+ * Implement a specific hot-unplug handler, which is responsible for
+ * handle the failure when device be hot-unplugged. When the event of
+ * hot-unplug be detected, it could call this function to handle
+ * the hot-unplug failure and avoid app crash.
+ * @param dev
+ *     Pointer of the device structure.
+ *
+ * @return
+ *     0 on success.
+ *     !0 on error.
+ */
+typedef int (*rte_bus_hot_unplug_handler_t)(struct rte_device *dev);
+
+/**
+ * Implement a specific sigbus handler, which is responsible for handling
+ * the sigbus error which is either original memory error, or specific memory
+ * error that caused of device be hot-unplugged. When sigbus error be captured,
+ * it could call this function to handle sigbus error.
+ * @param failure_addr
+ *     Pointer of the fault address of the sigbus error.
+ *
+ * @return
+ *     0 for success handle the sigbus for hot-unplug.
+ *     1 for not process it, because it is a generic sigbus error.
+ *     -1 for failed to handle the sigbus for hot-unplug.
+ */
+typedef int (*rte_bus_sigbus_handler_t)(const void *failure_addr);
+
 /**
  * Bus scan policies
  */
@@ -212,6 +241,11 @@ struct rte_bus {
        struct rte_bus_conf conf;    /**< Bus configuration */
        rte_bus_get_iommu_class_t get_iommu_class; /**< Get iommu class */
        rte_dev_iterate_t dev_iterate; /**< Device iterator. */
+       rte_bus_hot_unplug_handler_t hot_unplug_handler;
+                               /**< handle hot-unplug failure on the bus */
+       rte_bus_sigbus_handler_t sigbus_handler;
+                                       /**< handle sigbus error on the bus */
+
 };
 
 /**
index 069c13e..cba7bbc 100644 (file)
@@ -68,6 +68,11 @@ typedef uint16_t unaligned_uint16_t;
 /******* Macro to mark functions and fields scheduled for removal *****/
 #define __rte_deprecated       __attribute__((__deprecated__))
 
+/**
+ * Mark a function or variable to a weak reference.
+ */
+#define __rte_weak __attribute__((__weak__))
+
 /*********** Macros to eliminate unused variable warnings ********/
 
 /**
@@ -164,6 +169,12 @@ static void __attribute__((destructor(RTE_PRIO(prio)), used)) func(void)
  */
 #define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2))
 
+/**
+ * Workaround to cast a const field of a structure to non-const type.
+ */
+#define RTE_CAST_FIELD(var, field, type) \
+       (*(type *)((uintptr_t)(var) + offsetof(typeof(*(var)), field)))
+
 /*********** Macros/static functions for doing alignment ********/
 
 
index b80a805..cd6c187 100644 (file)
@@ -39,7 +39,7 @@ struct rte_dev_event {
        char *devname;                  /**< device name */
 };
 
-typedef void (*rte_dev_event_cb_fn)(char *device_name,
+typedef void (*rte_dev_event_cb_fn)(const char *device_name,
                                        enum rte_dev_event_type event,
                                        void *cb_arg);
 
@@ -156,63 +156,67 @@ struct rte_driver {
 struct rte_device {
        TAILQ_ENTRY(rte_device) next; /**< Next device */
        const char *name;             /**< Device name */
-       const struct rte_driver *driver;/**< Associated driver */
+       const struct rte_driver *driver; /**< Driver assigned after probing */
+       const struct rte_bus *bus;    /**< Bus handle assigned on scan */
        int numa_node;                /**< NUMA node connection */
-       struct rte_devargs *devargs;  /**< Device user arguments */
+       struct rte_devargs *devargs;  /**< Arguments for latest probing */
 };
 
 /**
- * Attach a device to a registered driver.
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
  *
- * @param name
- *   The device name, that refers to a pci device (or some private
- *   way of designating a vdev device). Based on this device name, eal
- *   will identify a driver capable of handling it and pass it to the
- *   driver probing function.
- * @param devargs
- *   Device arguments to be passed to the driver.
- * @return
- *   0 on success, negative on error.
- */
-__rte_deprecated
-int rte_eal_dev_attach(const char *name, const char *devargs);
-
-/**
- * Detach a device from its driver.
+ * Query status of a device.
  *
  * @param dev
- *   A pointer to a rte_device structure.
+ *   Generic device pointer.
  * @return
- *   0 on success, negative on error.
+ *   (int)true if already probed successfully, 0 otherwise.
  */
-__rte_deprecated
-int rte_eal_dev_detach(struct rte_device *dev);
+__rte_experimental
+int rte_dev_is_probed(const struct rte_device *dev);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Hotplug add a given device to a specific bus.
  *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
  * @param busname
  *   The bus name the device is added to.
  * @param devname
  *   The device name. Based on this device name, eal will identify a driver
  *   capable of handling it and pass it to the driver probing function.
- * @param devargs
+ * @param drvargs
  *   Device arguments to be passed to the driver.
  * @return
  *   0 on success, negative on error.
  */
-int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devname,
-                       const char *devargs);
+int rte_eal_hotplug_add(const char *busname, const char *devname,
+                       const char *drvargs);
 
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
  *
+ * Add matching devices.
+ *
+ * In multi-process, it will request other processes to add the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param devargs
+ *   Device arguments including bus, class and driver properties.
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_probe(const char *devargs);
+
+/**
  * Hotplug remove a given device from a specific bus.
  *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
  * @param busname
  *   The bus name the device is removed from.
  * @param devname
@@ -220,8 +224,23 @@ int __rte_experimental rte_eal_hotplug_add(const char *busname, const char *devn
  * @return
  *   0 on success, negative on error.
  */
-int __rte_experimental rte_eal_hotplug_remove(const char *busname,
-                                         const char *devname);
+int rte_eal_hotplug_remove(const char *busname, const char *devname);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Remove one device.
+ *
+ * In multi-process, it will request other processes to remove the same device.
+ * A failure, in any process, will rollback the action
+ *
+ * @param dev
+ *   Data structure of the device to remove.
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental rte_dev_remove(struct rte_device *dev);
 
 /**
  * Device comparison function.
@@ -434,6 +453,22 @@ rte_dev_event_callback_unregister(const char *device_name,
                                  rte_dev_event_cb_fn cb_fn,
                                  void *cb_arg);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Executes all the user application registered callbacks for
+ * the specific device.
+ *
+ * @param device_name
+ *  The device name.
+ * @param event
+ *  the device event type.
+ */
+void  __rte_experimental
+rte_dev_event_callback_process(const char *device_name,
+                              enum rte_dev_event_type event);
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
@@ -460,4 +495,30 @@ rte_dev_event_monitor_start(void);
 int __rte_experimental
 rte_dev_event_monitor_stop(void);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Enable hotplug handling for devices.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Disable hotplug handling for devices.
+ *
+ * @return
+ *   - On success, zero.
+ *   - On failure, a negative value.
+ */
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void);
+
 #endif /* _RTE_DEV_H_ */
index 097a4ce..b1f121f 100644 (file)
@@ -66,36 +66,6 @@ struct rte_devargs {
        const char *data; /**< Device string storage. */
 };
 
-/**
- * @deprecated
- * Parse a devargs string.
- *
- * For PCI devices, the format of arguments string is "PCI_ADDR" or
- * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0",
- * "04:00.0,arg=val".
- *
- * For virtual devices, the format of arguments string is "DRIVER_NAME*"
- * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "net_ring",
- * "net_ring0", "net_pmdAnything,arg=0:arg2=1".
- *
- * The function parses the arguments string to get driver name and driver
- * arguments.
- *
- * @param devargs_str
- *   The arguments as given by the user.
- * @param drvname
- *   The pointer to the string to store parsed driver name.
- * @param drvargs
- *   The pointer to the string to store parsed driver arguments.
- *
- * @return
- *   - 0 on success
- *   - A negative value on error
- */
-__rte_deprecated
-int rte_eal_parse_devargs_str(const char *devargs_str,
-                               char **drvname, char **drvargs);
-
 /**
  * Parse a device string.
  *
@@ -201,33 +171,13 @@ rte_devargs_insert(struct rte_devargs *da);
 __rte_experimental
 int rte_devargs_add(enum rte_devtype devtype, const char *devargs_str);
 
-/**
- * @deprecated
- * Add a device to the user device list
- * See rte_devargs_parse() for details.
- *
- * @param devtype
- *   The type of the device.
- * @param devargs_str
- *   The arguments as given by the user.
- *
- * @return
- *   - 0 on success
- *   - A negative value on error
- */
-__rte_deprecated
-int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
-
 /**
  * Remove a device from the user device list.
  * Its resources are freed.
  * If the devargs cannot be found, nothing happens.
  *
- * @param busname
- *   bus name of the devargs to remove.
- *
- * @param devname
- *   device name of the devargs to remove.
+ * @param devargs
+ *   The instance or a copy of devargs to remove.
  *
  * @return
  *   0 on success.
@@ -235,8 +185,7 @@ int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str);
  *   >0 if the devargs was not within the user device list.
  */
 __rte_experimental
-int rte_devargs_remove(const char *busname,
-                      const char *devname);
+int rte_devargs_remove(struct rte_devargs *devargs);
 
 /**
  * Count the number of user devices of a specified type
@@ -251,20 +200,6 @@ __rte_experimental
 unsigned int
 rte_devargs_type_count(enum rte_devtype devtype);
 
-/**
- * @deprecated
- * Count the number of user devices of a specified type
- *
- * @param devtype
- *   The type of the devices to counted.
- *
- * @return
- *   The number of devices.
- */
-__rte_deprecated
-unsigned int
-rte_eal_devargs_type_count(enum rte_devtype devtype);
-
 /**
  * This function dumps the list of user device and their arguments.
  *
@@ -274,16 +209,6 @@ rte_eal_devargs_type_count(enum rte_devtype devtype);
 __rte_experimental
 void rte_devargs_dump(FILE *f);
 
-/**
- * @deprecated
- * This function dumps the list of user device and their arguments.
- *
- * @param f
- *   A pointer to a file for output
- */
-__rte_deprecated
-void rte_eal_devargs_dump(FILE *f);
-
 /**
  * Find next rte_devargs matching the provided bus name.
  *
index e114dcb..a0cedd5 100644 (file)
@@ -316,7 +316,7 @@ rte_mp_sendmsg(struct rte_mp_msg *msg);
  *
  * @param reply
  *   The reply argument will be for storing all the replied messages;
- *   the caller is responsible for free reply->replies.
+ *   the caller is responsible for free reply->msgs.
  *
  * @param ts
  *   The ts argument specifies how long we can wait for the peer(s) to reply.
@@ -377,6 +377,15 @@ rte_mp_request_async(struct rte_mp_msg *req, const struct timespec *ts,
 int __rte_experimental
 rte_mp_reply(struct rte_mp_msg *msg, const char *peer);
 
+/**
+ * Register all mp action callbacks for hotplug.
+ *
+ * @return
+ *   0 on success, negative on error.
+ */
+int __rte_experimental
+rte_mp_dev_hotplug_init(void);
+
 /**
  * Usage function typedef used by the application usage function.
  *
@@ -498,6 +507,15 @@ enum rte_iova_mode rte_eal_iova_mode(void);
 const char *
 rte_eal_mbuf_user_pool_ops(void);
 
+/**
+ * Get the runtime directory of DPDK
+ *
+ * @return
+ *  The runtime directory path of DPDK
+ */
+const char *
+rte_eal_get_runtime_dir(void);
+
 #ifdef __cplusplus
 }
 #endif
index 6eb4932..9d302f4 100644 (file)
@@ -35,6 +35,7 @@ enum rte_intr_handle_type {
        RTE_INTR_HANDLE_EXT,          /**< external handler */
        RTE_INTR_HANDLE_VDEV,         /**< virtual device */
        RTE_INTR_HANDLE_DEV_EVENT,    /**< device event handle */
+       RTE_INTR_HANDLE_VFIO_REQ,     /**< VFIO request handle */
        RTE_INTR_HANDLE_MAX           /**< count of elements */
 };
 
index aff0688..84aabe3 100644 (file)
@@ -30,9 +30,11 @@ struct rte_memseg_list {
                uint64_t addr_64;
                /**< Makes sure addr is always 64-bits */
        };
-       int socket_id; /**< Socket ID for all memsegs in this list. */
        uint64_t page_sz; /**< Page size for all memsegs in this list. */
+       int socket_id; /**< Socket ID for all memsegs in this list. */
        volatile uint32_t version; /**< version number for multiprocess sync. */
+       size_t len; /**< Length of memory area covered by this memseg list. */
+       unsigned int external; /**< 1 if this list points to external memory */
        struct rte_fbarray memseg_arr;
 };
 
@@ -70,13 +72,23 @@ struct rte_mem_config {
 
        struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
 
-       /* Heaps of Malloc per socket */
-       struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES];
+       /* Heaps of Malloc */
+       struct malloc_heap malloc_heaps[RTE_MAX_HEAPS];
+
+       /* next socket ID for external malloc heap */
+       int next_socket_id;
 
        /* address of mem_config in primary process. used to map shared config into
         * exact same address the primary process maps it.
         */
        uint64_t mem_cfg_addr;
+
+       /* legacy mem and single file segments options are shared */
+       uint32_t legacy_mem;
+       uint32_t single_file_segments;
+
+       /* keeps the more restricted dma mask */
+       uint8_t dma_maskbits;
 } __attribute__((__packed__));
 
 
index a9fb7e4..7249e6a 100644 (file)
@@ -263,6 +263,198 @@ int
 rte_malloc_get_socket_stats(int socket,
                struct rte_malloc_socket_stats *socket_stats);
 
+/**
+ * Add memory chunk to a heap with specified name.
+ *
+ * @note Multiple memory chunks can be added to the same heap
+ *
+ * @note Before accessing this memory in other processes, it needs to be
+ *   attached in each of those processes by calling
+ *   ``rte_malloc_heap_memory_attach`` in each other process.
+ *
+ * @note Memory must be previously allocated for DPDK to be able to use it as a
+ *   malloc heap. Failing to do so will result in undefined behavior, up to and
+ *   including segmentation faults.
+ *
+ * @note Calling this function will erase any contents already present at the
+ *   supplied memory address.
+ *
+ * @param heap_name
+ *   Name of the heap to add memory chunk to
+ * @param va_addr
+ *   Start of virtual area to add to the heap
+ * @param len
+ *   Length of virtual area to add to the heap
+ * @param iova_addrs
+ *   Array of page IOVA addresses corresponding to each page in this memory
+ *   area. Can be NULL, in which case page IOVA addresses will be set to
+ *   RTE_BAD_IOVA.
+ * @param n_pages
+ *   Number of elements in the iova_addrs array. Ignored if  ``iova_addrs``
+ *   is NULL.
+ * @param page_sz
+ *   Page size of the underlying memory
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to add memory to a reserved heap
+ *     ENOSPC - no more space in internal config to store a new memory chunk
+ */
+int __rte_experimental
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+               rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+/**
+ * Remove memory chunk from heap with specified name.
+ *
+ * @note Memory chunk being removed must be the same as one that was added;
+ *   partially removing memory chunks is not supported
+ *
+ * @note Memory area must not contain any allocated elements to allow its
+ *   removal from the heap
+ *
+ * @note All other processes must detach from the memory chunk prior to it being
+ *   removed from the heap.
+ *
+ * @param heap_name
+ *   Name of the heap to remove memory from
+ * @param va_addr
+ *   Virtual address to remove from the heap
+ * @param len
+ *   Length of virtual area to remove from the heap
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to remove memory from a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ *     EBUSY  - memory chunk still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Attach to an already existing chunk of external memory in another process.
+ *
+ * @note This function must be called before any attempt is made to use an
+ *   already existing external memory chunk. This function does *not* need to
+ *   be called if a call to ``rte_malloc_heap_memory_add`` was made in the
+ *   current process.
+ *
+ * @param heap_name
+ *   Heap name to which this chunk of memory belongs
+ * @param va_addr
+ *   Start address of memory chunk to attach to
+ * @param len
+ *   Length of memory chunk to attach to
+ * @return
+ *   0 on successful attach
+ *   -1 on unsuccessful attach, with rte_errno set to indicate cause for error:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to attach memory to a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Detach from a chunk of external memory in secondary process.
+ *
+ * @note This function must be called in before any attempt is made to remove
+ *   external memory from the heap in another process. This function does *not*
+ *   need to be called if a call to ``rte_malloc_heap_memory_remove`` will be
+ *   called in current process.
+ *
+ * @param heap_name
+ *   Heap name to which this chunk of memory belongs
+ * @param va_addr
+ *   Start address of memory chunk to attach to
+ * @param len
+ *   Length of memory chunk to attach to
+ * @return
+ *   0 on successful detach
+ *   -1 on unsuccessful detach, with rte_errno set to indicate cause for error:
+ *     EINVAL - one of the parameters was invalid
+ *     EPERM  - attempted to detach memory from a reserved heap
+ *     ENOENT - heap or memory chunk was not found
+ */
+int __rte_experimental
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len);
+
+/**
+ * Creates a new empty malloc heap with a specified name.
+ *
+ * @note Heaps created via this call will automatically get assigned a unique
+ *   socket ID, which can be found using ``rte_malloc_heap_get_socket()``
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on successful creation
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``heap_name`` was NULL, empty or too long
+ *     EEXIST - heap by name of ``heap_name`` already exists
+ *     ENOSPC - no more space in internal config to store a new heap
+ */
+int __rte_experimental
+rte_malloc_heap_create(const char *heap_name);
+
+/**
+ * Destroys a previously created malloc heap with specified name.
+ *
+ * @note This function will return a failure result if not all memory allocated
+ *   from the heap has been freed back to the heap
+ *
+ * @note This function will return a failure result if not all memory segments
+ *   were removed from the heap prior to its destruction
+ *
+ * @param heap_name
+ *   Name of the heap to create.
+ *
+ * @return
+ *   - 0 on success
+ *   - -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``heap_name`` was NULL, empty or too long
+ *     ENOENT - heap by the name of ``heap_name`` was not found
+ *     EPERM  - attempting to destroy reserved heap
+ *     EBUSY  - heap still contains data
+ */
+int __rte_experimental
+rte_malloc_heap_destroy(const char *heap_name);
+
+/**
+ * Find socket ID corresponding to a named heap.
+ *
+ * @param name
+ *   Heap name to find socket ID for
+ * @return
+ *   Socket ID in case of success (a non-negative number)
+ *   -1 in case of error, with rte_errno set to one of the following:
+ *     EINVAL - ``name`` was NULL
+ *     ENOENT - heap identified by the name ``name`` was not found
+ */
+int __rte_experimental
+rte_malloc_heap_get_socket(const char *name);
+
+/**
+ * Check if a given socket ID refers to externally allocated memory.
+ *
+ * @note Passing SOCKET_ID_ANY will return 0.
+ *
+ * @param socket_id
+ *   Socket ID to check
+ * @return
+ *   1 if socket ID refers to externally allocated memory
+ *   0 if socket ID refers to internal DPDK memory
+ *   -1 if socket ID is invalid
+ */
+int __rte_experimental
+rte_malloc_heap_socket_is_external(int socket_id);
+
 /**
  * Dump statistics.
  *
index d43fa90..4a7e0eb 100644 (file)
@@ -12,6 +12,7 @@
 
 /* Number of free lists per heap, grouped by size. */
 #define RTE_HEAP_NUM_FREELISTS  13
+#define RTE_HEAP_NAME_MAX_LEN 32
 
 /* dummy definition, for pointers */
 struct malloc_elem;
@@ -26,7 +27,9 @@ struct malloc_heap {
        struct malloc_elem *volatile last;
 
        unsigned alloc_count;
+       unsigned int socket_id;
        size_t total_size;
+       char name[RTE_HEAP_NAME_MAX_LEN];
 } __rte_cache_aligned;
 
 #endif /* _RTE_MALLOC_HEAP_H_ */
index c4b7f4c..ce93705 100644 (file)
@@ -215,6 +215,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -233,6 +236,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -251,6 +257,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -317,6 +326,103 @@ rte_memseg_contig_walk_thread_unsafe(rte_memseg_contig_walk_t func, void *arg);
 int __rte_experimental
 rte_memseg_list_walk_thread_unsafe(rte_memseg_list_walk_t func, void *arg);
 
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ *       this file descriptor is inherently dangerous, so it should be treated
+ *       as read-only for all intents and purposes.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd(const struct rte_memseg *ms);
+
+/**
+ * Return file descriptor associated with a particular memseg (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @note This returns an internal file descriptor. Performing any operations on
+ *       this file descriptor is inherently dangerous, so it should be treated
+ *       as read-only for all intents and purposes.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_thread_unsafe(const struct rte_memseg *ms);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function read-locks the memory hotplug subsystem, and thus cannot
+ *       be used within memory-related callback functions.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ * @param offset
+ *   A pointer to offset value where the result will be stored.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - EINVAL  - ``offset`` pointer was NULL
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset(const struct rte_memseg *ms, size_t *offset);
+
+/**
+ * Get offset into segment file descriptor associated with a particular memseg
+ * (if available).
+ *
+ * @note This function does not perform any locking, and is only safe to call
+ *       from within memory-related callback functions.
+ *
+ * @param ms
+ *   A pointer to memseg for which to get file descriptor.
+ * @param offset
+ *   A pointer to offset value where the result will be stored.
+ *
+ * @return
+ *   Valid file descriptor in case of success.
+ *   -1 in case of error, with ``rte_errno`` set to the following values:
+ *     - EINVAL  - ``ms`` pointer was NULL or did not point to a valid memseg
+ *     - EINVAL  - ``offset`` pointer was NULL
+ *     - ENODEV  - ``ms`` fd is not available
+ *     - ENOENT  - ``ms`` is an unused segment
+ *     - ENOTSUP - segment fd's are not supported
+ */
+int __rte_experimental
+rte_memseg_get_fd_offset_thread_unsafe(const struct rte_memseg *ms,
+               size_t *offset);
+
 /**
  * Dump the physical memory layout to a file.
  *
@@ -357,6 +463,9 @@ unsigned rte_memory_get_nchannel(void);
  */
 unsigned rte_memory_get_nrank(void);
 
+/* check memsegs iovas are within a range based on dma mask */
+int __rte_experimental rte_eal_check_dma_mask(uint8_t maskbits);
+
 /**
  * Drivers based on uio will not load unless physical
  * addresses are obtainable. It is only possible to get
diff --git a/lib/librte_eal/common/include/rte_option.h b/lib/librte_eal/common/include/rte_option.h
new file mode 100644 (file)
index 0000000..8957b97
--- /dev/null
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef __INCLUDE_RTE_OPTION_H__
+#define __INCLUDE_RTE_OPTION_H__
+
+/**
+ * @file
+ *
+ * This API offers the ability to register options to the EAL command line and
+ * map those options to functions that will be executed at the end of EAL
+ * initialization. These options will be available as part of the EAL command
+ * line of applications and are dynamically managed.
+ *
+ * This is used primarily by DPDK libraries offering command line options.
+ * Currently, this API is limited to registering options without argument.
+ *
+ * The register API can be used to resolve circular dependency issues
+ * between EAL and the library. The library uses EAL, but is also initialized
+ * by EAL. Hence, EAL depends on the init function of the library. The API
+ * introduced in rte_option allows us to register the library init with EAL
+ * (passing a function pointer) and avoid the circular dependency.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int (*rte_option_cb)(void);
+
+/*
+ * Structure describing the EAL command line option being registered.
+ */
+struct rte_option {
+       TAILQ_ENTRY(rte_option) next; /**< Next entry in the list. */
+       char *opt_str;             /**< The option name. */
+       rte_option_cb cb;          /**< Function called when option is used. */
+       int enabled;               /**< Set when the option is used. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register an option to the EAL command line.
+ * When recognized, the associated function will be executed at the end of EAL
+ * initialization.
+ *
+ * The associated structure must be available the whole time this option is
+ * registered (i.e. not stack memory).
+ *
+ * @param opt
+ *  Structure describing the option to parse.
+ */
+void __rte_experimental
+rte_option_register(struct rte_option *opt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 97597a1..9a2a1ff 100644 (file)
@@ -16,6 +16,7 @@ extern "C" {
 #endif
 
 #include <stdio.h>
+#include <string.h>
 
 /**
  * Takes string "string" parameter and splits it at character "delim"
@@ -60,12 +61,10 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 
 /* pull in a strlcpy function */
 #ifdef RTE_EXEC_ENV_BSDAPP
-#include <string.h>
 #ifndef __BSD_VISIBLE /* non-standard functions are hidden */
 #define strlcpy(dst, src, size) rte_strlcpy(dst, src, size)
 #endif
 
-
 #else /* non-BSD platforms */
 #ifdef RTE_USE_LIBBSD
 #include <bsd/string.h>
@@ -76,6 +75,29 @@ rte_strlcpy(char *dst, const char *src, size_t size)
 #endif /* RTE_USE_LIBBSD */
 #endif /* BSDAPP */
 
+/**
+ * Copy string src to buffer dst of size dsize.
+ * At most dsize-1 chars will be copied.
+ * Always NUL-terminates, unless (dsize == 0).
+ * Returns number of bytes copied (terminating NUL-byte excluded) on success ;
+ * negative errno on error.
+ *
+ * @param dst
+ *   The destination string.
+ *
+ * @param src
+ *   The input string to be copied.
+ *
+ * @param dsize
+ *   Length in bytes of the destination buffer.
+ *
+ * @return
+ *   The number of bytes copied on success
+ *   -E2BIG if the destination buffer is too small.
+ */
+ssize_t
+rte_strscpy(char *dst, const char *src, size_t dsize);
+
 #ifdef __cplusplus
 }
 #endif
index 7c6714a..412ed2d 100644 (file)
@@ -32,7 +32,7 @@ extern "C" {
 /**
  * Minor version/month number i.e. the mm in yy.mm.z
  */
-#define RTE_VER_MONTH 8
+#define RTE_VER_MONTH 11
 
 /**
  * Patch level number i.e. the z in yy.mm.z
@@ -42,14 +42,14 @@ extern "C" {
 /**
  * Extra string to be appended to version number
  */
-#define RTE_VER_SUFFIX ""
+#define RTE_VER_SUFFIX "-rc"
 
 /**
  * Patch release number
  *   0-15 = release candidates
  *   16   = release
  */
-#define RTE_VER_RELEASE 16
+#define RTE_VER_RELEASE 1
 
 /**
  * Macro to compute a version number usable for comparisons
index 5ca13fc..cae96fa 100644 (file)
@@ -14,6 +14,8 @@
 extern "C" {
 #endif
 
+#include <stdint.h>
+
 /*
  * determine if VFIO is present on the system
  */
@@ -22,6 +24,9 @@ extern "C" {
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0)
 #define VFIO_PRESENT
 #endif /* kernel version >= 3.6.0 */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 0, 0)
+#define HAVE_VFIO_DEV_REQ_INTERFACE
+#endif /* kernel version >= 4.0.0 */
 #endif /* RTE_EAL_VFIO */
 
 #ifdef VFIO_PRESENT
@@ -44,6 +49,30 @@ extern "C" {
 #define RTE_VFIO_NOIOMMU 8
 #endif
 
+/*
+ * capabilities are only supported on kernel 4.6+. there were also some API
+ * changes as well, so add a macro to get cap offset.
+ */
+#ifdef VFIO_REGION_INFO_FLAG_CAPS
+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS
+#define VFIO_CAP_OFFSET(x) (x->cap_offset)
+#else
+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)
+#define VFIO_CAP_OFFSET(x) (x->resv)
+struct vfio_info_cap_header {
+       uint16_t id;
+       uint16_t version;
+       uint32_t next;
+};
+#endif
+
+/* kernels 4.16+ can map BAR containing MSI-X table */
+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE
+#else
+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3
+#endif
+
 #else /* not VFIO_PRESENT */
 
 /* we don't need an actual definition, only pointer is used */
@@ -227,7 +256,7 @@ rte_vfio_get_group_num(const char *sysfs_base,
                      const char *dev_addr, int *iommu_group_num);
 
 /**
- * Open VFIO container fd or get an existing one
+ * Open a new VFIO container fd
  *
  * This function is only relevant to linux and will return
  * an error on BSD.
index e0a8ed1..1a74660 100644 (file)
@@ -39,10 +39,14 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
        contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
 
        /* if we're in IOVA as VA mode, or if we're in legacy mode with
-        * hugepages, all elements are IOVA-contiguous.
+        * hugepages, all elements are IOVA-contiguous. however, we can only
+        * make these assumptions about internal memory - externally allocated
+        * segments have to be checked.
         */
-       if (rte_eal_iova_mode() == RTE_IOVA_VA ||
-                       (internal_config.legacy_mem && rte_eal_has_hugepages()))
+       if (!elem->msl->external &&
+                       (rte_eal_iova_mode() == RTE_IOVA_VA ||
+                               (internal_config.legacy_mem &&
+                                       rte_eal_has_hugepages())))
                return RTE_PTR_DIFF(data_end, contig_seg_start);
 
        cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
index 12aaf2d..1973b6e 100644 (file)
 #include "malloc_heap.h"
 #include "malloc_mp.h"
 
+/* start external socket ID's at a very high number */
+#define CONST_MAX(a, b) (a > b ? a : b) /* RTE_MAX is not a constant */
+#define EXTERNAL_HEAP_MIN_SOCKET_ID (CONST_MAX((1 << 8), RTE_MAX_NUMA_NODES))
+
 static unsigned
 check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
 {
@@ -66,6 +70,21 @@ check_hugepage_sz(unsigned flags, uint64_t hugepage_sz)
        return check_flag & flags;
 }
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       int i;
+
+       for (i = 0; i < RTE_MAX_HEAPS; i++) {
+               struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+               if (heap->socket_id == socket_id)
+                       return i;
+       }
+       return -1;
+}
+
 /*
  * Expand the heap with a memory area.
  */
@@ -93,9 +112,17 @@ malloc_add_seg(const struct rte_memseg_list *msl,
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        struct rte_memseg_list *found_msl;
        struct malloc_heap *heap;
-       int msl_idx;
+       int msl_idx, heap_idx;
+
+       if (msl->external)
+               return 0;
 
-       heap = &mcfg->malloc_heaps[msl->socket_id];
+       heap_idx = malloc_socket_to_heap_id(msl->socket_id);
+       if (heap_idx < 0) {
+               RTE_LOG(ERR, EAL, "Memseg list has invalid socket id\n");
+               return -1;
+       }
+       heap = &mcfg->malloc_heaps[heap_idx];
 
        /* msl is const, so find it */
        msl_idx = msl - mcfg->memsegs;
@@ -165,7 +192,9 @@ find_biggest_element(struct malloc_heap *heap, size_t *size,
                for (elem = LIST_FIRST(&heap->free_head[idx]);
                                !!elem; elem = LIST_NEXT(elem, free_list)) {
                        size_t cur_size;
-                       if (!check_hugepage_sz(flags, elem->msl->page_sz))
+                       if ((flags & RTE_MEMZONE_SIZE_HINT_ONLY) == 0 &&
+                                       !check_hugepage_sz(flags,
+                                               elem->msl->page_sz))
                                continue;
                        if (contig) {
                                cur_size =
@@ -259,11 +288,13 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
                int socket, unsigned int flags, size_t align, size_t bound,
                bool contig, struct rte_memseg **ms, int n_segs)
 {
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        struct rte_memseg_list *msl;
        struct malloc_elem *elem = NULL;
        size_t alloc_sz;
        int allocd_pages;
        void *ret, *map_addr;
+       uint64_t mask;
 
        alloc_sz = (size_t)pg_sz * n_segs;
 
@@ -291,6 +322,16 @@ alloc_pages_on_heap(struct malloc_heap *heap, uint64_t pg_sz, size_t elt_size,
                goto fail;
        }
 
+       if (mcfg->dma_maskbits) {
+               mask = ~((1ULL << mcfg->dma_maskbits) - 1);
+               if (rte_eal_check_dma_mask(mask)) {
+                       RTE_LOG(ERR, EAL,
+                               "%s(): couldn't allocate memory due to DMA mask\n",
+                               __func__);
+                       goto fail;
+               }
+       }
+
        /* add newly minted memsegs to malloc heap */
        elem = malloc_heap_add_memory(heap, msl, map_addr, alloc_sz);
 
@@ -326,11 +367,9 @@ try_expand_heap_primary(struct malloc_heap *heap, uint64_t pg_sz,
 
        /* we can't know in advance how many pages we'll need, so we malloc */
        ms = malloc(sizeof(*ms) * n_segs);
-
-       memset(ms, 0, sizeof(*ms) * n_segs);
-
        if (ms == NULL)
                return -1;
+       memset(ms, 0, sizeof(*ms) * n_segs);
 
        elem = alloc_pages_on_heap(heap, pg_sz, elt_size, socket, flags, align,
                        bound, contig, ms, n_segs);
@@ -560,12 +599,14 @@ alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
 
 /* this will try lower page sizes first */
 static void *
-heap_alloc_on_socket(const char *type, size_t size, int socket,
-               unsigned int flags, size_t align, size_t bound, bool contig)
+malloc_heap_alloc_on_heap_id(const char *type, size_t size,
+               unsigned int heap_id, unsigned int flags, size_t align,
+               size_t bound, bool contig)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+       struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
        unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
+       int socket_id;
        void *ret;
 
        rte_spinlock_lock(&(heap->lock));
@@ -583,12 +624,28 @@ heap_alloc_on_socket(const char *type, size_t size, int socket,
         * we may still be able to allocate memory from appropriate page sizes,
         * we just need to request more memory first.
         */
+
+       socket_id = rte_socket_id_by_idx(heap_id);
+       /*
+        * if socket ID is negative, we cannot find a socket ID for this heap -
+        * which means it's an external heap. those can have unexpected page
+        * sizes, so if the user asked to allocate from there - assume user
+        * knows what they're doing, and allow allocating from there with any
+        * page size flags.
+        */
+       if (socket_id < 0)
+               size_flags |= RTE_MEMZONE_SIZE_HINT_ONLY;
+
        ret = heap_alloc(heap, type, size, size_flags, align, bound, contig);
        if (ret != NULL)
                goto alloc_unlock;
 
-       if (!alloc_more_mem_on_socket(heap, size, socket, flags, align, bound,
-                       contig)) {
+       /* if socket ID is invalid, this is an external heap */
+       if (socket_id < 0)
+               goto alloc_unlock;
+
+       if (!alloc_more_mem_on_socket(heap, size, socket_id, flags, align,
+                       bound, contig)) {
                ret = heap_alloc(heap, type, size, flags, align, bound, contig);
 
                /* this should have succeeded */
@@ -604,14 +661,14 @@ void *
 malloc_heap_alloc(const char *type, size_t size, int socket_arg,
                unsigned int flags, size_t align, size_t bound, bool contig)
 {
-       int socket, i, cur_socket;
+       int socket, heap_id, i;
        void *ret;
 
        /* return NULL if size is 0 or alignment is not power-of-2 */
        if (size == 0 || (align && !rte_is_power_of_2(align)))
                return NULL;
 
-       if (!rte_eal_has_hugepages())
+       if (!rte_eal_has_hugepages() && socket_arg < RTE_MAX_NUMA_NODES)
                socket_arg = SOCKET_ID_ANY;
 
        if (socket_arg == SOCKET_ID_ANY)
@@ -619,22 +676,25 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
        else
                socket = socket_arg;
 
-       /* Check socket parameter */
-       if (socket >= RTE_MAX_NUMA_NODES)
+       /* turn socket ID into heap ID */
+       heap_id = malloc_socket_to_heap_id(socket);
+       /* if heap id is negative, socket ID was invalid */
+       if (heap_id < 0)
                return NULL;
 
-       ret = heap_alloc_on_socket(type, size, socket, flags, align, bound,
-                       contig);
+       ret = malloc_heap_alloc_on_heap_id(type, size, heap_id, flags, align,
+                       bound, contig);
        if (ret != NULL || socket_arg != SOCKET_ID_ANY)
                return ret;
 
-       /* try other heaps */
+       /* try other heaps. we are only iterating through native DPDK sockets,
+        * so external heaps won't be included.
+        */
        for (i = 0; i < (int) rte_socket_count(); i++) {
-               cur_socket = rte_socket_id_by_idx(i);
-               if (cur_socket == socket)
+               if (i == heap_id)
                        continue;
-               ret = heap_alloc_on_socket(type, size, cur_socket, flags,
-                               align, bound, contig);
+               ret = malloc_heap_alloc_on_heap_id(type, size, i, flags, align,
+                               bound, contig);
                if (ret != NULL)
                        return ret;
        }
@@ -642,11 +702,11 @@ malloc_heap_alloc(const char *type, size_t size, int socket_arg,
 }
 
 static void *
-heap_alloc_biggest_on_socket(const char *type, int socket, unsigned int flags,
-               size_t align, bool contig)
+heap_alloc_biggest_on_heap_id(const char *type, unsigned int heap_id,
+               unsigned int flags, size_t align, bool contig)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       struct malloc_heap *heap = &mcfg->malloc_heaps[socket];
+       struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
        void *ret;
 
        rte_spinlock_lock(&(heap->lock));
@@ -664,7 +724,7 @@ void *
 malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
                size_t align, bool contig)
 {
-       int socket, i, cur_socket;
+       int socket, i, cur_socket, heap_id;
        void *ret;
 
        /* return NULL if align is not power-of-2 */
@@ -679,11 +739,13 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
        else
                socket = socket_arg;
 
-       /* Check socket parameter */
-       if (socket >= RTE_MAX_NUMA_NODES)
+       /* turn socket ID into heap ID */
+       heap_id = malloc_socket_to_heap_id(socket);
+       /* if heap id is negative, socket ID was invalid */
+       if (heap_id < 0)
                return NULL;
 
-       ret = heap_alloc_biggest_on_socket(type, socket, flags, align,
+       ret = heap_alloc_biggest_on_heap_id(type, heap_id, flags, align,
                        contig);
        if (ret != NULL || socket_arg != SOCKET_ID_ANY)
                return ret;
@@ -693,8 +755,8 @@ malloc_heap_alloc_biggest(const char *type, int socket_arg, unsigned int flags,
                cur_socket = rte_socket_id_by_idx(i);
                if (cur_socket == socket)
                        continue;
-               ret = heap_alloc_biggest_on_socket(type, cur_socket, flags,
-                               align, contig);
+               ret = heap_alloc_biggest_on_heap_id(type, i, flags, align,
+                               contig);
                if (ret != NULL)
                        return ret;
        }
@@ -756,8 +818,10 @@ malloc_heap_free(struct malloc_elem *elem)
        /* anything after this is a bonus */
        ret = 0;
 
-       /* ...of which we can't avail if we are in legacy mode */
-       if (internal_config.legacy_mem)
+       /* ...of which we can't avail if we are in legacy mode, or if this is an
+        * externally allocated segment.
+        */
+       if (internal_config.legacy_mem || (msl->external > 0))
                goto free_unlock;
 
        /* check if we can free any memory back to the system */
@@ -914,7 +978,7 @@ malloc_heap_resize(struct malloc_elem *elem, size_t size)
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 int
 malloc_heap_get_stats(struct malloc_heap *heap,
@@ -952,7 +1016,7 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 }
 
 /*
- * Function to retrieve data for heap on given socket
+ * Function to retrieve data for a given heap
  */
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f)
@@ -973,10 +1037,216 @@ malloc_heap_dump(struct malloc_heap *heap, FILE *f)
        rte_spinlock_unlock(&heap->lock);
 }
 
+static int
+destroy_seg(struct malloc_elem *elem, size_t len)
+{
+       struct malloc_heap *heap = elem->heap;
+       struct rte_memseg_list *msl;
+
+       msl = elem->msl;
+
+       /* notify all subscribers that a memory area is going to be removed */
+       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE, elem, len);
+
+       /* this element can be removed */
+       malloc_elem_free_list_remove(elem);
+       malloc_elem_hide_region(elem, elem, len);
+
+       heap->total_size -= len;
+
+       memset(elem, 0, sizeof(*elem));
+
+       /* destroy the fbarray backing this memory */
+       if (rte_fbarray_destroy(&msl->memseg_arr) < 0)
+               return -1;
+
+       /* reset the memseg list */
+       memset(msl, 0, sizeof(*msl));
+
+       return 0;
+}
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+               rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       char fbarray_name[RTE_FBARRAY_NAME_LEN];
+       struct rte_memseg_list *msl = NULL;
+       struct rte_fbarray *arr;
+       size_t seg_len = n_pages * page_sz;
+       unsigned int i;
+
+       /* first, find a free memseg list */
+       for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
+               struct rte_memseg_list *tmp = &mcfg->memsegs[i];
+               if (tmp->base_va == NULL) {
+                       msl = tmp;
+                       break;
+               }
+       }
+       if (msl == NULL) {
+               RTE_LOG(ERR, EAL, "Couldn't find empty memseg list\n");
+               rte_errno = ENOSPC;
+               return -1;
+       }
+
+       snprintf(fbarray_name, sizeof(fbarray_name) - 1, "%s_%p",
+                       heap->name, va_addr);
+
+       /* create the backing fbarray */
+       if (rte_fbarray_init(&msl->memseg_arr, fbarray_name, n_pages,
+                       sizeof(struct rte_memseg)) < 0) {
+               RTE_LOG(ERR, EAL, "Couldn't create fbarray backing the memseg list\n");
+               return -1;
+       }
+       arr = &msl->memseg_arr;
+
+       /* fbarray created, fill it up */
+       for (i = 0; i < n_pages; i++) {
+               struct rte_memseg *ms;
+
+               rte_fbarray_set_used(arr, i);
+               ms = rte_fbarray_get(arr, i);
+               ms->addr = RTE_PTR_ADD(va_addr, i * page_sz);
+               ms->iova = iova_addrs == NULL ? RTE_BAD_IOVA : iova_addrs[i];
+               ms->hugepage_sz = page_sz;
+               ms->len = page_sz;
+               ms->nchannel = rte_memory_get_nchannel();
+               ms->nrank = rte_memory_get_nrank();
+               ms->socket_id = heap->socket_id;
+       }
+
+       /* set up the memseg list */
+       msl->base_va = va_addr;
+       msl->page_sz = page_sz;
+       msl->socket_id = heap->socket_id;
+       msl->len = seg_len;
+       msl->version = 0;
+       msl->external = 1;
+
+       /* erase contents of new memory */
+       memset(va_addr, 0, seg_len);
+
+       /* now, add newly minted memory to the malloc heap */
+       malloc_heap_add_memory(heap, msl, va_addr, seg_len);
+
+       heap->total_size += seg_len;
+
+       /* all done! */
+       RTE_LOG(DEBUG, EAL, "Added segment for heap %s starting at %p\n",
+                       heap->name, va_addr);
+
+       /* notify all subscribers that a new memory area has been added */
+       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+                       va_addr, seg_len);
+
+       return 0;
+}
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+               size_t len)
+{
+       struct malloc_elem *elem = heap->first;
+
+       /* find element with specified va address */
+       while (elem != NULL && elem != va_addr) {
+               elem = elem->next;
+               /* stop if we've blown past our VA */
+               if (elem > (struct malloc_elem *)va_addr) {
+                       rte_errno = ENOENT;
+                       return -1;
+               }
+       }
+       /* check if element was found */
+       if (elem == NULL || elem->msl->len != len) {
+               rte_errno = ENOENT;
+               return -1;
+       }
+       /* if element's size is not equal to segment len, segment is busy */
+       if (elem->state == ELEM_BUSY || elem->size != len) {
+               rte_errno = EBUSY;
+               return -1;
+       }
+       return destroy_seg(elem, len);
+}
+
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       uint32_t next_socket_id = mcfg->next_socket_id;
+
+       /* prevent overflow. did you really create 2 billion heaps??? */
+       if (next_socket_id > INT32_MAX) {
+               RTE_LOG(ERR, EAL, "Cannot assign new socket ID's\n");
+               rte_errno = ENOSPC;
+               return -1;
+       }
+
+       /* initialize empty heap */
+       heap->alloc_count = 0;
+       heap->first = NULL;
+       heap->last = NULL;
+       LIST_INIT(heap->free_head);
+       rte_spinlock_init(&heap->lock);
+       heap->total_size = 0;
+       heap->socket_id = next_socket_id;
+
+       /* we hold a global mem hotplug writelock, so it's safe to increment */
+       mcfg->next_socket_id++;
+
+       /* set up name */
+       strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+       return 0;
+}
+
+int
+malloc_heap_destroy(struct malloc_heap *heap)
+{
+       if (heap->alloc_count != 0) {
+               RTE_LOG(ERR, EAL, "Heap is still in use\n");
+               rte_errno = EBUSY;
+               return -1;
+       }
+       if (heap->first != NULL || heap->last != NULL) {
+               RTE_LOG(ERR, EAL, "Heap still contains memory segments\n");
+               rte_errno = EBUSY;
+               return -1;
+       }
+       if (heap->total_size != 0)
+               RTE_LOG(ERR, EAL, "Total size not zero, heap is likely corrupt\n");
+
+       /* after this, the lock will be dropped */
+       memset(heap, 0, sizeof(*heap));
+
+       return 0;
+}
+
 int
 rte_eal_malloc_heap_init(void)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       unsigned int i;
+
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               /* assign min socket ID to external heaps */
+               mcfg->next_socket_id = EXTERNAL_HEAP_MIN_SOCKET_ID;
+
+               /* assign names to default DPDK heaps */
+               for (i = 0; i < rte_socket_count(); i++) {
+                       struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+                       char heap_name[RTE_HEAP_NAME_MAX_LEN];
+                       int socket_id = rte_socket_id_by_idx(i);
+
+                       snprintf(heap_name, sizeof(heap_name) - 1,
+                                       "socket_%i", socket_id);
+                       strlcpy(heap->name, heap_name, RTE_HEAP_NAME_MAX_LEN);
+                       heap->socket_id = socket_id;
+               }
+       }
+
 
        if (register_mp_requests()) {
                RTE_LOG(ERR, EAL, "Couldn't register malloc multiprocess actions\n");
index f52cb55..e48996d 100644 (file)
@@ -33,6 +33,20 @@ void *
 malloc_heap_alloc_biggest(const char *type, int socket, unsigned int flags,
                size_t align, bool contig);
 
+int
+malloc_heap_create(struct malloc_heap *heap, const char *heap_name);
+
+int
+malloc_heap_destroy(struct malloc_heap *heap);
+
+int
+malloc_heap_add_external_memory(struct malloc_heap *heap, void *va_addr,
+               rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz);
+
+int
+malloc_heap_remove_external_memory(struct malloc_heap *heap, void *va_addr,
+               size_t len);
+
 int
 malloc_heap_free(struct malloc_elem *elem);
 
@@ -46,6 +60,9 @@ malloc_heap_get_stats(struct malloc_heap *heap,
 void
 malloc_heap_dump(struct malloc_heap *heap, FILE *f);
 
+int
+malloc_socket_to_heap_id(unsigned int socket_id);
+
 int
 rte_eal_malloc_heap_init(void);
 
index 931c14b..5f2d4e0 100644 (file)
@@ -194,13 +194,11 @@ handle_alloc_request(const struct malloc_mp_req *m,
 
        /* we can't know in advance how many pages we'll need, so we malloc */
        ms = malloc(sizeof(*ms) * n_segs);
-
-       memset(ms, 0, sizeof(*ms) * n_segs);
-
        if (ms == NULL) {
                RTE_LOG(ERR, EAL, "Couldn't allocate memory for request state\n");
                goto fail;
        }
+       memset(ms, 0, sizeof(*ms) * n_segs);
 
        elem = alloc_pages_on_heap(heap, ar->page_sz, ar->elt_size, ar->socket,
                        ar->flags, ar->align, ar->bound, ar->contig, ms,
index 56005be..2a10d57 100644 (file)
@@ -14,6 +14,7 @@ common_sources = files(
        'eal_common_errno.c',
        'eal_common_fbarray.c',
        'eal_common_hexdump.c',
+       'eal_common_hypervisor.c',
        'eal_common_launch.c',
        'eal_common_lcore.c',
        'eal_common_log.c',
@@ -27,11 +28,13 @@ common_sources = files(
        'eal_common_thread.c',
        'eal_common_timer.c',
        'eal_common_uuid.c',
+       'hotplug_mp.c',
        'malloc_elem.c',
        'malloc_heap.c',
        'malloc_mp.c',
        'rte_keepalive.c',
        'rte_malloc.c',
+       'rte_option.c',
        'rte_reciprocal.c',
        'rte_service.c'
 )
@@ -59,6 +62,7 @@ common_headers = files(
        'include/rte_errno.h',
        'include/rte_fbarray.h',
        'include/rte_hexdump.h',
+       'include/rte_hypervisor.h',
        'include/rte_interrupts.h',
        'include/rte_keepalive.h',
        'include/rte_launch.h',
@@ -68,6 +72,7 @@ common_headers = files(
        'include/rte_malloc_heap.h',
        'include/rte_memory.h',
        'include/rte_memzone.h',
+       'include/rte_option.h',
        'include/rte_pci_dev_feature_defs.h',
        'include/rte_pci_dev_features.h',
        'include/rte_per_lcore.h',
index b51a6d1..9e61dc4 100644 (file)
@@ -8,6 +8,7 @@
 #include <string.h>
 #include <sys/queue.h>
 
+#include <rte_errno.h>
 #include <rte_memcpy.h>
 #include <rte_memory.h>
 #include <rte_eal.h>
@@ -23,6 +24,7 @@
 #include <rte_malloc.h>
 #include "malloc_elem.h"
 #include "malloc_heap.h"
+#include "eal_memalloc.h"
 
 
 /* Free the memory space back to heap */
@@ -44,13 +46,15 @@ rte_malloc_socket(const char *type, size_t size, unsigned int align,
        if (size == 0 || (align && !rte_is_power_of_2(align)))
                return NULL;
 
-       if (!rte_eal_has_hugepages())
+       /* if there are no hugepages and if we are not allocating from an
+        * external heap, use memory from any socket available. checking for
+        * socket being external may return -1 in case of invalid socket, but
+        * that's OK - if there are no hugepages, it doesn't matter.
+        */
+       if (rte_malloc_heap_socket_is_external(socket_arg) != 1 &&
+                               !rte_eal_has_hugepages())
                socket_arg = SOCKET_ID_ANY;
 
-       /* Check socket parameter */
-       if (socket_arg >= RTE_MAX_NUMA_NODES)
-               return NULL;
-
        return malloc_heap_alloc(type, size, socket_arg, 0,
                        align == 0 ? 1 : align, 0, false);
 }
@@ -152,11 +156,20 @@ rte_malloc_get_socket_stats(int socket,
                struct rte_malloc_socket_stats *socket_stats)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       int heap_idx, ret = -1;
 
-       if (socket >= RTE_MAX_NUMA_NODES || socket < 0)
-               return -1;
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+       heap_idx = malloc_socket_to_heap_id(socket);
+       if (heap_idx < 0)
+               goto unlock;
 
-       return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats);
+       ret = malloc_heap_get_stats(&mcfg->malloc_heaps[heap_idx],
+                       socket_stats);
+unlock:
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
 }
 
 /*
@@ -168,12 +181,75 @@ rte_malloc_dump_heaps(FILE *f)
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        unsigned int idx;
 
-       for (idx = 0; idx < rte_socket_count(); idx++) {
-               unsigned int socket = rte_socket_id_by_idx(idx);
-               fprintf(f, "Heap on socket %i:\n", socket);
-               malloc_heap_dump(&mcfg->malloc_heaps[socket], f);
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+       for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+               fprintf(f, "Heap id: %u\n", idx);
+               malloc_heap_dump(&mcfg->malloc_heaps[idx], f);
+       }
+
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+}
+
+int
+rte_malloc_heap_get_socket(const char *name)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       unsigned int idx;
+       int ret;
+
+       if (name == NULL ||
+                       strnlen(name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+       for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+               struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+               if (!strncmp(name, tmp->name, RTE_HEAP_NAME_MAX_LEN)) {
+                       heap = tmp;
+                       break;
+               }
+       }
+
+       if (heap != NULL) {
+               ret = heap->socket_id;
+       } else {
+               rte_errno = ENOENT;
+               ret = -1;
+       }
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
+int
+rte_malloc_heap_socket_is_external(int socket_id)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       unsigned int idx;
+       int ret = -1;
+
+       if (socket_id == SOCKET_ID_ANY)
+               return 0;
+
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+       for (idx = 0; idx < RTE_MAX_HEAPS; idx++) {
+               struct malloc_heap *tmp = &mcfg->malloc_heaps[idx];
+
+               if ((int)tmp->socket_id == socket_id) {
+                       /* external memory always has large socket ID's */
+                       ret = tmp->socket_id >= RTE_MAX_NUMA_NODES;
+                       break;
+               }
        }
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
 
+       return ret;
 }
 
 /*
@@ -182,14 +258,20 @@ rte_malloc_dump_heaps(FILE *f)
 void
 rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
 {
-       unsigned int socket;
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       unsigned int heap_id;
        struct rte_malloc_socket_stats sock_stats;
+
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
        /* Iterate through all initialised heaps */
-       for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) {
-               if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0))
-                       continue;
+       for (heap_id = 0; heap_id < RTE_MAX_HEAPS; heap_id++) {
+               struct malloc_heap *heap = &mcfg->malloc_heaps[heap_id];
+
+               malloc_heap_get_stats(heap, &sock_stats);
 
-               fprintf(f, "Socket:%u\n", socket);
+               fprintf(f, "Heap id:%u\n", heap_id);
+               fprintf(f, "\tHeap name:%s\n", heap->name);
                fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes);
                fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes);
                fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes);
@@ -198,6 +280,7 @@ rte_malloc_dump_stats(FILE *f, __rte_unused const char *type)
                fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count);
                fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count);
        }
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
        return;
 }
 
@@ -223,7 +306,7 @@ rte_malloc_virt2iova(const void *addr)
        if (elem == NULL)
                return RTE_BAD_IOVA;
 
-       if (rte_eal_iova_mode() == RTE_IOVA_VA)
+       if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
                return (uintptr_t) addr;
 
        ms = rte_mem_virt2memseg(addr, elem->msl);
@@ -235,3 +318,320 @@ rte_malloc_virt2iova(const void *addr)
 
        return ms->iova + RTE_PTR_DIFF(addr, ms->addr);
 }
+
+static struct malloc_heap *
+find_named_heap(const char *name)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       unsigned int i;
+
+       for (i = 0; i < RTE_MAX_HEAPS; i++) {
+               struct malloc_heap *heap = &mcfg->malloc_heaps[i];
+
+               if (!strncmp(name, heap->name, RTE_HEAP_NAME_MAX_LEN))
+                       return heap;
+       }
+       return NULL;
+}
+
+int
+rte_malloc_heap_memory_add(const char *heap_name, void *va_addr, size_t len,
+               rte_iova_t iova_addrs[], unsigned int n_pages, size_t page_sz)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       unsigned int n;
+       int ret;
+
+       if (heap_name == NULL || va_addr == NULL ||
+                       page_sz == 0 || !rte_is_power_of_2(page_sz) ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               ret = -1;
+               goto unlock;
+       }
+       rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+       /* find our heap */
+       heap = find_named_heap(heap_name);
+       if (heap == NULL) {
+               rte_errno = ENOENT;
+               ret = -1;
+               goto unlock;
+       }
+       if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+               /* cannot add memory to internal heaps */
+               rte_errno = EPERM;
+               ret = -1;
+               goto unlock;
+       }
+       n = len / page_sz;
+       if (n != n_pages && iova_addrs != NULL) {
+               rte_errno = EINVAL;
+               ret = -1;
+               goto unlock;
+       }
+
+       rte_spinlock_lock(&heap->lock);
+       ret = malloc_heap_add_external_memory(heap, va_addr, iova_addrs, n,
+                       page_sz);
+       rte_spinlock_unlock(&heap->lock);
+
+unlock:
+       rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
+int
+rte_malloc_heap_memory_remove(const char *heap_name, void *va_addr, size_t len)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       int ret;
+
+       if (heap_name == NULL || va_addr == NULL || len == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+       /* find our heap */
+       heap = find_named_heap(heap_name);
+       if (heap == NULL) {
+               rte_errno = ENOENT;
+               ret = -1;
+               goto unlock;
+       }
+       if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+               /* cannot remove memory from internal heaps */
+               rte_errno = EPERM;
+               ret = -1;
+               goto unlock;
+       }
+
+       rte_spinlock_lock(&heap->lock);
+       ret = malloc_heap_remove_external_memory(heap, va_addr, len);
+       rte_spinlock_unlock(&heap->lock);
+
+unlock:
+       rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
+struct sync_mem_walk_arg {
+       void *va_addr;
+       size_t len;
+       int result;
+       bool attach;
+};
+
+static int
+sync_mem_walk(const struct rte_memseg_list *msl, void *arg)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct sync_mem_walk_arg *wa = arg;
+       size_t len = msl->page_sz * msl->memseg_arr.len;
+
+       if (msl->base_va == wa->va_addr &&
+                       len == wa->len) {
+               struct rte_memseg_list *found_msl;
+               int msl_idx, ret;
+
+               /* msl is const */
+               msl_idx = msl - mcfg->memsegs;
+               found_msl = &mcfg->memsegs[msl_idx];
+
+               if (wa->attach) {
+                       ret = rte_fbarray_attach(&found_msl->memseg_arr);
+               } else {
+                       /* notify all subscribers that a memory area is about to
+                        * be removed
+                        */
+                       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_FREE,
+                                       msl->base_va, msl->len);
+                       ret = rte_fbarray_detach(&found_msl->memseg_arr);
+               }
+
+               if (ret < 0) {
+                       wa->result = -rte_errno;
+               } else {
+                       /* notify all subscribers that a new memory area was
+                        * added
+                        */
+                       if (wa->attach)
+                               eal_memalloc_mem_event_notify(
+                                               RTE_MEM_EVENT_ALLOC,
+                                               msl->base_va, msl->len);
+                       wa->result = 0;
+               }
+               return 1;
+       }
+       return 0;
+}
+
+static int
+sync_memory(const char *heap_name, void *va_addr, size_t len, bool attach)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       struct sync_mem_walk_arg wa;
+       int ret;
+
+       if (heap_name == NULL || va_addr == NULL || len == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
+
+       /* find our heap */
+       heap = find_named_heap(heap_name);
+       if (heap == NULL) {
+               rte_errno = ENOENT;
+               ret = -1;
+               goto unlock;
+       }
+       /* we shouldn't be able to sync to internal heaps */
+       if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+               rte_errno = EPERM;
+               ret = -1;
+               goto unlock;
+       }
+
+       /* find corresponding memseg list to sync to */
+       wa.va_addr = va_addr;
+       wa.len = len;
+       wa.result = -ENOENT; /* fail unless explicitly told to succeed */
+       wa.attach = attach;
+
+       /* we're already holding a read lock */
+       rte_memseg_list_walk_thread_unsafe(sync_mem_walk, &wa);
+
+       if (wa.result < 0) {
+               rte_errno = -wa.result;
+               ret = -1;
+       } else {
+               /* notify all subscribers that a new memory area was added */
+               if (attach)
+                       eal_memalloc_mem_event_notify(RTE_MEM_EVENT_ALLOC,
+                                       va_addr, len);
+               ret = 0;
+       }
+unlock:
+       rte_rwlock_read_unlock(&mcfg->memory_hotplug_lock);
+       return ret;
+}
+
+int
+rte_malloc_heap_memory_attach(const char *heap_name, void *va_addr, size_t len)
+{
+       return sync_memory(heap_name, va_addr, len, true);
+}
+
+int
+rte_malloc_heap_memory_detach(const char *heap_name, void *va_addr, size_t len)
+{
+       return sync_memory(heap_name, va_addr, len, false);
+}
+
+int
+rte_malloc_heap_create(const char *heap_name)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       int i, ret;
+
+       if (heap_name == NULL ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       /* check if there is space in the heap list, or if heap with this name
+        * already exists.
+        */
+       rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+       for (i = 0; i < RTE_MAX_HEAPS; i++) {
+               struct malloc_heap *tmp = &mcfg->malloc_heaps[i];
+               /* existing heap */
+               if (strncmp(heap_name, tmp->name,
+                               RTE_HEAP_NAME_MAX_LEN) == 0) {
+                       RTE_LOG(ERR, EAL, "Heap %s already exists\n",
+                               heap_name);
+                       rte_errno = EEXIST;
+                       ret = -1;
+                       goto unlock;
+               }
+               /* empty heap */
+               if (strnlen(tmp->name, RTE_HEAP_NAME_MAX_LEN) == 0) {
+                       heap = tmp;
+                       break;
+               }
+       }
+       if (heap == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot create new heap: no space\n");
+               rte_errno = ENOSPC;
+               ret = -1;
+               goto unlock;
+       }
+
+       /* we're sure that we can create a new heap, so do it */
+       ret = malloc_heap_create(heap, heap_name);
+unlock:
+       rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
+
+int
+rte_malloc_heap_destroy(const char *heap_name)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       struct malloc_heap *heap = NULL;
+       int ret;
+
+       if (heap_name == NULL ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) == 0 ||
+                       strnlen(heap_name, RTE_HEAP_NAME_MAX_LEN) ==
+                               RTE_HEAP_NAME_MAX_LEN) {
+               rte_errno = EINVAL;
+               return -1;
+       }
+       rte_rwlock_write_lock(&mcfg->memory_hotplug_lock);
+
+       /* start from non-socket heaps */
+       heap = find_named_heap(heap_name);
+       if (heap == NULL) {
+               RTE_LOG(ERR, EAL, "Heap %s not found\n", heap_name);
+               rte_errno = ENOENT;
+               ret = -1;
+               goto unlock;
+       }
+       /* we shouldn't be able to destroy internal heaps */
+       if (heap->socket_id < RTE_MAX_NUMA_NODES) {
+               rte_errno = EPERM;
+               ret = -1;
+               goto unlock;
+       }
+       /* sanity checks done, now we can destroy the heap */
+       rte_spinlock_lock(&heap->lock);
+       ret = malloc_heap_destroy(heap);
+
+       /* if we failed, lock is still active */
+       if (ret < 0)
+               rte_spinlock_unlock(&heap->lock);
+unlock:
+       rte_rwlock_write_unlock(&mcfg->memory_hotplug_lock);
+
+       return ret;
+}
diff --git a/lib/librte_eal/common/rte_option.c b/lib/librte_eal/common/rte_option.c
new file mode 100644 (file)
index 0000000..02d59a8
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <unistd.h>
+#include <string.h>
+
+#include <rte_eal.h>
+#include <rte_option.h>
+
+#include "eal_private.h"
+
+TAILQ_HEAD(rte_option_list, rte_option);
+
+struct rte_option_list rte_option_list =
+       TAILQ_HEAD_INITIALIZER(rte_option_list);
+
+static struct rte_option *option;
+
+int
+rte_option_parse(const char *opt)
+{
+       /* Check if the option is registered */
+       TAILQ_FOREACH(option, &rte_option_list, next) {
+               if (strcmp(opt, option->opt_str) == 0) {
+                       option->enabled = 1;
+                       return 0;
+               }
+       }
+
+       return -1;
+}
+
+void __rte_experimental
+rte_option_register(struct rte_option *opt)
+{
+       TAILQ_FOREACH(option, &rte_option_list, next) {
+               if (strcmp(opt->opt_str, option->opt_str) == 0)
+                       RTE_LOG(INFO, EAL, "Option %s has already been registered.",
+                                       opt->opt_str);
+                       return;
+       }
+
+       TAILQ_INSERT_HEAD(&rte_option_list, opt, next);
+}
+
+void
+rte_option_init(void)
+{
+       TAILQ_FOREACH(option, &rte_option_list, next) {
+               if (option->enabled)
+                       option->cb();
+       }
+}
index fd92c75..51deb57 100644 (file)
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
 EXPORT_MAP := ../../rte_eal_version.map
 VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
 
-LIBABIVER := 8
+LIBABIVER := 9
 
 VPATH += $(RTE_SDK)/lib/librte_eal/common
 
@@ -70,10 +70,12 @@ SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_fbarray.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_uuid.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += hotplug_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_mp.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c
+SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_option.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_service.c
 SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_reciprocal.c
 
@@ -85,22 +87,6 @@ SRCS-y += rte_cycles.c
 
 CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST)
 
-CFLAGS_eal.o := -D_GNU_SOURCE
-CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
-CFLAGS_eal_vfio_mp_sync.o := -D_GNU_SOURCE
-CFLAGS_eal_timer.o := -D_GNU_SOURCE
-CFLAGS_eal_lcore.o := -D_GNU_SOURCE
-CFLAGS_eal_memalloc.o := -D_GNU_SOURCE
-CFLAGS_eal_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_log.o := -D_GNU_SOURCE
-CFLAGS_eal_common_log.o := -D_GNU_SOURCE
-CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE
-CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE
-CFLAGS_eal_common_options.o := -D_GNU_SOURCE
-CFLAGS_eal_common_thread.o := -D_GNU_SOURCE
-CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE
-CFLAGS_rte_cycles.o := -D_GNU_SOURCE
-
 # workaround for a gcc bug with noreturn attribute
 # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
 ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
index e59ac65..361744d 100644 (file)
@@ -48,6 +48,7 @@
 #include <rte_atomic.h>
 #include <malloc_heap.h>
 #include <rte_vfio.h>
+#include <rte_option.h>
 
 #include "eal_private.h"
 #include "eal_thread.h"
@@ -149,7 +150,7 @@ eal_create_runtime_dir(void)
 }
 
 const char *
-eal_get_runtime_dir(void)
+rte_eal_get_runtime_dir(void)
 {
        return runtime_dir;
 }
@@ -263,6 +264,8 @@ rte_eal_config_create(void)
         * processes could later map the config into this exact location */
        rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr;
 
+       rte_config.mem_config->dma_maskbits = 0;
+
 }
 
 /* attach to an existing shared memory config */
@@ -352,6 +355,24 @@ eal_proc_type_detect(void)
        return ptype;
 }
 
+/* copies data from internal config to shared config */
+static void
+eal_update_mem_config(void)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       mcfg->legacy_mem = internal_config.legacy_mem;
+       mcfg->single_file_segments = internal_config.single_file_segments;
+}
+
+/* copies data from shared config to internal config */
+static void
+eal_update_internal_config(void)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       internal_config.legacy_mem = mcfg->legacy_mem;
+       internal_config.single_file_segments = mcfg->single_file_segments;
+}
+
 /* Sets up rte_config structure with the pointer to shared memory config.*/
 static void
 rte_config_init(void)
@@ -361,11 +382,13 @@ rte_config_init(void)
        switch (rte_config.process_type){
        case RTE_PROC_PRIMARY:
                rte_eal_config_create();
+               eal_update_mem_config();
                break;
        case RTE_PROC_SECONDARY:
                rte_eal_config_attach();
                rte_eal_mcfg_wait_complete(rte_config.mem_config);
                rte_eal_config_reattach();
+               eal_update_internal_config();
                break;
        case RTE_PROC_AUTO:
        case RTE_PROC_INVALID:
@@ -580,12 +603,20 @@ eal_parse_args(int argc, char **argv)
 
        argvopt = argv;
        optind = 1;
+       opterr = 0;
 
        while ((opt = getopt_long(argc, argvopt, eal_short_options,
                                  eal_long_options, &option_index)) != EOF) {
 
-               /* getopt is not happy, stop right now */
+               /*
+                * getopt didn't recognise the option, lets parse the
+                * registered options to see if the flag is valid
+                */
                if (opt == '?') {
+                       ret = rte_option_parse(argv[optind-1]);
+                       if (ret == 0)
+                               continue;
+
                        eal_usage(prgname);
                        ret = -1;
                        goto out;
@@ -725,6 +756,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
 {
        int *socket_id = arg;
 
+       if (msl->external)
+               return 0;
+
        return *socket_id == msl->socket_id;
 }
 
@@ -793,7 +827,8 @@ rte_eal_init(int argc, char **argv)
        int i, fctret, ret;
        pthread_t thread_id;
        static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0);
-       const char *logid;
+       const char *p;
+       static char logid[PATH_MAX];
        char cpuset[RTE_CPU_AFFINITY_STR_LEN];
        char thread_name[RTE_MAX_THREAD_NAME_LEN];
 
@@ -810,9 +845,8 @@ rte_eal_init(int argc, char **argv)
                return -1;
        }
 
-       logid = strrchr(argv[0], '/');
-       logid = strdup(logid ? logid + 1: argv[0]);
-
+       p = strrchr(argv[0], '/');
+       strlcpy(logid, p ? p + 1 : argv[0], sizeof(logid));
        thread_id = pthread_self();
 
        eal_reset_internal_config(&internal_config);
@@ -835,7 +869,7 @@ rte_eal_init(int argc, char **argv)
        }
 
        if (eal_plugins_init() < 0) {
-               rte_eal_init_alert("Cannot init plugins\n");
+               rte_eal_init_alert("Cannot init plugins");
                rte_errno = EINVAL;
                rte_atomic32_clear(&run_once);
                return -1;
@@ -850,7 +884,7 @@ rte_eal_init(int argc, char **argv)
        rte_config_init();
 
        if (rte_eal_intr_init() < 0) {
-               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               rte_eal_init_alert("Cannot init interrupt-handling thread");
                return -1;
        }
 
@@ -858,30 +892,43 @@ rte_eal_init(int argc, char **argv)
         * bus through mp channel in the secondary process before the bus scan.
         */
        if (rte_mp_channel_init() < 0) {
-               rte_eal_init_alert("failed to init mp channel\n");
+               rte_eal_init_alert("failed to init mp channel");
                if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                        rte_errno = EFAULT;
                        return -1;
                }
        }
 
+       /* register multi-process action callbacks for hotplug */
+       if (rte_mp_dev_hotplug_init() < 0) {
+               rte_eal_init_alert("failed to register mp callback for hotplug");
+               return -1;
+       }
+
        if (rte_bus_scan()) {
-               rte_eal_init_alert("Cannot scan the buses for devices\n");
+               rte_eal_init_alert("Cannot scan the buses for devices");
                rte_errno = ENODEV;
                rte_atomic32_clear(&run_once);
                return -1;
        }
 
-       /* autodetect the iova mapping mode (default is iova_pa) */
-       rte_eal_get_configuration()->iova_mode = rte_bus_get_iommu_class();
-
-       /* Workaround for KNI which requires physical address to work */
-       if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
-                       rte_eal_check_module("rte_kni") == 1) {
-               rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
-               RTE_LOG(WARNING, EAL,
-                       "Some devices want IOVA as VA but PA will be used because.. "
-                       "KNI module inserted\n");
+       /* if no EAL option "--iova-mode=<pa|va>", use bus IOVA scheme */
+       if (internal_config.iova_mode == RTE_IOVA_DC) {
+               /* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
+               rte_eal_get_configuration()->iova_mode =
+                       rte_bus_get_iommu_class();
+
+               /* Workaround for KNI which requires physical address to work */
+               if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
+                               rte_eal_check_module("rte_kni") == 1) {
+                       rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
+                       RTE_LOG(WARNING, EAL,
+                               "Some devices want IOVA as VA but PA will be used because.. "
+                               "KNI module inserted\n");
+               }
+       } else {
+               rte_eal_get_configuration()->iova_mode =
+                       internal_config.iova_mode;
        }
 
        if (internal_config.no_hugetlbfs == 0) {
@@ -924,7 +971,7 @@ rte_eal_init(int argc, char **argv)
 
 #ifdef VFIO_PRESENT
        if (rte_eal_vfio_setup() < 0) {
-               rte_eal_init_alert("Cannot init VFIO\n");
+               rte_eal_init_alert("Cannot init VFIO");
                rte_errno = EAGAIN;
                rte_atomic32_clear(&run_once);
                return -1;
@@ -935,13 +982,13 @@ rte_eal_init(int argc, char **argv)
         * initialize memzones first.
         */
        if (rte_eal_memzone_init() < 0) {
-               rte_eal_init_alert("Cannot init memzone\n");
+               rte_eal_init_alert("Cannot init memzone");
                rte_errno = ENODEV;
                return -1;
        }
 
        if (rte_eal_memory_init() < 0) {
-               rte_eal_init_alert("Cannot init memory\n");
+               rte_eal_init_alert("Cannot init memory");
                rte_errno = ENOMEM;
                return -1;
        }
@@ -950,25 +997,25 @@ rte_eal_init(int argc, char **argv)
        eal_hugedirs_unlock();
 
        if (rte_eal_malloc_heap_init() < 0) {
-               rte_eal_init_alert("Cannot init malloc heap\n");
+               rte_eal_init_alert("Cannot init malloc heap");
                rte_errno = ENODEV;
                return -1;
        }
 
        if (rte_eal_tailqs_init() < 0) {
-               rte_eal_init_alert("Cannot init tail queues for objects\n");
+               rte_eal_init_alert("Cannot init tail queues for objects");
                rte_errno = EFAULT;
                return -1;
        }
 
        if (rte_eal_alarm_init() < 0) {
-               rte_eal_init_alert("Cannot init interrupt-handling thread\n");
+               rte_eal_init_alert("Cannot init interrupt-handling thread");
                /* rte_eal_alarm_init sets rte_errno on failure. */
                return -1;
        }
 
        if (rte_eal_timer_init() < 0) {
-               rte_eal_init_alert("Cannot init HPET or TSC timers\n");
+               rte_eal_init_alert("Cannot init HPET or TSC timers");
                rte_errno = ENOTSUP;
                return -1;
        }
@@ -979,8 +1026,8 @@ rte_eal_init(int argc, char **argv)
 
        ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
-       RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-               rte_config.master_lcore, (int)thread_id, cpuset,
+       RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+               rte_config.master_lcore, (uintptr_t)thread_id, cpuset,
                ret == 0 ? "" : "...");
 
        RTE_LCORE_FOREACH_SLAVE(i) {
@@ -1022,14 +1069,14 @@ rte_eal_init(int argc, char **argv)
        /* initialize services so vdevs register service during bus_probe. */
        ret = rte_service_init();
        if (ret) {
-               rte_eal_init_alert("rte_service_init() failed\n");
+               rte_eal_init_alert("rte_service_init() failed");
                rte_errno = ENOEXEC;
                return -1;
        }
 
        /* Probe all the buses and devices/drivers on them */
        if (rte_bus_probe()) {
-               rte_eal_init_alert("Cannot probe devices\n");
+               rte_eal_init_alert("Cannot probe devices");
                rte_errno = ENOTSUP;
                return -1;
        }
@@ -1051,6 +1098,9 @@ rte_eal_init(int argc, char **argv)
 
        rte_eal_mcfg_complete();
 
+       /* Call each registered callback, if enabled */
+       rte_option_init();
+
        return fctret;
 }
 
@@ -1059,7 +1109,12 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
                void *arg __rte_unused)
 {
        /* ms is const, so find this memseg */
-       struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+       struct rte_memseg *found;
+
+       if (msl->external)
+               return 0;
+
+       found = rte_mem_virt2memseg(ms->addr, msl);
 
        found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
 
index 1cf6aeb..d589c69 100644 (file)
@@ -4,6 +4,8 @@
 
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
 #include <sys/socket.h>
 #include <linux/netlink.h>
 
 #include <rte_malloc.h>
 #include <rte_interrupts.h>
 #include <rte_alarm.h>
+#include <rte_bus.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+#include <rte_errno.h>
 
 #include "eal_private.h"
 
 static struct rte_intr_handle intr_handle = {.fd = -1 };
 static bool monitor_started;
+static bool hotplug_handle;
 
 #define EAL_UEV_MSG_LEN 4096
 #define EAL_UEV_MSG_ELEM_LEN 128
 
+/*
+ * spinlock for device hot-unplug failure handling. If it try to access bus or
+ * device, such as handle sigbus on bus or handle memory failure for device
+ * just need to use this lock. It could protect the bus and the device to avoid
+ * race condition.
+ */
+static rte_spinlock_t failure_handle_lock = RTE_SPINLOCK_INITIALIZER;
+
+static struct sigaction sigbus_action_old;
+
+static int sigbus_need_recover;
+
 static void dev_uev_handler(__rte_unused void *param);
 
 /* identify the system layer which reports this event. */
@@ -33,6 +52,55 @@ enum eal_dev_event_subsystem {
        EAL_DEV_EVENT_SUBSYSTEM_MAX
 };
 
+static void
+sigbus_action_recover(void)
+{
+       if (sigbus_need_recover) {
+               sigaction(SIGBUS, &sigbus_action_old, NULL);
+               sigbus_need_recover = 0;
+       }
+}
+
+static void sigbus_handler(int signum, siginfo_t *info,
+                               void *ctx __rte_unused)
+{
+       int ret;
+
+       RTE_LOG(DEBUG, EAL, "Thread[%d] catch SIGBUS, fault address:%p\n",
+               (int)pthread_self(), info->si_addr);
+
+       rte_spinlock_lock(&failure_handle_lock);
+       ret = rte_bus_sigbus_handler(info->si_addr);
+       rte_spinlock_unlock(&failure_handle_lock);
+       if (ret == -1) {
+               rte_exit(EXIT_FAILURE,
+                        "Failed to handle SIGBUS for hot-unplug, "
+                        "(rte_errno: %s)!", strerror(rte_errno));
+       } else if (ret == 1) {
+               if (sigbus_action_old.sa_flags == SA_SIGINFO
+                   && sigbus_action_old.sa_sigaction) {
+                       (*(sigbus_action_old.sa_sigaction))(signum,
+                                                           info, ctx);
+               } else if (sigbus_action_old.sa_flags != SA_SIGINFO
+                          && sigbus_action_old.sa_handler) {
+                       (*(sigbus_action_old.sa_handler))(signum);
+               } else {
+                       rte_exit(EXIT_FAILURE,
+                                "Failed to handle generic SIGBUS!");
+               }
+       }
+
+       RTE_LOG(DEBUG, EAL, "Success to handle SIGBUS for hot-unplug!\n");
+}
+
+static int cmp_dev_name(const struct rte_device *dev,
+       const void *_name)
+{
+       const char *name = _name;
+
+       return strcmp(dev->name, name);
+}
+
 static int
 dev_uev_socket_fd_create(void)
 {
@@ -147,6 +215,9 @@ dev_uev_handler(__rte_unused void *param)
        struct rte_dev_event uevent;
        int ret;
        char buf[EAL_UEV_MSG_LEN];
+       struct rte_bus *bus;
+       struct rte_device *dev;
+       const char *busname = "";
 
        memset(&uevent, 0, sizeof(struct rte_dev_event));
        memset(buf, 0, EAL_UEV_MSG_LEN);
@@ -171,8 +242,43 @@ dev_uev_handler(__rte_unused void *param)
        RTE_LOG(DEBUG, EAL, "receive uevent(name:%s, type:%d, subsystem:%d)\n",
                uevent.devname, uevent.type, uevent.subsystem);
 
-       if (uevent.devname)
-               dev_callback_process(uevent.devname, uevent.type);
+       switch (uevent.subsystem) {
+       case EAL_DEV_EVENT_SUBSYSTEM_PCI:
+       case EAL_DEV_EVENT_SUBSYSTEM_UIO:
+               busname = "pci";
+               break;
+       default:
+               break;
+       }
+
+       if (uevent.devname) {
+               if (uevent.type == RTE_DEV_EVENT_REMOVE && hotplug_handle) {
+                       rte_spinlock_lock(&failure_handle_lock);
+                       bus = rte_bus_find_by_name(busname);
+                       if (bus == NULL) {
+                               RTE_LOG(ERR, EAL, "Cannot find bus (%s)\n",
+                                       busname);
+                               return;
+                       }
+
+                       dev = bus->find_device(NULL, cmp_dev_name,
+                                              uevent.devname);
+                       if (dev == NULL) {
+                               RTE_LOG(ERR, EAL, "Cannot find device (%s) on "
+                                       "bus (%s)\n", uevent.devname, busname);
+                               return;
+                       }
+
+                       ret = bus->hot_unplug_handler(dev);
+                       rte_spinlock_unlock(&failure_handle_lock);
+                       if (ret) {
+                               RTE_LOG(ERR, EAL, "Can not handle hot-unplug "
+                                       "for device (%s)\n", dev->name);
+                               return;
+                       }
+               }
+               rte_dev_event_callback_process(uevent.devname, uevent.type);
+       }
 }
 
 int __rte_experimental
@@ -220,5 +326,67 @@ rte_dev_event_monitor_stop(void)
        close(intr_handle.fd);
        intr_handle.fd = -1;
        monitor_started = false;
+
        return 0;
 }
+
+int
+dev_sigbus_handler_register(void)
+{
+       sigset_t mask;
+       struct sigaction action;
+
+       rte_errno = 0;
+
+       if (sigbus_need_recover)
+               return 0;
+
+       sigemptyset(&mask);
+       sigaddset(&mask, SIGBUS);
+       action.sa_flags = SA_SIGINFO;
+       action.sa_mask = mask;
+       action.sa_sigaction = sigbus_handler;
+       sigbus_need_recover = !sigaction(SIGBUS, &action, &sigbus_action_old);
+
+       return rte_errno;
+}
+
+int
+dev_sigbus_handler_unregister(void)
+{
+       rte_errno = 0;
+
+       sigbus_action_recover();
+
+       return rte_errno;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_enable(void)
+{
+       int ret = 0;
+
+       ret = dev_sigbus_handler_register();
+       if (ret < 0)
+               RTE_LOG(ERR, EAL,
+                       "fail to register sigbus handler for devices.\n");
+
+       hotplug_handle = true;
+
+       return ret;
+}
+
+int __rte_experimental
+rte_dev_hotplug_handle_disable(void)
+{
+       int ret = 0;
+
+       ret = dev_sigbus_handler_unregister();
+       if (ret < 0)
+               RTE_LOG(ERR, EAL,
+                       "fail to unregister sigbus handler for devices.\n");
+
+       hotplug_handle = false;
+
+       return ret;
+}
index 3a7d4b2..0eab1cf 100644 (file)
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <sys/file.h>
 #include <dirent.h>
+#include <fcntl.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <stdio.h>
index 4076c6d..39252a8 100644 (file)
@@ -33,6 +33,7 @@
 #include <rte_errno.h>
 #include <rte_spinlock.h>
 #include <rte_pause.h>
+#include <rte_vfio.h>
 
 #include "eal_private.h"
 #include "eal_vfio.h"
@@ -308,6 +309,66 @@ vfio_disable_msix(const struct rte_intr_handle *intr_handle) {
 
        return ret;
 }
+
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+/* enable req notifier */
+static int
+vfio_enable_req(const struct rte_intr_handle *intr_handle)
+{
+       int len, ret;
+       char irq_set_buf[IRQ_SET_BUF_LEN];
+       struct vfio_irq_set *irq_set;
+       int *fd_ptr;
+
+       len = sizeof(irq_set_buf);
+
+       irq_set = (struct vfio_irq_set *) irq_set_buf;
+       irq_set->argsz = len;
+       irq_set->count = 1;
+       irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                        VFIO_IRQ_SET_ACTION_TRIGGER;
+       irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+       irq_set->start = 0;
+       fd_ptr = (int *) &irq_set->data;
+       *fd_ptr = intr_handle->fd;
+
+       ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+       if (ret) {
+               RTE_LOG(ERR, EAL, "Error enabling req interrupts for fd %d\n",
+                                               intr_handle->fd);
+               return -1;
+       }
+
+       return 0;
+}
+
+/* disable req notifier */
+static int
+vfio_disable_req(const struct rte_intr_handle *intr_handle)
+{
+       struct vfio_irq_set *irq_set;
+       char irq_set_buf[IRQ_SET_BUF_LEN];
+       int len, ret;
+
+       len = sizeof(struct vfio_irq_set);
+
+       irq_set = (struct vfio_irq_set *) irq_set_buf;
+       irq_set->argsz = len;
+       irq_set->count = 0;
+       irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+       irq_set->index = VFIO_PCI_REQ_IRQ_INDEX;
+       irq_set->start = 0;
+
+       ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+       if (ret)
+               RTE_LOG(ERR, EAL, "Error disabling req interrupts for fd %d\n",
+                       intr_handle->fd);
+
+       return ret;
+}
+#endif
 #endif
 
 static int
@@ -556,6 +617,12 @@ rte_intr_enable(const struct rte_intr_handle *intr_handle)
                if (vfio_enable_intx(intr_handle))
                        return -1;
                break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       case RTE_INTR_HANDLE_VFIO_REQ:
+               if (vfio_enable_req(intr_handle))
+                       return -1;
+               break;
+#endif
 #endif
        /* not used at this moment */
        case RTE_INTR_HANDLE_DEV_EVENT:
@@ -606,6 +673,12 @@ rte_intr_disable(const struct rte_intr_handle *intr_handle)
                if (vfio_disable_intx(intr_handle))
                        return -1;
                break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+       case RTE_INTR_HANDLE_VFIO_REQ:
+               if (vfio_disable_req(intr_handle))
+                       return -1;
+               break;
+#endif
 #endif
        /* not used at this moment */
        case RTE_INTR_HANDLE_DEV_EVENT:
@@ -672,6 +745,12 @@ eal_intr_process_interrupts(struct epoll_event *events, int nfds)
                case RTE_INTR_HANDLE_VFIO_LEGACY:
                        bytes_read = sizeof(buf.vfio_intr_count);
                        break;
+#ifdef HAVE_VFIO_DEV_REQ_INTERFACE
+               case RTE_INTR_HANDLE_VFIO_REQ:
+                       bytes_read = 0;
+                       call = true;
+                       break;
+#endif
 #endif
                case RTE_INTR_HANDLE_VDEV:
                case RTE_INTR_HANDLE_EXT:
index aa95551..48b9c73 100644 (file)
@@ -34,6 +34,7 @@
 #include <rte_log.h>
 #include <rte_eal_memconfig.h>
 #include <rte_eal.h>
+#include <rte_errno.h>
 #include <rte_memory.h>
 #include <rte_spinlock.h>
 
@@ -51,31 +52,56 @@ const int anonymous_hugepages_supported =
 #define RTE_MAP_HUGE_SHIFT 26
 #endif
 
+/*
+ * we don't actually care if memfd itself is supported - we only need to check
+ * if memfd supports hugetlbfs, as that already implies memfd support.
+ *
+ * also, this is not a constant, because while we may be *compiled* with memfd
+ * hugetlbfs support, we might not be *running* on a system that supports memfd
+ * and/or memfd with hugetlbfs, so we need to be able to adjust this flag at
+ * runtime, and fall back to anonymous memory.
+ */
+static int memfd_create_supported =
+#ifdef MFD_HUGETLB
+#define MEMFD_SUPPORTED
+               1;
+#else
+               0;
+#endif
+
 /*
  * not all kernel version support fallocate on hugetlbfs, so fall back to
  * ftruncate and disallow deallocation if fallocate is not supported.
  */
 static int fallocate_supported = -1; /* unknown */
 
-/* for single-file segments, we need some kind of mechanism to keep track of
+/*
+ * we have two modes - single file segments, and file-per-page mode.
+ *
+ * for single-file segments, we need some kind of mechanism to keep track of
  * which hugepages can be freed back to the system, and which cannot. we cannot
  * use flock() because they don't allow locking parts of a file, and we cannot
  * use fcntl() due to issues with their semantics, so we will have to rely on a
- * bunch of lockfiles for each page.
+ * bunch of lockfiles for each page. so, we will use 'fds' array to keep track
+ * of per-page lockfiles. we will store the actual segment list fd in the
+ * 'memseg_list_fd' field.
+ *
+ * for file-per-page mode, each page will have its own fd, so 'memseg_list_fd'
+ * will be invalid (set to -1), and we'll use 'fds' to keep track of page fd's.
  *
  * we cannot know how many pages a system will have in advance, but we do know
  * that they come in lists, and we know lengths of these lists. so, simply store
  * a malloc'd array of fd's indexed by list and segment index.
  *
  * they will be initialized at startup, and filled as we allocate/deallocate
- * segments. also, use this to track memseg list proper fd.
+ * segments.
  */
 static struct {
        int *fds; /**< dynamically allocated array of segment lock fd's */
        int memseg_list_fd; /**< memseg list fd */
        int len; /**< total length of the array */
        int count; /**< entries used in an array */
-} lock_fds[RTE_MAX_MEMSEG_LISTS];
+} fd_list[RTE_MAX_MEMSEG_LISTS];
 
 /** local copy of a memory map, used to synchronize memory hotplug in MP */
 static struct rte_memseg_list local_memsegs[RTE_MAX_MEMSEG_LISTS];
@@ -182,6 +208,31 @@ get_file_size(int fd)
        return st.st_size;
 }
 
+static inline uint32_t
+bsf64(uint64_t v)
+{
+       return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+       if (v == 0)
+               return 0;
+       v = rte_align64pow2(v);
+       return bsf64(v);
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+       /* as per mmap() manpage, all page sizes are log2 of page size
+        * shifted by MAP_HUGE_SHIFT
+        */
+       int log2 = log2_u64(page_sz);
+       return log2 << RTE_MAP_HUGE_SHIFT;
+}
+
 /* returns 1 on successful lock, 0 on unsuccessful lock, -1 on error */
 static int lock(int fd, int type)
 {
@@ -209,12 +260,12 @@ static int get_segment_lock_fd(int list_idx, int seg_idx)
        char path[PATH_MAX] = {0};
        int fd;
 
-       if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+       if (list_idx < 0 || list_idx >= (int)RTE_DIM(fd_list))
                return -1;
-       if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+       if (seg_idx < 0 || seg_idx >= fd_list[list_idx].len)
                return -1;
 
-       fd = lock_fds[list_idx].fds[seg_idx];
+       fd = fd_list[list_idx].fds[seg_idx];
        /* does this lock already exist? */
        if (fd >= 0)
                return fd;
@@ -236,8 +287,8 @@ static int get_segment_lock_fd(int list_idx, int seg_idx)
                return -1;
        }
        /* store it for future reference */
-       lock_fds[list_idx].fds[seg_idx] = fd;
-       lock_fds[list_idx].count++;
+       fd_list[list_idx].fds[seg_idx] = fd;
+       fd_list[list_idx].count++;
        return fd;
 }
 
@@ -245,12 +296,12 @@ static int unlock_segment(int list_idx, int seg_idx)
 {
        int fd, ret;
 
-       if (list_idx < 0 || list_idx >= (int)RTE_DIM(lock_fds))
+       if (list_idx < 0 || list_idx >= (int)RTE_DIM(fd_list))
                return -1;
-       if (seg_idx < 0 || seg_idx >= lock_fds[list_idx].len)
+       if (seg_idx < 0 || seg_idx >= fd_list[list_idx].len)
                return -1;
 
-       fd = lock_fds[list_idx].fds[seg_idx];
+       fd = fd_list[list_idx].fds[seg_idx];
 
        /* upgrade lock to exclusive to see if we can remove the lockfile */
        ret = lock(fd, LOCK_EX);
@@ -270,25 +321,77 @@ static int unlock_segment(int list_idx, int seg_idx)
         * and remove it from list anyway.
         */
        close(fd);
-       lock_fds[list_idx].fds[seg_idx] = -1;
-       lock_fds[list_idx].count--;
+       fd_list[list_idx].fds[seg_idx] = -1;
+       fd_list[list_idx].count--;
 
        if (ret < 0)
                return -1;
        return 0;
 }
 
+static int
+get_seg_memfd(struct hugepage_info *hi __rte_unused,
+               unsigned int list_idx __rte_unused,
+               unsigned int seg_idx __rte_unused)
+{
+#ifdef MEMFD_SUPPORTED
+       int fd;
+       char segname[250]; /* as per manpage, limit is 249 bytes plus null */
+
+       if (internal_config.single_file_segments) {
+               fd = fd_list[list_idx].memseg_list_fd;
+
+               if (fd < 0) {
+                       int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
+                       snprintf(segname, sizeof(segname), "seg_%i", list_idx);
+                       fd = memfd_create(segname, flags);
+                       if (fd < 0) {
+                               RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+                                       __func__, strerror(errno));
+                               return -1;
+                       }
+                       fd_list[list_idx].memseg_list_fd = fd;
+               }
+       } else {
+               fd = fd_list[list_idx].fds[seg_idx];
+
+               if (fd < 0) {
+                       int flags = MFD_HUGETLB | pagesz_flags(hi->hugepage_sz);
+
+                       snprintf(segname, sizeof(segname), "seg_%i-%i",
+                                       list_idx, seg_idx);
+                       fd = memfd_create(segname, flags);
+                       if (fd < 0) {
+                               RTE_LOG(DEBUG, EAL, "%s(): memfd create failed: %s\n",
+                                       __func__, strerror(errno));
+                               return -1;
+                       }
+                       fd_list[list_idx].fds[seg_idx] = fd;
+               }
+       }
+       return fd;
+#endif
+       return -1;
+}
+
 static int
 get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
                unsigned int list_idx, unsigned int seg_idx)
 {
        int fd;
 
+       /* for in-memory mode, we only make it here when we're sure we support
+        * memfd, and this is a special case.
+        */
+       if (internal_config.in_memory)
+               return get_seg_memfd(hi, list_idx, seg_idx);
+
        if (internal_config.single_file_segments) {
                /* create a hugepage file path */
                eal_get_hugefile_path(path, buflen, hi->hugedir, list_idx);
 
-               fd = lock_fds[list_idx].memseg_list_fd;
+               fd = fd_list[list_idx].memseg_list_fd;
 
                if (fd < 0) {
                        fd = open(path, O_CREAT | O_RDWR, 0600);
@@ -304,24 +407,30 @@ get_seg_fd(char *path, int buflen, struct hugepage_info *hi,
                                close(fd);
                                return -1;
                        }
-                       lock_fds[list_idx].memseg_list_fd = fd;
+                       fd_list[list_idx].memseg_list_fd = fd;
                }
        } else {
                /* create a hugepage file path */
                eal_get_hugefile_path(path, buflen, hi->hugedir,
                                list_idx * RTE_MAX_MEMSEG_PER_LIST + seg_idx);
-               fd = open(path, O_CREAT | O_RDWR, 0600);
+
+               fd = fd_list[list_idx].fds[seg_idx];
+
                if (fd < 0) {
-                       RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n", __func__,
-                                       strerror(errno));
-                       return -1;
-               }
-               /* take out a read lock */
-               if (lock(fd, LOCK_SH) < 0) {
-                       RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
-                               __func__, strerror(errno));
-                       close(fd);
-                       return -1;
+                       fd = open(path, O_CREAT | O_RDWR, 0600);
+                       if (fd < 0) {
+                               RTE_LOG(DEBUG, EAL, "%s(): open failed: %s\n",
+                                       __func__, strerror(errno));
+                               return -1;
+                       }
+                       /* take out a read lock */
+                       if (lock(fd, LOCK_SH) < 0) {
+                               RTE_LOG(ERR, EAL, "%s(): lock failed: %s\n",
+                                       __func__, strerror(errno));
+                               close(fd);
+                               return -1;
+                       }
+                       fd_list[list_idx].fds[seg_idx] = fd;
                }
        }
        return fd;
@@ -332,6 +441,33 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
                uint64_t fa_offset, uint64_t page_sz, bool grow)
 {
        bool again = false;
+
+       /* in-memory mode is a special case, because we don't need to perform
+        * any locking, and we can be sure that fallocate() is supported.
+        */
+       if (internal_config.in_memory) {
+               int flags = grow ? 0 : FALLOC_FL_PUNCH_HOLE |
+                               FALLOC_FL_KEEP_SIZE;
+               int ret;
+
+               /* grow or shrink the file */
+               ret = fallocate(fd, flags, fa_offset, page_sz);
+
+               if (ret < 0) {
+                       RTE_LOG(DEBUG, EAL, "%s(): fallocate() failed: %s\n",
+                                       __func__,
+                                       strerror(errno));
+                       return -1;
+               }
+               /* increase/decrease total segment count */
+               fd_list[list_idx].count += (grow ? 1 : -1);
+               if (!grow && fd_list[list_idx].count == 0) {
+                       close(fd_list[list_idx].memseg_list_fd);
+                       fd_list[list_idx].memseg_list_fd = -1;
+               }
+               return 0;
+       }
+
        do {
                if (fallocate_supported == 0) {
                        /* we cannot deallocate memory if fallocate() is not
@@ -410,9 +546,9 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
                                 * page file fd, so that one of the processes
                                 * could then delete the file after shrinking.
                                 */
-                               if (ret < 1 && lock_fds[list_idx].count == 0) {
+                               if (ret < 1 && fd_list[list_idx].count == 0) {
                                        close(fd);
-                                       lock_fds[list_idx].memseg_list_fd = -1;
+                                       fd_list[list_idx].memseg_list_fd = -1;
                                }
 
                                if (ret < 0) {
@@ -448,13 +584,13 @@ resize_hugefile(int fd, char *path, int list_idx, int seg_idx,
                                 * more segments active in this segment list,
                                 * and remove the file if there aren't.
                                 */
-                               if (lock_fds[list_idx].count == 0) {
+                               if (fd_list[list_idx].count == 0) {
                                        if (unlink(path))
                                                RTE_LOG(ERR, EAL, "%s(): unlinking '%s' failed: %s\n",
                                                        __func__, path,
                                                        strerror(errno));
                                        close(fd);
-                                       lock_fds[list_idx].memseg_list_fd = -1;
+                                       fd_list[list_idx].memseg_list_fd = -1;
                                }
                        }
                }
@@ -481,26 +617,34 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
        void *new_addr;
 
        alloc_sz = hi->hugepage_sz;
-       if (!internal_config.single_file_segments &&
-                       internal_config.in_memory &&
-                       anonymous_hugepages_supported) {
-               int log2, flags;
-
-               log2 = rte_log2_u32(alloc_sz);
-               /* as per mmap() manpage, all page sizes are log2 of page size
-                * shifted by MAP_HUGE_SHIFT
-                */
-               flags = (log2 << RTE_MAP_HUGE_SHIFT) | MAP_HUGETLB | MAP_FIXED |
+
+       /* these are checked at init, but code analyzers don't know that */
+       if (internal_config.in_memory && !anonymous_hugepages_supported) {
+               RTE_LOG(ERR, EAL, "Anonymous hugepages not supported, in-memory mode cannot allocate memory\n");
+               return -1;
+       }
+       if (internal_config.in_memory && !memfd_create_supported &&
+                       internal_config.single_file_segments) {
+               RTE_LOG(ERR, EAL, "Single-file segments are not supported without memfd support\n");
+               return -1;
+       }
+
+       /* in-memory without memfd is a special case */
+       int mmap_flags;
+
+       if (internal_config.in_memory && !memfd_create_supported) {
+               int pagesz_flag, flags;
+
+               pagesz_flag = pagesz_flags(alloc_sz);
+               flags = pagesz_flag | MAP_HUGETLB | MAP_FIXED |
                                MAP_PRIVATE | MAP_ANONYMOUS;
                fd = -1;
-               va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, flags, -1, 0);
-
-               /* single-file segments codepath will never be active because
-                * in-memory mode is incompatible with it and it's stopped at
-                * EAL initialization stage, however the compiler doesn't know
-                * that and complains about map_offset being used uninitialized
-                * on failure codepaths while having in-memory mode enabled. so,
-                * assign a value here.
+               mmap_flags = flags;
+
+               /* single-file segments codepath will never be active
+                * here because in-memory mode is incompatible with the
+                * fallback path, and it's stopped at EAL initialization
+                * stage.
                 */
                map_offset = 0;
        } else {
@@ -524,7 +668,8 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
                                        __func__, strerror(errno));
                                goto resized;
                        }
-                       if (internal_config.hugepage_unlink) {
+                       if (internal_config.hugepage_unlink &&
+                                       !internal_config.in_memory) {
                                if (unlink(path)) {
                                        RTE_LOG(DEBUG, EAL, "%s(): unlink() failed: %s\n",
                                                __func__, strerror(errno));
@@ -532,16 +677,16 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
                                }
                        }
                }
-
-               /*
-                * map the segment, and populate page tables, the kernel fills
-                * this segment with zeros if it's a new page.
-                */
-               va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE,
-                               MAP_SHARED | MAP_POPULATE | MAP_FIXED, fd,
-                               map_offset);
+               mmap_flags = MAP_SHARED | MAP_POPULATE | MAP_FIXED;
        }
 
+       /*
+        * map the segment, and populate page tables, the kernel fills
+        * this segment with zeros if it's a new page.
+        */
+       va = mmap(addr, alloc_sz, PROT_READ | PROT_WRITE, mmap_flags, fd,
+                       map_offset);
+
        if (va == MAP_FAILED) {
                RTE_LOG(DEBUG, EAL, "%s(): mmap() failed: %s\n", __func__,
                        strerror(errno));
@@ -593,10 +738,6 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
                goto mapped;
        }
 #endif
-       /* for non-single file segments that aren't in-memory, we can close fd
-        * here */
-       if (!internal_config.single_file_segments && !internal_config.in_memory)
-               close(fd);
 
        ms->addr = addr;
        ms->hugepage_sz = alloc_sz;
@@ -626,7 +767,10 @@ unmapped:
                RTE_LOG(CRIT, EAL, "Can't mmap holes in our virtual address space\n");
        }
 resized:
-       /* in-memory mode will never be single-file-segments mode */
+       /* some codepaths will return negative fd, so exit early */
+       if (fd < 0)
+               return -1;
+
        if (internal_config.single_file_segments) {
                resize_hugefile(fd, path, list_idx, seg_idx, map_offset,
                                alloc_sz, false);
@@ -638,6 +782,7 @@ resized:
                                lock(fd, LOCK_EX) == 1)
                        unlink(path);
                close(fd);
+               fd_list[list_idx].fds[seg_idx] = -1;
        }
        return -1;
 }
@@ -648,7 +793,8 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
 {
        uint64_t map_offset;
        char path[PATH_MAX];
-       int fd, ret;
+       int fd, ret = 0;
+       bool exit_early;
 
        /* erase page data */
        memset(ms->addr, 0, ms->len);
@@ -660,8 +806,17 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
                return -1;
        }
 
+       exit_early = false;
+
+       /* if we're using anonymous hugepages, nothing to be done */
+       if (internal_config.in_memory && !memfd_create_supported)
+               exit_early = true;
+
        /* if we've already unlinked the page, nothing needs to be done */
-       if (internal_config.hugepage_unlink) {
+       if (!internal_config.in_memory && internal_config.hugepage_unlink)
+               exit_early = true;
+
+       if (exit_early) {
                memset(ms, 0, sizeof(*ms));
                return 0;
        }
@@ -684,14 +839,17 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
                /* if we're able to take out a write lock, we're the last one
                 * holding onto this page.
                 */
-               ret = lock(fd, LOCK_EX);
-               if (ret >= 0) {
-                       /* no one else is using this page */
-                       if (ret == 1)
-                               unlink(path);
+               if (!internal_config.in_memory) {
+                       ret = lock(fd, LOCK_EX);
+                       if (ret >= 0) {
+                               /* no one else is using this page */
+                               if (ret == 1)
+                                       unlink(path);
+                       }
                }
                /* closing fd will drop the lock */
                close(fd);
+               fd_list[list_idx].fds[seg_idx] = -1;
        }
 
        memset(ms, 0, sizeof(*ms));
@@ -828,7 +986,7 @@ free_seg_walk(const struct rte_memseg_list *msl, void *arg)
        int msl_idx, seg_idx, ret, dir_fd = -1;
 
        start_addr = (uintptr_t) msl->base_va;
-       end_addr = start_addr + msl->memseg_arr.len * (size_t)msl->page_sz;
+       end_addr = start_addr + msl->len;
 
        if ((uintptr_t)wa->ms->addr < start_addr ||
                        (uintptr_t)wa->ms->addr >= end_addr)
@@ -1250,6 +1408,9 @@ sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
        unsigned int i;
        int msl_idx;
 
+       if (msl->external)
+               return 0;
+
        msl_idx = msl - mcfg->memsegs;
        primary_msl = &mcfg->memsegs[msl_idx];
        local_msl = &local_memsegs[msl_idx];
@@ -1298,6 +1459,9 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
        char name[PATH_MAX];
        int msl_idx, ret;
 
+       if (msl->external)
+               return 0;
+
        msl_idx = msl - mcfg->memsegs;
        primary_msl = &mcfg->memsegs[msl_idx];
        local_msl = &local_memsegs[msl_idx];
@@ -1314,50 +1478,176 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
                return -1;
        }
        local_msl->base_va = primary_msl->base_va;
+       local_msl->len = primary_msl->len;
 
        return 0;
 }
 
 static int
-secondary_lock_list_create_walk(const struct rte_memseg_list *msl,
-               void *arg __rte_unused)
+alloc_list(int list_idx, int len)
 {
-       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       unsigned int i, len;
-       int msl_idx;
        int *data;
+       int i;
 
-       msl_idx = msl - mcfg->memsegs;
-       len = msl->memseg_arr.len;
-
-       /* ensure we have space to store lock fd per each possible segment */
+       /* ensure we have space to store fd per each possible segment */
        data = malloc(sizeof(int) * len);
        if (data == NULL) {
-               RTE_LOG(ERR, EAL, "Unable to allocate space for lock descriptors\n");
+               RTE_LOG(ERR, EAL, "Unable to allocate space for file descriptors\n");
                return -1;
        }
        /* set all fd's as invalid */
        for (i = 0; i < len; i++)
                data[i] = -1;
 
-       lock_fds[msl_idx].fds = data;
-       lock_fds[msl_idx].len = len;
-       lock_fds[msl_idx].count = 0;
-       lock_fds[msl_idx].memseg_list_fd = -1;
+       fd_list[list_idx].fds = data;
+       fd_list[list_idx].len = len;
+       fd_list[list_idx].count = 0;
+       fd_list[list_idx].memseg_list_fd = -1;
+
+       return 0;
+}
+
+static int
+fd_list_create_walk(const struct rte_memseg_list *msl,
+               void *arg __rte_unused)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+       unsigned int len;
+       int msl_idx;
+
+       if (msl->external)
+               return 0;
+
+       msl_idx = msl - mcfg->memsegs;
+       len = msl->memseg_arr.len;
+
+       return alloc_list(msl_idx, len);
+}
+
+int
+eal_memalloc_set_seg_fd(int list_idx, int seg_idx, int fd)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+       /* if list is not allocated, allocate it */
+       if (fd_list[list_idx].len == 0) {
+               int len = mcfg->memsegs[list_idx].memseg_arr.len;
+
+               if (alloc_list(list_idx, len) < 0)
+                       return -ENOMEM;
+       }
+       fd_list[list_idx].fds[seg_idx] = fd;
 
        return 0;
 }
 
+int
+eal_memalloc_get_seg_fd(int list_idx, int seg_idx)
+{
+       int fd;
+       if (internal_config.single_file_segments) {
+               fd = fd_list[list_idx].memseg_list_fd;
+       } else if (fd_list[list_idx].len == 0) {
+               /* list not initialized */
+               fd = -1;
+       } else {
+               fd = fd_list[list_idx].fds[seg_idx];
+       }
+       if (fd < 0)
+               return -ENODEV;
+       return fd;
+}
+
+static int
+test_memfd_create(void)
+{
+#ifdef MEMFD_SUPPORTED
+       unsigned int i;
+       for (i = 0; i < internal_config.num_hugepage_sizes; i++) {
+               uint64_t pagesz = internal_config.hugepage_info[i].hugepage_sz;
+               int pagesz_flag = pagesz_flags(pagesz);
+               int flags;
+
+               flags = pagesz_flag | MFD_HUGETLB;
+               int fd = memfd_create("test", flags);
+               if (fd < 0) {
+                       /* we failed - let memalloc know this isn't working */
+                       if (errno == EINVAL) {
+                               memfd_create_supported = 0;
+                               return 0; /* not supported */
+                       }
+
+                       /* we got other error - something's wrong */
+                       return -1; /* error */
+               }
+               close(fd);
+               return 1; /* supported */
+       }
+#endif
+       return 0; /* not supported */
+}
+
+int
+eal_memalloc_get_seg_fd_offset(int list_idx, int seg_idx, size_t *offset)
+{
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+
+       /* fd_list not initialized? */
+       if (fd_list[list_idx].len == 0)
+               return -ENODEV;
+       if (internal_config.single_file_segments) {
+               size_t pgsz = mcfg->memsegs[list_idx].page_sz;
+
+               /* segment not active? */
+               if (fd_list[list_idx].memseg_list_fd < 0)
+                       return -ENOENT;
+               *offset = pgsz * seg_idx;
+       } else {
+               /* segment not active? */
+               if (fd_list[list_idx].fds[seg_idx] < 0)
+                       return -ENOENT;
+               *offset = 0;
+       }
+       return 0;
+}
+
 int
 eal_memalloc_init(void)
 {
        if (rte_eal_process_type() == RTE_PROC_SECONDARY)
                if (rte_memseg_list_walk(secondary_msl_create_walk, NULL) < 0)
                        return -1;
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY &&
+                       internal_config.in_memory) {
+               int mfd_res = test_memfd_create();
 
-       /* initialize all of the lock fd lists */
-       if (internal_config.single_file_segments)
-               if (rte_memseg_list_walk(secondary_lock_list_create_walk, NULL))
+               if (mfd_res < 0) {
+                       RTE_LOG(ERR, EAL, "Unable to check if memfd is supported\n");
+                       return -1;
+               }
+               if (mfd_res == 1)
+                       RTE_LOG(DEBUG, EAL, "Using memfd for anonymous memory\n");
+               else
+                       RTE_LOG(INFO, EAL, "Using memfd is not supported, falling back to anonymous hugepages\n");
+
+               /* we only support single-file segments mode with in-memory mode
+                * if we support hugetlbfs with memfd_create. this code will
+                * test if we do.
+                */
+               if (internal_config.single_file_segments &&
+                               mfd_res != 1) {
+                       RTE_LOG(ERR, EAL, "Single-file segments mode cannot be used without memfd support\n");
                        return -1;
+               }
+               /* this cannot ever happen but better safe than sorry */
+               if (!anonymous_hugepages_supported) {
+                       RTE_LOG(ERR, EAL, "Using anonymous memory is not supported\n");
+                       return -1;
+               }
+       }
+
+       /* initialize all of the fd lists */
+       if (rte_memseg_list_walk(fd_list_create_walk, NULL))
+               return -1;
        return 0;
 }
index dbf1949..fce86fd 100644 (file)
@@ -5,6 +5,7 @@
 
 #define _FILE_OFFSET_BITS 64
 #include <errno.h>
+#include <fcntl.h>
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -17,6 +18,7 @@
 #include <sys/stat.h>
 #include <sys/queue.h>
 #include <sys/file.h>
+#include <sys/resource.h>
 #include <unistd.h>
 #include <limits.h>
 #include <sys/ioctl.h>
@@ -263,7 +265,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
        int node_id = -1;
        int essential_prev = 0;
        int oldpolicy;
-       struct bitmask *oldmask = numa_allocate_nodemask();
+       struct bitmask *oldmask = NULL;
        bool have_numa = true;
        unsigned long maxnode = 0;
 
@@ -275,6 +277,7 @@ map_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi,
 
        if (have_numa) {
                RTE_LOG(DEBUG, EAL, "Trying to obtain current memory policy.\n");
+               oldmask = numa_allocate_nodemask();
                if (get_mempolicy(&oldpolicy, oldmask->maskp,
                                  oldmask->size + 1, 0, 0) < 0) {
                        RTE_LOG(ERR, EAL,
@@ -402,7 +405,8 @@ out:
                        numa_set_localalloc();
                }
        }
-       numa_free_cpumask(oldmask);
+       if (oldmask != NULL)
+               numa_free_cpumask(oldmask);
 #endif
        return i;
 }
@@ -584,7 +588,7 @@ unlink_hugepage_files(struct hugepage_file *hugepg_tbl,
        for (page = 0; page < nrpages; page++) {
                struct hugepage_file *hp = &hugepg_tbl[page];
 
-               if (hp->final_va != NULL && unlink(hp->filepath)) {
+               if (hp->orig_va != NULL && unlink(hp->filepath)) {
                        RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n",
                                __func__, hp->filepath, strerror(errno));
                }
@@ -771,7 +775,10 @@ remap_segment(struct hugepage_file *hugepages, int seg_start, int seg_end)
 
                rte_fbarray_set_used(arr, ms_idx);
 
-               close(fd);
+               /* store segment fd internally */
+               if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+                       RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+                               rte_strerror(rte_errno));
        }
        RTE_LOG(DEBUG, EAL, "Allocated %" PRIu64 "M on socket %i\n",
                        (seg_len * page_sz) >> 20, socket_id);
@@ -857,6 +864,7 @@ alloc_va_space(struct rte_memseg_list *msl)
                return -1;
        }
        msl->base_va = addr;
+       msl->len = mem_sz;
 
        return 0;
 }
@@ -1365,6 +1373,7 @@ eal_legacy_hugepage_init(void)
                msl->base_va = addr;
                msl->page_sz = page_sz;
                msl->socket_id = 0;
+               msl->len = internal_config.memory;
 
                /* populate memsegs. each memseg is one page long */
                for (cur_seg = 0; cur_seg < n_segs; cur_seg++) {
@@ -1611,7 +1620,7 @@ eal_legacy_hugepage_init(void)
                if (msl->memseg_arr.count > 0)
                        continue;
                /* this is an unused list, deallocate it */
-               mem_sz = (size_t)msl->page_sz * msl->memseg_arr.len;
+               mem_sz = msl->len;
                munmap(msl->base_va, mem_sz);
                msl->base_va = NULL;
 
@@ -1770,6 +1779,7 @@ getFileSize(int fd)
 static int
 eal_legacy_hugepage_attach(void)
 {
+       struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
        struct hugepage_file *hp = NULL;
        unsigned int num_hp = 0;
        unsigned int i = 0;
@@ -1813,6 +1823,9 @@ eal_legacy_hugepage_attach(void)
                struct hugepage_file *hf = &hp[i];
                size_t map_sz = hf->size;
                void *map_addr = hf->final_va;
+               int msl_idx, ms_idx;
+               struct rte_memseg_list *msl;
+               struct rte_memseg *ms;
 
                /* if size is zero, no more pages left */
                if (map_sz == 0)
@@ -1830,25 +1843,50 @@ eal_legacy_hugepage_attach(void)
                if (map_addr == MAP_FAILED) {
                        RTE_LOG(ERR, EAL, "Could not map %s: %s\n",
                                hf->filepath, strerror(errno));
-                       close(fd);
-                       goto error;
+                       goto fd_error;
                }
 
                /* set shared lock on the file. */
                if (flock(fd, LOCK_SH) < 0) {
                        RTE_LOG(DEBUG, EAL, "%s(): Locking file failed: %s\n",
                                __func__, strerror(errno));
-                       close(fd);
-                       goto error;
+                       goto fd_error;
                }
 
-               close(fd);
+               /* find segment data */
+               msl = rte_mem_virt2memseg_list(map_addr);
+               if (msl == NULL) {
+                       RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg list\n",
+                               __func__);
+                       goto fd_error;
+               }
+               ms = rte_mem_virt2memseg(map_addr, msl);
+               if (ms == NULL) {
+                       RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg\n",
+                               __func__);
+                       goto fd_error;
+               }
+
+               msl_idx = msl - mcfg->memsegs;
+               ms_idx = rte_fbarray_find_idx(&msl->memseg_arr, ms);
+               if (ms_idx < 0) {
+                       RTE_LOG(DEBUG, EAL, "%s(): Cannot find memseg idx\n",
+                               __func__);
+                       goto fd_error;
+               }
+
+               /* store segment fd internally */
+               if (eal_memalloc_set_seg_fd(msl_idx, ms_idx, fd) < 0)
+                       RTE_LOG(ERR, EAL, "Could not store segment fd: %s\n",
+                               rte_strerror(rte_errno));
        }
        /* unmap the hugepage config file, since we are done using it */
        munmap(hp, size);
        close(fd_hugepage);
        return 0;
 
+fd_error:
+       close(fd);
 error:
        /* map all segments into memory to make sure we get the addrs */
        cur_seg = 0;
@@ -2093,18 +2131,65 @@ static int __rte_unused
 memseg_primary_init(void)
 {
        struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
-       int i, socket_id, hpi_idx, msl_idx = 0;
+       struct memtype {
+               uint64_t page_sz;
+               int socket_id;
+       } *memtypes = NULL;
+       int i, hpi_idx, msl_idx, ret = -1; /* fail unless told to succeed */
        struct rte_memseg_list *msl;
-       uint64_t max_mem, total_mem;
+       uint64_t max_mem, max_mem_per_type;
+       unsigned int max_seglists_per_type;
+       unsigned int n_memtypes, cur_type;
 
        /* no-huge does not need this at all */
        if (internal_config.no_hugetlbfs)
                return 0;
 
-       max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
-       total_mem = 0;
+       /*
+        * figuring out amount of memory we're going to have is a long and very
+        * involved process. the basic element we're operating with is a memory
+        * type, defined as a combination of NUMA node ID and page size (so that
+        * e.g. 2 sockets with 2 page sizes yield 4 memory types in total).
+        *
+        * deciding amount of memory going towards each memory type is a
+        * balancing act between maximum segments per type, maximum memory per
+        * type, and number of detected NUMA nodes. the goal is to make sure
+        * each memory type gets at least one memseg list.
+        *
+        * the total amount of memory is limited by RTE_MAX_MEM_MB value.
+        *
+        * the total amount of memory per type is limited by either
+        * RTE_MAX_MEM_MB_PER_TYPE, or by RTE_MAX_MEM_MB divided by the number
+        * of detected NUMA nodes. additionally, maximum number of segments per
+        * type is also limited by RTE_MAX_MEMSEG_PER_TYPE. this is because for
+        * smaller page sizes, it can take hundreds of thousands of segments to
+        * reach the above specified per-type memory limits.
+        *
+        * additionally, each type may have multiple memseg lists associated
+        * with it, each limited by either RTE_MAX_MEM_MB_PER_LIST for bigger
+        * page sizes, or RTE_MAX_MEMSEG_PER_LIST segments for smaller ones.
+        *
+        * the number of memseg lists per type is decided based on the above
+        * limits, and also taking number of detected NUMA nodes, to make sure
+        * that we don't run out of memseg lists before we populate all NUMA
+        * nodes with memory.
+        *
+        * we do this in three stages. first, we collect the number of types.
+        * then, we figure out memory constraints and populate the list of
+        * would-be memseg lists. then, we go ahead and allocate the memseg
+        * lists.
+        */
 
-       /* create memseg lists */
+       /* create space for mem types */
+       n_memtypes = internal_config.num_hugepage_sizes * rte_socket_count();
+       memtypes = calloc(n_memtypes, sizeof(*memtypes));
+       if (memtypes == NULL) {
+               RTE_LOG(ERR, EAL, "Cannot allocate space for memory types\n");
+               return -1;
+       }
+
+       /* populate mem types */
+       cur_type = 0;
        for (hpi_idx = 0; hpi_idx < (int) internal_config.num_hugepage_sizes;
                        hpi_idx++) {
                struct hugepage_info *hpi;
@@ -2113,62 +2198,114 @@ memseg_primary_init(void)
                hpi = &internal_config.hugepage_info[hpi_idx];
                hugepage_sz = hpi->hugepage_sz;
 
-               for (i = 0; i < (int) rte_socket_count(); i++) {
-                       uint64_t max_type_mem, total_type_mem = 0;
-                       int type_msl_idx, max_segs, total_segs = 0;
-
-                       socket_id = rte_socket_id_by_idx(i);
+               for (i = 0; i < (int) rte_socket_count(); i++, cur_type++) {
+                       int socket_id = rte_socket_id_by_idx(i);
 
 #ifndef RTE_EAL_NUMA_AWARE_HUGEPAGES
                        if (socket_id > 0)
                                break;
 #endif
+                       memtypes[cur_type].page_sz = hugepage_sz;
+                       memtypes[cur_type].socket_id = socket_id;
 
-                       if (total_mem >= max_mem)
-                               break;
-
-                       max_type_mem = RTE_MIN(max_mem - total_mem,
-                               (uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20);
-                       max_segs = RTE_MAX_MEMSEG_PER_TYPE;
+                       RTE_LOG(DEBUG, EAL, "Detected memory type: "
+                               "socket_id:%u hugepage_sz:%" PRIu64 "\n",
+                               socket_id, hugepage_sz);
+               }
+       }
 
-                       type_msl_idx = 0;
-                       while (total_type_mem < max_type_mem &&
-                                       total_segs < max_segs) {
-                               uint64_t cur_max_mem, cur_mem;
-                               unsigned int n_segs;
+       /* set up limits for types */
+       max_mem = (uint64_t)RTE_MAX_MEM_MB << 20;
+       max_mem_per_type = RTE_MIN((uint64_t)RTE_MAX_MEM_MB_PER_TYPE << 20,
+                       max_mem / n_memtypes);
+       /*
+        * limit maximum number of segment lists per type to ensure there's
+        * space for memseg lists for all NUMA nodes with all page sizes
+        */
+       max_seglists_per_type = RTE_MAX_MEMSEG_LISTS / n_memtypes;
 
-                               if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
-                                       RTE_LOG(ERR, EAL,
-                                               "No more space in memseg lists, please increase %s\n",
-                                               RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
-                                       return -1;
-                               }
+       if (max_seglists_per_type == 0) {
+               RTE_LOG(ERR, EAL, "Cannot accommodate all memory types, please increase %s\n",
+                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+               goto out;
+       }
 
-                               msl = &mcfg->memsegs[msl_idx++];
+       /* go through all mem types and create segment lists */
+       msl_idx = 0;
+       for (cur_type = 0; cur_type < n_memtypes; cur_type++) {
+               unsigned int cur_seglist, n_seglists, n_segs;
+               unsigned int max_segs_per_type, max_segs_per_list;
+               struct memtype *type = &memtypes[cur_type];
+               uint64_t max_mem_per_list, pagesz;
+               int socket_id;
 
-                               cur_max_mem = max_type_mem - total_type_mem;
+               pagesz = type->page_sz;
+               socket_id = type->socket_id;
 
-                               cur_mem = get_mem_amount(hugepage_sz,
-                                               cur_max_mem);
-                               n_segs = cur_mem / hugepage_sz;
+               /*
+                * we need to create segment lists for this type. we must take
+                * into account the following things:
+                *
+                * 1. total amount of memory we can use for this memory type
+                * 2. total amount of memory per memseg list allowed
+                * 3. number of segments needed to fit the amount of memory
+                * 4. number of segments allowed per type
+                * 5. number of segments allowed per memseg list
+                * 6. number of memseg lists we are allowed to take up
+                */
 
-                               if (alloc_memseg_list(msl, hugepage_sz, n_segs,
-                                               socket_id, type_msl_idx))
-                                       return -1;
+               /* calculate how much segments we will need in total */
+               max_segs_per_type = max_mem_per_type / pagesz;
+               /* limit number of segments to maximum allowed per type */
+               max_segs_per_type = RTE_MIN(max_segs_per_type,
+                               (unsigned int)RTE_MAX_MEMSEG_PER_TYPE);
+               /* limit number of segments to maximum allowed per list */
+               max_segs_per_list = RTE_MIN(max_segs_per_type,
+                               (unsigned int)RTE_MAX_MEMSEG_PER_LIST);
+
+               /* calculate how much memory we can have per segment list */
+               max_mem_per_list = RTE_MIN(max_segs_per_list * pagesz,
+                               (uint64_t)RTE_MAX_MEM_MB_PER_LIST << 20);
+
+               /* calculate how many segments each segment list will have */
+               n_segs = RTE_MIN(max_segs_per_list, max_mem_per_list / pagesz);
+
+               /* calculate how many segment lists we can have */
+               n_seglists = RTE_MIN(max_segs_per_type / n_segs,
+                               max_mem_per_type / max_mem_per_list);
+
+               /* limit number of segment lists according to our maximum */
+               n_seglists = RTE_MIN(n_seglists, max_seglists_per_type);
+
+               RTE_LOG(DEBUG, EAL, "Creating %i segment lists: "
+                               "n_segs:%i socket_id:%i hugepage_sz:%" PRIu64 "\n",
+                       n_seglists, n_segs, socket_id, pagesz);
+
+               /* create all segment lists */
+               for (cur_seglist = 0; cur_seglist < n_seglists; cur_seglist++) {
+                       if (msl_idx >= RTE_MAX_MEMSEG_LISTS) {
+                               RTE_LOG(ERR, EAL,
+                                       "No more space in memseg lists, please increase %s\n",
+                                       RTE_STR(CONFIG_RTE_MAX_MEMSEG_LISTS));
+                               goto out;
+                       }
+                       msl = &mcfg->memsegs[msl_idx++];
 
-                               total_segs += msl->memseg_arr.len;
-                               total_type_mem = total_segs * hugepage_sz;
-                               type_msl_idx++;
+                       if (alloc_memseg_list(msl, pagesz, n_segs,
+                                       socket_id, cur_seglist))
+                               goto out;
 
-                               if (alloc_va_space(msl)) {
-                                       RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
-                                       return -1;
-                               }
+                       if (alloc_va_space(msl)) {
+                               RTE_LOG(ERR, EAL, "Cannot allocate VA space for memseg list\n");
+                               goto out;
                        }
-                       total_mem += total_type_mem;
                }
        }
-       return 0;
+       /* we're successful */
+       ret = 0;
+out:
+       free(memtypes);
+       return ret;
 }
 
 static int
@@ -2204,6 +2341,25 @@ memseg_secondary_init(void)
 int
 rte_eal_memseg_init(void)
 {
+       /* increase rlimit to maximum */
+       struct rlimit lim;
+
+       if (getrlimit(RLIMIT_NOFILE, &lim) == 0) {
+               /* set limit to maximum */
+               lim.rlim_cur = lim.rlim_max;
+
+               if (setrlimit(RLIMIT_NOFILE, &lim) < 0) {
+                       RTE_LOG(DEBUG, EAL, "Setting maximum number of open files failed: %s\n",
+                                       strerror(errno));
+               } else {
+                       RTE_LOG(DEBUG, EAL, "Setting maximum number of open files to %"
+                                       PRIu64 "\n",
+                                       (uint64_t)lim.rlim_cur);
+               }
+       } else {
+               RTE_LOG(ERR, EAL, "Cannot get current resource limits\n");
+       }
+
        return rte_eal_process_type() == RTE_PROC_PRIMARY ?
 #ifndef RTE_ARCH_64
                        memseg_primary_init_32() :
index b496fc7..379773b 100644 (file)
@@ -121,8 +121,8 @@ eal_thread_loop(__attribute__((unused)) void *arg)
 
        ret = eal_thread_dump_affinity(cpuset, sizeof(cpuset));
 
-       RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n",
-               lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "...");
+       RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%zx;cpuset=[%s%s])\n",
+               lcore_id, (uintptr_t)thread_id, cpuset, ret == 0 ? "" : "...");
 
        /* read on our pipe to get commands */
        while (1) {
index 2766bd7..bc8f051 100644 (file)
@@ -87,7 +87,7 @@ static pthread_t msb_inc_thread_id;
  * containing used to process MSB of the HPET (unfortunately, we need
  * this because hpet is 32 bits by default under linux).
  */
-static void
+static void *
 hpet_msb_inc(__attribute__((unused)) void *arg)
 {
        uint32_t t;
@@ -98,6 +98,7 @@ hpet_msb_inc(__attribute__((unused)) void *arg)
                        eal_hpet_msb ++;
                sleep(10);
        }
+       return NULL;
 }
 
 uint64_t
@@ -178,7 +179,7 @@ rte_eal_hpet_init(int make_default)
        /* create a thread that will increment a global variable for
         * msb (hpet is 32 bits by default under linux) */
        ret = rte_ctrl_thread_create(&msb_inc_thread_id, "hpet-msb-inc", NULL,
-                       (void *(*)(void *))hpet_msb_inc, NULL);
+                                    hpet_msb_inc, NULL);
        if (ret != 0) {
                RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n");
                internal_config.no_hpet = 1;
index c68dc38..0516b15 100644 (file)
@@ -345,46 +345,13 @@ get_vfio_cfg_by_group_num(int iommu_group_num)
        return NULL;
 }
 
-static struct vfio_config *
-get_vfio_cfg_by_group_fd(int vfio_group_fd)
-{
-       struct vfio_config *vfio_cfg;
-       int i, j;
-
-       for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
-               vfio_cfg = &vfio_cfgs[i];
-               for (j = 0; j < VFIO_MAX_GROUPS; j++)
-                       if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
-                               return vfio_cfg;
-       }
-
-       return NULL;
-}
-
-static struct vfio_config *
-get_vfio_cfg_by_container_fd(int container_fd)
-{
-       int i;
-
-       for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
-               if (vfio_cfgs[i].vfio_container_fd == container_fd)
-                       return &vfio_cfgs[i];
-       }
-
-       return NULL;
-}
-
-int
-rte_vfio_get_group_fd(int iommu_group_num)
+static int
+vfio_get_group_fd(struct vfio_config *vfio_cfg,
+               int iommu_group_num)
 {
        int i;
        int vfio_group_fd;
        struct vfio_group *cur_grp;
-       struct vfio_config *vfio_cfg;
-
-       /* get the vfio_config it belongs to */
-       vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
-       vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
 
        /* check if we already have the group descriptor open */
        for (i = 0; i < VFIO_MAX_GROUPS; i++)
@@ -423,6 +390,47 @@ rte_vfio_get_group_fd(int iommu_group_num)
        return vfio_group_fd;
 }
 
+static struct vfio_config *
+get_vfio_cfg_by_group_fd(int vfio_group_fd)
+{
+       struct vfio_config *vfio_cfg;
+       int i, j;
+
+       for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+               vfio_cfg = &vfio_cfgs[i];
+               for (j = 0; j < VFIO_MAX_GROUPS; j++)
+                       if (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)
+                               return vfio_cfg;
+       }
+
+       return NULL;
+}
+
+static struct vfio_config *
+get_vfio_cfg_by_container_fd(int container_fd)
+{
+       int i;
+
+       for (i = 0; i < VFIO_MAX_CONTAINERS; i++) {
+               if (vfio_cfgs[i].vfio_container_fd == container_fd)
+                       return &vfio_cfgs[i];
+       }
+
+       return NULL;
+}
+
+int
+rte_vfio_get_group_fd(int iommu_group_num)
+{
+       struct vfio_config *vfio_cfg;
+
+       /* get the vfio_config it belongs to */
+       vfio_cfg = get_vfio_cfg_by_group_num(iommu_group_num);
+       vfio_cfg = vfio_cfg ? vfio_cfg : default_vfio_cfg;
+
+       return vfio_get_group_fd(vfio_cfg, iommu_group_num);
+}
+
 static int
 get_vfio_group_idx(int vfio_group_fd)
 {
@@ -509,7 +517,7 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
        msl = rte_mem_virt2memseg_list(addr);
 
        /* for IOVA as VA mode, no need to care for IOVA addresses */
-       if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+       if (rte_eal_iova_mode() == RTE_IOVA_VA && msl->external == 0) {
                uint64_t vfio_va = (uint64_t)(uintptr_t)addr;
                if (type == RTE_MEM_EVENT_ALLOC)
                        vfio_dma_mem_map(default_vfio_cfg, vfio_va, vfio_va,
@@ -523,13 +531,19 @@ vfio_mem_event_callback(enum rte_mem_event type, const void *addr, size_t len,
        /* memsegs are contiguous in memory */
        ms = rte_mem_virt2memseg(addr, msl);
        while (cur_len < len) {
+               /* some memory segments may have invalid IOVA */
+               if (ms->iova == RTE_BAD_IOVA) {
+                       RTE_LOG(DEBUG, EAL, "Memory segment at %p has bad IOVA, skipping\n",
+                                       ms->addr);
+                       goto next;
+               }
                if (type == RTE_MEM_EVENT_ALLOC)
                        vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
                                        ms->iova, ms->len, 1);
                else
                        vfio_dma_mem_map(default_vfio_cfg, ms->addr_64,
                                        ms->iova, ms->len, 0);
-
+next:
                cur_len += ms->len;
                ++ms;
        }
@@ -896,7 +910,15 @@ rte_vfio_enable(const char *modname)
                return 0;
        }
 
-       default_vfio_cfg->vfio_container_fd = rte_vfio_get_container_fd();
+       if (internal_config.process_type == RTE_PROC_PRIMARY) {
+               /* open a new container */
+               default_vfio_cfg->vfio_container_fd =
+                               rte_vfio_get_container_fd();
+       } else {
+               /* get the default container from the primary process */
+               default_vfio_cfg->vfio_container_fd =
+                               vfio_get_default_container_fd();
+       }
 
        /* check if we have VFIO driver enabled */
        if (default_vfio_cfg->vfio_container_fd != -1) {
@@ -916,6 +938,45 @@ rte_vfio_is_enabled(const char *modname)
        return default_vfio_cfg->vfio_enabled && mod_available;
 }
 
+int
+vfio_get_default_container_fd(void)
+{
+       struct rte_mp_msg mp_req, *mp_rep;
+       struct rte_mp_reply mp_reply;
+       struct timespec ts = {.tv_sec = 5, .tv_nsec = 0};
+       struct vfio_mp_param *p = (struct vfio_mp_param *)mp_req.param;
+
+       if (default_vfio_cfg->vfio_enabled)
+               return default_vfio_cfg->vfio_container_fd;
+
+       if (internal_config.process_type == RTE_PROC_PRIMARY) {
+               /* if we were secondary process we would try requesting
+                * container fd from the primary, but we're the primary
+                * process so just exit here
+                */
+               return -1;
+       }
+
+       p->req = SOCKET_REQ_DEFAULT_CONTAINER;
+       strcpy(mp_req.name, EAL_VFIO_MP);
+       mp_req.len_param = sizeof(*p);
+       mp_req.num_fds = 0;
+
+       if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) == 0 &&
+           mp_reply.nb_received == 1) {
+               mp_rep = &mp_reply.msgs[0];
+               p = (struct vfio_mp_param *)mp_rep->param;
+               if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+                       free(mp_reply.msgs);
+                       return mp_rep->fds[0];
+               }
+               free(mp_reply.msgs);
+       }
+
+       RTE_LOG(ERR, EAL, "  cannot request default container fd\n");
+       return -1;
+}
+
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd)
 {
@@ -1028,8 +1089,9 @@ rte_vfio_get_container_fd(void)
                mp_rep = &mp_reply.msgs[0];
                p = (struct vfio_mp_param *)mp_rep->param;
                if (p->result == SOCKET_OK && mp_rep->num_fds == 1) {
+                       vfio_container_fd = mp_rep->fds[0];
                        free(mp_reply.msgs);
-                       return mp_rep->fds[0];
+                       return vfio_container_fd;
                }
                free(mp_reply.msgs);
        }
@@ -1082,11 +1144,14 @@ rte_vfio_get_group_num(const char *sysfs_base,
 }
 
 static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
-               const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+               void *arg)
 {
        int *vfio_container_fd = arg;
 
+       if (msl->external)
+               return 0;
+
        return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
                        ms->len, 1);
 }
@@ -1145,8 +1210,22 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
        struct vfio_iommu_type1_dma_map dma_map;
        struct vfio_iommu_type1_dma_unmap dma_unmap;
        int ret;
+       struct vfio_iommu_spapr_register_memory reg = {
+               .argsz = sizeof(reg),
+               .flags = 0
+       };
+       reg.vaddr = (uintptr_t) vaddr;
+       reg.size = len;
 
        if (do_map != 0) {
+               ret = ioctl(vfio_container_fd,
+                               VFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);
+               if (ret) {
+                       RTE_LOG(ERR, EAL, "  cannot register vaddr for IOMMU, "
+                               "error %i (%s)\n", errno, strerror(errno));
+                       return -1;
+               }
+
                memset(&dma_map, 0, sizeof(dma_map));
                dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
                dma_map.vaddr = vaddr;
@@ -1163,13 +1242,6 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
                }
 
        } else {
-               struct vfio_iommu_spapr_register_memory reg = {
-                       .argsz = sizeof(reg),
-                       .flags = 0
-               };
-               reg.vaddr = (uintptr_t) vaddr;
-               reg.size = len;
-
                ret = ioctl(vfio_container_fd,
                                VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);
                if (ret) {
@@ -1196,12 +1268,15 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 }
 
 static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
                const struct rte_memseg *ms, void *arg)
 {
        int *vfio_container_fd = arg;
 
-       return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
+       if (msl->external)
+               return 0;
+
+       return vfio_spapr_dma_do_map(*vfio_container_fd, ms->addr_64, ms->iova,
                        ms->len, 1);
 }
 
@@ -1210,12 +1285,15 @@ struct spapr_walk_param {
        uint64_t hugepage_sz;
 };
 static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
                const struct rte_memseg *ms, void *arg)
 {
        struct spapr_walk_param *param = arg;
        uint64_t max = ms->iova + ms->len;
 
+       if (msl->external)
+               return 0;
+
        if (max > param->window_size) {
                param->hugepage_sz = ms->hugepage_sz;
                param->window_size = max;
@@ -1670,9 +1748,6 @@ int
 rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
 {
        struct vfio_config *vfio_cfg;
-       struct vfio_group *cur_grp;
-       int vfio_group_fd;
-       int i;
 
        vfio_cfg = get_vfio_cfg_by_container_fd(container_fd);
        if (vfio_cfg == NULL) {
@@ -1680,36 +1755,7 @@ rte_vfio_container_group_bind(int container_fd, int iommu_group_num)
                return -1;
        }
 
-       /* Check room for new group */
-       if (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {
-               RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n");
-               return -1;
-       }
-
-       /* Get an index for the new group */
-       for (i = 0; i < VFIO_MAX_GROUPS; i++)
-               if (vfio_cfg->vfio_groups[i].group_num == -1) {
-                       cur_grp = &vfio_cfg->vfio_groups[i];
-                       break;
-               }
-
-       /* This should not happen */
-       if (i == VFIO_MAX_GROUPS) {
-               RTE_LOG(ERR, EAL, "No VFIO group free slot found\n");
-               return -1;
-       }
-
-       vfio_group_fd = vfio_open_group_fd(iommu_group_num);
-       if (vfio_group_fd < 0) {
-               RTE_LOG(ERR, EAL, "Failed to open group %d\n", iommu_group_num);
-               return -1;
-       }
-       cur_grp->group_num = iommu_group_num;
-       cur_grp->fd = vfio_group_fd;
-       cur_grp->devices = 0;
-       vfio_cfg->vfio_active_groups++;
-
-       return vfio_group_fd;
+       return vfio_get_group_fd(vfio_cfg, iommu_group_num);
 }
 
 int
index 68d4750..63ae115 100644 (file)
@@ -115,6 +115,9 @@ struct vfio_iommu_type {
        vfio_dma_func_t dma_map_func;
 };
 
+/* get the vfio container that devices are bound to by default */
+int vfio_get_default_container_fd(void);
+
 /* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
 const struct vfio_iommu_type *
 vfio_set_iommu_type(int vfio_container_fd);
@@ -129,6 +132,7 @@ int vfio_mp_sync_setup(void);
 
 #define SOCKET_REQ_CONTAINER 0x100
 #define SOCKET_REQ_GROUP 0x200
+#define SOCKET_REQ_DEFAULT_CONTAINER 0x400
 #define SOCKET_OK 0x0
 #define SOCKET_NO_FD 0x1
 #define SOCKET_ERR 0xFF
index 680a24a..a1e8c83 100644 (file)
@@ -66,6 +66,17 @@ vfio_mp_primary(const struct rte_mp_msg *msg, const void *peer)
                        reply.fds[0] = fd;
                }
                break;
+       case SOCKET_REQ_DEFAULT_CONTAINER:
+               r->req = SOCKET_REQ_DEFAULT_CONTAINER;
+               fd = vfio_get_default_container_fd();
+               if (fd < 0)
+                       r->result = SOCKET_ERR;
+               else {
+                       r->result = SOCKET_OK;
+                       reply.num_fds = 1;
+                       reply.fds[0] = fd;
+               }
+               break;
        default:
                RTE_LOG(ERR, EAL, "vfio received invalid message!\n");
                return -1;
index cfa9448..5afa087 100644 (file)
@@ -8,6 +8,7 @@
 
 #ifdef __KERNEL__
 #include <linux/if.h>
+#include <asm/barrier.h>
 #define RTE_STD_C11
 #else
 #include <rte_common.h>
@@ -54,8 +55,13 @@ struct rte_kni_request {
  * Writing should never overwrite the read position
  */
 struct rte_kni_fifo {
+#ifdef RTE_USE_C11_MEM_MODEL
+       unsigned write;              /**< Next position to be written*/
+       unsigned read;               /**< Next position to be read */
+#else
        volatile unsigned write;     /**< Next position to be written*/
        volatile unsigned read;      /**< Next position to be read */
+#endif
        unsigned len;                /**< Circular buffer length */
        unsigned elem_size;          /**< Pointer size - for 32/64 bit OS */
        void *volatile buffer[];     /**< The buffer contains mbuf pointers */
index e1fde15..a18f3a8 100644 (file)
@@ -21,11 +21,10 @@ else
        error('unsupported system type "@0@"'.format(host_machine.system()))
 endif
 
-version = 8  # the version of the EAL API
+version = 9  # the version of the EAL API
 allow_experimental_apis = true
 deps += 'compat'
 deps += 'kvargs'
-cflags += '-D_GNU_SOURCE'
 sources = common_sources + env_sources
 objs = common_objs + env_objs
 headers = common_headers + env_headers
index 344a43d..04f6242 100644 (file)
@@ -19,9 +19,6 @@ DPDK_2.0 {
        rte_dump_tailq;
        rte_eal_alarm_cancel;
        rte_eal_alarm_set;
-       rte_eal_devargs_add;
-       rte_eal_devargs_dump;
-       rte_eal_devargs_type_count;
        rte_eal_get_configuration;
        rte_eal_get_lcore_state;
        rte_eal_get_physmem_size;
@@ -32,7 +29,6 @@ DPDK_2.0 {
        rte_eal_lcore_role;
        rte_eal_mp_remote_launch;
        rte_eal_mp_wait_lcore;
-       rte_eal_parse_devargs_str;
        rte_eal_process_type;
        rte_eal_remote_launch;
        rte_eal_tailq_lookup;
@@ -134,8 +130,6 @@ DPDK_16.11 {
 
        rte_delay_us_block;
        rte_delay_us_callback_register;
-       rte_eal_dev_attach;
-       rte_eal_dev_detach;
 
 } DPDK_16.07;
 
@@ -262,6 +256,16 @@ DPDK_18.08 {
 
 } DPDK_18.05;
 
+DPDK_18.11 {
+       global:
+
+       rte_eal_get_runtime_dir;
+       rte_eal_hotplug_add;
+       rte_eal_hotplug_remove;
+       rte_strscpy;
+
+} DPDK_18.08;
+
 EXPERIMENTAL {
        global:
 
@@ -270,12 +274,19 @@ EXPERIMENTAL {
        rte_class_register;
        rte_class_unregister;
        rte_ctrl_thread_create;
+       rte_delay_us_sleep;
+       rte_dev_event_callback_process;
        rte_dev_event_callback_register;
        rte_dev_event_callback_unregister;
        rte_dev_event_monitor_start;
        rte_dev_event_monitor_stop;
+       rte_dev_hotplug_handle_disable;
+       rte_dev_hotplug_handle_enable;
+       rte_dev_is_probed;
        rte_dev_iterator_init;
        rte_dev_iterator_next;
+       rte_dev_probe;
+       rte_dev_remove;
        rte_devargs_add;
        rte_devargs_dump;
        rte_devargs_insert;
@@ -284,9 +295,8 @@ EXPERIMENTAL {
        rte_devargs_parsef;
        rte_devargs_remove;
        rte_devargs_type_count;
+       rte_eal_check_dma_mask;
        rte_eal_cleanup;
-       rte_eal_hotplug_add;
-       rte_eal_hotplug_remove;
        rte_fbarray_attach;
        rte_fbarray_destroy;
        rte_fbarray_detach;
@@ -311,6 +321,14 @@ EXPERIMENTAL {
        rte_fbarray_set_used;
        rte_log_register_type_and_pick_level;
        rte_malloc_dump_heaps;
+       rte_malloc_heap_create;
+       rte_malloc_heap_destroy;
+       rte_malloc_heap_get_socket;
+       rte_malloc_heap_memory_add;
+       rte_malloc_heap_memory_attach;
+       rte_malloc_heap_memory_detach;
+       rte_malloc_heap_memory_remove;
+       rte_malloc_heap_socket_is_external;
        rte_mem_alloc_validator_register;
        rte_mem_alloc_validator_unregister;
        rte_mem_event_callback_register;
@@ -320,6 +338,10 @@ EXPERIMENTAL {
        rte_mem_virt2memseg_list;
        rte_memseg_contig_walk;
        rte_memseg_contig_walk_thread_unsafe;
+       rte_memseg_get_fd;
+       rte_memseg_get_fd_offset;
+       rte_memseg_get_fd_thread_unsafe;
+       rte_memseg_get_fd_offset_thread_unsafe;
        rte_memseg_list_walk;
        rte_memseg_list_walk_thread_unsafe;
        rte_memseg_walk;
@@ -330,6 +352,7 @@ EXPERIMENTAL {
        rte_mp_request_sync;
        rte_mp_request_async;
        rte_mp_sendmsg;
+       rte_option_register;
        rte_service_lcore_attr_get;
        rte_service_lcore_attr_reset_all;
        rte_service_may_be_active;
index 0935a27..3e27ae4 100644 (file)
@@ -12,13 +12,15 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_net -lrte_eal -lrte_mempool -lrte_ring
-LDLIBS += -lrte_mbuf
+LDLIBS += -lrte_mbuf -lrte_kvargs -lrte_cmdline
 
 EXPORT_MAP := rte_ethdev_version.map
 
-LIBABIVER := 10
+LIBABIVER := 11
 
+SRCS-y += ethdev_private.c
 SRCS-y += rte_ethdev.c
+SRCS-y += rte_class_eth.c
 SRCS-y += rte_flow.c
 SRCS-y += rte_tm.c
 SRCS-y += rte_mtr.c
diff --git a/lib/librte_ethdev/ethdev_private.c b/lib/librte_ethdev/ethdev_private.c
new file mode 100644 (file)
index 0000000..162a502
--- /dev/null
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#include "rte_ethdev.h"
+#include "rte_ethdev_driver.h"
+#include "ethdev_private.h"
+
+uint16_t
+eth_dev_to_id(const struct rte_eth_dev *dev)
+{
+       if (dev == NULL)
+               return RTE_MAX_ETHPORTS;
+       return dev - rte_eth_devices;
+}
+
+struct rte_eth_dev *
+eth_find_device(const struct rte_eth_dev *start, rte_eth_cmp_t cmp,
+               const void *data)
+{
+       struct rte_eth_dev *edev;
+       ptrdiff_t idx;
+
+       /* Avoid Undefined Behaviour */
+       if (start != NULL &&
+           (start < &rte_eth_devices[0] ||
+            start > &rte_eth_devices[RTE_MAX_ETHPORTS]))
+               return NULL;
+       if (start != NULL)
+               idx = eth_dev_to_id(start) + 1;
+       else
+               idx = 0;
+       for (; idx < RTE_MAX_ETHPORTS; idx++) {
+               edev = &rte_eth_devices[idx];
+               if (cmp(edev, data) == 0)
+                       return edev;
+       }
+       return NULL;
+}
+
+int
+rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
+       void *data)
+{
+       char *str_start;
+       int state;
+       int result;
+
+       if (*str != '[')
+               /* Single element, not a list */
+               return callback(str, data);
+
+       /* Sanity check, then strip the brackets */
+       str_start = &str[strlen(str) - 1];
+       if (*str_start != ']') {
+               RTE_LOG(ERR, EAL, "(%s): List does not end with ']'\n", str);
+               return -EINVAL;
+       }
+       str++;
+       *str_start = '\0';
+
+       /* Process list elements */
+       state = 0;
+       while (1) {
+               if (state == 0) {
+                       if (*str == '\0')
+                               break;
+                       if (*str != ',') {
+                               str_start = str;
+                               state = 1;
+                       }
+               } else if (state == 1) {
+                       if (*str == ',' || *str == '\0') {
+                               if (str > str_start) {
+                                       /* Non-empty string fragment */
+                                       *str = '\0';
+                                       result = callback(str_start, data);
+                                       if (result < 0)
+                                               return result;
+                               }
+                               state = 0;
+                       }
+               }
+               str++;
+       }
+       return 0;
+}
+
+static int
+rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list,
+       const uint16_t max_list)
+{
+       uint16_t lo, hi, val;
+       int result;
+
+       result = sscanf(str, "%hu-%hu", &lo, &hi);
+       if (result == 1) {
+               if (*len_list >= max_list)
+                       return -ENOMEM;
+               list[(*len_list)++] = lo;
+       } else if (result == 2) {
+               if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS)
+                       return -EINVAL;
+               for (val = lo; val <= hi; val++) {
+                       if (*len_list >= max_list)
+                               return -ENOMEM;
+                       list[(*len_list)++] = val;
+               }
+       } else
+               return -EINVAL;
+       return 0;
+}
+
+int
+rte_eth_devargs_parse_representor_ports(char *str, void *data)
+{
+       struct rte_eth_devargs *eth_da = data;
+
+       return rte_eth_devargs_process_range(str, eth_da->representor_ports,
+               &eth_da->nb_representor_ports, RTE_MAX_ETHPORTS);
+}
diff --git a/lib/librte_ethdev/ethdev_private.h b/lib/librte_ethdev/ethdev_private.h
new file mode 100644 (file)
index 0000000..7b787bf
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#ifndef _RTE_ETH_PRIVATE_H_
+#define _RTE_ETH_PRIVATE_H_
+
+#include "rte_ethdev.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Convert rte_eth_dev pointer to port id.
+ * NULL will be translated to RTE_MAX_ETHPORTS.
+ */
+uint16_t eth_dev_to_id(const struct rte_eth_dev *dev);
+
+/* Generic rte_eth_dev comparison function. */
+typedef int (*rte_eth_cmp_t)(const struct rte_eth_dev *, const void *);
+
+/* Generic rte_eth_dev iterator. */
+struct rte_eth_dev *
+eth_find_device(const struct rte_eth_dev *_start, rte_eth_cmp_t cmp,
+               const void *data);
+
+/* Parse devargs value for representor parameter. */
+typedef int (*rte_eth_devargs_callback_t)(char *str, void *data);
+int rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
+       void *data);
+int rte_eth_devargs_parse_representor_ports(char *str, void *data);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETH_PRIVATE_H_ */
index 0d1dcda..a3c303f 100644 (file)
@@ -1,87 +1,33 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #include "ethdev_profile.h"
 
 /**
- * This conditional block enables RX queues profiling by tracking wasted
- * iterations, i.e. iterations which yielded no RX packets. Profiling is
- * performed using the Instrumentation and Tracing Technology (ITT) API,
- * employed by the Intel (R) VTune (TM) Amplifier.
+ * This conditional block enables Ethernet device profiling with
+ * Intel (R) VTune (TM) Amplifier.
  */
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-
-#include <ittnotify.h>
-
-#define ITT_MAX_NAME_LEN (100)
-
-/**
- * Auxiliary ITT structure belonging to Ethernet device and using to:
- *   -  track RX queue state to determine whether it is wasting loop iterations
- *   -  begin or end ITT task using task domain and task name (handle)
- */
-struct itt_profile_rx_data {
-       /**
-        * ITT domains for each queue.
-        */
-       __itt_domain *domains[RTE_MAX_QUEUES_PER_PORT];
-       /**
-        * ITT task names for each queue.
-        */
-       __itt_string_handle *handles[RTE_MAX_QUEUES_PER_PORT];
-       /**
-        * Flags indicating the queues state. Possible values:
-        *   1 - queue is wasting iterations,
-        *   0 - otherwise.
-        */
-       uint8_t queue_state[RTE_MAX_QUEUES_PER_PORT];
-};
-
-/**
- * The pool of *itt_profile_rx_data* structures.
- */
-struct itt_profile_rx_data itt_rx_data[RTE_MAX_ETHPORTS];
-
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
 
 /**
- * This callback function manages ITT tasks collection on given port and queue.
- * It must be registered with rte_eth_add_rx_callback() to be called from
- * rte_eth_rx_burst(). To find more comments see rte_rx_callback_fn function
- * type declaration.
+ * Hook callback to trace rte_eth_rx_burst() calls.
  */
-static uint16_t
-collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
+uint16_t
+profile_hook_rx_burst_cb(
+       __rte_unused uint16_t port_id, __rte_unused uint16_t queue_id,
        __rte_unused struct rte_mbuf *pkts[], uint16_t nb_pkts,
        __rte_unused uint16_t max_pkts, __rte_unused void *user_param)
 {
-       if (unlikely(nb_pkts == 0)) {
-               if (!itt_rx_data[port_id].queue_state[queue_id]) {
-                       __itt_task_begin(
-                               itt_rx_data[port_id].domains[queue_id],
-                               __itt_null, __itt_null,
-                               itt_rx_data[port_id].handles[queue_id]);
-                       itt_rx_data[port_id].queue_state[queue_id] = 1;
-               }
-       } else {
-               if (unlikely(itt_rx_data[port_id].queue_state[queue_id])) {
-                       __itt_task_end(
-                               itt_rx_data[port_id].domains[queue_id]);
-                       itt_rx_data[port_id].queue_state[queue_id] = 0;
-               }
-       }
        return nb_pkts;
 }
 
 /**
- * Initialization of itt_profile_rx_data for a given Ethernet device.
+ * Setting profiling rx callback for a given Ethernet device.
  * This function must be invoked when ethernet device is being configured.
- * Result will be stored in the global array *itt_rx_data*.
  *
  * @param port_id
  *  The port identifier of the Ethernet device.
- * @param port_name
- *  The name of the Ethernet device.
  * @param rx_queue_num
  *  The number of RX queues on specified port.
  *
@@ -90,46 +36,27 @@ collect_itt_rx_burst_cb(uint16_t port_id, uint16_t queue_id,
  *  - On failure, a negative value.
  */
 static inline int
-itt_profile_rx_init(uint16_t port_id, char *port_name, uint8_t rx_queue_num)
+vtune_profile_rx_init(uint16_t port_id, uint8_t rx_queue_num)
 {
        uint16_t q_id;
 
        for (q_id = 0; q_id < rx_queue_num; ++q_id) {
-               char domain_name[ITT_MAX_NAME_LEN];
-
-               snprintf(domain_name, sizeof(domain_name),
-                       "RXBurst.WastedIterations.Port_%s.Queue_%d",
-                       port_name, q_id);
-               itt_rx_data[port_id].domains[q_id]
-                       = __itt_domain_create(domain_name);
-
-               char task_name[ITT_MAX_NAME_LEN];
-
-               snprintf(task_name, sizeof(task_name),
-                       "port id: %d; queue id: %d",
-                       port_id, q_id);
-               itt_rx_data[port_id].handles[q_id]
-                       = __itt_string_handle_create(task_name);
-
-               itt_rx_data[port_id].queue_state[q_id] = 0;
-
                if (!rte_eth_add_rx_callback(
-                       port_id, q_id, collect_itt_rx_burst_cb, NULL)) {
+                       port_id, q_id, profile_hook_rx_burst_cb, NULL)) {
                        return -rte_errno;
                }
        }
 
        return 0;
 }
-#endif /* RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS */
+#endif /* RTE_ETHDEV_PROFILE_WITH_VTUNE */
 
 int
-__rte_eth_profile_rx_init(__rte_unused uint16_t port_id,
+__rte_eth_dev_profile_init(__rte_unused uint16_t port_id,
        __rte_unused struct rte_eth_dev *dev)
 {
-#ifdef RTE_ETHDEV_PROFILE_ITT_WASTED_RX_ITERATIONS
-       return itt_profile_rx_init(
-               port_id, dev->data->name, dev->data->nb_rx_queues);
+#ifdef RTE_ETHDEV_PROFILE_WITH_VTUNE
+       return vtune_profile_rx_init(port_id, dev->data->nb_rx_queues);
 #endif
        return 0;
 }
index e5ea368..65031e6 100644 (file)
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2010-2017 Intel Corporation
+ * Copyright(c) 2010-2018 Intel Corporation
  */
 
 #ifndef _RTE_ETHDEV_PROFILE_H_
@@ -8,7 +8,7 @@
 #include "rte_ethdev.h"
 
 /**
- * Initialization of profiling RX queues for the Ethernet device.
+ * Initialization of the Ethernet device profiling.
  * Implementation of this function depends on chosen profiling method,
  * defined in configs.
  *
@@ -22,6 +22,6 @@
  *  - On failure, a negative value.
  */
 int
-__rte_eth_profile_rx_init(uint16_t port_id, struct rte_eth_dev *dev);
+__rte_eth_dev_profile_init(uint16_t port_id, struct rte_eth_dev *dev);
 
 #endif
index 596cd0f..a4d8502 100644 (file)
@@ -2,9 +2,11 @@
 # Copyright(c) 2017 Intel Corporation
 
 name = 'ethdev'
-version = 10
+version = 11
 allow_experimental_apis = true
-sources = files('ethdev_profile.c',
+sources = files('ethdev_private.c',
+       'ethdev_profile.c',
+       'rte_class_eth.c',
        'rte_ethdev.c',
        'rte_flow.c',
        'rte_mtr.c',
@@ -24,4 +26,4 @@ headers = files('rte_ethdev.h',
        'rte_tm.h',
        'rte_tm_driver.h')
 
-deps += ['net', 'kvargs']
+deps += ['net', 'kvargs', 'cmdline']
diff --git a/lib/librte_ethdev/rte_class_eth.c b/lib/librte_ethdev/rte_class_eth.c
new file mode 100644 (file)
index 0000000..cb99c92
--- /dev/null
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Gaëtan Rivet
+ */
+
+#include <string.h>
+
+#include <cmdline_parse_etheraddr.h>
+#include <rte_class.h>
+#include <rte_compat.h>
+#include <rte_errno.h>
+#include <rte_kvargs.h>
+#include <rte_log.h>
+
+#include "rte_ethdev.h"
+#include "rte_ethdev_core.h"
+#include "rte_ethdev_driver.h"
+#include "ethdev_private.h"
+
+enum eth_params {
+       RTE_ETH_PARAM_MAC,
+       RTE_ETH_PARAM_REPRESENTOR,
+       RTE_ETH_PARAM_MAX,
+};
+
+static const char * const eth_params_keys[] = {
+       [RTE_ETH_PARAM_MAC] = "mac",
+       [RTE_ETH_PARAM_REPRESENTOR] = "representor",
+       [RTE_ETH_PARAM_MAX] = NULL,
+};
+
+struct eth_dev_match_arg {
+       struct rte_device *device;
+       struct rte_kvargs *kvlist;
+};
+
+#define eth_dev_match_arg(d, k) \
+       (&(const struct eth_dev_match_arg) { \
+               .device = (d), \
+               .kvlist = (k), \
+       })
+
+static int
+eth_mac_cmp(const char *key __rte_unused,
+               const char *value, void *opaque)
+{
+       int ret;
+       struct ether_addr mac;
+       const struct rte_eth_dev_data *data = opaque;
+       struct rte_eth_dev_info dev_info;
+       uint32_t index;
+
+       /* Parse devargs MAC address. */
+       /*
+        * cannot use ether_aton_r(value, &mac)
+        * because of include conflict with rte_ether.h
+        */
+       ret = cmdline_parse_etheraddr(NULL, value, &mac, sizeof(mac));
+       if (ret < 0)
+               return -1; /* invalid devargs value */
+
+       /* Return 0 if devargs MAC is matching one of the device MACs. */
+       rte_eth_dev_info_get(data->port_id, &dev_info);
+       for (index = 0; index < dev_info.max_mac_addrs; index++)
+               if (is_same_ether_addr(&mac, &data->mac_addrs[index]))
+                       return 0;
+       return -1; /* no match */
+}
+
+static int
+eth_representor_cmp(const char *key __rte_unused,
+               const char *value, void *opaque)
+{
+       int ret;
+       char *values;
+       const struct rte_eth_dev_data *data = opaque;
+       struct rte_eth_devargs representors;
+       uint16_t index;
+
+       if ((data->dev_flags & RTE_ETH_DEV_REPRESENTOR) == 0)
+               return -1; /* not a representor port */
+
+       /* Parse devargs representor values. */
+       values = strdup(value);
+       if (values == NULL)
+               return -1;
+       memset(&representors, 0, sizeof(representors));
+       ret = rte_eth_devargs_parse_list(values,
+                       rte_eth_devargs_parse_representor_ports,
+                       &representors);
+       free(values);
+       if (ret != 0)
+               return -1; /* invalid devargs value */
+
+       /* Return 0 if representor id is matching one of the values. */
+       for (index = 0; index < representors.nb_representor_ports; index++)
+               if (data->representor_id ==
+                               representors.representor_ports[index])
+                       return 0;
+       return -1; /* no match */
+}
+
+static int
+eth_dev_match(const struct rte_eth_dev *edev,
+             const void *_arg)
+{
+       int ret;
+       const struct eth_dev_match_arg *arg = _arg;
+       const struct rte_kvargs *kvlist = arg->kvlist;
+       unsigned int pair;
+
+       if (edev->state == RTE_ETH_DEV_UNUSED)
+               return -1;
+       if (arg->device != NULL && arg->device != edev->device)
+               return -1;
+
+       ret = rte_kvargs_process(kvlist,
+                       eth_params_keys[RTE_ETH_PARAM_MAC],
+                       eth_mac_cmp, edev->data);
+       if (ret != 0)
+               return -1;
+
+       ret = rte_kvargs_process(kvlist,
+                       eth_params_keys[RTE_ETH_PARAM_REPRESENTOR],
+                       eth_representor_cmp, edev->data);
+       if (ret != 0)
+               return -1;
+       /* search for representor key */
+       for (pair = 0; pair < kvlist->count; pair++) {
+               ret = strcmp(kvlist->pairs[pair].key,
+                               eth_params_keys[RTE_ETH_PARAM_REPRESENTOR]);
+               if (ret == 0)
+                       break; /* there is a representor key */
+       }
+       /* if no representor key, default is to not match representor ports */
+       if (ret != 0)
+               if ((edev->data->dev_flags & RTE_ETH_DEV_REPRESENTOR) != 0)
+                       return -1; /* do not match any representor */
+
+       return 0;
+}
+
+static void *
+eth_dev_iterate(const void *start,
+               const char *str,
+               const struct rte_dev_iterator *it)
+{
+       struct rte_kvargs *kvargs = NULL;
+       struct rte_eth_dev *edev = NULL;
+       const char * const *valid_keys = NULL;
+
+       if (str != NULL) {
+               if (str[0] == '+') /* no validation of keys */
+                       str++;
+               else
+                       valid_keys = eth_params_keys;
+               kvargs = rte_kvargs_parse(str, valid_keys);
+               if (kvargs == NULL) {
+                       RTE_LOG(ERR, EAL, "cannot parse argument list\n");
+                       rte_errno = EINVAL;
+                       return NULL;
+               }
+       }
+       edev = eth_find_device(start, eth_dev_match,
+                              eth_dev_match_arg(it->device, kvargs));
+       rte_kvargs_free(kvargs);
+       return edev;
+}
+
+static struct rte_class rte_class_eth = {
+       .dev_iterate = eth_dev_iterate,
+};
+
+RTE_REGISTER_CLASS(eth, rte_class_eth);
index 4c32025..9d34813 100644 (file)
 #include <rte_spinlock.h>
 #include <rte_string_fns.h>
 #include <rte_kvargs.h>
+#include <rte_class.h>
 
 #include "rte_ether.h"
 #include "rte_ethdev.h"
 #include "rte_ethdev_driver.h"
 #include "ethdev_profile.h"
+#include "ethdev_private.h"
 
 int rte_eth_dev_logtype;
 
@@ -122,11 +124,12 @@ static const struct {
        RTE_RX_OFFLOAD_BIT2STR(VLAN_FILTER),
        RTE_RX_OFFLOAD_BIT2STR(VLAN_EXTEND),
        RTE_RX_OFFLOAD_BIT2STR(JUMBO_FRAME),
-       RTE_RX_OFFLOAD_BIT2STR(CRC_STRIP),
        RTE_RX_OFFLOAD_BIT2STR(SCATTER),
        RTE_RX_OFFLOAD_BIT2STR(TIMESTAMP),
        RTE_RX_OFFLOAD_BIT2STR(SECURITY),
        RTE_RX_OFFLOAD_BIT2STR(KEEP_CRC),
+       RTE_RX_OFFLOAD_BIT2STR(SCTP_CKSUM),
+       RTE_RX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
 };
 
 #undef RTE_RX_OFFLOAD_BIT2STR
@@ -156,6 +159,10 @@ static const struct {
        RTE_TX_OFFLOAD_BIT2STR(MULTI_SEGS),
        RTE_TX_OFFLOAD_BIT2STR(MBUF_FAST_FREE),
        RTE_TX_OFFLOAD_BIT2STR(SECURITY),
+       RTE_TX_OFFLOAD_BIT2STR(UDP_TNL_TSO),
+       RTE_TX_OFFLOAD_BIT2STR(IP_TNL_TSO),
+       RTE_TX_OFFLOAD_BIT2STR(OUTER_UDP_CKSUM),
+       RTE_TX_OFFLOAD_BIT2STR(MATCH_METADATA),
 };
 
 #undef RTE_TX_OFFLOAD_BIT2STR
@@ -180,6 +187,146 @@ enum {
        STAT_QMAP_RX
 };
 
+int __rte_experimental
+rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs_str)
+{
+       int ret;
+       struct rte_devargs devargs = {.args = NULL};
+       const char *bus_param_key;
+       char *bus_str = NULL;
+       char *cls_str = NULL;
+       int str_size;
+
+       memset(iter, 0, sizeof(*iter));
+
+       /*
+        * The devargs string may use various syntaxes:
+        *   - 0000:08:00.0,representor=[1-3]
+        *   - pci:0000:06:00.0,representor=[0,5]
+        *   - class=eth,mac=00:11:22:33:44:55
+        * A new syntax is in development (not yet supported):
+        *   - bus=X,paramX=x/class=Y,paramY=y/driver=Z,paramZ=z
+        */
+
+       /*
+        * Handle pure class filter (i.e. without any bus-level argument),
+        * from future new syntax.
+        * rte_devargs_parse() is not yet supporting the new syntax,
+        * that's why this simple case is temporarily parsed here.
+        */
+#define iter_anybus_str "class=eth,"
+       if (strncmp(devargs_str, iter_anybus_str,
+                       strlen(iter_anybus_str)) == 0) {
+               iter->cls_str = devargs_str + strlen(iter_anybus_str);
+               goto end;
+       }
+
+       /* Split bus, device and parameters. */
+       ret = rte_devargs_parse(&devargs, devargs_str);
+       if (ret != 0)
+               goto error;
+
+       /*
+        * Assume parameters of old syntax can match only at ethdev level.
+        * Extra parameters will be ignored, thanks to "+" prefix.
+        */
+       str_size = strlen(devargs.args) + 2;
+       cls_str = malloc(str_size);
+       if (cls_str == NULL) {
+               ret = -ENOMEM;
+               goto error;
+       }
+       ret = snprintf(cls_str, str_size, "+%s", devargs.args);
+       if (ret != str_size - 1) {
+               ret = -EINVAL;
+               goto error;
+       }
+       iter->cls_str = cls_str;
+       free(devargs.args); /* allocated by rte_devargs_parse() */
+       devargs.args = NULL;
+
+       iter->bus = devargs.bus;
+       if (iter->bus->dev_iterate == NULL) {
+               ret = -ENOTSUP;
+               goto error;
+       }
+
+       /* Convert bus args to new syntax for use with new API dev_iterate. */
+       if (strcmp(iter->bus->name, "vdev") == 0) {
+               bus_param_key = "name";
+       } else if (strcmp(iter->bus->name, "pci") == 0) {
+               bus_param_key = "addr";
+       } else {
+               ret = -ENOTSUP;
+               goto error;
+       }
+       str_size = strlen(bus_param_key) + strlen(devargs.name) + 2;
+       bus_str = malloc(str_size);
+       if (bus_str == NULL) {
+               ret = -ENOMEM;
+               goto error;
+       }
+       ret = snprintf(bus_str, str_size, "%s=%s",
+                       bus_param_key, devargs.name);
+       if (ret != str_size - 1) {
+               ret = -EINVAL;
+               goto error;
+       }
+       iter->bus_str = bus_str;
+
+end:
+       iter->cls = rte_class_find_by_name("eth");
+       return 0;
+
+error:
+       if (ret == -ENOTSUP)
+               RTE_LOG(ERR, EAL, "Bus %s does not support iterating.\n",
+                               iter->bus->name);
+       free(devargs.args);
+       free(bus_str);
+       free(cls_str);
+       return ret;
+}
+
+uint16_t __rte_experimental
+rte_eth_iterator_next(struct rte_dev_iterator *iter)
+{
+       if (iter->cls == NULL) /* invalid ethdev iterator */
+               return RTE_MAX_ETHPORTS;
+
+       do { /* loop to try all matching rte_device */
+               /* If not pure ethdev filter and */
+               if (iter->bus != NULL &&
+                               /* not in middle of rte_eth_dev iteration, */
+                               iter->class_device == NULL) {
+                       /* get next rte_device to try. */
+                       iter->device = iter->bus->dev_iterate(
+                                       iter->device, iter->bus_str, iter);
+                       if (iter->device == NULL)
+                               break; /* no more rte_device candidate */
+               }
+               /* A device is matching bus part, need to check ethdev part. */
+               iter->class_device = iter->cls->dev_iterate(
+                               iter->class_device, iter->cls_str, iter);
+               if (iter->class_device != NULL)
+                       return eth_dev_to_id(iter->class_device); /* match */
+       } while (iter->bus != NULL); /* need to try next rte_device */
+
+       /* No more ethdev port to iterate. */
+       rte_eth_iterator_cleanup(iter);
+       return RTE_MAX_ETHPORTS;
+}
+
+void __rte_experimental
+rte_eth_iterator_cleanup(struct rte_dev_iterator *iter)
+{
+       if (iter->bus_str == NULL)
+               return; /* nothing to free in pure class filter */
+       free(RTE_CAST_FIELD(iter, bus_str, char *)); /* workaround const */
+       free(RTE_CAST_FIELD(iter, cls_str, char *)); /* workaround const */
+       memset(iter, 0, sizeof(*iter));
+}
+
 uint16_t
 rte_eth_find_next(uint16_t port_id)
 {
@@ -366,13 +513,22 @@ rte_eth_dev_release_port(struct rte_eth_dev *eth_dev)
 
        rte_eth_dev_shared_data_prepare();
 
-       _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_DESTROY, NULL);
+       if (eth_dev->state != RTE_ETH_DEV_UNUSED)
+               _rte_eth_dev_callback_process(eth_dev,
+                               RTE_ETH_EVENT_DESTROY, NULL);
 
        rte_spinlock_lock(&rte_eth_dev_shared_data->ownership_lock);
 
        eth_dev->state = RTE_ETH_DEV_UNUSED;
 
-       memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+       if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+               rte_free(eth_dev->data->rx_queues);
+               rte_free(eth_dev->data->tx_queues);
+               rte_free(eth_dev->data->mac_addrs);
+               rte_free(eth_dev->data->hash_mac_addrs);
+               rte_free(eth_dev->data->dev_private);
+               memset(eth_dev->data, 0, sizeof(struct rte_eth_dev_data));
+       }
 
        rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
 
@@ -393,11 +549,8 @@ static int
 rte_eth_is_valid_owner_id(uint64_t owner_id)
 {
        if (owner_id == RTE_ETH_DEV_NO_OWNER ||
-           rte_eth_dev_shared_data->next_owner_id <= owner_id) {
-               RTE_ETHDEV_LOG(ERR, "Invalid owner_id=%016"PRIx64"\n",
-                       owner_id);
+           rte_eth_dev_shared_data->next_owner_id <= owner_id)
                return 0;
-       }
        return 1;
 }
 
@@ -444,8 +597,12 @@ _rte_eth_dev_owner_set(const uint16_t port_id, const uint64_t old_owner_id,
        }
 
        if (!rte_eth_is_valid_owner_id(new_owner->id) &&
-           !rte_eth_is_valid_owner_id(old_owner_id))
+           !rte_eth_is_valid_owner_id(old_owner_id)) {
+               RTE_ETHDEV_LOG(ERR,
+                       "Invalid owner old_id=%016"PRIx64" new_id=%016"PRIx64"\n",
+                      old_owner_id, new_owner->id);
                return -EINVAL;
+       }
 
        port_owner = &rte_eth_devices[port_id].data->owner;
        if (port_owner->id != old_owner_id) {
@@ -516,9 +673,13 @@ rte_eth_dev_owner_delete(const uint64_t owner_id)
                        if (rte_eth_devices[port_id].data->owner.id == owner_id)
                                memset(&rte_eth_devices[port_id].data->owner, 0,
                                       sizeof(struct rte_eth_dev_owner));
-               RTE_ETHDEV_LOG(ERR,
+               RTE_ETHDEV_LOG(NOTICE,
                        "All port owners owned by %016"PRIx64" identifier have removed\n",
                        owner_id);
+       } else {
+               RTE_ETHDEV_LOG(ERR,
+                              "Invalid owner id=%016"PRIx64"\n",
+                              owner_id);
        }
 
        rte_spinlock_unlock(&rte_eth_dev_shared_data->ownership_lock);
@@ -642,87 +803,6 @@ eth_err(uint16_t port_id, int ret)
        return ret;
 }
 
-/* attach the new device, then store port_id of the device */
-int
-rte_eth_dev_attach(const char *devargs, uint16_t *port_id)
-{
-       int current = rte_eth_dev_count_total();
-       struct rte_devargs da;
-       int ret = -1;
-
-       memset(&da, 0, sizeof(da));
-
-       if ((devargs == NULL) || (port_id == NULL)) {
-               ret = -EINVAL;
-               goto err;
-       }
-
-       /* parse devargs */
-       if (rte_devargs_parse(&da, devargs))
-               goto err;
-
-       ret = rte_eal_hotplug_add(da.bus->name, da.name, da.args);
-       if (ret < 0)
-               goto err;
-
-       /* no point looking at the port count if no port exists */
-       if (!rte_eth_dev_count_total()) {
-               RTE_ETHDEV_LOG(ERR, "No port found for device (%s)\n", da.name);
-               ret = -1;
-               goto err;
-       }
-
-       /* if nothing happened, there is a bug here, since some driver told us
-        * it did attach a device, but did not create a port.
-        * FIXME: race condition in case of plug-out of another device
-        */
-       if (current == rte_eth_dev_count_total()) {
-               ret = -1;
-               goto err;
-       }
-
-       *port_id = eth_dev_last_created_port;
-       ret = 0;
-
-err:
-       free(da.args);
-       return ret;
-}
-
-/* detach the device, then store the name of the device */
-int
-rte_eth_dev_detach(uint16_t port_id, char *name __rte_unused)
-{
-       struct rte_device *dev;
-       struct rte_bus *bus;
-       uint32_t dev_flags;
-       int ret = -1;
-
-       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
-
-       dev_flags = rte_eth_devices[port_id].data->dev_flags;
-       if (dev_flags & RTE_ETH_DEV_BONDED_SLAVE) {
-               RTE_ETHDEV_LOG(ERR,
-                       "Port %"PRIu16" is bonded, cannot detach\n", port_id);
-               return -ENOTSUP;
-       }
-
-       dev = rte_eth_devices[port_id].device;
-       if (dev == NULL)
-               return -EINVAL;
-
-       bus = rte_bus_find_by_device(dev);
-       if (bus == NULL)
-               return -ENOENT;
-
-       ret = rte_eal_hotplug_remove(bus->name, dev->name);
-       if (ret < 0)
-               return ret;
-
-       rte_eth_dev_release_port(&rte_eth_devices[port_id]);
-       return 0;
-}
-
 static int
 rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues)
 {
@@ -974,7 +1054,7 @@ rte_eth_speed_bitflag(uint32_t speed, int duplex)
        }
 }
 
-const char * __rte_experimental
+const char *
 rte_eth_dev_rx_offload_name(uint64_t offload)
 {
        const char *name = "UNKNOWN";
@@ -990,7 +1070,7 @@ rte_eth_dev_rx_offload_name(uint64_t offload)
        return name;
 }
 
-const char * __rte_experimental
+const char *
 rte_eth_dev_tx_offload_name(uint64_t offload)
 {
        const char *name = "UNKNOWN";
@@ -1142,14 +1222,6 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
                return -EINVAL;
        }
 
-       if ((local_conf.rxmode.offloads & DEV_RX_OFFLOAD_CRC_STRIP) &&
-                       (local_conf.rxmode.offloads & DEV_RX_OFFLOAD_KEEP_CRC)) {
-               RTE_ETHDEV_LOG(ERR,
-                       "Port id=%u not allowed to set both CRC STRIP and KEEP CRC offload flags\n",
-                       port_id);
-               return -EINVAL;
-       }
-
        /* Check that device supports requested rss hash functions. */
        if ((dev_info.flow_type_rss_offloads |
             dev_conf->rx_adv_conf.rss_conf.rss_hf) !=
@@ -1191,9 +1263,9 @@ rte_eth_dev_configure(uint16_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q,
        }
 
        /* Initialize Rx profiling if enabled at compilation time. */
-       diag = __rte_eth_profile_rx_init(port_id, dev);
+       diag = __rte_eth_dev_profile_init(port_id, dev);
        if (diag != 0) {
-               RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_profile_rx_init = %d\n",
+               RTE_ETHDEV_LOG(ERR, "Port%u __rte_eth_dev_profile_init = %d\n",
                        port_id, diag);
                rte_eth_dev_rx_queue_config(dev, 0);
                rte_eth_dev_tx_queue_config(dev, 0);
@@ -1219,19 +1291,14 @@ _rte_eth_dev_reset(struct rte_eth_dev *dev)
 }
 
 static void
-rte_eth_dev_config_restore(uint16_t port_id)
+rte_eth_dev_mac_restore(struct rte_eth_dev *dev,
+                       struct rte_eth_dev_info *dev_info)
 {
-       struct rte_eth_dev *dev;
-       struct rte_eth_dev_info dev_info;
        struct ether_addr *addr;
        uint16_t i;
        uint32_t pool = 0;
        uint64_t pool_mask;
 
-       dev = &rte_eth_devices[port_id];
-
-       rte_eth_dev_info_get(port_id, &dev_info);
-
        /* replay MAC address configuration including default MAC */
        addr = &dev->data->mac_addrs[0];
        if (*dev->dev_ops->mac_addr_set != NULL)
@@ -1240,7 +1307,7 @@ rte_eth_dev_config_restore(uint16_t port_id)
                (*dev->dev_ops->mac_addr_add)(dev, addr, 0, pool);
 
        if (*dev->dev_ops->mac_addr_add != NULL) {
-               for (i = 1; i < dev_info.max_mac_addrs; i++) {
+               for (i = 1; i < dev_info->max_mac_addrs; i++) {
                        addr = &dev->data->mac_addrs[i];
 
                        /* skip zero address */
@@ -1259,6 +1326,14 @@ rte_eth_dev_config_restore(uint16_t port_id)
                        } while (pool_mask);
                }
        }
+}
+
+static void
+rte_eth_dev_config_restore(struct rte_eth_dev *dev,
+                          struct rte_eth_dev_info *dev_info, uint16_t port_id)
+{
+       if (!(*dev_info->dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR))
+               rte_eth_dev_mac_restore(dev, dev_info);
 
        /* replay promiscuous configuration */
        if (rte_eth_promiscuous_get(port_id) == 1)
@@ -1277,6 +1352,7 @@ int
 rte_eth_dev_start(uint16_t port_id)
 {
        struct rte_eth_dev *dev;
+       struct rte_eth_dev_info dev_info;
        int diag;
 
        RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
@@ -1292,13 +1368,19 @@ rte_eth_dev_start(uint16_t port_id)
                return 0;
        }
 
+       rte_eth_dev_info_get(port_id, &dev_info);
+
+       /* Lets restore MAC now if device does not support live change */
+       if (*dev_info.dev_flags & RTE_ETH_DEV_NOLIVE_MAC_ADDR)
+               rte_eth_dev_mac_restore(dev, &dev_info);
+
        diag = (*dev->dev_ops->dev_start)(dev);
        if (diag == 0)
                dev->data->dev_started = 1;
        else
                return eth_err(port_id, diag);
 
-       rte_eth_dev_config_restore(port_id);
+       rte_eth_dev_config_restore(dev, &dev_info, port_id);
 
        if (dev->data->dev_conf.intr_conf.lsc == 0) {
                RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP);
@@ -1366,6 +1448,16 @@ rte_eth_dev_close(uint16_t port_id)
        dev->data->dev_started = 0;
        (*dev->dev_ops->dev_close)(dev);
 
+       /* check behaviour flag - temporary for PMD migration */
+       if ((dev->data->dev_flags & RTE_ETH_DEV_CLOSE_REMOVE) != 0) {
+               /* new behaviour: send event + reset state + free all data */
+               rte_eth_dev_release_port(dev);
+               return;
+       }
+       RTE_ETHDEV_LOG(DEBUG, "Port closing is using an old behaviour.\n"
+                       "The driver %s should migrate to the new behaviour.\n",
+                       dev->device->driver->name);
+       /* old behaviour: only free queue arrays */
        dev->data->nb_rx_queues = 0;
        rte_free(dev->data->rx_queues);
        dev->data->rx_queues = NULL;
@@ -3425,6 +3517,43 @@ rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data)
        return 0;
 }
 
+int __rte_experimental
+rte_eth_dev_rx_intr_ctl_q_get_fd(uint16_t port_id, uint16_t queue_id)
+{
+       struct rte_intr_handle *intr_handle;
+       struct rte_eth_dev *dev;
+       unsigned int efd_idx;
+       uint32_t vec;
+       int fd;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -1);
+
+       dev = &rte_eth_devices[port_id];
+
+       if (queue_id >= dev->data->nb_rx_queues) {
+               RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
+               return -1;
+       }
+
+       if (!dev->intr_handle) {
+               RTE_ETHDEV_LOG(ERR, "RX Intr handle unset\n");
+               return -1;
+       }
+
+       intr_handle = dev->intr_handle;
+       if (!intr_handle->intr_vec) {
+               RTE_ETHDEV_LOG(ERR, "RX Intr vector unset\n");
+               return -1;
+       }
+
+       vec = intr_handle->intr_vec[queue_id];
+       efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ?
+               (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec;
+       fd = intr_handle->efds[efd_idx];
+
+       return fd;
+}
+
 const struct rte_memzone *
 rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
                         uint16_t queue_id, size_t size, unsigned align,
@@ -3433,9 +3562,8 @@ rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name,
        char z_name[RTE_MEMZONE_NAMESIZE];
        const struct rte_memzone *mz;
 
-       snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d",
-                dev->device->driver->name, ring_name,
-                dev->data->port_id, queue_id);
+       snprintf(z_name, sizeof(z_name), "eth_p%d_q%d_%s",
+                dev->data->port_id, queue_id, ring_name);
 
        mz = rte_memzone_lookup(z_name);
        if (mz)
@@ -3459,10 +3587,8 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
 
        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
                ethdev = rte_eth_dev_allocate(name);
-               if (!ethdev) {
-                       retval = -ENODEV;
-                       goto probe_failed;
-               }
+               if (!ethdev)
+                       return -ENODEV;
 
                if (priv_data_size) {
                        ethdev->data->dev_private = rte_zmalloc_socket(
@@ -3480,8 +3606,7 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
                if (!ethdev) {
                        RTE_LOG(ERR, EAL, "secondary process attach failed, "
                                "ethdev doesn't exist");
-                       retval = -ENODEV;
-                       goto probe_failed;
+                       return  -ENODEV;
                }
        }
 
@@ -3505,13 +3630,9 @@ rte_eth_dev_create(struct rte_device *device, const char *name,
        rte_eth_dev_probing_finish(ethdev);
 
        return retval;
-probe_failed:
-       /* free ports private data if primary process */
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(ethdev->data->dev_private);
 
+probe_failed:
        rte_eth_dev_release_port(ethdev);
-
        return retval;
 }
 
@@ -3532,11 +3653,6 @@ rte_eth_dev_destroy(struct rte_eth_dev *ethdev,
                        return ret;
        }
 
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(ethdev->data->dev_private);
-
-       ethdev->data->dev_private = NULL;
-
        return rte_eth_dev_release_port(ethdev);
 }
 
@@ -4195,7 +4311,7 @@ enum rte_eth_switch_domain_state {
  * RTE_MAX_ETHPORTS elements as there cannot be more active switch domains than
  * ethdev ports in a single process.
  */
-struct rte_eth_dev_switch {
+static struct rte_eth_dev_switch {
        enum rte_eth_switch_domain_state state;
 } rte_eth_switch_domains[RTE_MAX_ETHPORTS];
 
@@ -4236,8 +4352,6 @@ rte_eth_switch_domain_free(uint16_t domain_id)
        return 0;
 }
 
-typedef int (*rte_eth_devargs_callback_t)(char *str, void *data);
-
 static int
 rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in)
 {
@@ -4302,89 +4416,6 @@ rte_eth_devargs_tokenise(struct rte_kvargs *arglist, const char *str_in)
        }
 }
 
-static int
-rte_eth_devargs_parse_list(char *str, rte_eth_devargs_callback_t callback,
-       void *data)
-{
-       char *str_start;
-       int state;
-       int result;
-
-       if (*str != '[')
-               /* Single element, not a list */
-               return callback(str, data);
-
-       /* Sanity check, then strip the brackets */
-       str_start = &str[strlen(str) - 1];
-       if (*str_start != ']') {
-               RTE_LOG(ERR, EAL, "(%s): List does not end with ']'", str);
-               return -EINVAL;
-       }
-       str++;
-       *str_start = '\0';
-
-       /* Process list elements */
-       state = 0;
-       while (1) {
-               if (state == 0) {
-                       if (*str == '\0')
-                               break;
-                       if (*str != ',') {
-                               str_start = str;
-                               state = 1;
-                       }
-               } else if (state == 1) {
-                       if (*str == ',' || *str == '\0') {
-                               if (str > str_start) {
-                                       /* Non-empty string fragment */
-                                       *str = '\0';
-                                       result = callback(str_start, data);
-                                       if (result < 0)
-                                               return result;
-                               }
-                               state = 0;
-                       }
-               }
-               str++;
-       }
-       return 0;
-}
-
-static int
-rte_eth_devargs_process_range(char *str, uint16_t *list, uint16_t *len_list,
-       const uint16_t max_list)
-{
-       uint16_t lo, hi, val;
-       int result;
-
-       result = sscanf(str, "%hu-%hu", &lo, &hi);
-       if (result == 1) {
-               if (*len_list >= max_list)
-                       return -ENOMEM;
-               list[(*len_list)++] = lo;
-       } else if (result == 2) {
-               if (lo >= hi || lo > RTE_MAX_ETHPORTS || hi > RTE_MAX_ETHPORTS)
-                       return -EINVAL;
-               for (val = lo; val <= hi; val++) {
-                       if (*len_list >= max_list)
-                               return -ENOMEM;
-                       list[(*len_list)++] = val;
-               }
-       } else
-               return -EINVAL;
-       return 0;
-}
-
-
-static int
-rte_eth_devargs_parse_representor_ports(char *str, void *data)
-{
-       struct rte_eth_devargs *eth_da = data;
-
-       return rte_eth_devargs_process_range(str, eth_da->representor_ports,
-               &eth_da->nb_representor_ports, RTE_MAX_ETHPORTS);
-}
-
 int __rte_experimental
 rte_eth_devargs_parse(const char *dargs, struct rte_eth_devargs *eth_da)
 {
index 7070e9a..769a694 100644 (file)
@@ -166,6 +166,85 @@ extern int rte_eth_dev_logtype;
 
 struct rte_mbuf;
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Initializes a device iterator.
+ *
+ * This iterator allows accessing a list of devices matching some devargs.
+ *
+ * @param iter
+ *   Device iterator handle initialized by the function.
+ *   The fields bus_str and cls_str might be dynamically allocated,
+ *   and could be freed by calling rte_eth_iterator_cleanup().
+ *
+ * @param devargs
+ *   Device description string.
+ *
+ * @return
+ *   0 on successful initialization, negative otherwise.
+ */
+__rte_experimental
+int rte_eth_iterator_init(struct rte_dev_iterator *iter, const char *devargs);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Iterates on devices with devargs filter.
+ * The ownership is not checked.
+ *
+ * The next port id is returned, and the iterator is updated.
+ *
+ * @param iter
+ *   Device iterator handle initialized by rte_eth_iterator_init().
+ *   Some fields bus_str and cls_str might be freed when no more port is found,
+ *   by calling rte_eth_iterator_cleanup().
+ *
+ * @return
+ *   A port id if found, RTE_MAX_ETHPORTS otherwise.
+ */
+__rte_experimental
+uint16_t rte_eth_iterator_next(struct rte_dev_iterator *iter);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Free some allocated fields of the iterator.
+ *
+ * This function is automatically called by rte_eth_iterator_next()
+ * on the last iteration (i.e. when no more matching port is found).
+ *
+ * It is safe to call this function twice; it will do nothing more.
+ *
+ * @param iter
+ *   Device iterator handle initialized by rte_eth_iterator_init().
+ *   The fields bus_str and cls_str are freed if needed.
+ */
+__rte_experimental
+void rte_eth_iterator_cleanup(struct rte_dev_iterator *iter);
+
+/**
+ * Macro to iterate over all ethdev ports matching some devargs.
+ *
+ * If a break is done before the end of the loop,
+ * the function rte_eth_iterator_cleanup() must be called.
+ *
+ * @param id
+ *   Iterated port id of type uint16_t.
+ * @param devargs
+ *   Device parameters input as string of type char*.
+ * @param iter
+ *   Iterator handle of type struct rte_dev_iterator, used internally.
+ */
+#define RTE_ETH_FOREACH_MATCHING_DEV(id, devargs, iter) \
+       for (rte_eth_iterator_init(iter, devargs), \
+            id = rte_eth_iterator_next(iter); \
+            id != RTE_MAX_ETHPORTS; \
+            id = rte_eth_iterator_next(iter))
+
 /**
  * A structure used to retrieve statistics for an Ethernet port.
  * Not all statistics fields in struct rte_eth_stats are supported
@@ -869,12 +948,6 @@ struct rte_eth_conf {
        struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */
 };
 
-/**
- * A structure used to retrieve the contextual information of
- * an Ethernet device, such as the controlling driver of the device,
- * its PCI context, etc...
- */
-
 /**
  * RX offload capabilities of a device.
  */
@@ -890,16 +963,13 @@ struct rte_eth_conf {
 #define DEV_RX_OFFLOAD_VLAN_FILTER     0x00000200
 #define DEV_RX_OFFLOAD_VLAN_EXTEND     0x00000400
 #define DEV_RX_OFFLOAD_JUMBO_FRAME     0x00000800
-#define DEV_RX_OFFLOAD_CRC_STRIP       0x00001000
 #define DEV_RX_OFFLOAD_SCATTER         0x00002000
 #define DEV_RX_OFFLOAD_TIMESTAMP       0x00004000
 #define DEV_RX_OFFLOAD_SECURITY         0x00008000
-
-/**
- * Invalid to set both DEV_RX_OFFLOAD_CRC_STRIP and DEV_RX_OFFLOAD_KEEP_CRC
- * No DEV_RX_OFFLOAD_CRC_STRIP flag means keep CRC
- */
 #define DEV_RX_OFFLOAD_KEEP_CRC                0x00010000
+#define DEV_RX_OFFLOAD_SCTP_CKSUM      0x00020000
+#define DEV_RX_OFFLOAD_OUTER_UDP_CKSUM  0x00040000
+
 #define DEV_RX_OFFLOAD_CHECKSUM (DEV_RX_OFFLOAD_IPV4_CKSUM | \
                                 DEV_RX_OFFLOAD_UDP_CKSUM | \
                                 DEV_RX_OFFLOAD_TCP_CKSUM)
@@ -953,6 +1023,13 @@ struct rte_eth_conf {
  * for tunnel TSO.
  */
 #define DEV_TX_OFFLOAD_IP_TNL_TSO       0x00080000
+/** Device supports outer UDP checksum */
+#define DEV_TX_OFFLOAD_OUTER_UDP_CKSUM  0x00100000
+/**
+ * Device supports match on metadata Tx offload..
+ * Application must set PKT_TX_METADATA and mbuf metadata field.
+ */
+#define DEV_TX_OFFLOAD_MATCH_METADATA   0x00200000
 
 #define RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP 0x00000001
 /**< Device supports Rx queue setup after device started*/
@@ -1010,6 +1087,12 @@ struct rte_eth_switch_info {
 /**
  * Ethernet device information
  */
+
+/**
+ * A structure used to retrieve the contextual information of
+ * an Ethernet device, such as the controlling driver of the
+ * device, etc...
+ */
 struct rte_eth_dev_info {
        struct rte_device *device; /** Generic device information */
        const char *driver_name; /**< Device Driver name. */
@@ -1260,6 +1343,11 @@ struct rte_eth_dev_owner {
        char name[RTE_ETH_MAX_OWNER_NAME_LEN]; /**< The owner name. */
 };
 
+/**
+ * Port is released (i.e. totally freed and data erased) on close.
+ * Temporary flag for PMD migration to new rte_eth_dev_close() behaviour.
+ */
+#define RTE_ETH_DEV_CLOSE_REMOVE 0x0001
 /** Device supports link state interrupt */
 #define RTE_ETH_DEV_INTR_LSC     0x0002
 /** Device is a bonded slave */
@@ -1268,6 +1356,8 @@ struct rte_eth_dev_owner {
 #define RTE_ETH_DEV_INTR_RMV     0x0008
 /** Device is port representor */
 #define RTE_ETH_DEV_REPRESENTOR  0x0010
+/** Device does not support MAC change after started */
+#define RTE_ETH_DEV_NOLIVE_MAC_ADDR  0x0020
 
 /**
  * Iterates over valid ethdev ports owned by a specific owner.
@@ -1419,37 +1509,6 @@ uint16_t rte_eth_dev_count_avail(void);
  */
 uint16_t __rte_experimental rte_eth_dev_count_total(void);
 
-/**
- * Attach a new Ethernet device specified by arguments.
- *
- * @param devargs
- *  A pointer to a strings array describing the new device
- *  to be attached. The strings should be a pci address like
- *  '0000:01:00.0' or virtual device name like 'net_pcap0'.
- * @param port_id
- *  A pointer to a port identifier actually attached.
- * @return
- *  0 on success and port_id is filled, negative on error
- */
-__rte_deprecated
-int rte_eth_dev_attach(const char *devargs, uint16_t *port_id);
-
-/**
- * Detach a Ethernet device specified by port identifier.
- * This function must be called when the device is in the
- * closed state.
- *
- * @param port_id
- *   The port identifier of the device to detach.
- * @param devname
- *   A pointer to a buffer that will be filled with the device name.
- *   This buffer must be at least RTE_DEV_NAME_MAX_LEN long.
- * @return
- *  0 on success and devname is filled, negative on error
- */
-__rte_deprecated
-int rte_eth_dev_detach(uint16_t port_id, char *devname);
-
 /**
  * Convert a numerical speed in Mbps to a bitmap flag that can be used in
  * the bitmap link_speeds of the struct rte_eth_conf
@@ -1464,9 +1523,6 @@ int rte_eth_dev_detach(uint16_t port_id, char *devname);
 uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get DEV_RX_OFFLOAD_* flag name.
  *
  * @param offload
@@ -1474,12 +1530,9 @@ uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex);
  * @return
  *   Offload name or 'UNKNOWN' if the flag cannot be recognised.
  */
-const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload);
+const char *rte_eth_dev_rx_offload_name(uint64_t offload);
 
 /**
- * @warning
- * @b EXPERIMENTAL: this API may change without prior notice
- *
  * Get DEV_TX_OFFLOAD_* flag name.
  *
  * @param offload
@@ -1487,7 +1540,7 @@ const char * __rte_experimental rte_eth_dev_rx_offload_name(uint64_t offload);
  * @return
  *   Offload name or 'UNKNOWN' if the flag cannot be recognised.
  */
-const char * __rte_experimental rte_eth_dev_tx_offload_name(uint64_t offload);
+const char *rte_eth_dev_tx_offload_name(uint64_t offload);
 
 /**
  * Configure an Ethernet device.
@@ -1750,6 +1803,10 @@ int rte_eth_dev_tx_queue_stop(uint16_t port_id, uint16_t tx_queue_id);
  * The device start step is the last one and consists of setting the configured
  * offload features and in starting the transmit and the receive units of the
  * device.
+ *
+ * Device RTE_ETH_DEV_NOLIVE_MAC_ADDR flag causes MAC address to be set before
+ * PMD port start callback function is invoked.
+ *
  * On success, all basic functions exported by the Ethernet API (link status,
  * receive/transmit, and so on) can be invoked.
  *
@@ -1797,8 +1854,8 @@ int rte_eth_dev_set_link_down(uint16_t port_id);
 
 /**
  * Close a stopped Ethernet device. The device cannot be restarted!
- * The function frees all resources except for needed by the
- * closed state. To free these resources, call rte_eth_dev_detach().
+ * The function frees all port resources if the driver supports
+ * the flag RTE_ETH_DEV_CLOSE_REMOVE.
  *
  * @param port_id
  *   The port identifier of the Ethernet device.
@@ -2718,6 +2775,26 @@ int rte_eth_dev_rx_intr_ctl(uint16_t port_id, int epfd, int op, void *data);
 int rte_eth_dev_rx_intr_ctl_q(uint16_t port_id, uint16_t queue_id,
                              int epfd, int op, void *data);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get interrupt fd per Rx queue.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue from which to retrieve input packets.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @return
+ *   - (>=0) the interrupt fd associated to the requested Rx queue if
+ *           successful.
+ *   - (-1) on error.
+ */
+int __rte_experimental
+rte_eth_dev_rx_intr_ctl_q_get_fd(uint16_t port_id, uint16_t queue_id);
+
 /**
  * Turn on the LED on the Ethernet device.
  * This function turns on the LED on the Ethernet device.
index 33d12b3..8f03f83 100644 (file)
@@ -539,7 +539,13 @@ struct rte_eth_dev {
        eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
        eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
        eth_tx_prep_t tx_pkt_prepare; /**< Pointer to PMD transmit prepare function. */
-       struct rte_eth_dev_data *data;  /**< Pointer to device data */
+       /**
+        * Next two fields are per-device data but *data is shared between
+        * primary and secondary processes and *process_private is per-process
+        * private. The second one is managed by PMDs if necessary.
+        */
+       struct rte_eth_dev_data *data;  /**< Pointer to device data. */
+       void *process_private; /**< Pointer to per-process device data. */
        const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
        struct rte_device *device; /**< Backing device */
        struct rte_intr_handle *intr_handle; /**< Device interrupt handle */
@@ -579,24 +585,30 @@ struct rte_eth_dev_data {
 
        struct rte_eth_dev_sriov sriov;    /**< SRIOV data */
 
-       void *dev_private;              /**< PMD-specific private data */
-
-       struct rte_eth_link dev_link;
-       /**< Link-level information & status */
+       void *dev_private;
+                       /**< PMD-specific private data.
+                        *   @see rte_eth_dev_release_port()
+                        */
 
+       struct rte_eth_link dev_link;   /**< Link-level information & status. */
        struct rte_eth_conf dev_conf;   /**< Configuration applied to device. */
        uint16_t mtu;                   /**< Maximum Transmission Unit. */
-
        uint32_t min_rx_buf_size;
-       /**< Common rx buffer size handled by all queues */
+                       /**< Common RX buffer size handled by all queues. */
 
        uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */
-       struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */
+       struct ether_addr *mac_addrs;
+                       /**< Device Ethernet link address.
+                        *   @see rte_eth_dev_release_port()
+                        */
        uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR];
-       /** bitmap array of associating Ethernet MAC addresses to pools */
-       struct ether_addr* hash_mac_addrs;
-       /** Device Ethernet MAC addresses of hash filtering. */
+                       /**< Bitmap associating MAC addresses to pools. */
+       struct ether_addr *hash_mac_addrs;
+                       /**< Device Ethernet MAC addresses of hash filtering.
+                        *   @see rte_eth_dev_release_port()
+                        */
        uint16_t port_id;           /**< Device [external] port identifier. */
+
        __extension__
        uint8_t promiscuous   : 1, /**< RX promiscuous mode ON(1) / OFF(0). */
                scattered_rx : 1,  /**< RX of scattered packets is ON(1) / OFF(0) */
@@ -604,15 +616,19 @@ struct rte_eth_dev_data {
                dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
                lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
        uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-       /** Queues state: STARTED(1) / STOPPED(0) */
+                       /**< Queues state: STARTED(1) / STOPPED(0). */
        uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-       /** Queues state: STARTED(1) / STOPPED(0) */
-       uint32_t dev_flags; /**< Capabilities */
-       enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough */
-       int numa_node;  /**< NUMA node connection */
+                       /**< Queues state: STARTED(1) / STOPPED(0). */
+       uint32_t dev_flags;             /**< Capabilities. */
+       enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
+       int numa_node;                  /**< NUMA node connection. */
        struct rte_vlan_filter_conf vlan_filter_conf;
-       /**< VLAN filter configuration. */
+                       /**< VLAN filter configuration. */
        struct rte_eth_dev_owner owner; /**< The port owner. */
+       uint16_t representor_id;
+                       /**< Switch-specific identifier.
+                        *   Valid if RTE_ETH_DEV_REPRESENTOR in dev_flags.
+                        */
 } __rte_cache_aligned;
 
 /**
index c6d9bc1..c2ac263 100644 (file)
@@ -58,10 +58,17 @@ struct rte_eth_dev *rte_eth_dev_attach_secondary(const char *name);
 
 /**
  * @internal
- * Release the specified ethdev port.
+ * Notify RTE_ETH_EVENT_DESTROY and release the specified ethdev port.
+ *
+ * The following PMD-managed data fields will be freed:
+ *   - dev_private
+ *   - mac_addrs
+ *   - hash_mac_addrs
+ * If one of these fields should not be freed,
+ * it must be reset to NULL by the PMD, typically in dev_close method.
  *
  * @param eth_dev
- * The *eth_dev* pointer is the address of the *rte_eth_dev* structure.
+ * Device to be detached.
  * @return
  *   - 0 on success, negative on error
  */
@@ -324,32 +331,6 @@ typedef int (*ethdev_uninit_t)(struct rte_eth_dev *ethdev);
 int __rte_experimental
 rte_eth_dev_destroy(struct rte_eth_dev *ethdev, ethdev_uninit_t ethdev_uninit);
 
-/**
- * PMD helper function to check if keeping CRC is requested
- *
- * @note
- * When CRC_STRIP offload flag is removed and default behavior switch to
- * strip CRC, as planned, this helper function is not that useful and will be
- * removed. In PMDs this function will be replaced with check:
- *   if (offloads & DEV_RX_OFFLOAD_KEEP_CRC)
- *
- * @param rx_offloads
- *   offload bits to be applied
- *
- * @return
- *   Return positive if keeping CRC is requested,
- *   zero if stripping CRC is requested
- */
-static inline int
-rte_eth_dev_must_keep_crc(uint64_t rx_offloads)
-{
-       if (rx_offloads & DEV_RX_OFFLOAD_CRC_STRIP)
-               return 0;
-
-       /* no KEEP_CRC or CRC_STRIP offload flags means keep CRC */
-       return 1;
-}
-
 #ifdef __cplusplus
 }
 #endif
index f652596..23257e9 100644 (file)
@@ -135,17 +135,6 @@ rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size)
 static inline void
 rte_eth_dev_pci_release(struct rte_eth_dev *eth_dev)
 {
-       if (rte_eal_process_type() == RTE_PROC_PRIMARY)
-               rte_free(eth_dev->data->dev_private);
-
-       eth_dev->data->dev_private = NULL;
-
-       /*
-        * Secondary process will check the name to attach.
-        * Clear this field to avoid attaching a released ports.
-        */
-       eth_dev->data->name[0] = '\0';
-
        eth_dev->device = NULL;
        eth_dev->intr_handle = NULL;
 
index 38f117f..3560c28 100644 (file)
@@ -8,14 +8,12 @@ DPDK_2.2 {
        rte_eth_allmulticast_get;
        rte_eth_dev_allocate;
        rte_eth_dev_allocated;
-       rte_eth_dev_attach;
        rte_eth_dev_callback_register;
        rte_eth_dev_callback_unregister;
        rte_eth_dev_close;
        rte_eth_dev_configure;
        rte_eth_dev_count;
        rte_eth_dev_default_mac_addr_set;
-       rte_eth_dev_detach;
        rte_eth_dev_filter_supported;
        rte_eth_dev_flow_ctrl_get;
        rte_eth_dev_flow_ctrl_set;
@@ -220,6 +218,14 @@ DPDK_18.08 {
 
 } DPDK_18.05;
 
+DPDK_18.11 {
+       global:
+
+       rte_eth_dev_rx_offload_name;
+       rte_eth_dev_tx_offload_name;
+
+} DPDK_18.08;
+
 EXPERIMENTAL {
        global:
 
@@ -235,10 +241,13 @@ EXPERIMENTAL {
        rte_eth_dev_owner_new;
        rte_eth_dev_owner_set;
        rte_eth_dev_owner_unset;
-       rte_eth_dev_rx_offload_name;
-       rte_eth_dev_tx_offload_name;
+       rte_eth_dev_rx_intr_ctl_q_get_fd;
+       rte_eth_iterator_cleanup;
+       rte_eth_iterator_init;
+       rte_eth_iterator_next;
        rte_eth_switch_domain_alloc;
        rte_eth_switch_domain_free;
+       rte_flow_conv;
        rte_flow_expand_rss;
        rte_mtr_capabilities_get;
        rte_mtr_create;
index cff4b52..3277be1 100644 (file)
@@ -11,6 +11,7 @@
 #include <rte_common.h>
 #include <rte_errno.h>
 #include <rte_branch_prediction.h>
+#include <rte_string_fns.h>
 #include "rte_ethdev.h"
 #include "rte_flow_driver.h"
 #include "rte_flow.h"
@@ -50,10 +51,15 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
        MK_FLOW_ITEM(TCP, sizeof(struct rte_flow_item_tcp)),
        MK_FLOW_ITEM(SCTP, sizeof(struct rte_flow_item_sctp)),
        MK_FLOW_ITEM(VXLAN, sizeof(struct rte_flow_item_vxlan)),
-       MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
-       MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
        MK_FLOW_ITEM(E_TAG, sizeof(struct rte_flow_item_e_tag)),
        MK_FLOW_ITEM(NVGRE, sizeof(struct rte_flow_item_nvgre)),
+       MK_FLOW_ITEM(MPLS, sizeof(struct rte_flow_item_mpls)),
+       MK_FLOW_ITEM(GRE, sizeof(struct rte_flow_item_gre)),
+       MK_FLOW_ITEM(FUZZY, sizeof(struct rte_flow_item_fuzzy)),
+       MK_FLOW_ITEM(GTP, sizeof(struct rte_flow_item_gtp)),
+       MK_FLOW_ITEM(GTPC, sizeof(struct rte_flow_item_gtp)),
+       MK_FLOW_ITEM(GTPU, sizeof(struct rte_flow_item_gtp)),
+       MK_FLOW_ITEM(ESP, sizeof(struct rte_flow_item_esp)),
        MK_FLOW_ITEM(GENEVE, sizeof(struct rte_flow_item_geneve)),
        MK_FLOW_ITEM(VXLAN_GPE, sizeof(struct rte_flow_item_vxlan_gpe)),
        MK_FLOW_ITEM(ARP_ETH_IPV4, sizeof(struct rte_flow_item_arp_eth_ipv4)),
@@ -66,6 +72,8 @@ static const struct rte_flow_desc_data rte_flow_desc_item[] = {
                     sizeof(struct rte_flow_item_icmp6_nd_opt_sla_eth)),
        MK_FLOW_ITEM(ICMP6_ND_OPT_TLA_ETH,
                     sizeof(struct rte_flow_item_icmp6_nd_opt_tla_eth)),
+       MK_FLOW_ITEM(MARK, sizeof(struct rte_flow_item_mark)),
+       MK_FLOW_ITEM(META, sizeof(struct rte_flow_item_meta)),
 };
 
 /** Generate flow_action[] entry. */
@@ -80,6 +88,7 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
        MK_FLOW_ACTION(END, 0),
        MK_FLOW_ACTION(VOID, 0),
        MK_FLOW_ACTION(PASSTHRU, 0),
+       MK_FLOW_ACTION(JUMP, sizeof(struct rte_flow_action_jump)),
        MK_FLOW_ACTION(MARK, sizeof(struct rte_flow_action_mark)),
        MK_FLOW_ACTION(FLAG, 0),
        MK_FLOW_ACTION(QUEUE, sizeof(struct rte_flow_action_queue)),
@@ -90,6 +99,8 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
        MK_FLOW_ACTION(VF, sizeof(struct rte_flow_action_vf)),
        MK_FLOW_ACTION(PHY_PORT, sizeof(struct rte_flow_action_phy_port)),
        MK_FLOW_ACTION(PORT_ID, sizeof(struct rte_flow_action_port_id)),
+       MK_FLOW_ACTION(METER, sizeof(struct rte_flow_action_meter)),
+       MK_FLOW_ACTION(SECURITY, sizeof(struct rte_flow_action_security)),
        MK_FLOW_ACTION(OF_SET_MPLS_TTL,
                       sizeof(struct rte_flow_action_of_set_mpls_ttl)),
        MK_FLOW_ACTION(OF_DEC_MPLS_TTL, 0),
@@ -109,6 +120,29 @@ static const struct rte_flow_desc_data rte_flow_desc_action[] = {
                       sizeof(struct rte_flow_action_of_pop_mpls)),
        MK_FLOW_ACTION(OF_PUSH_MPLS,
                       sizeof(struct rte_flow_action_of_push_mpls)),
+       MK_FLOW_ACTION(VXLAN_ENCAP, sizeof(struct rte_flow_action_vxlan_encap)),
+       MK_FLOW_ACTION(VXLAN_DECAP, 0),
+       MK_FLOW_ACTION(NVGRE_ENCAP, sizeof(struct rte_flow_action_vxlan_encap)),
+       MK_FLOW_ACTION(NVGRE_DECAP, 0),
+       MK_FLOW_ACTION(RAW_ENCAP, sizeof(struct rte_flow_action_raw_encap)),
+       MK_FLOW_ACTION(RAW_DECAP, sizeof(struct rte_flow_action_raw_decap)),
+       MK_FLOW_ACTION(SET_IPV4_SRC,
+                      sizeof(struct rte_flow_action_set_ipv4)),
+       MK_FLOW_ACTION(SET_IPV4_DST,
+                      sizeof(struct rte_flow_action_set_ipv4)),
+       MK_FLOW_ACTION(SET_IPV6_SRC,
+                      sizeof(struct rte_flow_action_set_ipv6)),
+       MK_FLOW_ACTION(SET_IPV6_DST,
+                      sizeof(struct rte_flow_action_set_ipv6)),
+       MK_FLOW_ACTION(SET_TP_SRC,
+                      sizeof(struct rte_flow_action_set_tp)),
+       MK_FLOW_ACTION(SET_TP_DST,
+                      sizeof(struct rte_flow_action_set_tp)),
+       MK_FLOW_ACTION(MAC_SWAP, 0),
+       MK_FLOW_ACTION(DEC_TTL, 0),
+       MK_FLOW_ACTION(SET_TTL, sizeof(struct rte_flow_action_set_ttl)),
+       MK_FLOW_ACTION(SET_MAC_SRC, sizeof(struct rte_flow_action_set_mac)),
+       MK_FLOW_ACTION(SET_MAC_DST, sizeof(struct rte_flow_action_set_mac)),
 };
 
 static int
@@ -288,26 +322,41 @@ rte_flow_error_set(struct rte_flow_error *error,
 }
 
 /** Pattern item specification types. */
-enum item_spec_type {
-       ITEM_SPEC,
-       ITEM_LAST,
-       ITEM_MASK,
+enum rte_flow_conv_item_spec_type {
+       RTE_FLOW_CONV_ITEM_SPEC,
+       RTE_FLOW_CONV_ITEM_LAST,
+       RTE_FLOW_CONV_ITEM_MASK,
 };
 
-/** Compute storage space needed by item specification and copy it. */
+/**
+ * Copy pattern item specification.
+ *
+ * @param[out] buf
+ *   Output buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p buf in bytes.
+ * @param[in] item
+ *   Pattern item to copy specification from.
+ * @param type
+ *   Specification selector for either @p spec, @p last or @p mask.
+ *
+ * @return
+ *   Number of bytes needed to store pattern item specification regardless
+ *   of @p size. @p buf contents are truncated to @p size if not large
+ *   enough.
+ */
 static size_t
-flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
-                   enum item_spec_type type)
+rte_flow_conv_item_spec(void *buf, const size_t size,
+                       const struct rte_flow_item *item,
+                       enum rte_flow_conv_item_spec_type type)
 {
-       size_t size = 0;
+       size_t off;
        const void *data =
-               type == ITEM_SPEC ? item->spec :
-               type == ITEM_LAST ? item->last :
-               type == ITEM_MASK ? item->mask :
+               type == RTE_FLOW_CONV_ITEM_SPEC ? item->spec :
+               type == RTE_FLOW_CONV_ITEM_LAST ? item->last :
+               type == RTE_FLOW_CONV_ITEM_MASK ? item->mask :
                NULL;
 
-       if (!item->spec || !data)
-               goto empty;
        switch (item->type) {
                union {
                        const struct rte_flow_item_raw *raw;
@@ -324,7 +373,7 @@ flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
                union {
                        struct rte_flow_item_raw *raw;
                } dst;
-               size_t off;
+               size_t tmp;
 
        case RTE_FLOW_ITEM_TYPE_RAW:
                spec.raw = item->spec;
@@ -332,91 +381,466 @@ flow_item_spec_copy(void *buf, const struct rte_flow_item *item,
                mask.raw = item->mask ? item->mask : &rte_flow_item_raw_mask;
                src.raw = data;
                dst.raw = buf;
-               off = RTE_ALIGN_CEIL(sizeof(struct rte_flow_item_raw),
-                                    sizeof(*src.raw->pattern));
-               if (type == ITEM_SPEC ||
-                   (type == ITEM_MASK &&
+               rte_memcpy(dst.raw,
+                          (&(struct rte_flow_item_raw){
+                               .relative = src.raw->relative,
+                               .search = src.raw->search,
+                               .reserved = src.raw->reserved,
+                               .offset = src.raw->offset,
+                               .limit = src.raw->limit,
+                               .length = src.raw->length,
+                          }),
+                          size > sizeof(*dst.raw) ? sizeof(*dst.raw) : size);
+               off = sizeof(*dst.raw);
+               if (type == RTE_FLOW_CONV_ITEM_SPEC ||
+                   (type == RTE_FLOW_CONV_ITEM_MASK &&
                     ((spec.raw->length & mask.raw->length) >=
                      (last.raw->length & mask.raw->length))))
-                       size = spec.raw->length & mask.raw->length;
+                       tmp = spec.raw->length & mask.raw->length;
                else
-                       size = last.raw->length & mask.raw->length;
-               size = off + size * sizeof(*src.raw->pattern);
-               if (dst.raw) {
-                       memcpy(dst.raw, src.raw, sizeof(*src.raw));
-                       dst.raw->pattern = memcpy((uint8_t *)dst.raw + off,
-                                                 src.raw->pattern,
-                                                 size - off);
+                       tmp = last.raw->length & mask.raw->length;
+               if (tmp) {
+                       off = RTE_ALIGN_CEIL(off, sizeof(*dst.raw->pattern));
+                       if (size >= off + tmp)
+                               dst.raw->pattern = rte_memcpy
+                                       ((void *)((uintptr_t)dst.raw + off),
+                                        src.raw->pattern, tmp);
+                       off += tmp;
                }
                break;
        default:
-               size = rte_flow_desc_item[item->type].size;
-               if (buf)
-                       memcpy(buf, data, size);
+               off = rte_flow_desc_item[item->type].size;
+               rte_memcpy(buf, data, (size > off ? off : size));
                break;
        }
-empty:
-       return RTE_ALIGN_CEIL(size, sizeof(double));
+       return off;
 }
 
-/** Compute storage space needed by action configuration and copy it. */
+/**
+ * Copy action configuration.
+ *
+ * @param[out] buf
+ *   Output buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p buf in bytes.
+ * @param[in] action
+ *   Action to copy configuration from.
+ *
+ * @return
+ *   Number of bytes needed to store pattern item specification regardless
+ *   of @p size. @p buf contents are truncated to @p size if not large
+ *   enough.
+ */
 static size_t
-flow_action_conf_copy(void *buf, const struct rte_flow_action *action)
+rte_flow_conv_action_conf(void *buf, const size_t size,
+                         const struct rte_flow_action *action)
 {
-       size_t size = 0;
+       size_t off;
 
-       if (!action->conf)
-               goto empty;
        switch (action->type) {
                union {
                        const struct rte_flow_action_rss *rss;
+                       const struct rte_flow_action_vxlan_encap *vxlan_encap;
+                       const struct rte_flow_action_nvgre_encap *nvgre_encap;
                } src;
                union {
                        struct rte_flow_action_rss *rss;
+                       struct rte_flow_action_vxlan_encap *vxlan_encap;
+                       struct rte_flow_action_nvgre_encap *nvgre_encap;
                } dst;
-               size_t off;
+               size_t tmp;
+               int ret;
 
        case RTE_FLOW_ACTION_TYPE_RSS:
                src.rss = action->conf;
                dst.rss = buf;
-               off = 0;
-               if (dst.rss)
-                       *dst.rss = (struct rte_flow_action_rss){
+               rte_memcpy(dst.rss,
+                          (&(struct rte_flow_action_rss){
                                .func = src.rss->func,
                                .level = src.rss->level,
                                .types = src.rss->types,
                                .key_len = src.rss->key_len,
                                .queue_num = src.rss->queue_num,
-                       };
-               off += sizeof(*src.rss);
+                          }),
+                          size > sizeof(*dst.rss) ? sizeof(*dst.rss) : size);
+               off = sizeof(*dst.rss);
                if (src.rss->key_len) {
-                       off = RTE_ALIGN_CEIL(off, sizeof(double));
-                       size = sizeof(*src.rss->key) * src.rss->key_len;
-                       if (dst.rss)
-                               dst.rss->key = memcpy
+                       off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->key));
+                       tmp = sizeof(*src.rss->key) * src.rss->key_len;
+                       if (size >= off + tmp)
+                               dst.rss->key = rte_memcpy
                                        ((void *)((uintptr_t)dst.rss + off),
-                                        src.rss->key, size);
-                       off += size;
+                                        src.rss->key, tmp);
+                       off += tmp;
                }
                if (src.rss->queue_num) {
-                       off = RTE_ALIGN_CEIL(off, sizeof(double));
-                       size = sizeof(*src.rss->queue) * src.rss->queue_num;
-                       if (dst.rss)
-                               dst.rss->queue = memcpy
+                       off = RTE_ALIGN_CEIL(off, sizeof(*dst.rss->queue));
+                       tmp = sizeof(*src.rss->queue) * src.rss->queue_num;
+                       if (size >= off + tmp)
+                               dst.rss->queue = rte_memcpy
                                        ((void *)((uintptr_t)dst.rss + off),
-                                        src.rss->queue, size);
-                       off += size;
+                                        src.rss->queue, tmp);
+                       off += tmp;
+               }
+               break;
+       case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
+       case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
+               src.vxlan_encap = action->conf;
+               dst.vxlan_encap = buf;
+               RTE_BUILD_BUG_ON(sizeof(*src.vxlan_encap) !=
+                                sizeof(*src.nvgre_encap) ||
+                                offsetof(struct rte_flow_action_vxlan_encap,
+                                         definition) !=
+                                offsetof(struct rte_flow_action_nvgre_encap,
+                                         definition));
+               off = sizeof(*dst.vxlan_encap);
+               if (src.vxlan_encap->definition) {
+                       off = RTE_ALIGN_CEIL
+                               (off, sizeof(*dst.vxlan_encap->definition));
+                       ret = rte_flow_conv
+                               (RTE_FLOW_CONV_OP_PATTERN,
+                                (void *)((uintptr_t)dst.vxlan_encap + off),
+                                size > off ? size - off : 0,
+                                src.vxlan_encap->definition, NULL);
+                       if (ret < 0)
+                               return 0;
+                       if (size >= off + ret)
+                               dst.vxlan_encap->definition =
+                                       (void *)((uintptr_t)dst.vxlan_encap +
+                                                off);
+                       off += ret;
                }
-               size = off;
                break;
        default:
-               size = rte_flow_desc_action[action->type].size;
-               if (buf)
-                       memcpy(buf, action->conf, size);
+               off = rte_flow_desc_action[action->type].size;
+               rte_memcpy(buf, action->conf, (size > off ? off : size));
                break;
        }
-empty:
-       return RTE_ALIGN_CEIL(size, sizeof(double));
+       return off;
+}
+
+/**
+ * Copy a list of pattern items.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source pattern items.
+ * @param num
+ *   Maximum number of pattern items to process from @p src or 0 to process
+ *   the entire list. In both cases, processing stops after
+ *   RTE_FLOW_ITEM_TYPE_END is encountered.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store
+ *   pattern items regardless of @p size on success (@p buf contents are
+ *   truncated to @p size if not large enough), a negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_pattern(struct rte_flow_item *dst,
+                     const size_t size,
+                     const struct rte_flow_item *src,
+                     unsigned int num,
+                     struct rte_flow_error *error)
+{
+       uintptr_t data = (uintptr_t)dst;
+       size_t off;
+       size_t ret;
+       unsigned int i;
+
+       for (i = 0, off = 0; !num || i != num; ++i, ++src, ++dst) {
+               if ((size_t)src->type >= RTE_DIM(rte_flow_desc_item) ||
+                   !rte_flow_desc_item[src->type].name)
+                       return rte_flow_error_set
+                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, src,
+                                "cannot convert unknown item type");
+               if (size >= off + sizeof(*dst))
+                       *dst = (struct rte_flow_item){
+                               .type = src->type,
+                       };
+               off += sizeof(*dst);
+               if (!src->type)
+                       num = i + 1;
+       }
+       num = i;
+       src -= num;
+       dst -= num;
+       do {
+               if (src->spec) {
+                       off = RTE_ALIGN_CEIL(off, sizeof(double));
+                       ret = rte_flow_conv_item_spec
+                               ((void *)(data + off),
+                                size > off ? size - off : 0, src,
+                                RTE_FLOW_CONV_ITEM_SPEC);
+                       if (size && size >= off + ret)
+                               dst->spec = (void *)(data + off);
+                       off += ret;
+
+               }
+               if (src->last) {
+                       off = RTE_ALIGN_CEIL(off, sizeof(double));
+                       ret = rte_flow_conv_item_spec
+                               ((void *)(data + off),
+                                size > off ? size - off : 0, src,
+                                RTE_FLOW_CONV_ITEM_LAST);
+                       if (size && size >= off + ret)
+                               dst->last = (void *)(data + off);
+                       off += ret;
+               }
+               if (src->mask) {
+                       off = RTE_ALIGN_CEIL(off, sizeof(double));
+                       ret = rte_flow_conv_item_spec
+                               ((void *)(data + off),
+                                size > off ? size - off : 0, src,
+                                RTE_FLOW_CONV_ITEM_MASK);
+                       if (size && size >= off + ret)
+                               dst->mask = (void *)(data + off);
+                       off += ret;
+               }
+               ++src;
+               ++dst;
+       } while (--num);
+       return off;
+}
+
+/**
+ * Copy a list of actions.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source actions.
+ * @param num
+ *   Maximum number of actions to process from @p src or 0 to process the
+ *   entire list. In both cases, processing stops after
+ *   RTE_FLOW_ACTION_TYPE_END is encountered.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store
+ *   actions regardless of @p size on success (@p buf contents are truncated
+ *   to @p size if not large enough), a negative errno value otherwise and
+ *   rte_errno is set.
+ */
+static int
+rte_flow_conv_actions(struct rte_flow_action *dst,
+                     const size_t size,
+                     const struct rte_flow_action *src,
+                     unsigned int num,
+                     struct rte_flow_error *error)
+{
+       uintptr_t data = (uintptr_t)dst;
+       size_t off;
+       size_t ret;
+       unsigned int i;
+
+       for (i = 0, off = 0; !num || i != num; ++i, ++src, ++dst) {
+               if ((size_t)src->type >= RTE_DIM(rte_flow_desc_action) ||
+                   !rte_flow_desc_action[src->type].name)
+                       return rte_flow_error_set
+                               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+                                src, "cannot convert unknown action type");
+               if (size >= off + sizeof(*dst))
+                       *dst = (struct rte_flow_action){
+                               .type = src->type,
+                       };
+               off += sizeof(*dst);
+               if (!src->type)
+                       num = i + 1;
+       }
+       num = i;
+       src -= num;
+       dst -= num;
+       do {
+               if (src->conf) {
+                       off = RTE_ALIGN_CEIL(off, sizeof(double));
+                       ret = rte_flow_conv_action_conf
+                               ((void *)(data + off),
+                                size > off ? size - off : 0, src);
+                       if (size && size >= off + ret)
+                               dst->conf = (void *)(data + off);
+                       off += ret;
+               }
+               ++src;
+               ++dst;
+       } while (--num);
+       return off;
+}
+
+/**
+ * Copy flow rule components.
+ *
+ * This comprises the flow rule descriptor itself, attributes, pattern and
+ * actions list. NULL components in @p src are skipped.
+ *
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Source flow rule descriptor.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store all
+ *   components including the descriptor regardless of @p size on success
+ *   (@p buf contents are truncated to @p size if not large enough), a
+ *   negative errno value otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_rule(struct rte_flow_conv_rule *dst,
+                  const size_t size,
+                  const struct rte_flow_conv_rule *src,
+                  struct rte_flow_error *error)
+{
+       size_t off;
+       int ret;
+
+       rte_memcpy(dst,
+                  (&(struct rte_flow_conv_rule){
+                       .attr = NULL,
+                       .pattern = NULL,
+                       .actions = NULL,
+                  }),
+                  size > sizeof(*dst) ? sizeof(*dst) : size);
+       off = sizeof(*dst);
+       if (src->attr_ro) {
+               off = RTE_ALIGN_CEIL(off, sizeof(double));
+               if (size && size >= off + sizeof(*dst->attr))
+                       dst->attr = rte_memcpy
+                               ((void *)((uintptr_t)dst + off),
+                                src->attr_ro, sizeof(*dst->attr));
+               off += sizeof(*dst->attr);
+       }
+       if (src->pattern_ro) {
+               off = RTE_ALIGN_CEIL(off, sizeof(double));
+               ret = rte_flow_conv_pattern((void *)((uintptr_t)dst + off),
+                                           size > off ? size - off : 0,
+                                           src->pattern_ro, 0, error);
+               if (ret < 0)
+                       return ret;
+               if (size && size >= off + (size_t)ret)
+                       dst->pattern = (void *)((uintptr_t)dst + off);
+               off += ret;
+       }
+       if (src->actions_ro) {
+               off = RTE_ALIGN_CEIL(off, sizeof(double));
+               ret = rte_flow_conv_actions((void *)((uintptr_t)dst + off),
+                                           size > off ? size - off : 0,
+                                           src->actions_ro, 0, error);
+               if (ret < 0)
+                       return ret;
+               if (size >= off + (size_t)ret)
+                       dst->actions = (void *)((uintptr_t)dst + off);
+               off += ret;
+       }
+       return off;
+}
+
+/**
+ * Retrieve the name of a pattern item/action type.
+ *
+ * @param is_action
+ *   Nonzero when @p src represents an action type instead of a pattern item
+ *   type.
+ * @param is_ptr
+ *   Nonzero to write string address instead of contents into @p dst.
+ * @param[out] dst
+ *   Destination buffer. Can be NULL if @p size is zero.
+ * @param size
+ *   Size of @p dst in bytes.
+ * @param[in] src
+ *   Depending on @p is_action, source pattern item or action type cast as a
+ *   pointer.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL.
+ *
+ * @return
+ *   A positive value representing the number of bytes needed to store the
+ *   name or its address regardless of @p size on success (@p buf contents
+ *   are truncated to @p size if not large enough), a negative errno value
+ *   otherwise and rte_errno is set.
+ */
+static int
+rte_flow_conv_name(int is_action,
+                  int is_ptr,
+                  char *dst,
+                  const size_t size,
+                  const void *src,
+                  struct rte_flow_error *error)
+{
+       struct desc_info {
+               const struct rte_flow_desc_data *data;
+               size_t num;
+       };
+       static const struct desc_info info_rep[2] = {
+               { rte_flow_desc_item, RTE_DIM(rte_flow_desc_item), },
+               { rte_flow_desc_action, RTE_DIM(rte_flow_desc_action), },
+       };
+       const struct desc_info *const info = &info_rep[!!is_action];
+       unsigned int type = (uintptr_t)src;
+
+       if (type >= info->num)
+               return rte_flow_error_set
+                       (error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                        "unknown object type to retrieve the name of");
+       if (!is_ptr)
+               return strlcpy(dst, info->data[type].name, size);
+       if (size >= sizeof(const char **))
+               *((const char **)dst) = info->data[type].name;
+       return sizeof(const char **);
+}
+
+/** Helper function to convert flow API objects. */
+int
+rte_flow_conv(enum rte_flow_conv_op op,
+             void *dst,
+             size_t size,
+             const void *src,
+             struct rte_flow_error *error)
+{
+       switch (op) {
+               const struct rte_flow_attr *attr;
+
+       case RTE_FLOW_CONV_OP_NONE:
+               return 0;
+       case RTE_FLOW_CONV_OP_ATTR:
+               attr = src;
+               if (size > sizeof(*attr))
+                       size = sizeof(*attr);
+               rte_memcpy(dst, attr, size);
+               return sizeof(*attr);
+       case RTE_FLOW_CONV_OP_ITEM:
+               return rte_flow_conv_pattern(dst, size, src, 1, error);
+       case RTE_FLOW_CONV_OP_ACTION:
+               return rte_flow_conv_actions(dst, size, src, 1, error);
+       case RTE_FLOW_CONV_OP_PATTERN:
+               return rte_flow_conv_pattern(dst, size, src, 0, error);
+       case RTE_FLOW_CONV_OP_ACTIONS:
+               return rte_flow_conv_actions(dst, size, src, 0, error);
+       case RTE_FLOW_CONV_OP_RULE:
+               return rte_flow_conv_rule(dst, size, src, error);
+       case RTE_FLOW_CONV_OP_ITEM_NAME:
+               return rte_flow_conv_name(0, 0, dst, size, src, error);
+       case RTE_FLOW_CONV_OP_ACTION_NAME:
+               return rte_flow_conv_name(1, 0, dst, size, src, error);
+       case RTE_FLOW_CONV_OP_ITEM_NAME_PTR:
+               return rte_flow_conv_name(0, 1, dst, size, src, error);
+       case RTE_FLOW_CONV_OP_ACTION_NAME_PTR:
+               return rte_flow_conv_name(1, 1, dst, size, src, error);
+       }
+       return rte_flow_error_set
+               (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+                "unknown object conversion operation");
 }
 
 /** Store a full rte_flow description. */
@@ -426,105 +850,49 @@ rte_flow_copy(struct rte_flow_desc *desc, size_t len,
              const struct rte_flow_item *items,
              const struct rte_flow_action *actions)
 {
-       struct rte_flow_desc *fd = NULL;
-       size_t tmp;
-       size_t off1 = 0;
-       size_t off2 = 0;
-       size_t size = 0;
-
-store:
-       if (items) {
-               const struct rte_flow_item *item;
-
-               item = items;
-               if (fd)
-                       fd->items = (void *)&fd->data[off1];
-               do {
-                       struct rte_flow_item *dst = NULL;
-
-                       if ((size_t)item->type >=
-                               RTE_DIM(rte_flow_desc_item) ||
-                           !rte_flow_desc_item[item->type].name) {
-                               rte_errno = ENOTSUP;
-                               return 0;
-                       }
-                       if (fd)
-                               dst = memcpy(fd->data + off1, item,
-                                            sizeof(*item));
-                       off1 += sizeof(*item);
-                       if (item->spec) {
-                               if (fd)
-                                       dst->spec = fd->data + off2;
-                               off2 += flow_item_spec_copy
-                                       (fd ? fd->data + off2 : NULL, item,
-                                        ITEM_SPEC);
-                       }
-                       if (item->last) {
-                               if (fd)
-                                       dst->last = fd->data + off2;
-                               off2 += flow_item_spec_copy
-                                       (fd ? fd->data + off2 : NULL, item,
-                                        ITEM_LAST);
-                       }
-                       if (item->mask) {
-                               if (fd)
-                                       dst->mask = fd->data + off2;
-                               off2 += flow_item_spec_copy
-                                       (fd ? fd->data + off2 : NULL, item,
-                                        ITEM_MASK);
-                       }
-                       off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-               } while ((item++)->type != RTE_FLOW_ITEM_TYPE_END);
-               off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-       }
-       if (actions) {
-               const struct rte_flow_action *action;
-
-               action = actions;
-               if (fd)
-                       fd->actions = (void *)&fd->data[off1];
-               do {
-                       struct rte_flow_action *dst = NULL;
-
-                       if ((size_t)action->type >=
-                               RTE_DIM(rte_flow_desc_action) ||
-                           !rte_flow_desc_action[action->type].name) {
-                               rte_errno = ENOTSUP;
-                               return 0;
-                       }
-                       if (fd)
-                               dst = memcpy(fd->data + off1, action,
-                                            sizeof(*action));
-                       off1 += sizeof(*action);
-                       if (action->conf) {
-                               if (fd)
-                                       dst->conf = fd->data + off2;
-                               off2 += flow_action_conf_copy
-                                       (fd ? fd->data + off2 : NULL, action);
-                       }
-                       off2 = RTE_ALIGN_CEIL(off2, sizeof(double));
-               } while ((action++)->type != RTE_FLOW_ACTION_TYPE_END);
+       /*
+        * Overlap struct rte_flow_conv with struct rte_flow_desc in order
+        * to convert the former to the latter without wasting space.
+        */
+       struct rte_flow_conv_rule *dst =
+               len ?
+               (void *)((uintptr_t)desc +
+                        (offsetof(struct rte_flow_desc, actions) -
+                         offsetof(struct rte_flow_conv_rule, actions))) :
+               NULL;
+       size_t dst_size =
+               len > sizeof(*desc) - sizeof(*dst) ?
+               len - (sizeof(*desc) - sizeof(*dst)) :
+               0;
+       struct rte_flow_conv_rule src = {
+               .attr_ro = NULL,
+               .pattern_ro = items,
+               .actions_ro = actions,
+       };
+       int ret;
+
+       RTE_BUILD_BUG_ON(sizeof(struct rte_flow_desc) <
+                        sizeof(struct rte_flow_conv_rule));
+       if (dst_size &&
+           (&dst->pattern != &desc->items ||
+            &dst->actions != &desc->actions ||
+            (uintptr_t)(dst + 1) != (uintptr_t)(desc + 1))) {
+               rte_errno = EINVAL;
+               return 0;
        }
-       if (fd != NULL)
-               return size;
-       off1 = RTE_ALIGN_CEIL(off1, sizeof(double));
-       tmp = RTE_ALIGN_CEIL(offsetof(struct rte_flow_desc, data),
-                            sizeof(double));
-       size = tmp + off1 + off2;
-       if (size > len)
-               return size;
-       fd = desc;
-       if (fd != NULL) {
-               *fd = (const struct rte_flow_desc) {
-                       .size = size,
+       ret = rte_flow_conv(RTE_FLOW_CONV_OP_RULE, dst, dst_size, &src, NULL);
+       if (ret < 0)
+               return 0;
+       ret += sizeof(*desc) - sizeof(*dst);
+       rte_memcpy(desc,
+                  (&(struct rte_flow_desc){
+                       .size = ret,
                        .attr = *attr,
-               };
-               tmp -= offsetof(struct rte_flow_desc, data);
-               off2 = tmp + off1;
-               off1 = tmp;
-               goto store;
-       }
-       return 0;
+                       .items = dst_size ? dst->pattern : NULL,
+                       .actions = dst_size ? dst->actions : NULL,
+                  }),
+                  len > sizeof(*desc) ? sizeof(*desc) : len);
+       return ret;
 }
 
 /**
index f8ba71c..c0fe879 100644 (file)
@@ -18,6 +18,7 @@
 #include <stdint.h>
 
 #include <rte_arp.h>
+#include <rte_common.h>
 #include <rte_ether.h>
 #include <rte_eth_ctrl.h>
 #include <rte_icmp.h>
@@ -413,6 +414,14 @@ enum rte_flow_item_type {
         * See struct rte_flow_item_mark.
         */
        RTE_FLOW_ITEM_TYPE_MARK,
+
+       /**
+        * [META]
+        *
+        * Matches a metadata value specified in mbuf metadata field.
+        * See struct rte_flow_item_meta.
+        */
+       RTE_FLOW_ITEM_TYPE_META,
 };
 
 /**
@@ -1155,6 +1164,22 @@ rte_flow_item_icmp6_nd_opt_tla_eth_mask = {
 };
 #endif
 
+/**
+ * RTE_FLOW_ITEM_TYPE_META.
+ *
+ * Matches a specified metadata value.
+ */
+struct rte_flow_item_meta {
+       rte_be32_t data;
+};
+
+/** Default mask for RTE_FLOW_ITEM_TYPE_META. */
+#ifndef __cplusplus
+static const struct rte_flow_item_meta rte_flow_item_meta_mask = {
+       .data = RTE_BE32(UINT32_MAX),
+};
+#endif
+
 /**
  * @warning
  * @b EXPERIMENTAL: this structure may change without prior notice
@@ -1505,6 +1530,127 @@ enum rte_flow_action_type {
         * error.
         */
        RTE_FLOW_ACTION_TYPE_NVGRE_DECAP,
+
+       /**
+        * Add outer header whose template is provided in its data buffer
+        *
+        * See struct rte_flow_action_raw_encap.
+        */
+       RTE_FLOW_ACTION_TYPE_RAW_ENCAP,
+
+       /**
+        * Remove outer header whose template is provided in its data buffer.
+        *
+        * See struct rte_flow_action_raw_decap
+        */
+       RTE_FLOW_ACTION_TYPE_RAW_DECAP,
+
+       /**
+        * Modify IPv4 source address in the outermost IPv4 header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV4,
+        * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_ipv4.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC,
+
+       /**
+        * Modify IPv4 destination address in the outermost IPv4 header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV4,
+        * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_ipv4.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_IPV4_DST,
+
+       /**
+        * Modify IPv6 source address in the outermost IPv6 header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV6,
+        * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_ipv6.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC,
+
+       /**
+        * Modify IPv6 destination address in the outermost IPv6 header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_IPV6,
+        * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_ipv6.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_IPV6_DST,
+
+       /**
+        * Modify source port number in the outermost TCP/UDP header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_TCP
+        * or RTE_FLOW_ITEM_TYPE_UDP, then the PMD should return a
+        * RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_tp.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_TP_SRC,
+
+       /**
+        * Modify destination port number in the outermost TCP/UDP header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_TCP
+        * or RTE_FLOW_ITEM_TYPE_UDP, then the PMD should return a
+        * RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_tp.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_TP_DST,
+
+       /**
+        * Swap the source and destination MAC addresses in the outermost
+        * Ethernet header.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+        * then the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_MAC_SWAP,
+
+       /**
+        * Decrease TTL value directly
+        *
+        * No associated configuration structure.
+        */
+       RTE_FLOW_ACTION_TYPE_DEC_TTL,
+
+       /**
+        * Set TTL value
+        *
+        * See struct rte_flow_action_set_ttl
+        */
+       RTE_FLOW_ACTION_TYPE_SET_TTL,
+
+       /**
+        * Set source MAC address from matched flow.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+        * the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_mac.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_MAC_SRC,
+
+       /**
+        * Set destination MAC address from matched flow.
+        *
+        * If flow pattern does not define a valid RTE_FLOW_ITEM_TYPE_ETH,
+        * the PMD should return a RTE_FLOW_ERROR_TYPE_ACTION error.
+        *
+        * See struct rte_flow_action_set_mac.
+        */
+       RTE_FLOW_ACTION_TYPE_SET_MAC_DST,
 };
 
 /**
@@ -1868,6 +2014,114 @@ struct rte_flow_action_nvgre_encap {
        struct rte_flow_item *definition;
 };
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_RAW_ENCAP
+ *
+ * Raw tunnel end-point encapsulation data definition.
+ *
+ * The data holds the headers definitions to be applied on the packet.
+ * The data must start with ETH header up to the tunnel item header itself.
+ * When used right after RAW_DECAP (for decapsulating L3 tunnel type for
+ * example MPLSoGRE) the data will just hold layer 2 header.
+ *
+ * The preserve parameter holds which bits in the packet the PMD is not allowed
+ * to change, this parameter can also be NULL and then the PMD is allowed
+ * to update any field.
+ *
+ * size holds the number of bytes in @p data and @p preserve.
+ */
+struct rte_flow_action_raw_encap {
+       uint8_t *data; /**< Encapsulation data. */
+       uint8_t *preserve; /**< Bit-mask of @p data to preserve on output. */
+       size_t size; /**< Size of @p data and @p preserve. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_RAW_DECAP
+ *
+ * Raw tunnel end-point decapsulation data definition.
+ *
+ * The data holds the headers definitions to be removed from the packet.
+ * The data must start with ETH header up to the tunnel item header itself.
+ * When used right before RAW_DECAP (for encapsulating L3 tunnel type for
+ * example MPLSoGRE) the data will just hold layer 2 header.
+ *
+ * size holds the number of bytes in @p data.
+ */
+struct rte_flow_action_raw_decap {
+       uint8_t *data; /**< Encapsulation data. */
+       size_t size; /**< Size of @p data and @p preserve. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
+ *
+ * Allows modification of IPv4 source (RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC)
+ * and destination address (RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) in the
+ * specified outermost IPv4 header.
+ */
+struct rte_flow_action_set_ipv4 {
+       rte_be32_t ipv4_addr;
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_IPV6_DST
+ *
+ * Allows modification of IPv6 source (RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC)
+ * and destination address (RTE_FLOW_ACTION_TYPE_SET_IPV6_DST) in the
+ * specified outermost IPv6 header.
+ */
+struct rte_flow_action_set_ipv6 {
+       uint8_t ipv6_addr[16];
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice
+ *
+ * RTE_FLOW_ACTION_TYPE_SET_TP_SRC
+ * RTE_FLOW_ACTION_TYPE_SET_TP_DST
+ *
+ * Allows modification of source (RTE_FLOW_ACTION_TYPE_SET_TP_SRC)
+ * and destination (RTE_FLOW_ACTION_TYPE_SET_TP_DST) port numbers
+ * in the specified outermost TCP/UDP header.
+ */
+struct rte_flow_action_set_tp {
+       rte_be16_t port;
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_SET_TTL
+ *
+ * Set the TTL value directly for IPv4 or IPv6
+ */
+struct rte_flow_action_set_ttl {
+       uint8_t ttl_value;
+};
+
+/**
+ * RTE_FLOW_ACTION_TYPE_SET_MAC
+ *
+ * Set MAC address from the matched flow
+ */
+struct rte_flow_action_set_mac {
+       uint8_t mac_addr[ETHER_ADDR_LEN];
+};
+
 /*
  * Definition of a single action.
  *
@@ -1931,6 +2185,175 @@ struct rte_flow_error {
        const char *message; /**< Human-readable error message. */
 };
 
+/**
+ * Complete flow rule description.
+ *
+ * This object type is used when converting a flow rule description.
+ *
+ * @see RTE_FLOW_CONV_OP_RULE
+ * @see rte_flow_conv()
+ */
+RTE_STD_C11
+struct rte_flow_conv_rule {
+       union {
+               const struct rte_flow_attr *attr_ro; /**< RO attributes. */
+               struct rte_flow_attr *attr; /**< Attributes. */
+       };
+       union {
+               const struct rte_flow_item *pattern_ro; /**< RO pattern. */
+               struct rte_flow_item *pattern; /**< Pattern items. */
+       };
+       union {
+               const struct rte_flow_action *actions_ro; /**< RO actions. */
+               struct rte_flow_action *actions; /**< List of actions. */
+       };
+};
+
+/**
+ * Conversion operations for flow API objects.
+ *
+ * @see rte_flow_conv()
+ */
+enum rte_flow_conv_op {
+       /**
+        * No operation to perform.
+        *
+        * rte_flow_conv() simply returns 0.
+        */
+       RTE_FLOW_CONV_OP_NONE,
+
+       /**
+        * Convert attributes structure.
+        *
+        * This is a basic copy of an attributes structure.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_attr * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_attr * @endcode
+        */
+       RTE_FLOW_CONV_OP_ATTR,
+
+       /**
+        * Convert a single item.
+        *
+        * Duplicates @p spec, @p last and @p mask but not outside objects.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_item * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_item * @endcode
+        */
+       RTE_FLOW_CONV_OP_ITEM,
+
+       /**
+        * Convert a single action.
+        *
+        * Duplicates @p conf but not outside objects.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_action * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_action * @endcode
+        */
+       RTE_FLOW_CONV_OP_ACTION,
+
+       /**
+        * Convert an entire pattern.
+        *
+        * Duplicates all pattern items at once with the same constraints as
+        * RTE_FLOW_CONV_OP_ITEM.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_item * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_item * @endcode
+        */
+       RTE_FLOW_CONV_OP_PATTERN,
+
+       /**
+        * Convert a list of actions.
+        *
+        * Duplicates the entire list of actions at once with the same
+        * constraints as RTE_FLOW_CONV_OP_ACTION.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_action * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_action * @endcode
+        */
+       RTE_FLOW_CONV_OP_ACTIONS,
+
+       /**
+        * Convert a complete flow rule description.
+        *
+        * Comprises attributes, pattern and actions together at once with
+        * the usual constraints.
+        *
+        * - @p src type:
+        *   @code const struct rte_flow_conv_rule * @endcode
+        * - @p dst type:
+        *   @code struct rte_flow_conv_rule * @endcode
+        */
+       RTE_FLOW_CONV_OP_RULE,
+
+       /**
+        * Convert item type to its name string.
+        *
+        * Writes a NUL-terminated string to @p dst. Like snprintf(), the
+        * returned value excludes the terminator which is always written
+        * nonetheless.
+        *
+        * - @p src type:
+        *   @code (const void *)enum rte_flow_item_type @endcode
+        * - @p dst type:
+        *   @code char * @endcode
+        **/
+       RTE_FLOW_CONV_OP_ITEM_NAME,
+
+       /**
+        * Convert action type to its name string.
+        *
+        * Writes a NUL-terminated string to @p dst. Like snprintf(), the
+        * returned value excludes the terminator which is always written
+        * nonetheless.
+        *
+        * - @p src type:
+        *   @code (const void *)enum rte_flow_action_type @endcode
+        * - @p dst type:
+        *   @code char * @endcode
+        **/
+       RTE_FLOW_CONV_OP_ACTION_NAME,
+
+       /**
+        * Convert item type to pointer to item name.
+        *
+        * Retrieves item name pointer from its type. The string itself is
+        * not copied; instead, a unique pointer to an internal static
+        * constant storage is written to @p dst.
+        *
+        * - @p src type:
+        *   @code (const void *)enum rte_flow_item_type @endcode
+        * - @p dst type:
+        *   @code const char ** @endcode
+        */
+       RTE_FLOW_CONV_OP_ITEM_NAME_PTR,
+
+       /**
+        * Convert action type to pointer to action name.
+        *
+        * Retrieves action name pointer from its type. The string itself is
+        * not copied; instead, a unique pointer to an internal static
+        * constant storage is written to @p dst.
+        *
+        * - @p src type:
+        *   @code (const void *)enum rte_flow_action_type @endcode
+        * - @p dst type:
+        *   @code const char ** @endcode
+        */
+       RTE_FLOW_CONV_OP_ACTION_NAME_PTR,
+};
+
 /**
  * Check whether a flow rule can be created on a given port.
  *
@@ -2162,10 +2585,8 @@ rte_flow_error_set(struct rte_flow_error *error,
                   const char *message);
 
 /**
- * Generic flow representation.
- *
- * This form is sufficient to describe an rte_flow independently from any
- * PMD implementation and allows for replayability and identification.
+ * @deprecated
+ * @see rte_flow_copy()
  */
 struct rte_flow_desc {
        size_t size; /**< Allocated space including data[]. */
@@ -2176,8 +2597,14 @@ struct rte_flow_desc {
 };
 
 /**
+ * @deprecated
  * Copy an rte_flow rule description.
  *
+ * This interface is kept for compatibility with older applications but is
+ * implemented as a wrapper to rte_flow_conv(). It is deprecated due to its
+ * lack of flexibility and reliance on a type unusable with C++ programs
+ * (struct rte_flow_desc).
+ *
  * @param[in] fd
  *   Flow rule description.
  * @param[in] len
@@ -2195,12 +2622,61 @@ struct rte_flow_desc {
  *   If len is lower than the size of the flow, the number of bytes that would
  *   have been written to desc had it been sufficient. Nothing is written.
  */
+__rte_deprecated
 size_t
 rte_flow_copy(struct rte_flow_desc *fd, size_t len,
              const struct rte_flow_attr *attr,
              const struct rte_flow_item *items,
              const struct rte_flow_action *actions);
 
+/**
+ * Flow object conversion helper.
+ *
+ * This function performs conversion of various flow API objects to a
+ * pre-allocated destination buffer. See enum rte_flow_conv_op for possible
+ * operations and details about each of them.
+ *
+ * Since destination buffer must be large enough, it works in a manner
+ * reminiscent of snprintf():
+ *
+ * - If @p size is 0, @p dst may be a NULL pointer, otherwise @p dst must be
+ *   non-NULL.
+ * - If positive, the returned value represents the number of bytes needed
+ *   to store the conversion of @p src to @p dst according to @p op
+ *   regardless of the @p size parameter.
+ * - Since no more than @p size bytes can be written to @p dst, output is
+ *   truncated and may be inconsistent when the returned value is larger
+ *   than that.
+ * - In case of conversion error, a negative error code is returned and
+ *   @p dst contents are unspecified.
+ *
+ * @param op
+ *   Operation to perform, related to the object type of @p dst.
+ * @param[out] dst
+ *   Destination buffer address. Must be suitably aligned by the caller.
+ * @param size
+ *   Destination buffer size in bytes.
+ * @param[in] src
+ *   Source object to copy. Depending on @p op, its type may differ from
+ *   that of @p dst.
+ * @param[out] error
+ *   Perform verbose error reporting if not NULL. Initialized in case of
+ *   error only.
+ *
+ * @return
+ *   The number of bytes required to convert @p src to @p dst on success, a
+ *   negative errno value otherwise and rte_errno is set.
+ *
+ * @see rte_flow_conv_op
+ */
+__rte_experimental
+int
+rte_flow_conv(enum rte_flow_conv_op op,
+             void *dst,
+             size_t size,
+             const void *src,
+             struct rte_flow_error *error);
+
 #ifdef __cplusplus
 }
 #endif
index 955f02f..646ef38 100644 (file)
@@ -831,10 +831,10 @@ enum rte_tm_cman_mode {
  */
 struct rte_tm_red_params {
        /** Minimum queue threshold */
-       uint32_t min_th;
+       uint64_t min_th;
 
        /** Maximum queue threshold */
-       uint32_t max_th;
+       uint64_t max_th;
 
        /** Inverse of packet marking probability maximum value (maxp), i.e.
         * maxp_inv = 1 / maxp
index 47f599a..9496187 100644 (file)
@@ -8,7 +8,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_eventdev.a
 
 # library version
-LIBABIVER := 5
+LIBABIVER := 6
 
 # build flags
 CFLAGS += -DALLOW_EXPERIMENTAL_API
@@ -28,6 +28,7 @@ SRCS-y += rte_event_ring.c
 SRCS-y += rte_event_eth_rx_adapter.c
 SRCS-y += rte_event_timer_adapter.c
 SRCS-y += rte_event_crypto_adapter.c
+SRCS-y += rte_event_eth_tx_adapter.c
 
 # export include files
 SYMLINK-y-include += rte_eventdev.h
@@ -39,6 +40,7 @@ SYMLINK-y-include += rte_event_eth_rx_adapter.h
 SYMLINK-y-include += rte_event_timer_adapter.h
 SYMLINK-y-include += rte_event_timer_adapter_pmd.h
 SYMLINK-y-include += rte_event_crypto_adapter.h
+SYMLINK-y-include += rte_event_eth_tx_adapter.h
 
 # versioning export map
 EXPORT_MAP := rte_eventdev_version.map
index 3cbaf29..6becfe8 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 5
+version = 6
 allow_experimental_apis = true
 
 if host_machine.system() == 'linux'
@@ -14,7 +14,8 @@ sources = files('rte_eventdev.c',
                'rte_event_ring.c',
                'rte_event_eth_rx_adapter.c',
                'rte_event_timer_adapter.c',
-               'rte_event_crypto_adapter.c')
+               'rte_event_crypto_adapter.c',
+               'rte_event_eth_tx_adapter.c')
 headers = files('rte_eventdev.h',
                'rte_eventdev_pmd.h',
                'rte_eventdev_pmd_pci.h',
@@ -23,5 +24,6 @@ headers = files('rte_eventdev.h',
                'rte_event_eth_rx_adapter.h',
                'rte_event_timer_adapter.h',
                'rte_event_timer_adapter_pmd.h',
-               'rte_event_crypto_adapter.h')
+               'rte_event_crypto_adapter.h',
+               'rte_event_eth_tx_adapter.h')
 deps += ['ring', 'ethdev', 'hash', 'mempool', 'mbuf', 'timer', 'cryptodev']
index f5e5a0b..71d008c 100644 (file)
@@ -1125,7 +1125,6 @@ rxa_poll(struct rte_event_eth_rx_adapter *rx_adapter)
        wrr_pos = rx_adapter->wrr_pos;
        max_nb_rx = rx_adapter->max_nb_rx;
        buf = &rx_adapter->event_enqueue_buffer;
-       stats = &rx_adapter->stats;
 
        /* Iterate through a WRR sequence */
        for (num_queue = 0; num_queue < rx_adapter->wrr_len; num_queue++) {
@@ -1998,8 +1997,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
        rx_adapter->id = id;
        strcpy(rx_adapter->mem_name, mem_name);
        rx_adapter->eth_devices = rte_zmalloc_socket(rx_adapter->mem_name,
-                                       /* FIXME: incompatible with hotplug */
-                                       rte_eth_dev_count_total() *
+                                       RTE_MAX_ETHPORTS *
                                        sizeof(struct eth_device_info), 0,
                                        socket_id);
        rte_convert_rss_key((const uint32_t *)default_rss_key,
@@ -2012,7 +2010,7 @@ rte_event_eth_rx_adapter_create_ext(uint8_t id, uint8_t dev_id,
                return -ENOMEM;
        }
        rte_spinlock_init(&rx_adapter->rx_lock);
-       RTE_ETH_FOREACH_DEV(i)
+       for (i = 0; i < RTE_MAX_ETHPORTS; i++)
                rx_adapter->eth_devices[i].dev = &rte_eth_devices[i];
 
        event_eth_rx_adapter[id] = rx_adapter;
index 332ee21..863b72a 100644 (file)
  * rte_event_eth_rx_adapter_cb_register() function allows the
  * application to register a callback that selects which packets to enqueue
  * to the event device.
- *
- * Note:
- * 1) Devices created after an instance of rte_event_eth_rx_adapter_create
- *  should be added to a new instance of the rx adapter.
  */
 
 #ifdef __cplusplus
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.c b/lib/librte_eventdev/rte_event_eth_tx_adapter.c
new file mode 100644 (file)
index 0000000..3a21def
--- /dev/null
@@ -0,0 +1,1138 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+#include <rte_spinlock.h>
+#include <rte_service_component.h>
+#include <rte_ethdev.h>
+
+#include "rte_eventdev_pmd.h"
+#include "rte_event_eth_tx_adapter.h"
+
+#define TXA_BATCH_SIZE         32
+#define TXA_SERVICE_NAME_LEN   32
+#define TXA_MEM_NAME_LEN       32
+#define TXA_FLUSH_THRESHOLD    1024
+#define TXA_RETRY_CNT          100
+#define TXA_MAX_NB_TX          128
+#define TXA_INVALID_DEV_ID     INT32_C(-1)
+#define TXA_INVALID_SERVICE_ID INT64_C(-1)
+
+#define txa_evdev(id) (&rte_eventdevs[txa_dev_id_array[(id)]])
+
+#define txa_dev_caps_get(id) txa_evdev((id))->dev_ops->eth_tx_adapter_caps_get
+
+#define txa_dev_adapter_create(t) txa_evdev(t)->dev_ops->eth_tx_adapter_create
+
+#define txa_dev_adapter_create_ext(t) \
+                               txa_evdev(t)->dev_ops->eth_tx_adapter_create
+
+#define txa_dev_adapter_free(t) txa_evdev(t)->dev_ops->eth_tx_adapter_free
+
+#define txa_dev_queue_add(id) txa_evdev(id)->dev_ops->eth_tx_adapter_queue_add
+
+#define txa_dev_queue_del(t) txa_evdev(t)->dev_ops->eth_tx_adapter_queue_del
+
+#define txa_dev_start(t) txa_evdev(t)->dev_ops->eth_tx_adapter_start
+
+#define txa_dev_stop(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stop
+
+#define txa_dev_stats_reset(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stats_reset
+
+#define txa_dev_stats_get(t) txa_evdev(t)->dev_ops->eth_tx_adapter_stats_get
+
+#define RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, retval) \
+do { \
+       if (!txa_valid_id(id)) { \
+               RTE_EDEV_LOG_ERR("Invalid eth Rx adapter id = %d", id); \
+               return retval; \
+       } \
+} while (0)
+
+#define TXA_CHECK_OR_ERR_RET(id) \
+do {\
+       int ret; \
+       RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET((id), -EINVAL); \
+       ret = txa_init(); \
+       if (ret != 0) \
+               return ret; \
+       if (!txa_adapter_exist((id))) \
+               return -EINVAL; \
+} while (0)
+
+/* Tx retry callback structure */
+struct txa_retry {
+       /* Ethernet port id */
+       uint16_t port_id;
+       /* Tx queue */
+       uint16_t tx_queue;
+       /* Adapter ID */
+       uint8_t id;
+};
+
+/* Per queue structure */
+struct txa_service_queue_info {
+       /* Queue has been added */
+       uint8_t added;
+       /* Retry callback argument */
+       struct txa_retry txa_retry;
+       /* Tx buffer */
+       struct rte_eth_dev_tx_buffer *tx_buf;
+};
+
+/* PMD private structure */
+struct txa_service_data {
+       /* Max mbufs processed in any service function invocation */
+       uint32_t max_nb_tx;
+       /* Number of Tx queues in adapter */
+       uint32_t nb_queues;
+       /*  Synchronization with data path */
+       rte_spinlock_t tx_lock;
+       /* Event port ID */
+       uint8_t port_id;
+       /* Event device identifier */
+       uint8_t eventdev_id;
+       /* Highest port id supported + 1 */
+       uint16_t dev_count;
+       /* Loop count to flush Tx buffers */
+       int loop_cnt;
+       /* Per ethernet device structure */
+       struct txa_service_ethdev *txa_ethdev;
+       /* Statistics */
+       struct rte_event_eth_tx_adapter_stats stats;
+       /* Adapter Identifier */
+       uint8_t id;
+       /* Conf arg must be freed */
+       uint8_t conf_free;
+       /* Configuration callback */
+       rte_event_eth_tx_adapter_conf_cb conf_cb;
+       /* Configuration callback argument */
+       void *conf_arg;
+       /* socket id */
+       int socket_id;
+       /* Per adapter EAL service */
+       int64_t service_id;
+       /* Memory allocation name */
+       char mem_name[TXA_MEM_NAME_LEN];
+} __rte_cache_aligned;
+
+/* Per eth device structure */
+struct txa_service_ethdev {
+       /* Pointer to ethernet device */
+       struct rte_eth_dev *dev;
+       /* Number of queues added */
+       uint16_t nb_queues;
+       /* PMD specific queue data */
+       void *queues;
+};
+
+/* Array of adapter instances, initialized with event device id
+ * when adapter is created
+ */
+static int *txa_dev_id_array;
+
+/* Array of pointers to service implementation data */
+static struct txa_service_data **txa_service_data_array;
+
+static int32_t txa_service_func(void *args);
+static int txa_service_adapter_create_ext(uint8_t id,
+                       struct rte_eventdev *dev,
+                       rte_event_eth_tx_adapter_conf_cb conf_cb,
+                       void *conf_arg);
+static int txa_service_queue_del(uint8_t id,
+                               const struct rte_eth_dev *dev,
+                               int32_t tx_queue_id);
+
+static int
+txa_adapter_exist(uint8_t id)
+{
+       return txa_dev_id_array[id] != TXA_INVALID_DEV_ID;
+}
+
+static inline int
+txa_valid_id(uint8_t id)
+{
+       return id < RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE;
+}
+
+static void *
+txa_memzone_array_get(const char *name, unsigned int elt_size, int nb_elems)
+{
+       const struct rte_memzone *mz;
+       unsigned int sz;
+
+       sz = elt_size * nb_elems;
+       sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE);
+
+       mz = rte_memzone_lookup(name);
+       if (mz == NULL) {
+               mz = rte_memzone_reserve_aligned(name, sz, rte_socket_id(), 0,
+                                                RTE_CACHE_LINE_SIZE);
+               if (mz == NULL) {
+                       RTE_EDEV_LOG_ERR("failed to reserve memzone"
+                                       " name = %s err = %"
+                                       PRId32, name, rte_errno);
+                       return NULL;
+               }
+       }
+
+       return  mz->addr;
+}
+
+static int
+txa_dev_id_array_init(void)
+{
+       if (txa_dev_id_array == NULL) {
+               int i;
+
+               txa_dev_id_array = txa_memzone_array_get("txa_adapter_array",
+                                       sizeof(int),
+                                       RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE);
+               if (txa_dev_id_array == NULL)
+                       return -ENOMEM;
+
+               for (i = 0; i < RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE; i++)
+                       txa_dev_id_array[i] = TXA_INVALID_DEV_ID;
+       }
+
+       return 0;
+}
+
+static int
+txa_init(void)
+{
+       return txa_dev_id_array_init();
+}
+
+static int
+txa_service_data_init(void)
+{
+       if (txa_service_data_array == NULL) {
+               txa_service_data_array =
+                               txa_memzone_array_get("txa_service_data_array",
+                                       sizeof(int),
+                                       RTE_EVENT_ETH_TX_ADAPTER_MAX_INSTANCE);
+               if (txa_service_data_array == NULL)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static inline struct txa_service_data *
+txa_service_id_to_data(uint8_t id)
+{
+       return txa_service_data_array[id];
+}
+
+static inline struct txa_service_queue_info *
+txa_service_queue(struct txa_service_data *txa, uint16_t port_id,
+               uint16_t tx_queue_id)
+{
+       struct txa_service_queue_info *tqi;
+
+       if (unlikely(txa->txa_ethdev == NULL || txa->dev_count < port_id + 1))
+               return NULL;
+
+       tqi = txa->txa_ethdev[port_id].queues;
+
+       return likely(tqi != NULL) ? tqi + tx_queue_id : NULL;
+}
+
+static int
+txa_service_conf_cb(uint8_t __rte_unused id, uint8_t dev_id,
+               struct rte_event_eth_tx_adapter_conf *conf, void *arg)
+{
+       int ret;
+       struct rte_eventdev *dev;
+       struct rte_event_port_conf *pc;
+       struct rte_event_dev_config dev_conf;
+       int started;
+       uint8_t port_id;
+
+       pc = arg;
+       dev = &rte_eventdevs[dev_id];
+       dev_conf = dev->data->dev_conf;
+
+       started = dev->data->dev_started;
+       if (started)
+               rte_event_dev_stop(dev_id);
+
+       port_id = dev_conf.nb_event_ports;
+       dev_conf.nb_event_ports += 1;
+
+       ret = rte_event_dev_configure(dev_id, &dev_conf);
+       if (ret) {
+               RTE_EDEV_LOG_ERR("failed to configure event dev %u",
+                                               dev_id);
+               if (started) {
+                       if (rte_event_dev_start(dev_id))
+                               return -EIO;
+               }
+               return ret;
+       }
+
+       pc->disable_implicit_release = 0;
+       ret = rte_event_port_setup(dev_id, port_id, pc);
+       if (ret) {
+               RTE_EDEV_LOG_ERR("failed to setup event port %u\n",
+                                       port_id);
+               if (started) {
+                       if (rte_event_dev_start(dev_id))
+                               return -EIO;
+               }
+               return ret;
+       }
+
+       conf->event_port_id = port_id;
+       conf->max_nb_tx = TXA_MAX_NB_TX;
+       if (started)
+               ret = rte_event_dev_start(dev_id);
+       return ret;
+}
+
+static int
+txa_service_ethdev_alloc(struct txa_service_data *txa)
+{
+       struct txa_service_ethdev *txa_ethdev;
+       uint16_t i, dev_count;
+
+       dev_count = rte_eth_dev_count_avail();
+       if (txa->txa_ethdev && dev_count == txa->dev_count)
+               return 0;
+
+       txa_ethdev = rte_zmalloc_socket(txa->mem_name,
+                                       dev_count * sizeof(*txa_ethdev),
+                                       0,
+                                       txa->socket_id);
+       if (txa_ethdev == NULL) {
+               RTE_EDEV_LOG_ERR("Failed to alloc txa::txa_ethdev ");
+               return -ENOMEM;
+       }
+
+       if (txa->dev_count)
+               memcpy(txa_ethdev, txa->txa_ethdev,
+                       txa->dev_count * sizeof(*txa_ethdev));
+
+       RTE_ETH_FOREACH_DEV(i) {
+               if (i == dev_count)
+                       break;
+               txa_ethdev[i].dev = &rte_eth_devices[i];
+       }
+
+       txa->txa_ethdev = txa_ethdev;
+       txa->dev_count = dev_count;
+       return 0;
+}
+
+static int
+txa_service_queue_array_alloc(struct txa_service_data *txa,
+                       uint16_t port_id)
+{
+       struct txa_service_queue_info *tqi;
+       uint16_t nb_queue;
+       int ret;
+
+       ret = txa_service_ethdev_alloc(txa);
+       if (ret != 0)
+               return ret;
+
+       if (txa->txa_ethdev[port_id].queues)
+               return 0;
+
+       nb_queue = txa->txa_ethdev[port_id].dev->data->nb_tx_queues;
+       tqi = rte_zmalloc_socket(txa->mem_name,
+                               nb_queue *
+                               sizeof(struct txa_service_queue_info), 0,
+                               txa->socket_id);
+       if (tqi == NULL)
+               return -ENOMEM;
+       txa->txa_ethdev[port_id].queues = tqi;
+       return 0;
+}
+
+static void
+txa_service_queue_array_free(struct txa_service_data *txa,
+                       uint16_t port_id)
+{
+       struct txa_service_ethdev *txa_ethdev;
+       struct txa_service_queue_info *tqi;
+
+       txa_ethdev = &txa->txa_ethdev[port_id];
+       if (txa->txa_ethdev == NULL || txa_ethdev->nb_queues != 0)
+               return;
+
+       tqi = txa_ethdev->queues;
+       txa_ethdev->queues = NULL;
+       rte_free(tqi);
+
+       if (txa->nb_queues == 0) {
+               rte_free(txa->txa_ethdev);
+               txa->txa_ethdev = NULL;
+       }
+}
+
+static void
+txa_service_unregister(struct txa_service_data *txa)
+{
+       if (txa->service_id != TXA_INVALID_SERVICE_ID) {
+               rte_service_component_runstate_set(txa->service_id, 0);
+               while (rte_service_may_be_active(txa->service_id))
+                       rte_pause();
+               rte_service_component_unregister(txa->service_id);
+       }
+       txa->service_id = TXA_INVALID_SERVICE_ID;
+}
+
+static int
+txa_service_register(struct txa_service_data *txa)
+{
+       int ret;
+       struct rte_service_spec service;
+       struct rte_event_eth_tx_adapter_conf conf;
+
+       if (txa->service_id != TXA_INVALID_SERVICE_ID)
+               return 0;
+
+       memset(&service, 0, sizeof(service));
+       snprintf(service.name, TXA_SERVICE_NAME_LEN, "txa_%d", txa->id);
+       service.socket_id = txa->socket_id;
+       service.callback = txa_service_func;
+       service.callback_userdata = txa;
+       service.capabilities = RTE_SERVICE_CAP_MT_SAFE;
+       ret = rte_service_component_register(&service,
+                                       (uint32_t *)&txa->service_id);
+       if (ret) {
+               RTE_EDEV_LOG_ERR("failed to register service %s err = %"
+                                PRId32, service.name, ret);
+               return ret;
+       }
+
+       ret = txa->conf_cb(txa->id, txa->eventdev_id, &conf, txa->conf_arg);
+       if (ret) {
+               txa_service_unregister(txa);
+               return ret;
+       }
+
+       rte_service_component_runstate_set(txa->service_id, 1);
+       txa->port_id = conf.event_port_id;
+       txa->max_nb_tx = conf.max_nb_tx;
+       return 0;
+}
+
+static struct rte_eth_dev_tx_buffer *
+txa_service_tx_buf_alloc(struct txa_service_data *txa,
+                       const struct rte_eth_dev *dev)
+{
+       struct rte_eth_dev_tx_buffer *tb;
+       uint16_t port_id;
+
+       port_id = dev->data->port_id;
+       tb = rte_zmalloc_socket(txa->mem_name,
+                               RTE_ETH_TX_BUFFER_SIZE(TXA_BATCH_SIZE),
+                               0,
+                               rte_eth_dev_socket_id(port_id));
+       if (tb == NULL)
+               RTE_EDEV_LOG_ERR("Failed to allocate memory for tx buffer");
+       return tb;
+}
+
+static int
+txa_service_is_queue_added(struct txa_service_data *txa,
+                       const struct rte_eth_dev *dev,
+                       uint16_t tx_queue_id)
+{
+       struct txa_service_queue_info *tqi;
+
+       tqi = txa_service_queue(txa, dev->data->port_id, tx_queue_id);
+       return tqi && tqi->added;
+}
+
+static int
+txa_service_ctrl(uint8_t id, int start)
+{
+       int ret;
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       if (txa->service_id == TXA_INVALID_SERVICE_ID)
+               return 0;
+
+       ret = rte_service_runstate_set(txa->service_id, start);
+       if (ret == 0 && !start) {
+               while (rte_service_may_be_active(txa->service_id))
+                       rte_pause();
+       }
+       return ret;
+}
+
+static void
+txa_service_buffer_retry(struct rte_mbuf **pkts, uint16_t unsent,
+                       void *userdata)
+{
+       struct txa_retry *tr;
+       struct txa_service_data *data;
+       struct rte_event_eth_tx_adapter_stats *stats;
+       uint16_t sent = 0;
+       unsigned int retry = 0;
+       uint16_t i, n;
+
+       tr = (struct txa_retry *)(uintptr_t)userdata;
+       data = txa_service_id_to_data(tr->id);
+       stats = &data->stats;
+
+       do {
+               n = rte_eth_tx_burst(tr->port_id, tr->tx_queue,
+                              &pkts[sent], unsent - sent);
+
+               sent += n;
+       } while (sent != unsent && retry++ < TXA_RETRY_CNT);
+
+       for (i = sent; i < unsent; i++)
+               rte_pktmbuf_free(pkts[i]);
+
+       stats->tx_retry += retry;
+       stats->tx_packets += sent;
+       stats->tx_dropped += unsent - sent;
+}
+
+static void
+txa_service_tx(struct txa_service_data *txa, struct rte_event *ev,
+       uint32_t n)
+{
+       uint32_t i;
+       uint16_t nb_tx;
+       struct rte_event_eth_tx_adapter_stats *stats;
+
+       stats = &txa->stats;
+
+       nb_tx = 0;
+       for (i = 0; i < n; i++) {
+               struct rte_mbuf *m;
+               uint16_t port;
+               uint16_t queue;
+               struct txa_service_queue_info *tqi;
+
+               m = ev[i].mbuf;
+               port = m->port;
+               queue = rte_event_eth_tx_adapter_txq_get(m);
+
+               tqi = txa_service_queue(txa, port, queue);
+               if (unlikely(tqi == NULL || !tqi->added)) {
+                       rte_pktmbuf_free(m);
+                       continue;
+               }
+
+               nb_tx += rte_eth_tx_buffer(port, queue, tqi->tx_buf, m);
+       }
+
+       stats->tx_packets += nb_tx;
+}
+
+static int32_t
+txa_service_func(void *args)
+{
+       struct txa_service_data *txa = args;
+       uint8_t dev_id;
+       uint8_t port;
+       uint16_t n;
+       uint32_t nb_tx, max_nb_tx;
+       struct rte_event ev[TXA_BATCH_SIZE];
+
+       dev_id = txa->eventdev_id;
+       max_nb_tx = txa->max_nb_tx;
+       port = txa->port_id;
+
+       if (txa->nb_queues == 0)
+               return 0;
+
+       if (!rte_spinlock_trylock(&txa->tx_lock))
+               return 0;
+
+       for (nb_tx = 0; nb_tx < max_nb_tx; nb_tx += n) {
+
+               n = rte_event_dequeue_burst(dev_id, port, ev, RTE_DIM(ev), 0);
+               if (!n)
+                       break;
+               txa_service_tx(txa, ev, n);
+       }
+
+       if ((txa->loop_cnt++ & (TXA_FLUSH_THRESHOLD - 1)) == 0) {
+
+               struct txa_service_ethdev *tdi;
+               struct txa_service_queue_info *tqi;
+               struct rte_eth_dev *dev;
+               uint16_t i;
+
+               tdi = txa->txa_ethdev;
+               nb_tx = 0;
+
+               RTE_ETH_FOREACH_DEV(i) {
+                       uint16_t q;
+
+                       if (i == txa->dev_count)
+                               break;
+
+                       dev = tdi[i].dev;
+                       if (tdi[i].nb_queues == 0)
+                               continue;
+                       for (q = 0; q < dev->data->nb_tx_queues; q++) {
+
+                               tqi = txa_service_queue(txa, i, q);
+                               if (unlikely(tqi == NULL || !tqi->added))
+                                       continue;
+
+                               nb_tx += rte_eth_tx_buffer_flush(i, q,
+                                                       tqi->tx_buf);
+                       }
+               }
+
+               txa->stats.tx_packets += nb_tx;
+       }
+       rte_spinlock_unlock(&txa->tx_lock);
+       return 0;
+}
+
+static int
+txa_service_adapter_create(uint8_t id, struct rte_eventdev *dev,
+                       struct rte_event_port_conf *port_conf)
+{
+       struct txa_service_data *txa;
+       struct rte_event_port_conf *cb_conf;
+       int ret;
+
+       cb_conf = rte_malloc(NULL, sizeof(*cb_conf), 0);
+       if (cb_conf == NULL)
+               return -ENOMEM;
+
+       *cb_conf = *port_conf;
+       ret = txa_service_adapter_create_ext(id, dev, txa_service_conf_cb,
+                                       cb_conf);
+       if (ret) {
+               rte_free(cb_conf);
+               return ret;
+       }
+
+       txa = txa_service_id_to_data(id);
+       txa->conf_free = 1;
+       return ret;
+}
+
+static int
+txa_service_adapter_create_ext(uint8_t id, struct rte_eventdev *dev,
+                       rte_event_eth_tx_adapter_conf_cb conf_cb,
+                       void *conf_arg)
+{
+       struct txa_service_data *txa;
+       int socket_id;
+       char mem_name[TXA_SERVICE_NAME_LEN];
+       int ret;
+
+       if (conf_cb == NULL)
+               return -EINVAL;
+
+       socket_id = dev->data->socket_id;
+       snprintf(mem_name, TXA_MEM_NAME_LEN,
+               "rte_event_eth_txa_%d",
+               id);
+
+       ret = txa_service_data_init();
+       if (ret != 0)
+               return ret;
+
+       txa = rte_zmalloc_socket(mem_name,
+                               sizeof(*txa),
+                               RTE_CACHE_LINE_SIZE, socket_id);
+       if (txa == NULL) {
+               RTE_EDEV_LOG_ERR("failed to get mem for tx adapter");
+               return -ENOMEM;
+       }
+
+       txa->id = id;
+       txa->eventdev_id = dev->data->dev_id;
+       txa->socket_id = socket_id;
+       strncpy(txa->mem_name, mem_name, TXA_SERVICE_NAME_LEN);
+       txa->conf_cb = conf_cb;
+       txa->conf_arg = conf_arg;
+       txa->service_id = TXA_INVALID_SERVICE_ID;
+       rte_spinlock_init(&txa->tx_lock);
+       txa_service_data_array[id] = txa;
+
+       return 0;
+}
+
+static int
+txa_service_event_port_get(uint8_t id, uint8_t *port)
+{
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       if (txa->service_id == TXA_INVALID_SERVICE_ID)
+               return -ENODEV;
+
+       *port = txa->port_id;
+       return 0;
+}
+
+static int
+txa_service_adapter_free(uint8_t id)
+{
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       if (txa->nb_queues) {
+               RTE_EDEV_LOG_ERR("%" PRIu16 " Tx queues not deleted",
+                               txa->nb_queues);
+               return -EBUSY;
+       }
+
+       if (txa->conf_free)
+               rte_free(txa->conf_arg);
+       rte_free(txa);
+       return 0;
+}
+
+static int
+txa_service_queue_add(uint8_t id,
+               __rte_unused struct rte_eventdev *dev,
+               const struct rte_eth_dev *eth_dev,
+               int32_t tx_queue_id)
+{
+       struct txa_service_data *txa;
+       struct txa_service_ethdev *tdi;
+       struct txa_service_queue_info *tqi;
+       struct rte_eth_dev_tx_buffer *tb;
+       struct txa_retry *txa_retry;
+       int ret;
+
+       txa = txa_service_id_to_data(id);
+
+       if (tx_queue_id == -1) {
+               int nb_queues;
+               uint16_t i, j;
+               uint16_t *qdone;
+
+               nb_queues = eth_dev->data->nb_tx_queues;
+               if (txa->dev_count > eth_dev->data->port_id) {
+                       tdi = &txa->txa_ethdev[eth_dev->data->port_id];
+                       nb_queues -= tdi->nb_queues;
+               }
+
+               qdone = rte_zmalloc(txa->mem_name,
+                               nb_queues * sizeof(*qdone), 0);
+               j = 0;
+               for (i = 0; i < nb_queues; i++) {
+                       if (txa_service_is_queue_added(txa, eth_dev, i))
+                               continue;
+                       ret = txa_service_queue_add(id, dev, eth_dev, i);
+                       if (ret == 0)
+                               qdone[j++] = i;
+                       else
+                               break;
+               }
+
+               if (i != nb_queues) {
+                       for (i = 0; i < j; i++)
+                               txa_service_queue_del(id, eth_dev, qdone[i]);
+               }
+               rte_free(qdone);
+               return ret;
+       }
+
+       ret = txa_service_register(txa);
+       if (ret)
+               return ret;
+
+       rte_spinlock_lock(&txa->tx_lock);
+
+       if (txa_service_is_queue_added(txa, eth_dev, tx_queue_id)) {
+               rte_spinlock_unlock(&txa->tx_lock);
+               return 0;
+       }
+
+       ret = txa_service_queue_array_alloc(txa, eth_dev->data->port_id);
+       if (ret)
+               goto err_unlock;
+
+       tb = txa_service_tx_buf_alloc(txa, eth_dev);
+       if (tb == NULL)
+               goto err_unlock;
+
+       tdi = &txa->txa_ethdev[eth_dev->data->port_id];
+       tqi = txa_service_queue(txa, eth_dev->data->port_id, tx_queue_id);
+
+       txa_retry = &tqi->txa_retry;
+       txa_retry->id = txa->id;
+       txa_retry->port_id = eth_dev->data->port_id;
+       txa_retry->tx_queue = tx_queue_id;
+
+       rte_eth_tx_buffer_init(tb, TXA_BATCH_SIZE);
+       rte_eth_tx_buffer_set_err_callback(tb,
+               txa_service_buffer_retry, txa_retry);
+
+       tqi->tx_buf = tb;
+       tqi->added = 1;
+       tdi->nb_queues++;
+       txa->nb_queues++;
+
+err_unlock:
+       if (txa->nb_queues == 0) {
+               txa_service_queue_array_free(txa,
+                                       eth_dev->data->port_id);
+               txa_service_unregister(txa);
+       }
+
+       rte_spinlock_unlock(&txa->tx_lock);
+       return 0;
+}
+
+static int
+txa_service_queue_del(uint8_t id,
+               const struct rte_eth_dev *dev,
+               int32_t tx_queue_id)
+{
+       struct txa_service_data *txa;
+       struct txa_service_queue_info *tqi;
+       struct rte_eth_dev_tx_buffer *tb;
+       uint16_t port_id;
+
+       if (tx_queue_id == -1) {
+               uint16_t i;
+               int ret = -1;
+
+               for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                       ret = txa_service_queue_del(id, dev, i);
+                       if (ret != 0)
+                               break;
+               }
+               return ret;
+       }
+
+       txa = txa_service_id_to_data(id);
+       port_id = dev->data->port_id;
+
+       tqi = txa_service_queue(txa, port_id, tx_queue_id);
+       if (tqi == NULL || !tqi->added)
+               return 0;
+
+       tb = tqi->tx_buf;
+       tqi->added = 0;
+       tqi->tx_buf = NULL;
+       rte_free(tb);
+       txa->nb_queues--;
+       txa->txa_ethdev[port_id].nb_queues--;
+
+       txa_service_queue_array_free(txa, port_id);
+       return 0;
+}
+
+static int
+txa_service_id_get(uint8_t id, uint32_t *service_id)
+{
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       if (txa->service_id == TXA_INVALID_SERVICE_ID)
+               return -ESRCH;
+
+       if (service_id == NULL)
+               return -EINVAL;
+
+       *service_id = txa->service_id;
+       return 0;
+}
+
+static int
+txa_service_start(uint8_t id)
+{
+       return txa_service_ctrl(id, 1);
+}
+
+static int
+txa_service_stats_get(uint8_t id,
+               struct rte_event_eth_tx_adapter_stats *stats)
+{
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       *stats = txa->stats;
+       return 0;
+}
+
+static int
+txa_service_stats_reset(uint8_t id)
+{
+       struct txa_service_data *txa;
+
+       txa = txa_service_id_to_data(id);
+       memset(&txa->stats, 0, sizeof(txa->stats));
+       return 0;
+}
+
+static int
+txa_service_stop(uint8_t id)
+{
+       return txa_service_ctrl(id, 0);
+}
+
+
+int __rte_experimental
+rte_event_eth_tx_adapter_create(uint8_t id, uint8_t dev_id,
+                               struct rte_event_port_conf *port_conf)
+{
+       struct rte_eventdev *dev;
+       int ret;
+
+       if (port_conf == NULL)
+               return -EINVAL;
+
+       RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+       RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+       dev = &rte_eventdevs[dev_id];
+
+       ret = txa_init();
+       if (ret != 0)
+               return ret;
+
+       if (txa_adapter_exist(id))
+               return -EEXIST;
+
+       txa_dev_id_array[id] = dev_id;
+       if (txa_dev_adapter_create(id))
+               ret = txa_dev_adapter_create(id)(id, dev);
+
+       if (ret != 0) {
+               txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+               return ret;
+       }
+
+       ret = txa_service_adapter_create(id, dev, port_conf);
+       if (ret != 0) {
+               if (txa_dev_adapter_free(id))
+                       txa_dev_adapter_free(id)(id, dev);
+               txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+               return ret;
+       }
+
+       txa_dev_id_array[id] = dev_id;
+       return 0;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+                               rte_event_eth_tx_adapter_conf_cb conf_cb,
+                               void *conf_arg)
+{
+       struct rte_eventdev *dev;
+       int ret;
+
+       RTE_EVENT_ETH_TX_ADAPTER_ID_VALID_OR_ERR_RET(id, -EINVAL);
+       RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+
+       ret = txa_init();
+       if (ret != 0)
+               return ret;
+
+       if (txa_adapter_exist(id))
+               return -EINVAL;
+
+       dev = &rte_eventdevs[dev_id];
+
+       txa_dev_id_array[id] = dev_id;
+       if (txa_dev_adapter_create_ext(id))
+               ret = txa_dev_adapter_create_ext(id)(id, dev);
+
+       if (ret != 0) {
+               txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+               return ret;
+       }
+
+       ret = txa_service_adapter_create_ext(id, dev, conf_cb, conf_arg);
+       if (ret != 0) {
+               if (txa_dev_adapter_free(id))
+                       txa_dev_adapter_free(id)(id, dev);
+               txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+               return ret;
+       }
+
+       txa_dev_id_array[id] = dev_id;
+       return 0;
+}
+
+
+int __rte_experimental
+rte_event_eth_tx_adapter_event_port_get(uint8_t id, uint8_t *event_port_id)
+{
+       TXA_CHECK_OR_ERR_RET(id);
+
+       return txa_service_event_port_get(id, event_port_id);
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_free(uint8_t id)
+{
+       int ret;
+
+       TXA_CHECK_OR_ERR_RET(id);
+
+       ret = txa_dev_adapter_free(id) ?
+               txa_dev_adapter_free(id)(id, txa_evdev(id)) :
+               0;
+
+       if (ret == 0)
+               ret = txa_service_adapter_free(id);
+       txa_dev_id_array[id] = TXA_INVALID_DEV_ID;
+
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_add(uint8_t id,
+                               uint16_t eth_dev_id,
+                               int32_t queue)
+{
+       struct rte_eth_dev *eth_dev;
+       int ret;
+       uint32_t caps;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+       TXA_CHECK_OR_ERR_RET(id);
+
+       eth_dev = &rte_eth_devices[eth_dev_id];
+       if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
+               RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
+                               (uint16_t)queue);
+               return -EINVAL;
+       }
+
+       caps = 0;
+       if (txa_dev_caps_get(id))
+               txa_dev_caps_get(id)(txa_evdev(id), eth_dev, &caps);
+
+       if (caps & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)
+               ret =  txa_dev_queue_add(id) ?
+                                       txa_dev_queue_add(id)(id,
+                                                       txa_evdev(id),
+                                                       eth_dev,
+                                                       queue) : 0;
+       else
+               ret = txa_service_queue_add(id, txa_evdev(id), eth_dev, queue);
+
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_del(uint8_t id,
+                               uint16_t eth_dev_id,
+                               int32_t queue)
+{
+       struct rte_eth_dev *eth_dev;
+       int ret;
+       uint32_t caps;
+
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_dev_id, -EINVAL);
+       TXA_CHECK_OR_ERR_RET(id);
+
+       eth_dev = &rte_eth_devices[eth_dev_id];
+       if (queue != -1 && (uint16_t)queue >= eth_dev->data->nb_tx_queues) {
+               RTE_EDEV_LOG_ERR("Invalid tx queue_id %" PRIu16,
+                               (uint16_t)queue);
+               return -EINVAL;
+       }
+
+       caps = 0;
+
+       if (txa_dev_caps_get(id))
+               txa_dev_caps_get(id)(txa_evdev(id), eth_dev, &caps);
+
+       if (caps & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT)
+               ret =  txa_dev_queue_del(id) ?
+                                       txa_dev_queue_del(id)(id, txa_evdev(id),
+                                                       eth_dev,
+                                                       queue) : 0;
+       else
+               ret = txa_service_queue_del(id, eth_dev, queue);
+
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_service_id_get(uint8_t id, uint32_t *service_id)
+{
+       TXA_CHECK_OR_ERR_RET(id);
+
+       return txa_service_id_get(id, service_id);
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_start(uint8_t id)
+{
+       int ret;
+
+       TXA_CHECK_OR_ERR_RET(id);
+
+       ret = txa_dev_start(id) ? txa_dev_start(id)(id, txa_evdev(id)) : 0;
+       if (ret == 0)
+               ret = txa_service_start(id);
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_get(uint8_t id,
+                               struct rte_event_eth_tx_adapter_stats *stats)
+{
+       int ret;
+
+       TXA_CHECK_OR_ERR_RET(id);
+
+       if (stats == NULL)
+               return -EINVAL;
+
+       *stats = (struct rte_event_eth_tx_adapter_stats){0};
+
+       ret = txa_dev_stats_get(id) ?
+                       txa_dev_stats_get(id)(id, txa_evdev(id), stats) : 0;
+
+       if (ret == 0 && txa_service_id_get(id, NULL) != ESRCH) {
+               if (txa_dev_stats_get(id)) {
+                       struct rte_event_eth_tx_adapter_stats service_stats;
+
+                       ret = txa_service_stats_get(id, &service_stats);
+                       if (ret == 0) {
+                               stats->tx_retry += service_stats.tx_retry;
+                               stats->tx_packets += service_stats.tx_packets;
+                               stats->tx_dropped += service_stats.tx_dropped;
+                       }
+               } else
+                       ret = txa_service_stats_get(id, stats);
+       }
+
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_reset(uint8_t id)
+{
+       int ret;
+
+       TXA_CHECK_OR_ERR_RET(id);
+
+       ret = txa_dev_stats_reset(id) ?
+               txa_dev_stats_reset(id)(id, txa_evdev(id)) : 0;
+       if (ret == 0)
+               ret = txa_service_stats_reset(id);
+       return ret;
+}
+
+int __rte_experimental
+rte_event_eth_tx_adapter_stop(uint8_t id)
+{
+       int ret;
+
+       TXA_CHECK_OR_ERR_RET(id);
+
+       ret = txa_dev_stop(id) ? txa_dev_stop(id)(id,  txa_evdev(id)) : 0;
+       if (ret == 0)
+               ret = txa_service_stop(id);
+       return ret;
+}
diff --git a/lib/librte_eventdev/rte_event_eth_tx_adapter.h b/lib/librte_eventdev/rte_event_eth_tx_adapter.h
new file mode 100644 (file)
index 0000000..81456d4
--- /dev/null
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#ifndef _RTE_EVENT_ETH_TX_ADAPTER_
+#define _RTE_EVENT_ETH_TX_ADAPTER_
+
+/**
+ * @file
+ *
+ * RTE Event Ethernet Tx Adapter
+ *
+ * The event ethernet Tx adapter provides configuration and data path APIs
+ * for the ethernet transmit stage of an event driven packet processing
+ * application. These APIs abstract the implementation of the transmit stage
+ * and allow the application to use eventdev PMD support or a common
+ * implementation.
+ *
+ * In the common implementation, the application enqueues mbufs to the adapter
+ * which runs as a rte_service function. The service function dequeues events
+ * from its event port and transmits the mbufs referenced by these events.
+ *
+ * The ethernet Tx event adapter APIs are:
+ *
+ *  - rte_event_eth_tx_adapter_create()
+ *  - rte_event_eth_tx_adapter_create_ext()
+ *  - rte_event_eth_tx_adapter_free()
+ *  - rte_event_eth_tx_adapter_start()
+ *  - rte_event_eth_tx_adapter_stop()
+ *  - rte_event_eth_tx_adapter_queue_add()
+ *  - rte_event_eth_tx_adapter_queue_del()
+ *  - rte_event_eth_tx_adapter_stats_get()
+ *  - rte_event_eth_tx_adapter_stats_reset()
+ *  - rte_event_eth_tx_adapter_enqueue()
+ *  - rte_event_eth_tx_adapter_event_port_get()
+ *  - rte_event_eth_tx_adapter_service_id_get()
+ *
+ * The application creates the adapter using
+ * rte_event_eth_tx_adapter_create() or rte_event_eth_tx_adapter_create_ext().
+ *
+ * The adapter will use the common implementation when the eventdev PMD
+ * does not have the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT capability.
+ * The common implementation uses an event port that is created using the port
+ * configuration parameter passed to rte_event_eth_tx_adapter_create(). The
+ * application can get the port identifier using
+ * rte_event_eth_tx_adapter_event_port_get() and must link an event queue to
+ * this port.
+ *
+ * If the eventdev PMD has the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT
+ * flags set, Tx adapter events should be enqueued using the
+ * rte_event_eth_tx_adapter_enqueue() function, else the application should
+ * use rte_event_enqueue_burst().
+ *
+ * Transmit queues can be added and deleted from the adapter using
+ * rte_event_eth_tx_adapter_queue_add()/del() APIs respectively.
+ *
+ * The application can start and stop the adapter using the
+ * rte_event_eth_tx_adapter_start/stop() calls.
+ *
+ * The common adapter implementation uses an EAL service function as described
+ * before and its execution is controlled using the rte_service APIs. The
+ * rte_event_eth_tx_adapter_service_id_get()
+ * function can be used to retrieve the adapter's service function ID.
+ *
+ * The ethernet port and transmit queue index to transmit the mbuf on are
+ * specified using the mbuf port and the higher 16 bits of
+ * struct rte_mbuf::hash::sched:hi. The application should use the
+ * rte_event_eth_tx_adapter_txq_set() and rte_event_eth_tx_adapter_txq_get()
+ * functions to access the transmit queue index since it is expected that the
+ * transmit queue will be eventually defined within struct rte_mbuf and using
+ * these macros will help with minimizing application impact due to
+ * a change in how the transmit queue index is specified.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+
+#include "rte_eventdev.h"
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Adapter configuration structure
+ *
+ * @see rte_event_eth_tx_adapter_create_ext
+ * @see rte_event_eth_tx_adapter_conf_cb
+ */
+struct rte_event_eth_tx_adapter_conf {
+       uint8_t event_port_id;
+       /**< Event port identifier, the adapter service function dequeues mbuf
+        * events from this port.
+        * @see RTE_EVENT_ETH_RX_ADAPTER_CAP_INTERNAL_PORT
+        */
+       uint32_t max_nb_tx;
+       /**< The adapter can return early if it has processed at least
+        * max_nb_tx mbufs. This isn't treated as a requirement; batching may
+        * cause the adapter to process more than max_nb_tx mbufs.
+        */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Function type used for adapter configuration callback. The callback is
+ * used to fill in members of the struct rte_event_eth_tx_adapter_conf, this
+ * callback is invoked when creating a RTE service function based
+ * adapter implementation.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param dev_id
+ *  Event device identifier.
+ * @param [out] conf
+ *  Structure that needs to be populated by this callback.
+ * @param arg
+ *  Argument to the callback. This is the same as the conf_arg passed to the
+ *  rte_event_eth_tx_adapter_create_ext().
+ *
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+typedef int (*rte_event_eth_tx_adapter_conf_cb) (uint8_t id, uint8_t dev_id,
+                               struct rte_event_eth_tx_adapter_conf *conf,
+                               void *arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * A structure used to retrieve statistics for an ethernet Tx adapter instance.
+ */
+struct rte_event_eth_tx_adapter_stats {
+       uint64_t tx_retry;
+       /**< Number of transmit retries */
+       uint64_t tx_packets;
+       /**< Number of packets transmitted */
+       uint64_t tx_dropped;
+       /**< Number of packets dropped */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Tx adapter with the specified identifier.
+ *
+ * @param id
+ *  The identifier of the ethernet Tx adapter.
+ * @param dev_id
+ *  The event device identifier.
+ * @param port_config
+ *  Event port configuration, the adapter uses this configuration to
+ *  create an event port if needed.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_create(uint8_t id, uint8_t dev_id,
+                               struct rte_event_port_conf *port_config);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Create a new ethernet Tx adapter with the specified identifier.
+ *
+ * @param id
+ *  The identifier of the ethernet Tx adapter.
+ * @param dev_id
+ *  The event device identifier.
+ * @param conf_cb
+ *  Callback function that initializes members of the
+ *  struct rte_event_eth_tx_adapter_conf struct passed into
+ *  it.
+ * @param conf_arg
+ *  Argument that is passed to the conf_cb function.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_create_ext(uint8_t id, uint8_t dev_id,
+                               rte_event_eth_tx_adapter_conf_cb conf_cb,
+                               void *conf_arg);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free an ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *   - 0: Success
+ *   - <0: Error code on failure, If the adapter still has Tx queues
+ *      added to it, the function returns -EBUSY.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_free(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Start ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success, Adapter started correctly.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_start(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Stop ethernet Tx adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stop(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Add a Tx queue to the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Ethernet Port Identifier.
+ * @param queue
+ *  Tx queue index.
+ * @return
+ *  - 0: Success, Queues added successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_add(uint8_t id,
+                               uint16_t eth_dev_id,
+                               int32_t queue);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Delete a Tx queue from the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device, that have been added to this
+ * adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param eth_dev_id
+ *  Ethernet Port Identifier.
+ * @param queue
+ *  Tx queue index.
+ * @return
+ *  - 0: Success, Queues deleted successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_queue_del(uint8_t id,
+                               uint16_t eth_dev_id,
+                               int32_t queue);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Set Tx queue in the mbuf. This queue is used by the adapter
+ * to transmit the mbuf.
+ *
+ * @param pkt
+ *  Pointer to the mbuf.
+ * @param queue
+ *  Tx queue index.
+ */
+static __rte_always_inline void __rte_experimental
+rte_event_eth_tx_adapter_txq_set(struct rte_mbuf *pkt, uint16_t queue)
+{
+       uint16_t *p = (uint16_t *)&pkt->hash.sched.hi;
+       p[1] = queue;
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve Tx queue from the mbuf.
+ *
+ * @param pkt
+ *  Pointer to the mbuf.
+ * @return
+ *  Tx queue identifier.
+ *
+ * @see rte_event_eth_tx_adapter_txq_set()
+ */
+static __rte_always_inline uint16_t __rte_experimental
+rte_event_eth_tx_adapter_txq_get(struct rte_mbuf *pkt)
+{
+       uint16_t *p = (uint16_t *)&pkt->hash.sched.hi;
+       return p[1];
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the adapter event port. The adapter creates an event port if
+ * the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT is not set in the
+ * ethernet Tx capabilities of the event device.
+ *
+ * @param id
+ *  Adapter Identifier.
+ * @param[out] event_port_id
+ *  Event port pointer.
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_event_port_get(uint8_t id, uint8_t *event_port_id);
+
+/**
+ * Enqueue a burst of events objects or an event object supplied in *rte_event*
+ * structure on an  event device designated by its *dev_id* through the event
+ * port specified by *port_id*. This function is supported if the eventdev PMD
+ * has the #RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT capability flag set.
+ *
+ * The *nb_events* parameter is the number of event objects to enqueue which are
+ * supplied in the *ev* array of *rte_event* structure.
+ *
+ * The rte_event_eth_tx_adapter_enqueue() function returns the number of
+ * events objects it actually enqueued. A return value equal to *nb_events*
+ * means that all event objects have been enqueued.
+ *
+ * @param dev_id
+ *  The identifier of the device.
+ * @param port_id
+ *  The identifier of the event port.
+ * @param ev
+ *  Points to an array of *nb_events* objects of type *rte_event* structure
+ *  which contain the event object enqueue operations to be processed.
+ * @param nb_events
+ *  The number of event objects to enqueue, typically number of
+ *  rte_event_port_enqueue_depth() available for this port.
+ *
+ * @return
+ *   The number of event objects actually enqueued on the event device. The
+ *   return value can be less than the value of the *nb_events* parameter when
+ *   the event devices queue is full or if invalid parameters are specified in a
+ *   *rte_event*. If the return value is less than *nb_events*, the remaining
+ *   events at the end of ev[] are not consumed and the caller has to take care
+ *   of them, and rte_errno is set accordingly. Possible errno values include:
+ *   - -EINVAL  The port ID is invalid, device ID is invalid, an event's queue
+ *              ID is invalid, or an event's sched type doesn't match the
+ *              capabilities of the destination queue.
+ *   - -ENOSPC  The event port was backpressured and unable to enqueue
+ *              one or more events. This error code is only applicable to
+ *              closed systems.
+ */
+static inline uint16_t __rte_experimental
+rte_event_eth_tx_adapter_enqueue(uint8_t dev_id,
+                               uint8_t port_id,
+                               struct rte_event ev[],
+                               uint16_t nb_events)
+{
+       const struct rte_eventdev *dev = &rte_eventdevs[dev_id];
+
+#ifdef RTE_LIBRTE_EVENTDEV_DEBUG
+       if (dev_id >= RTE_EVENT_MAX_DEVS ||
+               !rte_eventdevs[dev_id].attached) {
+               rte_errno = -EINVAL;
+               return 0;
+       }
+
+       if (port_id >= dev->data->nb_ports) {
+               rte_errno = -EINVAL;
+               return 0;
+       }
+#endif
+       return dev->txa_enqueue(dev->data->ports[port_id], ev, nb_events);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param [out] stats
+ *  A pointer to structure used to retrieve statistics for an adapter.
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_get(uint8_t id,
+                               struct rte_event_eth_tx_adapter_stats *stats);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Reset statistics for an adapter.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @return
+ *  - 0: Success, statistics reset successfully.
+ *  - <0: Error code on failure.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_stats_reset(uint8_t id);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Retrieve the service ID of an adapter. If the adapter doesn't use
+ * a rte_service function, this function returns -ESRCH.
+ *
+ * @param id
+ *  Adapter identifier.
+ * @param [out] service_id
+ *  A pointer to a uint32_t, to be filled in with the service id.
+ * @return
+ *  - 0: Success
+ *  - <0: Error code on failure, if the adapter doesn't use a rte_service
+ * function, this function returns -ESRCH.
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_service_id_get(uint8_t id, uint32_t *service_id);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* _RTE_EVENT_ETH_TX_ADAPTER_ */
index 801810e..ebaf308 100644 (file)
 #include "rte_eventdev.h"
 #include "rte_eventdev_pmd.h"
 
-struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS];
+static struct rte_eventdev rte_event_devices[RTE_EVENT_MAX_DEVS];
 
-struct rte_eventdev *rte_eventdevs = &rte_event_devices[0];
+struct rte_eventdev *rte_eventdevs = rte_event_devices;
 
 static struct rte_eventdev_global eventdev_globals = {
        .nb_devs                = 0
 };
 
-struct rte_eventdev_global *rte_eventdev_globals = &eventdev_globals;
-
 /* Event dev north bound API implementation */
 
 uint8_t
 rte_event_dev_count(void)
 {
-       return rte_eventdev_globals->nb_devs;
+       return eventdev_globals.nb_devs;
 }
 
 int
@@ -62,7 +60,7 @@ rte_event_dev_get_dev_id(const char *name)
        if (!name)
                return -EINVAL;
 
-       for (i = 0; i < rte_eventdev_globals->nb_devs; i++) {
+       for (i = 0; i < eventdev_globals.nb_devs; i++) {
                cmp = (strncmp(rte_event_devices[i].data->name, name,
                                RTE_EVENTDEV_NAME_MAX_LEN) == 0) ||
                        (rte_event_devices[i].dev ? (strncmp(
@@ -109,7 +107,7 @@ rte_event_dev_info_get(uint8_t dev_id, struct rte_event_dev_info *dev_info)
 }
 
 int
-rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
                                uint32_t *caps)
 {
        struct rte_eventdev *dev;
@@ -175,6 +173,31 @@ rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
                (dev, cdev, caps) : -ENOTSUP;
 }
 
+int __rte_experimental
+rte_event_eth_tx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
+                               uint32_t *caps)
+{
+       struct rte_eventdev *dev;
+       struct rte_eth_dev *eth_dev;
+
+       RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+       RTE_ETH_VALID_PORTID_OR_ERR_RET(eth_port_id, -EINVAL);
+
+       dev = &rte_eventdevs[dev_id];
+       eth_dev = &rte_eth_devices[eth_port_id];
+
+       if (caps == NULL)
+               return -EINVAL;
+
+       *caps = 0;
+
+       return dev->dev_ops->eth_tx_adapter_caps_get ?
+                       (*dev->dev_ops->eth_tx_adapter_caps_get)(dev,
+                                                               eth_dev,
+                                                               caps)
+                       : 0;
+}
+
 static inline int
 rte_event_dev_queue_config(struct rte_eventdev *dev, uint8_t nb_queues)
 {
@@ -980,6 +1003,28 @@ rte_event_port_unlink(uint8_t dev_id, uint8_t port_id,
        return diag;
 }
 
+int __rte_experimental
+rte_event_port_unlinks_in_progress(uint8_t dev_id, uint8_t port_id)
+{
+       struct rte_eventdev *dev;
+
+       RTE_EVENTDEV_VALID_DEVID_OR_ERR_RET(dev_id, -EINVAL);
+       dev = &rte_eventdevs[dev_id];
+       if (!is_valid_port(dev, port_id)) {
+               RTE_EDEV_LOG_ERR("Invalid port_id=%" PRIu8, port_id);
+               return -EINVAL;
+       }
+
+       /* Return 0 if the PMD does not implement unlinks in progress.
+        * This allows PMDs which handle unlink synchronously to not implement
+        * this function at all.
+        */
+       RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->port_unlinks_in_progress, 0);
+
+       return (*dev->dev_ops->port_unlinks_in_progress)(dev,
+                       dev->data->ports[port_id]);
+}
+
 int
 rte_event_port_links_get(uint8_t dev_id, uint8_t port_id,
                         uint8_t queues[], uint8_t priorities[])
@@ -1275,6 +1320,15 @@ rte_eventdev_find_free_device_index(void)
        return RTE_EVENT_MAX_DEVS;
 }
 
+static uint16_t
+rte_event_tx_adapter_enqueue(__rte_unused void *port,
+                       __rte_unused struct rte_event ev[],
+                       __rte_unused uint16_t nb_events)
+{
+       rte_errno = ENOTSUP;
+       return 0;
+}
+
 struct rte_eventdev *
 rte_event_pmd_allocate(const char *name, int socket_id)
 {
@@ -1295,6 +1349,8 @@ rte_event_pmd_allocate(const char *name, int socket_id)
 
        eventdev = &rte_eventdevs[dev_id];
 
+       eventdev->txa_enqueue = rte_event_tx_adapter_enqueue;
+
        if (eventdev->data == NULL) {
                struct rte_eventdev_data *eventdev_data = NULL;
 
index b6fd6ee..d7eb69d 100644 (file)
@@ -1112,7 +1112,7 @@ struct rte_event {
  *
  */
 int
-rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint8_t eth_port_id,
+rte_event_eth_rx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
                                uint32_t *caps);
 
 #define RTE_EVENT_TIMER_ADAPTER_CAP_INTERNAL_PORT (1ULL << 0)
@@ -1186,6 +1186,32 @@ int __rte_experimental
 rte_event_crypto_adapter_caps_get(uint8_t dev_id, uint8_t cdev_id,
                                  uint32_t *caps);
 
+/* Ethdev Tx adapter capability bitmap flags */
+#define RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT     0x1
+/**< This flag is sent when the PMD supports a packet transmit callback
+ */
+
+/**
+ * Retrieve the event device's eth Tx adapter capabilities
+ *
+ * @param dev_id
+ *   The identifier of the device.
+ *
+ * @param eth_port_id
+ *   The identifier of the ethernet device.
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with eth Tx adapter capabilities.
+ *
+ * @return
+ *   - 0: Success, driver provides eth Tx adapter capabilities.
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+int __rte_experimental
+rte_event_eth_tx_adapter_caps_get(uint8_t dev_id, uint16_t eth_port_id,
+                               uint32_t *caps);
+
 struct rte_eventdev_ops;
 struct rte_eventdev;
 
@@ -1204,6 +1230,10 @@ typedef uint16_t (*event_dequeue_burst_t)(void *port, struct rte_event ev[],
                uint16_t nb_events, uint64_t timeout_ticks);
 /**< @internal Dequeue burst of events from port of a device */
 
+typedef uint16_t (*event_tx_adapter_enqueue)(void *port,
+                               struct rte_event ev[], uint16_t nb_events);
+/**< @internal Enqueue burst of events on port of a device */
+
 #define RTE_EVENTDEV_NAME_MAX_LEN      (64)
 /**< @internal Max length of name of event PMD */
 
@@ -1266,7 +1296,8 @@ struct rte_eventdev {
        /**< Pointer to PMD dequeue function. */
        event_dequeue_burst_t dequeue_burst;
        /**< Pointer to PMD dequeue burst function. */
-
+       event_tx_adapter_enqueue txa_enqueue;
+       /**< Pointer to PMD eth Tx adapter enqueue function. */
        struct rte_eventdev_data *data;
        /**< Pointer to device data */
        struct rte_eventdev_ops *dev_ops;
@@ -1656,12 +1687,13 @@ rte_event_port_link(uint8_t dev_id, uint8_t port_id,
  * event port designated by its *port_id* on the event device designated
  * by its *dev_id*.
  *
- * The unlink establishment shall disable the event port *port_id* from
- * receiving events from the specified event queue *queue_id*
- *
+ * The unlink call issues an async request to disable the event port *port_id*
+ * from receiving events from the specified event queue *queue_id*.
  * Event queue(s) to event port unlink establishment can be changed at runtime
  * without re-configuring the device.
  *
+ * @see rte_event_port_unlinks_in_progress() to poll for completed unlinks.
+ *
  * @param dev_id
  *   The identifier of the device.
  *
@@ -1679,21 +1711,47 @@ rte_event_port_link(uint8_t dev_id, uint8_t port_id,
  *   NULL.
  *
  * @return
- * The number of unlinks actually established. The return value can be less
+ * The number of unlinks successfully requested. The return value can be less
  * than the value of the *nb_unlinks* parameter when the implementation has the
  * limitation on specific queue to port unlink establishment or
  * if invalid parameters are specified.
  * If the return value is less than *nb_unlinks*, the remaining queues at the
- * end of queues[] are not established, and the caller has to take care of them.
+ * end of queues[] are not unlinked, and the caller has to take care of them.
  * If return value is less than *nb_unlinks* then implementation shall update
  * the rte_errno accordingly, Possible rte_errno values are
  * (-EINVAL) Invalid parameter
- *
  */
 int
 rte_event_port_unlink(uint8_t dev_id, uint8_t port_id,
                      uint8_t queues[], uint16_t nb_unlinks);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Returns the number of unlinks in progress.
+ *
+ * This function provides the application with a method to detect when an
+ * unlink has been completed by the implementation.
+ *
+ * @see rte_event_port_unlink() to issue unlink requests.
+ *
+ * @param dev_id
+ *   The indentifier of the device.
+ *
+ * @param port_id
+ *   Event port identifier to select port to check for unlinks in progress.
+ *
+ * @return
+ * The number of unlinks that are in progress. A return of zero indicates that
+ * there are no outstanding unlink requests. A positive return value indicates
+ * the number of unlinks that are in progress, but are not yet complete.
+ * A negative return value indicates an error, -EINVAL indicates an invalid
+ * parameter passed for *dev_id* or *port_id*.
+ */
+int __rte_experimental
+rte_event_port_unlinks_in_progress(uint8_t dev_id, uint8_t port_id);
+
 /**
  * Retrieve the list of source event queues and its associated service priority
  * linked to the destination event port designated by its *port_id*
index 3fbb4d2..1a01326 100644 (file)
@@ -87,8 +87,6 @@ struct rte_eventdev_global {
        uint8_t nb_devs;        /**< Number of devices found */
 };
 
-extern struct rte_eventdev_global *rte_eventdev_globals;
-/** Pointer to global event devices data structure. */
 extern struct rte_eventdev *rte_eventdevs;
 /** The pool of rte_eventdev structures. */
 
@@ -332,6 +330,23 @@ typedef int (*eventdev_port_link_t)(struct rte_eventdev *dev, void *port,
 typedef int (*eventdev_port_unlink_t)(struct rte_eventdev *dev, void *port,
                uint8_t queues[], uint16_t nb_unlinks);
 
+/**
+ * Unlinks in progress. Returns number of unlinks that the PMD is currently
+ * performing, but have not yet been completed.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param port
+ *   Event port pointer
+ *
+ * @return
+ *   Returns the number of in-progress unlinks. Zero is returned if none are
+ *   in progress.
+ */
+typedef int (*eventdev_port_unlinks_in_progress_t)(struct rte_eventdev *dev,
+               void *port);
+
 /**
  * Converts nanoseconds to *timeout_ticks* value for rte_event_dequeue()
  *
@@ -450,7 +465,7 @@ typedef int (*eventdev_eth_rx_adapter_caps_get_t)
                                        const struct rte_eth_dev *eth_dev,
                                        uint32_t *caps);
 
-struct rte_event_eth_rx_adapter_queue_conf *queue_conf;
+struct rte_event_eth_rx_adapter_queue_conf;
 
 /**
  * Retrieve the event device's timer adapter capabilities, as well as the ops
@@ -575,7 +590,7 @@ typedef int (*eventdev_eth_rx_adapter_stop_t)
                                        (const struct rte_eventdev *dev,
                                        const struct rte_eth_dev *eth_dev);
 
-struct rte_event_eth_rx_adapter_stats *stats;
+struct rte_event_eth_rx_adapter_stats;
 
 /**
  * Retrieve ethernet Rx adapter statistics.
@@ -789,6 +804,186 @@ typedef int (*eventdev_crypto_adapter_stats_reset)
                        (const struct rte_eventdev *dev,
                         const struct rte_cryptodev *cdev);
 
+/**
+ * Retrieve the event device's eth Tx adapter capabilities.
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param[out] caps
+ *   A pointer to memory filled with eth Tx adapter capabilities.
+ *
+ * @return
+ *   - 0: Success, driver provides eth Tx adapter capabilities
+ *   - <0: Error code returned by the driver function.
+ *
+ */
+typedef int (*eventdev_eth_tx_adapter_caps_get_t)
+                                       (const struct rte_eventdev *dev,
+                                       const struct rte_eth_dev *eth_dev,
+                                       uint32_t *caps);
+
+/**
+ * Create adapter callback.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_create_t)(uint8_t id,
+                                       const struct rte_eventdev *dev);
+
+/**
+ * Free adapter callback.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_free_t)(uint8_t id,
+                                       const struct rte_eventdev *dev);
+
+/**
+ * Add a Tx queue to the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param tx_queue_id
+ *   Transmt queue index
+ *
+ * @return
+ *   - 0: Success.
+ *   - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_queue_add_t)(
+                                       uint8_t id,
+                                       const struct rte_eventdev *dev,
+                                       const struct rte_eth_dev *eth_dev,
+                                       int32_t tx_queue_id);
+
+/**
+ * Delete a Tx queue from the adapter.
+ * A queue value of -1 is used to indicate all
+ * queues within the device, that have been added to this
+ * adapter.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param eth_dev
+ *   Ethernet device pointer
+ *
+ * @param tx_queue_id
+ *   Transmit queue index
+ *
+ * @return
+ *  - 0: Success, Queues deleted successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_queue_del_t)(
+                                       uint8_t id,
+                                       const struct rte_eventdev *dev,
+                                       const struct rte_eth_dev *eth_dev,
+                                       int32_t tx_queue_id);
+
+/**
+ * Start the adapter.
+ *
+ * @param id
+ *   Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success, Adapter started correctly.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_start_t)(uint8_t id,
+                                       const struct rte_eventdev *dev);
+
+/**
+ * Stop the adapter.
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stop_t)(uint8_t id,
+                                       const struct rte_eventdev *dev);
+
+struct rte_event_eth_tx_adapter_stats;
+
+/**
+ * Retrieve statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @param [out] stats
+ *  A pointer to structure used to retrieve statistics for an adapter
+ *
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stats_get_t)(
+                               uint8_t id,
+                               const struct rte_eventdev *dev,
+                               struct rte_event_eth_tx_adapter_stats *stats);
+
+/**
+ * Reset statistics for an adapter
+ *
+ * @param id
+ *  Adapter identifier
+ *
+ * @param dev
+ *   Event device pointer
+ *
+ * @return
+ *  - 0: Success, statistics retrieved successfully.
+ *  - <0: Error code on failure.
+ */
+typedef int (*eventdev_eth_tx_adapter_stats_reset_t)(uint8_t id,
+                                       const struct rte_eventdev *dev);
+
 /** Event device operations function pointer table */
 struct rte_eventdev_ops {
        eventdev_info_get_t dev_infos_get;      /**< Get device info. */
@@ -815,6 +1010,8 @@ struct rte_eventdev_ops {
        /**< Link event queues to an event port. */
        eventdev_port_unlink_t port_unlink;
        /**< Unlink event queues from an event port. */
+       eventdev_port_unlinks_in_progress_t port_unlinks_in_progress;
+       /**< Unlinks in progress on an event port. */
        eventdev_dequeue_timeout_ticks_t timeout_ticks;
        /**< Converts ns to *timeout_ticks* value for rte_event_dequeue() */
        eventdev_dump_t dump;
@@ -862,6 +1059,26 @@ struct rte_eventdev_ops {
        eventdev_crypto_adapter_stats_reset crypto_adapter_stats_reset;
        /**< Reset crypto stats */
 
+       eventdev_eth_tx_adapter_caps_get_t eth_tx_adapter_caps_get;
+       /**< Get ethernet Tx adapter capabilities */
+
+       eventdev_eth_tx_adapter_create_t eth_tx_adapter_create;
+       /**< Create adapter callback */
+       eventdev_eth_tx_adapter_free_t eth_tx_adapter_free;
+       /**< Free adapter callback */
+       eventdev_eth_tx_adapter_queue_add_t eth_tx_adapter_queue_add;
+       /**< Add Tx queues to the eth Tx adapter */
+       eventdev_eth_tx_adapter_queue_del_t eth_tx_adapter_queue_del;
+       /**< Delete Tx queues from the eth Tx adapter */
+       eventdev_eth_tx_adapter_start_t eth_tx_adapter_start;
+       /**< Start eth Tx adapter */
+       eventdev_eth_tx_adapter_stop_t eth_tx_adapter_stop;
+       /**< Stop eth Tx adapter */
+       eventdev_eth_tx_adapter_stats_get_t eth_tx_adapter_stats_get;
+       /**< Get eth Tx adapter statistics */
+       eventdev_eth_tx_adapter_stats_reset_t eth_tx_adapter_stats_reset;
+       /**< Reset eth Tx adapter statistics */
+
        eventdev_selftest dev_selftest;
        /**< Start eventdev Selftest */
 
index 12835e9..d558d7d 100644 (file)
@@ -96,6 +96,19 @@ EXPERIMENTAL {
        rte_event_crypto_adapter_stats_reset;
        rte_event_crypto_adapter_stop;
        rte_event_eth_rx_adapter_cb_register;
+       rte_event_port_unlinks_in_progress;
+       rte_event_eth_tx_adapter_caps_get;
+       rte_event_eth_tx_adapter_create;
+       rte_event_eth_tx_adapter_create_ext;
+       rte_event_eth_tx_adapter_event_port_get;
+       rte_event_eth_tx_adapter_free;
+       rte_event_eth_tx_adapter_queue_add;
+       rte_event_eth_tx_adapter_queue_del;
+       rte_event_eth_tx_adapter_service_id_get;
+       rte_event_eth_tx_adapter_start;
+       rte_event_eth_tx_adapter_stats_get;
+       rte_event_eth_tx_adapter_stats_reset;
+       rte_event_eth_tx_adapter_stop;
        rte_event_timer_adapter_caps_get;
        rte_event_timer_adapter_create;
        rte_event_timer_adapter_create_ext;
index 4c3469d..fb652a2 100644 (file)
@@ -247,8 +247,7 @@ rte_flow_classifier_check_params(struct rte_flow_classifier_params *params)
        }
 
        /* socket */
-       if ((params->socket_id < 0) ||
-           (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+       if (params->socket_id < 0) {
                RTE_FLOW_CLASSIFY_LOG(ERR,
                        "%s: Incorrect value for parameter socket_id\n",
                        __func__);
index f7b86c8..5ddcccd 100644 (file)
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2016 Intel Corporation
+ * Copyright(c) 2018 Arm Limited
  */
 
 #include <string.h>
 #include <rte_spinlock.h>
 #include <rte_ring.h>
 #include <rte_compat.h>
-#include <rte_pause.h>
 
 #include "rte_hash.h"
 #include "rte_cuckoo_hash.h"
 
+#define FOR_EACH_BUCKET(CURRENT_BKT, START_BUCKET)                            \
+       for (CURRENT_BKT = START_BUCKET;                                      \
+               CURRENT_BKT != NULL;                                          \
+               CURRENT_BKT = CURRENT_BKT->next)
 
 TAILQ_HEAD(rte_hash_list, rte_tailq_entry);
 
@@ -63,6 +67,14 @@ rte_hash_find_existing(const char *name)
        return h;
 }
 
+static inline struct rte_hash_bucket *
+rte_hash_get_last_bkt(struct rte_hash_bucket *lst_bkt)
+{
+       while (lst_bkt->next != NULL)
+               lst_bkt = lst_bkt->next;
+       return lst_bkt;
+}
+
 void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func)
 {
        h->cmp_jump_table_idx = KEY_CUSTOM;
@@ -78,6 +90,36 @@ rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h)
                return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len);
 }
 
+/*
+ * We use higher 16 bits of hash as the signature value stored in table.
+ * We use the lower bits for the primary bucket
+ * location. Then we XOR primary bucket location and the signature
+ * to get the secondary bucket location. This is same as
+ * proposed in Bin Fan, et al's paper
+ * "MemC3: Compact and Concurrent MemCache with Dumber Caching and
+ * Smarter Hashing". The benefit to use
+ * XOR is that one could derive the alternative bucket location
+ * by only using the current bucket location and the signature.
+ */
+static inline uint16_t
+get_short_sig(const hash_sig_t hash)
+{
+       return hash >> 16;
+}
+
+static inline uint32_t
+get_prim_bucket_index(const struct rte_hash *h, const hash_sig_t hash)
+{
+       return hash & h->bucket_bitmask;
+}
+
+static inline uint32_t
+get_alt_bucket_index(const struct rte_hash *h,
+                       uint32_t cur_bkt_idx, uint16_t sig)
+{
+       return (cur_bkt_idx ^ sig) & h->bucket_bitmask;
+}
+
 struct rte_hash *
 rte_hash_create(const struct rte_hash_parameters *params)
 {
@@ -85,14 +127,22 @@ rte_hash_create(const struct rte_hash_parameters *params)
        struct rte_tailq_entry *te = NULL;
        struct rte_hash_list *hash_list;
        struct rte_ring *r = NULL;
+       struct rte_ring *r_ext = NULL;
        char hash_name[RTE_HASH_NAMESIZE];
        void *k = NULL;
        void *buckets = NULL;
+       void *buckets_ext = NULL;
        char ring_name[RTE_RING_NAMESIZE];
+       char ext_ring_name[RTE_RING_NAMESIZE];
        unsigned num_key_slots;
        unsigned i;
-       unsigned int hw_trans_mem_support = 0, multi_writer_support = 0;
+       unsigned int hw_trans_mem_support = 0, use_local_cache = 0;
+       unsigned int ext_table_support = 0;
        unsigned int readwrite_concur_support = 0;
+       unsigned int writer_takes_lock = 0;
+       unsigned int no_free_on_del = 0;
+       uint32_t *tbl_chng_cnt = NULL;
+       unsigned int readwrite_concur_lf_support = 0;
 
        rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
 
@@ -112,20 +162,52 @@ rte_hash_create(const struct rte_hash_parameters *params)
                return NULL;
        }
 
+       /* Validate correct usage of extra options */
+       if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) &&
+           (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF)) {
+               rte_errno = EINVAL;
+               RTE_LOG(ERR, HASH, "rte_hash_create: choose rw concurrency or "
+                       "rw concurrency lock free\n");
+               return NULL;
+       }
+
+       if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
+           (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
+               rte_errno = EINVAL;
+               RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
+                       "feature not supported with rw concurrency "
+                       "lock free\n");
+               return NULL;
+       }
+
        /* Check extra flags field to check extra options. */
        if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT)
                hw_trans_mem_support = 1;
 
-       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
-               multi_writer_support = 1;
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD) {
+               use_local_cache = 1;
+               writer_takes_lock = 1;
+       }
 
        if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY) {
                readwrite_concur_support = 1;
-               multi_writer_support = 1;
+               writer_takes_lock = 1;
+       }
+
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
+               ext_table_support = 1;
+
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
+               no_free_on_del = 1;
+
+       if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
+               readwrite_concur_lf_support = 1;
+               /* Enable not freeing internal memory/index on delete */
+               no_free_on_del = 1;
        }
 
        /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
-       if (multi_writer_support)
+       if (use_local_cache)
                /*
                 * Increase number of slots by total number of indices
                 * that can be stored in the lcore caches
@@ -145,6 +227,24 @@ rte_hash_create(const struct rte_hash_parameters *params)
                goto err;
        }
 
+       const uint32_t num_buckets = rte_align32pow2(params->entries) /
+                                               RTE_HASH_BUCKET_ENTRIES;
+
+       /* Create ring for extendable buckets. */
+       if (ext_table_support) {
+               snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
+                                                               params->name);
+               r_ext = rte_ring_create(ext_ring_name,
+                               rte_align32pow2(num_buckets + 1),
+                               params->socket_id, 0);
+
+               if (r_ext == NULL) {
+                       RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+                                                               "failed\n");
+                       goto err;
+               }
+       }
+
        snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
 
        rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
@@ -177,19 +277,37 @@ rte_hash_create(const struct rte_hash_parameters *params)
                goto err_unlock;
        }
 
-       const uint32_t num_buckets = rte_align32pow2(params->entries)
-                                       / RTE_HASH_BUCKET_ENTRIES;
-
        buckets = rte_zmalloc_socket(NULL,
                                num_buckets * sizeof(struct rte_hash_bucket),
                                RTE_CACHE_LINE_SIZE, params->socket_id);
 
        if (buckets == NULL) {
-               RTE_LOG(ERR, HASH, "memory allocation failed\n");
+               RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
                goto err_unlock;
        }
 
-       const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len;
+       /* Allocate same number of extendable buckets */
+       if (ext_table_support) {
+               buckets_ext = rte_zmalloc_socket(NULL,
+                               num_buckets * sizeof(struct rte_hash_bucket),
+                               RTE_CACHE_LINE_SIZE, params->socket_id);
+               if (buckets_ext == NULL) {
+                       RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+                                                       "failed\n");
+                       goto err_unlock;
+               }
+               /* Populate ext bkt ring. We reserve 0 similar to the
+                * key-data slot, just in case in future we want to
+                * use bucket index for the linked list and 0 means NULL
+                * for next bucket
+                */
+               for (i = 1; i <= num_buckets; i++)
+                       rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
+       }
+
+       const uint32_t key_entry_size =
+               RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
+                         KEY_ALIGNMENT);
        const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
 
        k = rte_zmalloc_socket(NULL, key_tbl_size,
@@ -200,6 +318,14 @@ rte_hash_create(const struct rte_hash_parameters *params)
                goto err_unlock;
        }
 
+       tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
+                       RTE_CACHE_LINE_SIZE, params->socket_id);
+
+       if (tbl_chng_cnt == NULL) {
+               RTE_LOG(ERR, HASH, "memory allocation failed\n");
+               goto err_unlock;
+       }
+
 /*
  * If x86 architecture is used, select appropriate compare function,
  * which may use x86 intrinsics, otherwise use memcmp
@@ -239,7 +365,7 @@ rte_hash_create(const struct rte_hash_parameters *params)
        h->cmp_jump_table_idx = KEY_OTHER_BYTES;
 #endif
 
-       if (multi_writer_support) {
+       if (use_local_cache) {
                h->local_free_slots = rte_zmalloc_socket(NULL,
                                sizeof(struct lcore_cache) * RTE_MAX_LCORE,
                                RTE_CACHE_LINE_SIZE, params->socket_id);
@@ -262,27 +388,34 @@ rte_hash_create(const struct rte_hash_parameters *params)
        h->num_buckets = num_buckets;
        h->bucket_bitmask = h->num_buckets - 1;
        h->buckets = buckets;
+       h->buckets_ext = buckets_ext;
+       h->free_ext_bkts = r_ext;
        h->hash_func = (params->hash_func == NULL) ?
                default_hash_func : params->hash_func;
        h->key_store = k;
        h->free_slots = r;
+       h->tbl_chng_cnt = tbl_chng_cnt;
+       *h->tbl_chng_cnt = 0;
        h->hw_trans_mem_support = hw_trans_mem_support;
-       h->multi_writer_support = multi_writer_support;
+       h->use_local_cache = use_local_cache;
        h->readwrite_concur_support = readwrite_concur_support;
+       h->ext_table_support = ext_table_support;
+       h->writer_takes_lock = writer_takes_lock;
+       h->no_free_on_del = no_free_on_del;
+       h->readwrite_concur_lf_support = readwrite_concur_lf_support;
 
 #if defined(RTE_ARCH_X86)
-       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
-               h->sig_cmp_fn = RTE_HASH_COMPARE_AVX2;
-       else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
                h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
        else
 #endif
                h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
 
-       /* Turn on multi-writer only with explicit flag from user and TM
-        * support.
+       /* Writer threads need to take the lock when:
+        * 1) RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is enabled OR
+        * 2) RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD is enabled
         */
-       if (h->multi_writer_support) {
+       if (h->writer_takes_lock) {
                h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
                                                RTE_CACHE_LINE_SIZE);
                if (h->readwrite_lock == NULL)
@@ -304,10 +437,13 @@ err_unlock:
        rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 err:
        rte_ring_free(r);
+       rte_ring_free(r_ext);
        rte_free(te);
        rte_free(h);
        rte_free(buckets);
+       rte_free(buckets_ext);
        rte_free(k);
+       rte_free(tbl_chng_cnt);
        return NULL;
 }
 
@@ -339,13 +475,16 @@ rte_hash_free(struct rte_hash *h)
 
        rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-       if (h->multi_writer_support) {
+       if (h->use_local_cache)
                rte_free(h->local_free_slots);
+       if (h->writer_takes_lock)
                rte_free(h->readwrite_lock);
-       }
        rte_ring_free(h->free_slots);
+       rte_ring_free(h->free_ext_bkts);
        rte_free(h->key_store);
        rte_free(h->buckets);
+       rte_free(h->buckets_ext);
+       rte_free(h->tbl_chng_cnt);
        rte_free(h);
        rte_free(te);
 }
@@ -357,18 +496,6 @@ rte_hash_hash(const struct rte_hash *h, const void *key)
        return h->hash_func(key, h->key_len, h->hash_func_init_val);
 }
 
-/* Calc the secondary hash value from the primary hash value of a given key */
-static inline hash_sig_t
-rte_hash_secondary_hash(const hash_sig_t primary_hash)
-{
-       static const unsigned all_bits_shift = 12;
-       static const unsigned alt_bits_xor = 0x5bd1e995;
-
-       uint32_t tag = primary_hash >> all_bits_shift;
-
-       return primary_hash ^ ((tag + 1) * alt_bits_xor);
-}
-
 int32_t
 rte_hash_count(const struct rte_hash *h)
 {
@@ -378,7 +505,7 @@ rte_hash_count(const struct rte_hash *h)
        if (h == NULL)
                return -EINVAL;
 
-       if (h->multi_writer_support) {
+       if (h->use_local_cache) {
                tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
                                        (LCORE_CACHE_SIZE - 1);
                for (i = 0; i < RTE_MAX_LCORE; i++)
@@ -397,13 +524,12 @@ rte_hash_count(const struct rte_hash *h)
 static inline void
 __hash_rw_writer_lock(const struct rte_hash *h)
 {
-       if (h->multi_writer_support && h->hw_trans_mem_support)
+       if (h->writer_takes_lock && h->hw_trans_mem_support)
                rte_rwlock_write_lock_tm(h->readwrite_lock);
-       else if (h->multi_writer_support)
+       else if (h->writer_takes_lock)
                rte_rwlock_write_lock(h->readwrite_lock);
 }
 
-
 static inline void
 __hash_rw_reader_lock(const struct rte_hash *h)
 {
@@ -416,9 +542,9 @@ __hash_rw_reader_lock(const struct rte_hash *h)
 static inline void
 __hash_rw_writer_unlock(const struct rte_hash *h)
 {
-       if (h->multi_writer_support && h->hw_trans_mem_support)
+       if (h->writer_takes_lock && h->hw_trans_mem_support)
                rte_rwlock_write_unlock_tm(h->readwrite_lock);
-       else if (h->multi_writer_support)
+       else if (h->writer_takes_lock)
                rte_rwlock_write_unlock(h->readwrite_lock);
 }
 
@@ -443,13 +569,22 @@ rte_hash_reset(struct rte_hash *h)
        __hash_rw_writer_lock(h);
        memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket));
        memset(h->key_store, 0, h->key_entry_size * (h->entries + 1));
+       *h->tbl_chng_cnt = 0;
 
        /* clear the free ring */
        while (rte_ring_dequeue(h->free_slots, &ptr) == 0)
-               rte_pause();
+               continue;
+
+       /* clear free extendable bucket ring and memory */
+       if (h->ext_table_support) {
+               memset(h->buckets_ext, 0, h->num_buckets *
+                                               sizeof(struct rte_hash_bucket));
+               while (rte_ring_dequeue(h->free_ext_bkts, &ptr) == 0)
+                       continue;
+       }
 
        /* Repopulate the free slots ring. Entry zero is reserved for key misses */
-       if (h->multi_writer_support)
+       if (h->use_local_cache)
                tot_ring_cnt = h->entries + (RTE_MAX_LCORE - 1) *
                                        (LCORE_CACHE_SIZE - 1);
        else
@@ -458,7 +593,14 @@ rte_hash_reset(struct rte_hash *h)
        for (i = 1; i < tot_ring_cnt + 1; i++)
                rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i));
 
-       if (h->multi_writer_support) {
+       /* Repopulate the free ext bkt ring. */
+       if (h->ext_table_support) {
+               for (i = 1; i <= h->num_buckets; i++)
+                       rte_ring_sp_enqueue(h->free_ext_bkts,
+                                               (void *)((uintptr_t) i));
+       }
+
+       if (h->use_local_cache) {
                /* Reset local caches per lcore */
                for (i = 0; i < RTE_MAX_LCORE; i++)
                        h->local_free_slots[i].len = 0;
@@ -476,29 +618,35 @@ enqueue_slot_back(const struct rte_hash *h,
                struct lcore_cache *cached_free_slots,
                void *slot_id)
 {
-       if (h->multi_writer_support) {
+       if (h->use_local_cache) {
                cached_free_slots->objs[cached_free_slots->len] = slot_id;
                cached_free_slots->len++;
        } else
                rte_ring_sp_enqueue(h->free_slots, slot_id);
 }
 
-/* Search a key from bucket and update its data */
+/* Search a key from bucket and update its data.
+ * Writer holds the lock before calling this.
+ */
 static inline int32_t
 search_and_update(const struct rte_hash *h, void *data, const void *key,
-       struct rte_hash_bucket *bkt, hash_sig_t sig, hash_sig_t alt_hash)
+       struct rte_hash_bucket *bkt, uint16_t sig)
 {
        int i;
        struct rte_hash_key *k, *keys = h->key_store;
 
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->sig_current[i] == sig &&
-                               bkt->sig_alt[i] == alt_hash) {
+               if (bkt->sig_current[i] == sig) {
                        k = (struct rte_hash_key *) ((char *)keys +
                                        bkt->key_idx[i] * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-                               /* Update data */
-                               k->pdata = data;
+                               /* 'pdata' acts as the synchronization point
+                                * when an existing hash entry is updated.
+                                * Key is not updated in this case.
+                                */
+                               __atomic_store_n(&k->pdata,
+                                       data,
+                                       __ATOMIC_RELEASE);
                                /*
                                 * Return index where key is stored,
                                 * subtracting the first dummy index
@@ -520,28 +668,31 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
                struct rte_hash_bucket *prim_bkt,
                struct rte_hash_bucket *sec_bkt,
                const struct rte_hash_key *key, void *data,
-               hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+               uint16_t sig, uint32_t new_idx,
                int32_t *ret_val)
 {
        unsigned int i;
-       struct rte_hash_bucket *cur_bkt = prim_bkt;
+       struct rte_hash_bucket *cur_bkt;
        int32_t ret;
 
        __hash_rw_writer_lock(h);
        /* Check if key was inserted after last check but before this
         * protected region in case of inserting duplicated keys.
         */
-       ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+       ret = search_and_update(h, data, key, prim_bkt, sig);
        if (ret != -1) {
                __hash_rw_writer_unlock(h);
                *ret_val = ret;
                return 1;
        }
-       ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-       if (ret != -1) {
-               __hash_rw_writer_unlock(h);
-               *ret_val = ret;
-               return 1;
+
+       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+               ret = search_and_update(h, data, key, cur_bkt, sig);
+               if (ret != -1) {
+                       __hash_rw_writer_unlock(h);
+                       *ret_val = ret;
+                       return 1;
+               }
        }
 
        /* Insert new entry if there is room in the primary
@@ -551,8 +702,15 @@ rte_hash_cuckoo_insert_mw(const struct rte_hash *h,
                /* Check if slot is available */
                if (likely(prim_bkt->key_idx[i] == EMPTY_SLOT)) {
                        prim_bkt->sig_current[i] = sig;
-                       prim_bkt->sig_alt[i] = alt_hash;
-                       prim_bkt->key_idx[i] = new_idx;
+                       /* Key can be of arbitrary length, so it is
+                        * not possible to store it atomically.
+                        * Hence the new key element's memory stores
+                        * (key as well as data) should be complete
+                        * before it is referenced.
+                        */
+                       __atomic_store_n(&prim_bkt->key_idx[i],
+                                        new_idx,
+                                        __ATOMIC_RELEASE);
                        break;
                }
        }
@@ -576,11 +734,11 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
                        struct rte_hash_bucket *alt_bkt,
                        const struct rte_hash_key *key, void *data,
                        struct queue_node *leaf, uint32_t leaf_slot,
-                       hash_sig_t sig, hash_sig_t alt_hash, uint32_t new_idx,
+                       uint16_t sig, uint32_t new_idx,
                        int32_t *ret_val)
 {
        uint32_t prev_alt_bkt_idx;
-       struct rte_hash_bucket *cur_bkt = bkt;
+       struct rte_hash_bucket *cur_bkt;
        struct queue_node *prev_node, *curr_node = leaf;
        struct rte_hash_bucket *prev_bkt, *curr_bkt = leaf->bkt;
        uint32_t prev_slot, curr_slot = leaf_slot;
@@ -597,18 +755,20 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
        /* Check if key was inserted after last check but before this
         * protected region.
         */
-       ret = search_and_update(h, data, key, cur_bkt, sig, alt_hash);
+       ret = search_and_update(h, data, key, bkt, sig);
        if (ret != -1) {
                __hash_rw_writer_unlock(h);
                *ret_val = ret;
                return 1;
        }
 
-       ret = search_and_update(h, data, key, alt_bkt, alt_hash, sig);
-       if (ret != -1) {
-               __hash_rw_writer_unlock(h);
-               *ret_val = ret;
-               return 1;
+       FOR_EACH_BUCKET(cur_bkt, alt_bkt) {
+               ret = search_and_update(h, data, key, cur_bkt, sig);
+               if (ret != -1) {
+                       __hash_rw_writer_unlock(h);
+                       *ret_val = ret;
+                       return 1;
+               }
        }
 
        while (likely(curr_node->prev != NULL)) {
@@ -616,36 +776,73 @@ rte_hash_cuckoo_move_insert_mw(const struct rte_hash *h,
                prev_bkt = prev_node->bkt;
                prev_slot = curr_node->prev_slot;
 
-               prev_alt_bkt_idx =
-                       prev_bkt->sig_alt[prev_slot] & h->bucket_bitmask;
+               prev_alt_bkt_idx = get_alt_bucket_index(h,
+                                       prev_node->cur_bkt_idx,
+                                       prev_bkt->sig_current[prev_slot]);
 
                if (unlikely(&h->buckets[prev_alt_bkt_idx]
                                != curr_bkt)) {
                        /* revert it to empty, otherwise duplicated keys */
-                       curr_bkt->key_idx[curr_slot] = EMPTY_SLOT;
+                       __atomic_store_n(&curr_bkt->key_idx[curr_slot],
+                               EMPTY_SLOT,
+                               __ATOMIC_RELEASE);
                        __hash_rw_writer_unlock(h);
                        return -1;
                }
 
+               if (h->readwrite_concur_lf_support) {
+                       /* Inform the previous move. The current move need
+                        * not be informed now as the current bucket entry
+                        * is present in both primary and secondary.
+                        * Since there is one writer, load acquires on
+                        * tbl_chng_cnt are not required.
+                        */
+                       __atomic_store_n(h->tbl_chng_cnt,
+                                        *h->tbl_chng_cnt + 1,
+                                        __ATOMIC_RELEASE);
+                       /* The stores to sig_alt and sig_current should not
+                        * move above the store to tbl_chng_cnt.
+                        */
+                       __atomic_thread_fence(__ATOMIC_RELEASE);
+               }
+
                /* Need to swap current/alt sig to allow later
                 * Cuckoo insert to move elements back to its
                 * primary bucket if available
                 */
-               curr_bkt->sig_alt[curr_slot] =
-                        prev_bkt->sig_current[prev_slot];
                curr_bkt->sig_current[curr_slot] =
-                       prev_bkt->sig_alt[prev_slot];
-               curr_bkt->key_idx[curr_slot] =
-                       prev_bkt->key_idx[prev_slot];
+                       prev_bkt->sig_current[prev_slot];
+               /* Release the updated bucket entry */
+               __atomic_store_n(&curr_bkt->key_idx[curr_slot],
+                       prev_bkt->key_idx[prev_slot],
+                       __ATOMIC_RELEASE);
 
                curr_slot = prev_slot;
                curr_node = prev_node;
                curr_bkt = curr_node->bkt;
        }
 
+       if (h->readwrite_concur_lf_support) {
+               /* Inform the previous move. The current move need
+                * not be informed now as the current bucket entry
+                * is present in both primary and secondary.
+                * Since there is one writer, load acquires on
+                * tbl_chng_cnt are not required.
+                */
+               __atomic_store_n(h->tbl_chng_cnt,
+                                *h->tbl_chng_cnt + 1,
+                                __ATOMIC_RELEASE);
+               /* The stores to sig_alt and sig_current should not
+                * move above the store to tbl_chng_cnt.
+                */
+               __atomic_thread_fence(__ATOMIC_RELEASE);
+       }
+
        curr_bkt->sig_current[curr_slot] = sig;
-       curr_bkt->sig_alt[curr_slot] = alt_hash;
-       curr_bkt->key_idx[curr_slot] = new_idx;
+       /* Release the new bucket entry */
+       __atomic_store_n(&curr_bkt->key_idx[curr_slot],
+                        new_idx,
+                        __ATOMIC_RELEASE);
 
        __hash_rw_writer_unlock(h);
 
@@ -662,39 +859,44 @@ rte_hash_cuckoo_make_space_mw(const struct rte_hash *h,
                        struct rte_hash_bucket *bkt,
                        struct rte_hash_bucket *sec_bkt,
                        const struct rte_hash_key *key, void *data,
-                       hash_sig_t sig, hash_sig_t alt_hash,
+                       uint16_t sig, uint32_t bucket_idx,
                        uint32_t new_idx, int32_t *ret_val)
 {
        unsigned int i;
        struct queue_node queue[RTE_HASH_BFS_QUEUE_MAX_LEN];
        struct queue_node *tail, *head;
        struct rte_hash_bucket *curr_bkt, *alt_bkt;
+       uint32_t cur_idx, alt_idx;
 
        tail = queue;
        head = queue + 1;
        tail->bkt = bkt;
        tail->prev = NULL;
        tail->prev_slot = -1;
+       tail->cur_bkt_idx = bucket_idx;
 
        /* Cuckoo bfs Search */
        while (likely(tail != head && head <
                                        queue + RTE_HASH_BFS_QUEUE_MAX_LEN -
                                        RTE_HASH_BUCKET_ENTRIES)) {
                curr_bkt = tail->bkt;
+               cur_idx = tail->cur_bkt_idx;
                for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
                        if (curr_bkt->key_idx[i] == EMPTY_SLOT) {
                                int32_t ret = rte_hash_cuckoo_move_insert_mw(h,
                                                bkt, sec_bkt, key, data,
-                                               tail, i, sig, alt_hash,
+                                               tail, i, sig,
                                                new_idx, ret_val);
                                if (likely(ret != -1))
                                        return ret;
                        }
 
                        /* Enqueue new node and keep prev node info */
-                       alt_bkt = &(h->buckets[curr_bkt->sig_alt[i]
-                                                   & h->bucket_bitmask]);
+                       alt_idx = get_alt_bucket_index(h, cur_idx,
+                                               curr_bkt->sig_current[i]);
+                       alt_bkt = &(h->buckets[alt_idx]);
                        head->bkt = alt_bkt;
+                       head->cur_bkt_idx = alt_idx;
                        head->prev = tail;
                        head->prev_slot = i;
                        head++;
@@ -709,45 +911,50 @@ static inline int32_t
 __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
                                                hash_sig_t sig, void *data)
 {
-       hash_sig_t alt_hash;
+       uint16_t short_sig;
        uint32_t prim_bucket_idx, sec_bucket_idx;
-       struct rte_hash_bucket *prim_bkt, *sec_bkt;
+       struct rte_hash_bucket *prim_bkt, *sec_bkt, *cur_bkt;
        struct rte_hash_key *new_k, *keys = h->key_store;
        void *slot_id = NULL;
-       uint32_t new_idx;
+       void *ext_bkt_id = NULL;
+       uint32_t new_idx, bkt_id;
        int ret;
        unsigned n_slots;
        unsigned lcore_id;
+       unsigned int i;
        struct lcore_cache *cached_free_slots = NULL;
        int32_t ret_val;
+       struct rte_hash_bucket *last;
 
-       prim_bucket_idx = sig & h->bucket_bitmask;
+       short_sig = get_short_sig(sig);
+       prim_bucket_idx = get_prim_bucket_index(h, sig);
+       sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
        prim_bkt = &h->buckets[prim_bucket_idx];
-       rte_prefetch0(prim_bkt);
-
-       alt_hash = rte_hash_secondary_hash(sig);
-       sec_bucket_idx = alt_hash & h->bucket_bitmask;
        sec_bkt = &h->buckets[sec_bucket_idx];
+       rte_prefetch0(prim_bkt);
        rte_prefetch0(sec_bkt);
 
        /* Check if key is already inserted in primary location */
        __hash_rw_writer_lock(h);
-       ret = search_and_update(h, data, key, prim_bkt, sig, alt_hash);
+       ret = search_and_update(h, data, key, prim_bkt, short_sig);
        if (ret != -1) {
                __hash_rw_writer_unlock(h);
                return ret;
        }
 
        /* Check if key is already inserted in secondary location */
-       ret = search_and_update(h, data, key, sec_bkt, alt_hash, sig);
-       if (ret != -1) {
-               __hash_rw_writer_unlock(h);
-               return ret;
+       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+               ret = search_and_update(h, data, key, cur_bkt, short_sig);
+               if (ret != -1) {
+                       __hash_rw_writer_unlock(h);
+                       return ret;
+               }
        }
+
        __hash_rw_writer_unlock(h);
 
        /* Did not find a match, so get a new slot for storing the new key */
-       if (h->multi_writer_support) {
+       if (h->use_local_cache) {
                lcore_id = rte_lcore_id();
                cached_free_slots = &h->local_free_slots[lcore_id];
                /* Try to get a free slot from the local cache */
@@ -776,12 +983,19 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
        new_idx = (uint32_t)((uintptr_t) slot_id);
        /* Copy key */
        rte_memcpy(new_k->key, key, h->key_len);
-       new_k->pdata = data;
-
+       /* Key can be of arbitrary length, so it is not possible to store
+        * it atomically. Hence the new key element's memory stores
+        * (key as well as data) should be complete before it is referenced.
+        * 'pdata' acts as the synchronization point when an existing hash
+        * entry is updated.
+        */
+       __atomic_store_n(&new_k->pdata,
+               data,
+               __ATOMIC_RELEASE);
 
        /* Find an empty slot and insert */
        ret = rte_hash_cuckoo_insert_mw(h, prim_bkt, sec_bkt, key, data,
-                                       sig, alt_hash, new_idx, &ret_val);
+                                       short_sig, new_idx, &ret_val);
        if (ret == 0)
                return new_idx - 1;
        else if (ret == 1) {
@@ -791,7 +1005,7 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
        /* Primary bucket full, need to make space for new entry */
        ret = rte_hash_cuckoo_make_space_mw(h, prim_bkt, sec_bkt, key, data,
-                                       sig, alt_hash, new_idx, &ret_val);
+                               short_sig, prim_bucket_idx, new_idx, &ret_val);
        if (ret == 0)
                return new_idx - 1;
        else if (ret == 1) {
@@ -801,17 +1015,75 @@ __rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key,
 
        /* Also search secondary bucket to get better occupancy */
        ret = rte_hash_cuckoo_make_space_mw(h, sec_bkt, prim_bkt, key, data,
-                                       alt_hash, sig, new_idx, &ret_val);
+                               short_sig, sec_bucket_idx, new_idx, &ret_val);
 
        if (ret == 0)
                return new_idx - 1;
        else if (ret == 1) {
                enqueue_slot_back(h, cached_free_slots, slot_id);
                return ret_val;
-       } else {
+       }
+
+       /* if ext table not enabled, we failed the insertion */
+       if (!h->ext_table_support) {
                enqueue_slot_back(h, cached_free_slots, slot_id);
                return ret;
        }
+
+       /* Now we need to go through the extendable bucket. Protection is needed
+        * to protect all extendable bucket processes.
+        */
+       __hash_rw_writer_lock(h);
+       /* We check for duplicates again since could be inserted before the lock */
+       ret = search_and_update(h, data, key, prim_bkt, short_sig);
+       if (ret != -1) {
+               enqueue_slot_back(h, cached_free_slots, slot_id);
+               goto failure;
+       }
+
+       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+               ret = search_and_update(h, data, key, cur_bkt, short_sig);
+               if (ret != -1) {
+                       enqueue_slot_back(h, cached_free_slots, slot_id);
+                       goto failure;
+               }
+       }
+
+       /* Search sec and ext buckets to find an empty entry to insert. */
+       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+               for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+                       /* Check if slot is available */
+                       if (likely(cur_bkt->key_idx[i] == EMPTY_SLOT)) {
+                               cur_bkt->sig_current[i] = short_sig;
+                               cur_bkt->key_idx[i] = new_idx;
+                               __hash_rw_writer_unlock(h);
+                               return new_idx - 1;
+                       }
+               }
+       }
+
+       /* Failed to get an empty entry from extendable buckets. Link a new
+        * extendable bucket. We first get a free bucket from ring.
+        */
+       if (rte_ring_sc_dequeue(h->free_ext_bkts, &ext_bkt_id) != 0) {
+               ret = -ENOSPC;
+               goto failure;
+       }
+
+       bkt_id = (uint32_t)((uintptr_t)ext_bkt_id) - 1;
+       /* Use the first location of the new bucket */
+       (h->buckets_ext[bkt_id]).sig_current[0] = short_sig;
+       (h->buckets_ext[bkt_id]).key_idx[0] = new_idx;
+       /* Link the new bucket to sec bucket linked list */
+       last = rte_hash_get_last_bkt(sec_bkt);
+       last->next = &h->buckets_ext[bkt_id];
+       __hash_rw_writer_unlock(h);
+       return new_idx - 1;
+
+failure:
+       __hash_rw_writer_unlock(h);
+       return ret;
+
 }
 
 int32_t
@@ -859,25 +1131,31 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data)
 
 /* Search one bucket to find the match key */
 static inline int32_t
-search_one_bucket(const struct rte_hash *h, const void *key, hash_sig_t sig,
+search_one_bucket(const struct rte_hash *h, const void *key, uint16_t sig,
                        void **data, const struct rte_hash_bucket *bkt)
 {
        int i;
+       uint32_t key_idx;
+       void *pdata;
        struct rte_hash_key *k, *keys = h->key_store;
 
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->sig_current[i] == sig &&
-                               bkt->key_idx[i] != EMPTY_SLOT) {
+               key_idx = __atomic_load_n(&bkt->key_idx[i],
+                                         __ATOMIC_ACQUIRE);
+               if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
                        k = (struct rte_hash_key *) ((char *)keys +
-                                       bkt->key_idx[i] * h->key_entry_size);
+                                       key_idx * h->key_entry_size);
+                       pdata = __atomic_load_n(&k->pdata,
+                                       __ATOMIC_ACQUIRE);
+
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
                                if (data != NULL)
-                                       *data = k->pdata;
+                                       *data = pdata;
                                /*
                                 * Return index where key is stored,
                                 * subtracting the first dummy index
                                 */
-                               return bkt->key_idx[i] - 1;
+                               return key_idx - 1;
                        }
                }
        }
@@ -888,34 +1166,64 @@ static inline int32_t
 __rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key,
                                        hash_sig_t sig, void **data)
 {
-       uint32_t bucket_idx;
-       hash_sig_t alt_hash;
-       struct rte_hash_bucket *bkt;
+       uint32_t prim_bucket_idx, sec_bucket_idx;
+       struct rte_hash_bucket *bkt, *cur_bkt;
+       uint32_t cnt_b, cnt_a;
        int ret;
+       uint16_t short_sig;
 
-       bucket_idx = sig & h->bucket_bitmask;
-       bkt = &h->buckets[bucket_idx];
+       short_sig = get_short_sig(sig);
+       prim_bucket_idx = get_prim_bucket_index(h, sig);
+       sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
 
        __hash_rw_reader_lock(h);
 
-       /* Check if key is in primary location */
-       ret = search_one_bucket(h, key, sig, data, bkt);
-       if (ret != -1) {
-               __hash_rw_reader_unlock(h);
-               return ret;
-       }
-       /* Calculate secondary hash */
-       alt_hash = rte_hash_secondary_hash(sig);
-       bucket_idx = alt_hash & h->bucket_bitmask;
-       bkt = &h->buckets[bucket_idx];
+       do {
+               /* Load the table change counter before the lookup
+                * starts. Acquire semantics will make sure that
+                * loads in search_one_bucket are not hoisted.
+                */
+               cnt_b = __atomic_load_n(h->tbl_chng_cnt,
+                               __ATOMIC_ACQUIRE);
+
+               /* Check if key is in primary location */
+               bkt = &h->buckets[prim_bucket_idx];
+               ret = search_one_bucket(h, key, short_sig, data, bkt);
+               if (ret != -1) {
+                       __hash_rw_reader_unlock(h);
+                       return ret;
+               }
+               /* Calculate secondary hash */
+               bkt = &h->buckets[sec_bucket_idx];
+
+               /* Check if key is in secondary location */
+               FOR_EACH_BUCKET(cur_bkt, bkt) {
+                       ret = search_one_bucket(h, key, short_sig,
+                                               data, cur_bkt);
+                       if (ret != -1) {
+                               __hash_rw_reader_unlock(h);
+                               return ret;
+                       }
+               }
+
+               /* The loads of sig_current in search_one_bucket
+                * should not move below the load from tbl_chng_cnt.
+                */
+               __atomic_thread_fence(__ATOMIC_ACQUIRE);
+               /* Re-read the table change counter to check if the
+                * table has changed during search. If yes, re-do
+                * the search.
+                * This load should not get hoisted. The load
+                * acquires on cnt_b, key index in primary bucket
+                * and key index in secondary bucket will make sure
+                * that it does not get hoisted.
+                */
+               cnt_a = __atomic_load_n(h->tbl_chng_cnt,
+                                       __ATOMIC_ACQUIRE);
+       } while (cnt_b != cnt_a);
 
-       /* Check if key is in secondary location */
-       ret = search_one_bucket(h, key, alt_hash, data, bkt);
-       if (ret != -1) {
-               __hash_rw_reader_unlock(h);
-               return ret;
-       }
        __hash_rw_reader_unlock(h);
+
        return -ENOENT;
 }
 
@@ -955,9 +1263,7 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
        unsigned lcore_id, n_slots;
        struct lcore_cache *cached_free_slots;
 
-       bkt->sig_current[i] = NULL_SIGNATURE;
-       bkt->sig_alt[i] = NULL_SIGNATURE;
-       if (h->multi_writer_support) {
+       if (h->use_local_cache) {
                lcore_id = rte_lcore_id();
                cached_free_slots = &h->local_free_slots[lcore_id];
                /* Cache full, need to free it. */
@@ -978,31 +1284,67 @@ remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i)
        }
 }
 
-/* Search one bucket and remove the matched key */
+/* Compact the linked list by moving key from last entry in linked list to the
+ * empty slot.
+ */
+static inline void
+__rte_hash_compact_ll(struct rte_hash_bucket *cur_bkt, int pos) {
+       int i;
+       struct rte_hash_bucket *last_bkt;
+
+       if (!cur_bkt->next)
+               return;
+
+       last_bkt = rte_hash_get_last_bkt(cur_bkt);
+
+       for (i = RTE_HASH_BUCKET_ENTRIES - 1; i >= 0; i--) {
+               if (last_bkt->key_idx[i] != EMPTY_SLOT) {
+                       cur_bkt->key_idx[pos] = last_bkt->key_idx[i];
+                       cur_bkt->sig_current[pos] = last_bkt->sig_current[i];
+                       last_bkt->sig_current[i] = NULL_SIGNATURE;
+                       last_bkt->key_idx[i] = EMPTY_SLOT;
+                       return;
+               }
+       }
+}
+
+/* Search one bucket and remove the matched key.
+ * Writer is expected to hold the lock while calling this
+ * function.
+ */
 static inline int32_t
 search_and_remove(const struct rte_hash *h, const void *key,
-                       struct rte_hash_bucket *bkt, hash_sig_t sig)
+                       struct rte_hash_bucket *bkt, uint16_t sig, int *pos)
 {
        struct rte_hash_key *k, *keys = h->key_store;
        unsigned int i;
-       int32_t ret;
+       uint32_t key_idx;
 
-       /* Check if key is in primary location */
+       /* Check if key is in bucket */
        for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
-               if (bkt->sig_current[i] == sig &&
-                               bkt->key_idx[i] != EMPTY_SLOT) {
+               key_idx = __atomic_load_n(&bkt->key_idx[i],
+                                         __ATOMIC_ACQUIRE);
+               if (bkt->sig_current[i] == sig && key_idx != EMPTY_SLOT) {
                        k = (struct rte_hash_key *) ((char *)keys +
-                                       bkt->key_idx[i] * h->key_entry_size);
+                                       key_idx * h->key_entry_size);
                        if (rte_hash_cmp_eq(key, k->key, h) == 0) {
-                               remove_entry(h, bkt, i);
+                               bkt->sig_current[i] = NULL_SIGNATURE;
+                               /* Free the key store index if
+                                * no_free_on_del is disabled.
+                                */
+                               if (!h->no_free_on_del)
+                                       remove_entry(h, bkt, i);
+
+                               __atomic_store_n(&bkt->key_idx[i],
+                                                EMPTY_SLOT,
+                                                __ATOMIC_RELEASE);
 
+                               *pos = i;
                                /*
                                 * Return index where key is stored,
                                 * subtracting the first dummy index
                                 */
-                               ret = bkt->key_idx[i] - 1;
-                               bkt->key_idx[i] = EMPTY_SLOT;
-                               return ret;
+                               return key_idx - 1;
                        }
                }
        }
@@ -1013,36 +1355,68 @@ static inline int32_t
 __rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key,
                                                hash_sig_t sig)
 {
-       uint32_t bucket_idx;
-       hash_sig_t alt_hash;
-       struct rte_hash_bucket *bkt;
-       int32_t ret;
-
-       bucket_idx = sig & h->bucket_bitmask;
-       bkt = &h->buckets[bucket_idx];
+       uint32_t prim_bucket_idx, sec_bucket_idx;
+       struct rte_hash_bucket *prim_bkt, *sec_bkt, *prev_bkt, *last_bkt;
+       struct rte_hash_bucket *cur_bkt;
+       int pos;
+       int32_t ret, i;
+       uint16_t short_sig;
+
+       short_sig = get_short_sig(sig);
+       prim_bucket_idx = get_prim_bucket_index(h, sig);
+       sec_bucket_idx = get_alt_bucket_index(h, prim_bucket_idx, short_sig);
+       prim_bkt = &h->buckets[prim_bucket_idx];
 
        __hash_rw_writer_lock(h);
        /* look for key in primary bucket */
-       ret = search_and_remove(h, key, bkt, sig);
+       ret = search_and_remove(h, key, prim_bkt, short_sig, &pos);
        if (ret != -1) {
-               __hash_rw_writer_unlock(h);
-               return ret;
+               __rte_hash_compact_ll(prim_bkt, pos);
+               last_bkt = prim_bkt->next;
+               prev_bkt = prim_bkt;
+               goto return_bkt;
        }
 
        /* Calculate secondary hash */
-       alt_hash = rte_hash_secondary_hash(sig);
-       bucket_idx = alt_hash & h->bucket_bitmask;
-       bkt = &h->buckets[bucket_idx];
+       sec_bkt = &h->buckets[sec_bucket_idx];
 
-       /* look for key in secondary bucket */
-       ret = search_and_remove(h, key, bkt, alt_hash);
-       if (ret != -1) {
+       FOR_EACH_BUCKET(cur_bkt, sec_bkt) {
+               ret = search_and_remove(h, key, cur_bkt, short_sig, &pos);
+               if (ret != -1) {
+                       __rte_hash_compact_ll(cur_bkt, pos);
+                       last_bkt = sec_bkt->next;
+                       prev_bkt = sec_bkt;
+                       goto return_bkt;
+               }
+       }
+
+       __hash_rw_writer_unlock(h);
+       return -ENOENT;
+
+/* Search last bucket to see if empty to be recycled */
+return_bkt:
+       if (!last_bkt) {
                __hash_rw_writer_unlock(h);
                return ret;
        }
+       while (last_bkt->next) {
+               prev_bkt = last_bkt;
+               last_bkt = last_bkt->next;
+       }
+
+       for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
+               if (last_bkt->key_idx[i] != EMPTY_SLOT)
+                       break;
+       }
+       /* found empty bucket and recycle */
+       if (i == RTE_HASH_BUCKET_ENTRIES) {
+               prev_bkt->next = last_bkt->next = NULL;
+               uint32_t index = last_bkt - h->buckets_ext + 1;
+               rte_ring_sp_enqueue(h->free_ext_bkts, (void *)(uintptr_t)index);
+       }
 
        __hash_rw_writer_unlock(h);
-       return -ENOENT;
+       return ret;
 }
 
 int32_t
@@ -1080,59 +1454,76 @@ rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
        return 0;
 }
 
+int __rte_experimental
+rte_hash_free_key_with_position(const struct rte_hash *h,
+                               const int32_t position)
+{
+       RETURN_IF_TRUE(((h == NULL) || (position == EMPTY_SLOT)), -EINVAL);
+
+       unsigned int lcore_id, n_slots;
+       struct lcore_cache *cached_free_slots;
+       const int32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
+
+       /* Out of bounds */
+       if (position >= total_entries)
+               return -EINVAL;
+
+       if (h->use_local_cache) {
+               lcore_id = rte_lcore_id();
+               cached_free_slots = &h->local_free_slots[lcore_id];
+               /* Cache full, need to free it. */
+               if (cached_free_slots->len == LCORE_CACHE_SIZE) {
+                       /* Need to enqueue the free slots in global ring. */
+                       n_slots = rte_ring_mp_enqueue_burst(h->free_slots,
+                                               cached_free_slots->objs,
+                                               LCORE_CACHE_SIZE, NULL);
+                       cached_free_slots->len -= n_slots;
+               }
+               /* Put index of new free slot in cache. */
+               cached_free_slots->objs[cached_free_slots->len] =
+                                       (void *)((uintptr_t)position);
+               cached_free_slots->len++;
+       } else {
+               rte_ring_sp_enqueue(h->free_slots,
+                               (void *)((uintptr_t)position));
+       }
+
+       return 0;
+}
+
 static inline void
 compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches,
                        const struct rte_hash_bucket *prim_bkt,
                        const struct rte_hash_bucket *sec_bkt,
-                       hash_sig_t prim_hash, hash_sig_t sec_hash,
+                       uint16_t sig,
                        enum rte_hash_sig_compare_function sig_cmp_fn)
 {
        unsigned int i;
 
+       /* For match mask the first bit of every two bits indicates the match */
        switch (sig_cmp_fn) {
-#ifdef RTE_MACHINE_CPUFLAG_AVX2
-       case RTE_HASH_COMPARE_AVX2:
-               *prim_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-                               _mm256_load_si256(
-                                       (__m256i const *)prim_bkt->sig_current),
-                               _mm256_set1_epi32(prim_hash)));
-               *sec_hash_matches = _mm256_movemask_ps((__m256)_mm256_cmpeq_epi32(
-                               _mm256_load_si256(
-                                       (__m256i const *)sec_bkt->sig_current),
-                               _mm256_set1_epi32(sec_hash)));
-               break;
-#endif
 #ifdef RTE_MACHINE_CPUFLAG_SSE2
        case RTE_HASH_COMPARE_SSE:
-               /* Compare the first 4 signatures in the bucket */
-               *prim_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+               /* Compare all signatures in the bucket */
+               *prim_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
                                _mm_load_si128(
                                        (__m128i const *)prim_bkt->sig_current),
-                               _mm_set1_epi32(prim_hash)));
-               *prim_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-                               _mm_load_si128(
-                                       (__m128i const *)&prim_bkt->sig_current[4]),
-                               _mm_set1_epi32(prim_hash)))) << 4;
-               /* Compare the first 4 signatures in the bucket */
-               *sec_hash_matches = _mm_movemask_ps((__m128)_mm_cmpeq_epi16(
+                               _mm_set1_epi16(sig)));
+               /* Compare all signatures in the bucket */
+               *sec_hash_matches = _mm_movemask_epi8(_mm_cmpeq_epi16(
                                _mm_load_si128(
                                        (__m128i const *)sec_bkt->sig_current),
-                               _mm_set1_epi32(sec_hash)));
-               *sec_hash_matches |= (_mm_movemask_ps((__m128)_mm_cmpeq_epi16(
-                               _mm_load_si128(
-                                       (__m128i const *)&sec_bkt->sig_current[4]),
-                               _mm_set1_epi32(sec_hash)))) << 4;
+                               _mm_set1_epi16(sig)));
                break;
 #endif
        default:
                for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) {
                        *prim_hash_matches |=
-                               ((prim_hash == prim_bkt->sig_current[i]) << i);
+                               ((sig == prim_bkt->sig_current[i]) << (i << 1));
                        *sec_hash_matches |=
-                               ((sec_hash == sec_bkt->sig_current[i]) << i);
+                               ((sig == sec_bkt->sig_current[i]) << (i << 1));
                }
        }
-
 }
 
 #define PREFETCH_OFFSET 4
@@ -1143,12 +1534,18 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
 {
        uint64_t hits = 0;
        int32_t i;
+       int32_t ret;
        uint32_t prim_hash[RTE_HASH_LOOKUP_BULK_MAX];
-       uint32_t sec_hash[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t prim_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t sec_index[RTE_HASH_LOOKUP_BULK_MAX];
+       uint16_t sig[RTE_HASH_LOOKUP_BULK_MAX];
        const struct rte_hash_bucket *primary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
        const struct rte_hash_bucket *secondary_bkt[RTE_HASH_LOOKUP_BULK_MAX];
        uint32_t prim_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
        uint32_t sec_hitmask[RTE_HASH_LOOKUP_BULK_MAX] = {0};
+       struct rte_hash_bucket *cur_bkt, *next_bkt;
+       void *pdata[RTE_HASH_LOOKUP_BULK_MAX];
+       uint32_t cnt_b, cnt_a;
 
        /* Prefetch first keys */
        for (i = 0; i < PREFETCH_OFFSET && i < num_keys; i++)
@@ -1162,10 +1559,13 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
                rte_prefetch0(keys[i + PREFETCH_OFFSET]);
 
                prim_hash[i] = rte_hash_hash(h, keys[i]);
-               sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-               primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-               secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
 
                rte_prefetch0(primary_bkt[i]);
                rte_prefetch0(secondary_bkt[i]);
@@ -1174,96 +1574,178 @@ __rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys,
        /* Calculate and prefetch rest of the buckets */
        for (; i < num_keys; i++) {
                prim_hash[i] = rte_hash_hash(h, keys[i]);
-               sec_hash[i] = rte_hash_secondary_hash(prim_hash[i]);
 
-               primary_bkt[i] = &h->buckets[prim_hash[i] & h->bucket_bitmask];
-               secondary_bkt[i] = &h->buckets[sec_hash[i] & h->bucket_bitmask];
+               sig[i] = get_short_sig(prim_hash[i]);
+               prim_index[i] = get_prim_bucket_index(h, prim_hash[i]);
+               sec_index[i] = get_alt_bucket_index(h, prim_index[i], sig[i]);
+
+               primary_bkt[i] = &h->buckets[prim_index[i]];
+               secondary_bkt[i] = &h->buckets[sec_index[i]];
 
                rte_prefetch0(primary_bkt[i]);
                rte_prefetch0(secondary_bkt[i]);
        }
 
        __hash_rw_reader_lock(h);
-       /* Compare signatures and prefetch key slot of first hit */
-       for (i = 0; i < num_keys; i++) {
-               compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
+       do {
+               /* Load the table change counter before the lookup
+                * starts. Acquire semantics will make sure that
+                * loads in compare_signatures are not hoisted.
+                */
+               cnt_b = __atomic_load_n(h->tbl_chng_cnt,
+                                       __ATOMIC_ACQUIRE);
+
+               /* Compare signatures and prefetch key slot of first hit */
+               for (i = 0; i < num_keys; i++) {
+                       compare_signatures(&prim_hitmask[i], &sec_hitmask[i],
                                primary_bkt[i], secondary_bkt[i],
-                               prim_hash[i], sec_hash[i], h->sig_cmp_fn);
-
-               if (prim_hitmask[i]) {
-                       uint32_t first_hit = __builtin_ctzl(prim_hitmask[i]);
-                       uint32_t key_idx = primary_bkt[i]->key_idx[first_hit];
-                       const struct rte_hash_key *key_slot =
-                               (const struct rte_hash_key *)(
-                               (const char *)h->key_store +
-                               key_idx * h->key_entry_size);
-                       rte_prefetch0(key_slot);
-                       continue;
-               }
+                               sig[i], h->sig_cmp_fn);
+
+                       if (prim_hitmask[i]) {
+                               uint32_t first_hit =
+                                               __builtin_ctzl(prim_hitmask[i])
+                                               >> 1;
+                               uint32_t key_idx =
+                                       primary_bkt[i]->key_idx[first_hit];
+                               const struct rte_hash_key *key_slot =
+                                       (const struct rte_hash_key *)(
+                                       (const char *)h->key_store +
+                                       key_idx * h->key_entry_size);
+                               rte_prefetch0(key_slot);
+                               continue;
+                       }
 
-               if (sec_hitmask[i]) {
-                       uint32_t first_hit = __builtin_ctzl(sec_hitmask[i]);
-                       uint32_t key_idx = secondary_bkt[i]->key_idx[first_hit];
-                       const struct rte_hash_key *key_slot =
-                               (const struct rte_hash_key *)(
-                               (const char *)h->key_store +
-                               key_idx * h->key_entry_size);
-                       rte_prefetch0(key_slot);
+                       if (sec_hitmask[i]) {
+                               uint32_t first_hit =
+                                               __builtin_ctzl(sec_hitmask[i])
+                                               >> 1;
+                               uint32_t key_idx =
+                                       secondary_bkt[i]->key_idx[first_hit];
+                               const struct rte_hash_key *key_slot =
+                                       (const struct rte_hash_key *)(
+                                       (const char *)h->key_store +
+                                       key_idx * h->key_entry_size);
+                               rte_prefetch0(key_slot);
+                       }
                }
-       }
 
-       /* Compare keys, first hits in primary first */
-       for (i = 0; i < num_keys; i++) {
-               positions[i] = -ENOENT;
-               while (prim_hitmask[i]) {
-                       uint32_t hit_index = __builtin_ctzl(prim_hitmask[i]);
-
-                       uint32_t key_idx = primary_bkt[i]->key_idx[hit_index];
-                       const struct rte_hash_key *key_slot =
-                               (const struct rte_hash_key *)(
-                               (const char *)h->key_store +
-                               key_idx * h->key_entry_size);
-                       /*
-                        * If key index is 0, do not compare key,
-                        * as it is checking the dummy slot
-                        */
-                       if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
-                               if (data != NULL)
-                                       data[i] = key_slot->pdata;
+               /* Compare keys, first hits in primary first */
+               for (i = 0; i < num_keys; i++) {
+                       positions[i] = -ENOENT;
+                       while (prim_hitmask[i]) {
+                               uint32_t hit_index =
+                                               __builtin_ctzl(prim_hitmask[i])
+                                               >> 1;
+                               uint32_t key_idx =
+                               __atomic_load_n(
+                                       &primary_bkt[i]->key_idx[hit_index],
+                                       __ATOMIC_ACQUIRE);
+                               const struct rte_hash_key *key_slot =
+                                       (const struct rte_hash_key *)(
+                                       (const char *)h->key_store +
+                                       key_idx * h->key_entry_size);
+
+                               if (key_idx != EMPTY_SLOT)
+                                       pdata[i] = __atomic_load_n(
+                                                       &key_slot->pdata,
+                                                       __ATOMIC_ACQUIRE);
+                               /*
+                                * If key index is 0, do not compare key,
+                                * as it is checking the dummy slot
+                                */
+                               if (!!key_idx &
+                                       !rte_hash_cmp_eq(
+                                               key_slot->key, keys[i], h)) {
+                                       if (data != NULL)
+                                               data[i] = pdata[i];
+
+                                       hits |= 1ULL << i;
+                                       positions[i] = key_idx - 1;
+                                       goto next_key;
+                               }
+                               prim_hitmask[i] &= ~(3ULL << (hit_index << 1));
+                       }
 
-                               hits |= 1ULL << i;
-                               positions[i] = key_idx - 1;
-                               goto next_key;
+                       while (sec_hitmask[i]) {
+                               uint32_t hit_index =
+                                               __builtin_ctzl(sec_hitmask[i])
+                                               >> 1;
+                               uint32_t key_idx =
+                               __atomic_load_n(
+                                       &secondary_bkt[i]->key_idx[hit_index],
+                                       __ATOMIC_ACQUIRE);
+                               const struct rte_hash_key *key_slot =
+                                       (const struct rte_hash_key *)(
+                                       (const char *)h->key_store +
+                                       key_idx * h->key_entry_size);
+
+                               if (key_idx != EMPTY_SLOT)
+                                       pdata[i] = __atomic_load_n(
+                                                       &key_slot->pdata,
+                                                       __ATOMIC_ACQUIRE);
+                               /*
+                                * If key index is 0, do not compare key,
+                                * as it is checking the dummy slot
+                                */
+
+                               if (!!key_idx &
+                                       !rte_hash_cmp_eq(
+                                               key_slot->key, keys[i], h)) {
+                                       if (data != NULL)
+                                               data[i] = pdata[i];
+
+                                       hits |= 1ULL << i;
+                                       positions[i] = key_idx - 1;
+                                       goto next_key;
+                               }
+                               sec_hitmask[i] &= ~(3ULL << (hit_index << 1));
                        }
-                       prim_hitmask[i] &= ~(1 << (hit_index));
+next_key:
+                       continue;
                }
 
-               while (sec_hitmask[i]) {
-                       uint32_t hit_index = __builtin_ctzl(sec_hitmask[i]);
-
-                       uint32_t key_idx = secondary_bkt[i]->key_idx[hit_index];
-                       const struct rte_hash_key *key_slot =
-                               (const struct rte_hash_key *)(
-                               (const char *)h->key_store +
-                               key_idx * h->key_entry_size);
-                       /*
-                        * If key index is 0, do not compare key,
-                        * as it is checking the dummy slot
-                        */
-
-                       if (!!key_idx & !rte_hash_cmp_eq(key_slot->key, keys[i], h)) {
-                               if (data != NULL)
-                                       data[i] = key_slot->pdata;
+               /* The loads of sig_current in compare_signatures
+                * should not move below the load from tbl_chng_cnt.
+                */
+               __atomic_thread_fence(__ATOMIC_ACQUIRE);
+               /* Re-read the table change counter to check if the
+                * table has changed during search. If yes, re-do
+                * the search.
+                * This load should not get hoisted. The load
+                * acquires on cnt_b, primary key index and secondary
+                * key index will make sure that it does not get
+                * hoisted.
+                */
+               cnt_a = __atomic_load_n(h->tbl_chng_cnt,
+                                       __ATOMIC_ACQUIRE);
+       } while (cnt_b != cnt_a);
+
+       /* all found, do not need to go through ext bkt */
+       if ((hits == ((1ULL << num_keys) - 1)) || !h->ext_table_support) {
+               if (hit_mask != NULL)
+                       *hit_mask = hits;
+               __hash_rw_reader_unlock(h);
+               return;
+       }
 
+       /* need to check ext buckets for match */
+       for (i = 0; i < num_keys; i++) {
+               if ((hits & (1ULL << i)) != 0)
+                       continue;
+               next_bkt = secondary_bkt[i]->next;
+               FOR_EACH_BUCKET(cur_bkt, next_bkt) {
+                       if (data != NULL)
+                               ret = search_one_bucket(h, keys[i],
+                                               sig[i], &data[i], cur_bkt);
+                       else
+                               ret = search_one_bucket(h, keys[i],
+                                               sig[i], NULL, cur_bkt);
+                       if (ret != -1) {
+                               positions[i] = ret;
                                hits |= 1ULL << i;
-                               positions[i] = key_idx - 1;
-                               goto next_key;
+                               break;
                        }
-                       sec_hitmask[i] &= ~(1 << (hit_index));
                }
-
-next_key:
-               continue;
        }
 
        __hash_rw_reader_unlock(h);
@@ -1308,27 +1790,30 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
 
        RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL);
 
-       const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES;
-       /* Out of bounds */
-       if (*next >= total_entries)
-               return -ENOENT;
+       const uint32_t total_entries_main = h->num_buckets *
+                                                       RTE_HASH_BUCKET_ENTRIES;
+       const uint32_t total_entries = total_entries_main << 1;
+
+       /* Out of bounds of all buckets (both main table and ext table) */
+       if (*next >= total_entries_main)
+               goto extend_table;
 
        /* Calculate bucket and index of current iterator */
        bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
        idx = *next % RTE_HASH_BUCKET_ENTRIES;
 
        /* If current position is empty, go to the next one */
-       while (h->buckets[bucket_idx].key_idx[idx] == EMPTY_SLOT) {
+       while ((position = __atomic_load_n(&h->buckets[bucket_idx].key_idx[idx],
+                                       __ATOMIC_ACQUIRE)) == EMPTY_SLOT) {
                (*next)++;
                /* End of table */
-               if (*next == total_entries)
-                       return -ENOENT;
+               if (*next == total_entries_main)
+                       goto extend_table;
                bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES;
                idx = *next % RTE_HASH_BUCKET_ENTRIES;
        }
+
        __hash_rw_reader_lock(h);
-       /* Get position of entry in key table */
-       position = h->buckets[bucket_idx].key_idx[idx];
        next_key = (struct rte_hash_key *) ((char *)h->key_store +
                                position * h->key_entry_size);
        /* Return key and data */
@@ -1341,4 +1826,34 @@ rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32
        (*next)++;
 
        return position - 1;
+
+/* Begin to iterate extendable buckets */
+extend_table:
+       /* Out of total bound or if ext bucket feature is not enabled */
+       if (*next >= total_entries || !h->ext_table_support)
+               return -ENOENT;
+
+       bucket_idx = (*next - total_entries_main) / RTE_HASH_BUCKET_ENTRIES;
+       idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+
+       while ((position = h->buckets_ext[bucket_idx].key_idx[idx]) == EMPTY_SLOT) {
+               (*next)++;
+               if (*next == total_entries)
+                       return -ENOENT;
+               bucket_idx = (*next - total_entries_main) /
+                                               RTE_HASH_BUCKET_ENTRIES;
+               idx = (*next - total_entries_main) % RTE_HASH_BUCKET_ENTRIES;
+       }
+       __hash_rw_reader_lock(h);
+       next_key = (struct rte_hash_key *) ((char *)h->key_store +
+                               position * h->key_entry_size);
+       /* Return key and data */
+       *key = next_key->key;
+       *data = next_key->pdata;
+
+       __hash_rw_reader_unlock(h);
+
+       /* Increment iterator */
+       (*next)++;
+       return position - 1;
 }
index b43f467..5dfbbc4 100644 (file)
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2016 Intel Corporation
+ * Copyright(c) 2018 Arm Limited
  */
 
 /* rte_cuckoo_hash.h
@@ -104,8 +105,6 @@ const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = {
 
 #define LCORE_CACHE_SIZE               64
 
-#define RTE_HASH_MAX_PUSHES             100
-
 #define RTE_HASH_BFS_QUEUE_MAX_LEN       1000
 
 #define RTE_XABORT_CUCKOO_PATH_INVALIDED 0x4
@@ -125,25 +124,24 @@ struct rte_hash_key {
        };
        /* Variable key size */
        char key[0];
-} __attribute__((aligned(KEY_ALIGNMENT)));
+};
 
 /* All different signature compare functions */
 enum rte_hash_sig_compare_function {
        RTE_HASH_COMPARE_SCALAR = 0,
        RTE_HASH_COMPARE_SSE,
-       RTE_HASH_COMPARE_AVX2,
        RTE_HASH_COMPARE_NUM
 };
 
 /** Bucket structure */
 struct rte_hash_bucket {
-       hash_sig_t sig_current[RTE_HASH_BUCKET_ENTRIES];
+       uint16_t sig_current[RTE_HASH_BUCKET_ENTRIES];
 
        uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES];
 
-       hash_sig_t sig_alt[RTE_HASH_BUCKET_ENTRIES];
-
        uint8_t flag[RTE_HASH_BUCKET_ENTRIES];
+
+       void *next;
 } __rte_cache_aligned;
 
 /** A hash table structure. */
@@ -164,10 +162,23 @@ struct rte_hash {
        /**< Length of hash key. */
        uint8_t hw_trans_mem_support;
        /**< If hardware transactional memory is used. */
-       uint8_t multi_writer_support;
-       /**< If multi-writer support is enabled. */
+       uint8_t use_local_cache;
+       /**< If multi-writer support is enabled, use local cache
+        * to allocate key-store slots.
+        */
        uint8_t readwrite_concur_support;
        /**< If read-write concurrency support is enabled */
+       uint8_t ext_table_support;     /**< Enable extendable bucket table */
+       uint8_t no_free_on_del;
+       /**< If key index should be freed on calling rte_hash_del_xxx APIs.
+        * If this is set, rte_hash_free_key_with_position must be called to
+        * free the key index associated with the deleted entry.
+        * This flag is enabled by default.
+        */
+       uint8_t readwrite_concur_lf_support;
+       /**< If read-write concurrency lock free support is enabled */
+       uint8_t writer_takes_lock;
+       /**< Indicates if the writer threads need to take lock */
        rte_hash_function hash_func;    /**< Function used to calculate hash. */
        uint32_t hash_func_init_val;    /**< Init value used by hash_func. */
        rte_hash_cmp_eq_t rte_hash_custom_cmp_eq;
@@ -186,10 +197,15 @@ struct rte_hash {
         * to the key table.
         */
        rte_rwlock_t *readwrite_lock; /**< Read-write lock thread-safety. */
+       struct rte_hash_bucket *buckets_ext; /**< Extra buckets array */
+       struct rte_ring *free_ext_bkts; /**< Ring of indexes of free buckets */
+       uint32_t *tbl_chng_cnt;
+       /**< Indicates if the hash table changed from last read. */
 } __rte_cache_aligned;
 
 struct queue_node {
        struct rte_hash_bucket *bkt; /* Current bucket on the bfs search */
+       uint32_t cur_bkt_idx;
 
        struct queue_node *prev;     /* Parent(bucket) in search path */
        int prev_slot;               /* Parent(slot) in search path */
index 9e7d931..c93d1a1 100644 (file)
@@ -14,6 +14,8 @@
 #include <stdint.h>
 #include <stddef.h>
 
+#include <rte_compat.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -37,7 +39,27 @@ extern "C" {
 /** Flag to support reader writer concurrency */
 #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
 
-/** Signature of key that is stored internally. */
+/** Flag to indicate the extendabe bucket table feature should be used */
+#define RTE_HASH_EXTRA_FLAGS_EXT_TABLE 0x08
+
+/** Flag to disable freeing of key index on hash delete.
+ * Refer to rte_hash_del_xxx APIs for more details.
+ * This is enabled by default when RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF
+ * is enabled.
+ */
+#define RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL 0x10
+
+/** Flag to support lock free reader writer concurrency. Both single writer
+ * and multi writer use cases are supported.
+ * Currently, extendable bucket table feature is not supported with
+ * this feature.
+ */
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF 0x20
+
+/**
+ * The type of hash value of a key.
+ * It should be a value of at least 32bit with fully random pattern.
+ */
 typedef uint32_t hash_sig_t;
 
 /** Type of function that can be used for calculating the hash value. */
@@ -119,7 +141,12 @@ void
 rte_hash_free(struct rte_hash *h);
 
 /**
- * Reset all hash structure, by zeroing all entries
+ * Reset all hash structure, by zeroing all entries.
+ * When RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * it is application's responsibility to make sure that
+ * none of the readers are referencing the hash table
+ * while calling this API.
+ *
  * @param h
  *   Hash table to reset
  */
@@ -143,6 +170,11 @@ rte_hash_count(const struct rte_hash *h);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If the key exists already in the table, this API updates its value
+ * with 'data' passed in this API. It is the responsibility of
+ * the application to manage any memory associated with the old value.
+ * The readers might still be using the old value even after this API
+ * has returned.
  *
  * @param h
  *   Hash table to add the key to.
@@ -165,6 +197,11 @@ rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If the key exists already in the table, this API updates its value
+ * with 'data' passed in this API. It is the responsibility of
+ * the application to manage any memory associated with the old value.
+ * The readers might still be using the old value even after this API
+ * has returned.
  *
  * @param h
  *   Hash table to add the key to.
@@ -230,6 +267,14 @@ rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_add_key_xxx APIs will not be
+ * freed by this API. rte_hash_free_key_with_position API must be called
+ * additionally to free the index associated with the key.
+ * rte_hash_free_key_with_position API should be called after all
+ * the readers have stopped referencing the entry corresponding to
+ * this key. RCU mechanisms could be used to determine such a state.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -251,6 +296,14 @@ rte_hash_del_key(const struct rte_hash *h, const void *key);
  * and should only be called from one thread by default.
  * Thread safety can be enabled by setting flag during
  * table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_add_key_xxx APIs will not be
+ * freed by this API. rte_hash_free_key_with_position API must be called
+ * additionally to free the index associated with the key.
+ * rte_hash_free_key_with_position API should be called after all
+ * the readers have stopped referencing the entry corresponding to
+ * this key. RCU mechanisms could be used to determine such a state.
  *
  * @param h
  *   Hash table to remove the key from.
@@ -289,6 +342,34 @@ int
 rte_hash_get_key_with_position(const struct rte_hash *h, const int32_t position,
                               void **key);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Free a hash key in the hash table given the position
+ * of the key. This operation is not multi-thread safe and should
+ * only be called from one thread by default. Thread safety
+ * can be enabled by setting flag during table creation.
+ * If RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL or
+ * RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF is enabled,
+ * the key index returned by rte_hash_del_key_xxx APIs must be freed
+ * using this API. This API should be called after all the readers
+ * have stopped referencing the entry corresponding to this key.
+ * RCU mechanisms could be used to determine such a state.
+ * This API does not validate if the key is already freed.
+ *
+ * @param h
+ *   Hash table to free the key from.
+ * @param position
+ *   Position returned when the key was deleted.
+ * @return
+ *   - 0 if freed successfully
+ *   - -EINVAL if the parameters are invalid.
+ */
+int __rte_experimental
+rte_hash_free_key_with_position(const struct rte_hash *h,
+                               const int32_t position);
+
 /**
  * Find a key-value pair in the hash table.
  * This operation is multi-thread safe with regarding to other lookup threads.
index e216ac8..734ae28 100644 (file)
@@ -53,3 +53,10 @@ DPDK_18.08 {
        rte_hash_count;
 
 } DPDK_16.07;
+
+EXPERIMENTAL {
+       global:
+
+       rte_hash_free_key_with_position;
+
+};
index 197acf8..0f62e2e 100644 (file)
 #define IPv6_KEY_BYTES_FMT \
        "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64
 
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define        IP_FRAG_TBL_STAT_UPDATE(s, f, v)        ((s)->f += (v))
+#else
+#define        IP_FRAG_TBL_STAT_UPDATE(s, f, v)        do {} while (0)
+#endif /* IP_FRAG_TBL_STAT */
+
 /* internal functions declarations */
 struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp,
                struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb,
@@ -69,10 +75,11 @@ ip_frag_key_invalidate(struct ip_frag_key * key)
 }
 
 /* compare two keys */
-static inline int
+static inline uint64_t
 ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2)
 {
-       uint32_t i, val;
+       uint32_t i;
+       uint64_t val;
        val = k1->id ^ k2->id;
        for (i = 0; i < k1->key_len; i++)
                val |= k1->src_dst[i] ^ k2->src_dst[i];
@@ -149,4 +156,16 @@ ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms)
        fp->frags[IP_FIRST_FRAG_IDX] = zero_frag;
 }
 
+/* local frag table helper functions */
+static inline void
+ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
+       struct ip_frag_pkt *fp)
+{
+       ip_frag_free(fp, dr);
+       ip_frag_key_invalidate(&fp->key);
+       TAILQ_REMOVE(&tbl->lru, fp, lru);
+       tbl->use_entries--;
+       IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
+}
+
 #endif /* _IP_FRAG_COMMON_H_ */
index 2560c77..97470a8 100644 (file)
 #define        IP_FRAG_TBL_POS(tbl, sig)       \
        ((tbl)->pkt + ((sig) & (tbl)->entry_mask))
 
-#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
-#define        IP_FRAG_TBL_STAT_UPDATE(s, f, v)        ((s)->f += (v))
-#else
-#define        IP_FRAG_TBL_STAT_UPDATE(s, f, v)        do {} while (0)
-#endif /* IP_FRAG_TBL_STAT */
-
-/* local frag table helper functions */
-static inline void
-ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr,
-       struct ip_frag_pkt *fp)
-{
-       ip_frag_free(fp, dr);
-       ip_frag_key_invalidate(&fp->key);
-       TAILQ_REMOVE(&tbl->lru, fp, lru);
-       tbl->use_entries--;
-       IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1);
-}
-
 static inline void
 ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl,  struct ip_frag_pkt *fp,
        const struct ip_frag_key *key, uint64_t tms)
index b3f3f78..7f425f6 100644 (file)
@@ -65,10 +65,13 @@ struct ip_frag_pkt {
 
 #define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */
 
+/* death row size in mbufs */
+#define IP_FRAG_DEATH_ROW_MBUF_LEN (IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1))
+
 /** mbuf death row (packets to be freed) */
 struct rte_ip_frag_death_row {
        uint32_t cnt;          /**< number of mbufs currently on death row */
-       struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)];
+       struct rte_mbuf *row[IP_FRAG_DEATH_ROW_MBUF_LEN];
        /**< mbufs to be freed */
 };
 
@@ -325,6 +328,20 @@ void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr,
 void
 rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl);
 
+/**
+ * Delete expired fragments
+ *
+ * @param tbl
+ *   Table to delete expired fragments from
+ * @param dr
+ *   Death row to free buffers to
+ * @param tms
+ *   Current timestamp
+ */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+       struct rte_ip_frag_death_row *dr, uint64_t tms);
+
 #ifdef __cplusplus
 }
 #endif
index 659a179..a23f6f2 100644 (file)
@@ -121,3 +121,24 @@ rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl)
                fail_nospace,
                fail_total - fail_nospace);
 }
+
+/* Delete expired fragments */
+void __rte_experimental
+rte_frag_table_del_expired_entries(struct rte_ip_frag_tbl *tbl,
+       struct rte_ip_frag_death_row *dr, uint64_t tms)
+{
+       uint64_t max_cycles;
+       struct ip_frag_pkt *fp;
+
+       max_cycles = tbl->max_cycles;
+
+       TAILQ_FOREACH(fp, &tbl->lru, lru)
+               if (max_cycles + fp->start < tms) {
+                       /* check that death row has enough space */
+                       if (IP_FRAG_DEATH_ROW_MBUF_LEN - dr->cnt >= fp->last_idx)
+                               ip_frag_tbl_del(tbl, dr, fp);
+                       else
+                               return;
+               } else
+                       return;
+}
index d1acf07..d40d551 100644 (file)
@@ -18,3 +18,9 @@ DPDK_17.08 {
     rte_ip_frag_table_destroy;
 
 } DPDK_2.0;
+
+EXPERIMENTAL {
+       global:
+
+       rte_frag_table_del_expired_entries;
+};
index 65f6a2b..c9726d4 100644 (file)
@@ -18,6 +18,9 @@
 #include <rte_log.h>
 #include <rte_kni.h>
 #include <rte_memzone.h>
+#include <rte_tailq.h>
+#include <rte_rwlock.h>
+#include <rte_eal_memconfig.h>
 #include <exec-env/rte_kni_common.h>
 #include "rte_kni_fifo.h"
 
 
 #define KNI_REQUEST_MBUF_NUM_MAX      32
 
-#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0)
+#define KNI_MEM_CHECK(cond, fail) do { if (cond) goto fail; } while (0)
+
+#define KNI_MZ_NAME_FMT                        "kni_info_%s"
+#define KNI_TX_Q_MZ_NAME_FMT           "kni_tx_%s"
+#define KNI_RX_Q_MZ_NAME_FMT           "kni_rx_%s"
+#define KNI_ALLOC_Q_MZ_NAME_FMT                "kni_alloc_%s"
+#define KNI_FREE_Q_MZ_NAME_FMT         "kni_free_%s"
+#define KNI_REQ_Q_MZ_NAME_FMT          "kni_req_%s"
+#define KNI_RESP_Q_MZ_NAME_FMT         "kni_resp_%s"
+#define KNI_SYNC_ADDR_MZ_NAME_FMT      "kni_sync_%s"
+
+TAILQ_HEAD(rte_kni_list, rte_tailq_entry);
+
+static struct rte_tailq_elem rte_kni_tailq = {
+       .name = "RTE_KNI",
+};
+EAL_REGISTER_TAILQ(rte_kni_tailq)
 
 /**
  * KNI context
@@ -42,18 +61,26 @@ struct rte_kni {
        struct rte_mempool *pktmbuf_pool;   /**< pkt mbuf mempool */
        unsigned mbuf_size;                 /**< mbuf size */
 
+       const struct rte_memzone *m_tx_q;   /**< TX queue memzone */
+       const struct rte_memzone *m_rx_q;   /**< RX queue memzone */
+       const struct rte_memzone *m_alloc_q;/**< Alloc queue memzone */
+       const struct rte_memzone *m_free_q; /**< Free queue memzone */
+
        struct rte_kni_fifo *tx_q;          /**< TX queue */
        struct rte_kni_fifo *rx_q;          /**< RX queue */
        struct rte_kni_fifo *alloc_q;       /**< Allocated mbufs queue */
        struct rte_kni_fifo *free_q;        /**< To be freed mbufs queue */
 
+       const struct rte_memzone *m_req_q;  /**< Request queue memzone */
+       const struct rte_memzone *m_resp_q; /**< Response queue memzone */
+       const struct rte_memzone *m_sync_addr;/**< Sync addr memzone */
+
        /* For request & response */
        struct rte_kni_fifo *req_q;         /**< Request queue */
        struct rte_kni_fifo *resp_q;        /**< Response queue */
        void * sync_addr;                   /**< Req/Resp Mem address */
 
        struct rte_kni_ops ops;             /**< operations for request */
-       uint8_t in_use : 1;                 /**< kni in use */
 };
 
 enum kni_ops_status {
@@ -61,231 +88,111 @@ enum kni_ops_status {
        KNI_REQ_REGISTERED,
 };
 
-/**
- * KNI memzone pool slot
- */
-struct rte_kni_memzone_slot {
-       uint32_t id;
-       uint8_t in_use : 1;                    /**< slot in use */
-
-       /* Memzones */
-       const struct rte_memzone *m_ctx;       /**< KNI ctx */
-       const struct rte_memzone *m_tx_q;      /**< TX queue */
-       const struct rte_memzone *m_rx_q;      /**< RX queue */
-       const struct rte_memzone *m_alloc_q;   /**< Allocated mbufs queue */
-       const struct rte_memzone *m_free_q;    /**< To be freed mbufs queue */
-       const struct rte_memzone *m_req_q;     /**< Request queue */
-       const struct rte_memzone *m_resp_q;    /**< Response queue */
-       const struct rte_memzone *m_sync_addr;
-
-       /* Free linked list */
-       struct rte_kni_memzone_slot *next;     /**< Next slot link.list */
-};
-
-/**
- * KNI memzone pool
- */
-struct rte_kni_memzone_pool {
-       uint8_t initialized : 1;            /**< Global KNI pool init flag */
-
-       uint32_t max_ifaces;                /**< Max. num of KNI ifaces */
-       struct rte_kni_memzone_slot *slots;        /**< Pool slots */
-       rte_spinlock_t mutex;               /**< alloc/release mutex */
-
-       /* Free memzone slots linked-list */
-       struct rte_kni_memzone_slot *free;         /**< First empty slot */
-       struct rte_kni_memzone_slot *free_tail;    /**< Last empty slot */
-};
-
-
 static void kni_free_mbufs(struct rte_kni *kni);
 static void kni_allocate_mbufs(struct rte_kni *kni);
 
 static volatile int kni_fd = -1;
-static struct rte_kni_memzone_pool kni_memzone_pool = {
-       .initialized = 0,
-};
 
-static const struct rte_memzone *
-kni_memzone_reserve(const char *name, size_t len, int socket_id,
-                                               unsigned flags)
+/* Shall be called before any allocation happens */
+int
+rte_kni_init(unsigned int max_kni_ifaces __rte_unused)
 {
-       const struct rte_memzone *mz = rte_memzone_lookup(name);
-
-       if (mz == NULL)
-               mz = rte_memzone_reserve(name, len, socket_id, flags);
+       /* Check FD and open */
+       if (kni_fd < 0) {
+               kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
+               if (kni_fd < 0) {
+                       RTE_LOG(ERR, KNI,
+                               "Can not open /dev/%s\n", KNI_DEVICE);
+                       return -1;
+               }
+       }
 
-       return mz;
+       return 0;
 }
 
-/* Pool mgmt */
-static struct rte_kni_memzone_slot*
-kni_memzone_pool_alloc(void)
+static struct rte_kni *
+__rte_kni_get(const char *name)
 {
-       struct rte_kni_memzone_slot *slot;
+       struct rte_kni *kni;
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
 
-       rte_spinlock_lock(&kni_memzone_pool.mutex);
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
 
-       if (!kni_memzone_pool.free) {
-               rte_spinlock_unlock(&kni_memzone_pool.mutex);
-               return NULL;
+       TAILQ_FOREACH(te, kni_list, next) {
+               kni = te->data;
+               if (strncmp(name, kni->name, RTE_KNI_NAMESIZE) == 0)
+                       break;
        }
 
-       slot = kni_memzone_pool.free;
-       kni_memzone_pool.free = slot->next;
-       slot->in_use = 1;
+       if (te == NULL)
+               kni = NULL;
 
-       if (!kni_memzone_pool.free)
-               kni_memzone_pool.free_tail = NULL;
-
-       rte_spinlock_unlock(&kni_memzone_pool.mutex);
-
-       return slot;
+       return kni;
 }
 
-static void
-kni_memzone_pool_release(struct rte_kni_memzone_slot *slot)
+static int
+kni_reserve_mz(struct rte_kni *kni)
 {
-       rte_spinlock_lock(&kni_memzone_pool.mutex);
+       char mz_name[RTE_MEMZONE_NAMESIZE];
 
-       if (kni_memzone_pool.free)
-               kni_memzone_pool.free_tail->next = slot;
-       else
-               kni_memzone_pool.free = slot;
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_TX_Q_MZ_NAME_FMT, kni->name);
+       kni->m_tx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_tx_q == NULL, tx_q_fail);
 
-       kni_memzone_pool.free_tail = slot;
-       slot->next = NULL;
-       slot->in_use = 0;
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RX_Q_MZ_NAME_FMT, kni->name);
+       kni->m_rx_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_rx_q == NULL, rx_q_fail);
 
-       rte_spinlock_unlock(&kni_memzone_pool.mutex);
-}
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_ALLOC_Q_MZ_NAME_FMT, kni->name);
+       kni->m_alloc_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_alloc_q == NULL, alloc_q_fail);
 
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_FREE_Q_MZ_NAME_FMT, kni->name);
+       kni->m_free_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_free_q == NULL, free_q_fail);
 
-/* Shall be called before any allocation happens */
-void
-rte_kni_init(unsigned int max_kni_ifaces)
-{
-       uint32_t i;
-       struct rte_kni_memzone_slot *it;
-       const struct rte_memzone *mz;
-#define OBJNAMSIZ 32
-       char obj_name[OBJNAMSIZ];
-       char mz_name[RTE_MEMZONE_NAMESIZE];
-
-       /* Immediately return if KNI is already initialized */
-       if (kni_memzone_pool.initialized) {
-               RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()");
-               return;
-       }
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_REQ_Q_MZ_NAME_FMT, kni->name);
+       kni->m_req_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_req_q == NULL, req_q_fail);
 
-       if (max_kni_ifaces == 0) {
-               RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n",
-                                                       max_kni_ifaces);
-               RTE_LOG(ERR, KNI, "Unable to initialize KNI\n");
-               return;
-       }
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_RESP_Q_MZ_NAME_FMT, kni->name);
+       kni->m_resp_q = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_resp_q == NULL, resp_q_fail);
 
-       /* Check FD and open */
-       if (kni_fd < 0) {
-               kni_fd = open("/dev/" KNI_DEVICE, O_RDWR);
-               if (kni_fd < 0) {
-                       RTE_LOG(ERR, KNI,
-                               "Can not open /dev/%s\n", KNI_DEVICE);
-                       return;
-               }
-       }
+       snprintf(mz_name, RTE_MEMZONE_NAMESIZE, KNI_SYNC_ADDR_MZ_NAME_FMT, kni->name);
+       kni->m_sync_addr = rte_memzone_reserve(mz_name, KNI_FIFO_SIZE, SOCKET_ID_ANY, 0);
+       KNI_MEM_CHECK(kni->m_sync_addr == NULL, sync_addr_fail);
 
-       /* Allocate slot objects */
-       kni_memzone_pool.slots = (struct rte_kni_memzone_slot *)
-                                       rte_malloc(NULL,
-                                       sizeof(struct rte_kni_memzone_slot) *
-                                       max_kni_ifaces,
-                                       0);
-       KNI_MEM_CHECK(kni_memzone_pool.slots == NULL);
-
-       /* Initialize general pool variables */
-       kni_memzone_pool.initialized = 1;
-       kni_memzone_pool.max_ifaces = max_kni_ifaces;
-       kni_memzone_pool.free = &kni_memzone_pool.slots[0];
-       rte_spinlock_init(&kni_memzone_pool.mutex);
-
-       /* Pre-allocate all memzones of all the slots; panic on error */
-       for (i = 0; i < max_kni_ifaces; i++) {
-
-               /* Recover current slot */
-               it = &kni_memzone_pool.slots[i];
-               it->id = i;
-
-               /* Allocate KNI context */
-               snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i);
-               mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni),
-                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_ctx = mz;
-
-               /* TX RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_tx_q = mz;
-
-               /* RX RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_rx_q = mz;
-
-               /* ALLOC RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_alloc_q = mz;
-
-               /* FREE RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_free_q = mz;
-
-               /* Request RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_req_q = mz;
-
-               /* Response RING */
-               snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_resp_q = mz;
-
-               /* Req/Resp sync mem area */
-               snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i);
-               mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE,
-                                                       SOCKET_ID_ANY, 0);
-               KNI_MEM_CHECK(mz == NULL);
-               it->m_sync_addr = mz;
-
-               if ((i+1) == max_kni_ifaces) {
-                       it->next = NULL;
-                       kni_memzone_pool.free_tail = it;
-               } else
-                       it->next = &kni_memzone_pool.slots[i+1];
-       }
-
-       return;
+       return 0;
 
-kni_fail:
-       RTE_LOG(ERR, KNI, "Unable to allocate memory for max_kni_ifaces:%d."
-               "Increase the amount of hugepages memory\n", max_kni_ifaces);
+sync_addr_fail:
+       rte_memzone_free(kni->m_resp_q);
+resp_q_fail:
+       rte_memzone_free(kni->m_req_q);
+req_q_fail:
+       rte_memzone_free(kni->m_free_q);
+free_q_fail:
+       rte_memzone_free(kni->m_alloc_q);
+alloc_q_fail:
+       rte_memzone_free(kni->m_rx_q);
+rx_q_fail:
+       rte_memzone_free(kni->m_tx_q);
+tx_q_fail:
+       return -1;
 }
 
+static void
+kni_release_mz(struct rte_kni *kni)
+{
+       rte_memzone_free(kni->m_tx_q);
+       rte_memzone_free(kni->m_rx_q);
+       rte_memzone_free(kni->m_alloc_q);
+       rte_memzone_free(kni->m_free_q);
+       rte_memzone_free(kni->m_req_q);
+       rte_memzone_free(kni->m_resp_q);
+       rte_memzone_free(kni->m_sync_addr);
+}
 
 struct rte_kni *
 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
@@ -294,41 +201,45 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 {
        int ret;
        struct rte_kni_device_info dev_info;
-       struct rte_kni *ctx;
-       char intf_name[RTE_KNI_NAMESIZE];
-       const struct rte_memzone *mz;
-       struct rte_kni_memzone_slot *slot = NULL;
+       struct rte_kni *kni;
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
 
        if (!pktmbuf_pool || !conf || !conf->name[0])
                return NULL;
 
        /* Check if KNI subsystem has been initialized */
-       if (kni_memzone_pool.initialized != 1) {
+       if (kni_fd < 0) {
                RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n");
                return NULL;
        }
 
-       /* Get an available slot from the pool */
-       slot = kni_memzone_pool_alloc();
-       if (!slot) {
-               RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unused ones.\n",
-                       kni_memzone_pool.max_ifaces);
-               return NULL;
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       kni = __rte_kni_get(conf->name);
+       if (kni != NULL) {
+               RTE_LOG(ERR, KNI, "KNI already exists\n");
+               goto unlock;
        }
 
-       /* Recover ctx */
-       ctx = slot->m_ctx->addr;
-       snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name);
+       te = rte_zmalloc("KNI_TAILQ_ENTRY", sizeof(*te), 0);
+       if (te == NULL) {
+               RTE_LOG(ERR, KNI, "Failed to allocate tailq entry\n");
+               goto unlock;
+       }
 
-       if (ctx->in_use) {
-               RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name);
-               return NULL;
+       kni = rte_zmalloc("KNI", sizeof(struct rte_kni), RTE_CACHE_LINE_SIZE);
+       if (kni == NULL) {
+               RTE_LOG(ERR, KNI, "KNI memory allocation failed\n");
+               goto kni_fail;
        }
-       memset(ctx, 0, sizeof(struct rte_kni));
+
+       snprintf(kni->name, RTE_KNI_NAMESIZE, "%s", conf->name);
+
        if (ops)
-               memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops));
+               memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops));
        else
-               ctx->ops.port_id = UINT16_MAX;
+               kni->ops.port_id = UINT16_MAX;
 
        memset(&dev_info, 0, sizeof(dev_info));
        dev_info.bus = conf->addr.bus;
@@ -344,72 +255,79 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 
        memcpy(dev_info.mac_addr, conf->mac_addr, ETHER_ADDR_LEN);
 
-       snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name);
-       snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name);
+       snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", conf->name);
 
        RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n",
                dev_info.bus, dev_info.devid, dev_info.function,
                        dev_info.vendor_id, dev_info.device_id);
+
+       ret = kni_reserve_mz(kni);
+       if (ret < 0)
+               goto mz_fail;
+
        /* TX RING */
-       mz = slot->m_tx_q;
-       ctx->tx_q = mz->addr;
-       kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX);
-       dev_info.tx_phys = mz->phys_addr;
+       kni->tx_q = kni->m_tx_q->addr;
+       kni_fifo_init(kni->tx_q, KNI_FIFO_COUNT_MAX);
+       dev_info.tx_phys = kni->m_tx_q->phys_addr;
 
        /* RX RING */
-       mz = slot->m_rx_q;
-       ctx->rx_q = mz->addr;
-       kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX);
-       dev_info.rx_phys = mz->phys_addr;
+       kni->rx_q = kni->m_rx_q->addr;
+       kni_fifo_init(kni->rx_q, KNI_FIFO_COUNT_MAX);
+       dev_info.rx_phys = kni->m_rx_q->phys_addr;
 
        /* ALLOC RING */
-       mz = slot->m_alloc_q;
-       ctx->alloc_q = mz->addr;
-       kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX);
-       dev_info.alloc_phys = mz->phys_addr;
+       kni->alloc_q = kni->m_alloc_q->addr;
+       kni_fifo_init(kni->alloc_q, KNI_FIFO_COUNT_MAX);
+       dev_info.alloc_phys = kni->m_alloc_q->phys_addr;
 
        /* FREE RING */
-       mz = slot->m_free_q;
-       ctx->free_q = mz->addr;
-       kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX);
-       dev_info.free_phys = mz->phys_addr;
+       kni->free_q = kni->m_free_q->addr;
+       kni_fifo_init(kni->free_q, KNI_FIFO_COUNT_MAX);
+       dev_info.free_phys = kni->m_free_q->phys_addr;
 
        /* Request RING */
-       mz = slot->m_req_q;
-       ctx->req_q = mz->addr;
-       kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX);
-       dev_info.req_phys = mz->phys_addr;
+       kni->req_q = kni->m_req_q->addr;
+       kni_fifo_init(kni->req_q, KNI_FIFO_COUNT_MAX);
+       dev_info.req_phys = kni->m_req_q->phys_addr;
 
        /* Response RING */
-       mz = slot->m_resp_q;
-       ctx->resp_q = mz->addr;
-       kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX);
-       dev_info.resp_phys = mz->phys_addr;
+       kni->resp_q = kni->m_resp_q->addr;
+       kni_fifo_init(kni->resp_q, KNI_FIFO_COUNT_MAX);
+       dev_info.resp_phys = kni->m_resp_q->phys_addr;
 
        /* Req/Resp sync mem area */
-       mz = slot->m_sync_addr;
-       ctx->sync_addr = mz->addr;
-       dev_info.sync_va = mz->addr;
-       dev_info.sync_phys = mz->phys_addr;
+       kni->sync_addr = kni->m_sync_addr->addr;
+       dev_info.sync_va = kni->m_sync_addr->addr;
+       dev_info.sync_phys = kni->m_sync_addr->phys_addr;
 
-       ctx->pktmbuf_pool = pktmbuf_pool;
-       ctx->group_id = conf->group_id;
-       ctx->slot_id = slot->id;
-       ctx->mbuf_size = conf->mbuf_size;
+       kni->pktmbuf_pool = pktmbuf_pool;
+       kni->group_id = conf->group_id;
+       kni->mbuf_size = conf->mbuf_size;
 
        ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
-       KNI_MEM_CHECK(ret < 0);
+       if (ret < 0)
+               goto ioctl_fail;
+
+       te->data = kni;
+
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+       TAILQ_INSERT_TAIL(kni_list, te, next);
 
-       ctx->in_use = 1;
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
        /* Allocate mbufs and then put them into alloc_q */
-       kni_allocate_mbufs(ctx);
+       kni_allocate_mbufs(kni);
 
-       return ctx;
+       return kni;
 
+ioctl_fail:
+       kni_release_mz(kni);
+mz_fail:
+       rte_free(kni);
 kni_fail:
-       if (slot)
-               kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]);
+       rte_free(te);
+unlock:
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
        return NULL;
 }
@@ -462,19 +380,36 @@ kni_free_fifo_phy(struct rte_mempool *mp, struct rte_kni_fifo *fifo)
 int
 rte_kni_release(struct rte_kni *kni)
 {
+       struct rte_tailq_entry *te;
+       struct rte_kni_list *kni_list;
        struct rte_kni_device_info dev_info;
-       uint32_t slot_id;
        uint32_t retry = 5;
 
-       if (!kni || !kni->in_use)
+       if (!kni)
                return -1;
 
+       kni_list = RTE_TAILQ_CAST(rte_kni_tailq.head, rte_kni_list);
+
+       rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+       TAILQ_FOREACH(te, kni_list, next) {
+               if (te->data == kni)
+                       break;
+       }
+
+       if (te == NULL)
+               goto unlock;
+
        snprintf(dev_info.name, sizeof(dev_info.name), "%s", kni->name);
        if (ioctl(kni_fd, RTE_KNI_IOCTL_RELEASE, &dev_info) < 0) {
                RTE_LOG(ERR, KNI, "Fail to release kni device\n");
-               return -1;
+               goto unlock;
        }
 
+       TAILQ_REMOVE(kni_list, te, next);
+
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
        /* mbufs in all fifo should be released, except request/response */
 
        /* wait until all rxq packets processed by kernel */
@@ -488,20 +423,18 @@ rte_kni_release(struct rte_kni *kni)
        kni_free_fifo(kni->tx_q);
        kni_free_fifo(kni->free_q);
 
-       slot_id = kni->slot_id;
+       kni_release_mz(kni);
 
-       /* Memset the KNI struct */
-       memset(kni, 0, sizeof(struct rte_kni));
+       rte_free(kni);
 
-       /* Release memzone */
-       if (slot_id > kni_memzone_pool.max_ifaces) {
-               RTE_LOG(ERR, KNI, "KNI pool: corrupted slot ID: %d, max: %d\n",
-                       slot_id, kni_memzone_pool.max_ifaces);
-               return -1;
-       }
-       kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]);
+       rte_free(te);
 
        return 0;
+
+unlock:
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+       return -1;
 }
 
 /* default callback for request of configuring device mac address */
@@ -711,24 +644,18 @@ kni_allocate_mbufs(struct rte_kni *kni)
 struct rte_kni *
 rte_kni_get(const char *name)
 {
-       uint32_t i;
-       struct rte_kni_memzone_slot *it;
        struct rte_kni *kni;
 
        if (name == NULL || name[0] == '\0')
                return NULL;
 
-       /* Note: could be improved perf-wise if necessary */
-       for (i = 0; i < kni_memzone_pool.max_ifaces; i++) {
-               it = &kni_memzone_pool.slots[i];
-               if (it->in_use == 0)
-                       continue;
-               kni = it->m_ctx->addr;
-               if (strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0)
-                       return kni;
-       }
+       rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK);
 
-       return NULL;
+       kni = __rte_kni_get(name);
+
+       rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+       return kni;
 }
 
 const char *
@@ -790,6 +717,47 @@ rte_kni_unregister_handlers(struct rte_kni *kni)
 
        return 0;
 }
+
+int __rte_experimental
+rte_kni_update_link(struct rte_kni *kni, unsigned int linkup)
+{
+       char path[64];
+       char old_carrier[2];
+       const char *new_carrier;
+       int old_linkup;
+       int fd, ret;
+
+       if (kni == NULL)
+               return -1;
+
+       snprintf(path, sizeof(path), "/sys/devices/virtual/net/%s/carrier",
+               kni->name);
+
+       fd = open(path, O_RDWR);
+       if (fd == -1) {
+               RTE_LOG(ERR, KNI, "Failed to open file: %s.\n", path);
+               return -1;
+       }
+
+       ret = read(fd, old_carrier, 2);
+       if (ret < 1) {
+               close(fd);
+               return -1;
+       }
+       old_linkup = (old_carrier[0] == '1');
+
+       new_carrier = linkup ? "1" : "0";
+       ret = write(fd, new_carrier, 1);
+       if (ret < 1) {
+               RTE_LOG(ERR, KNI, "Failed to write file: %s.\n", path);
+               close(fd);
+               return -1;
+       }
+
+       close(fd);
+       return old_linkup;
+}
+
 void
 rte_kni_close(void)
 {
index 99055e2..02ca43b 100644 (file)
@@ -81,8 +81,12 @@ struct rte_kni_conf {
  *
  * @param max_kni_ifaces
  *  The maximum number of KNI interfaces that can coexist concurrently
+ *
+ * @return
+ *  - 0 indicates success.
+ *  - negative value indicates failure.
  */
-void rte_kni_init(unsigned int max_kni_ifaces);
+int rte_kni_init(unsigned int max_kni_ifaces);
 
 
 /**
@@ -228,6 +232,26 @@ int rte_kni_register_handlers(struct rte_kni *kni, struct rte_kni_ops *ops);
  */
 int rte_kni_unregister_handlers(struct rte_kni *kni);
 
+/**
+ * Update link carrier state for KNI port.
+ *
+ * Update the linkup/linkdown state of a KNI interface in the kernel.
+ *
+ * @param kni
+ *  pointer to struct rte_kni.
+ * @param linkup
+ *  New link state:
+ *  0 for linkdown.
+ *  > 0 for linkup.
+ *
+ * @return
+ *  On failure: -1
+ *  Previous link state == linkdown: 0
+ *  Previous link state == linkup: 1
+ */
+int __rte_experimental
+rte_kni_update_link(struct rte_kni *kni, unsigned int linkup);
+
 /**
  *  Close KNI device.
  */
index ac26a8c..287d7de 100644 (file)
@@ -4,6 +4,36 @@
 
 
 
+/**
+ * @internal when c11 memory model enabled use c11 atomic memory barrier.
+ * when under non c11 memory model use rte_smp_* memory barrier.
+ *
+ * @param src
+ *   Pointer to the source data.
+ * @param dst
+ *   Pointer to the destination data.
+ * @param value
+ *   Data value.
+ */
+#ifdef RTE_USE_C11_MEM_MODEL
+#define __KNI_LOAD_ACQUIRE(src) ({                         \
+               __atomic_load_n((src), __ATOMIC_ACQUIRE);           \
+       })
+#define __KNI_STORE_RELEASE(dst, value) do {               \
+               __atomic_store_n((dst), value, __ATOMIC_RELEASE);   \
+       } while(0)
+#else
+#define __KNI_LOAD_ACQUIRE(src) ({                         \
+               typeof (*(src)) val = *(src);                       \
+               rte_smp_rmb();                                      \
+               val;                                                \
+       })
+#define __KNI_STORE_RELEASE(dst, value) do {               \
+               *(dst) = value;                                     \
+               rte_smp_wmb();                                      \
+       } while(0)
+#endif
+
 /**
  * Initializes the kni fifo structure
  */
@@ -28,8 +58,8 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
 {
        unsigned i = 0;
        unsigned fifo_write = fifo->write;
-       unsigned fifo_read = fifo->read;
        unsigned new_write = fifo_write;
+       unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
 
        for (i = 0; i < num; i++) {
                new_write = (new_write + 1) & (fifo->len - 1);
@@ -39,7 +69,7 @@ kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num)
                fifo->buffer[fifo_write] = data[i];
                fifo_write = new_write;
        }
-       fifo->write = fifo_write;
+       __KNI_STORE_RELEASE(&fifo->write, fifo_write);
        return i;
 }
 
@@ -51,7 +81,8 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 {
        unsigned i = 0;
        unsigned new_read = fifo->read;
-       unsigned fifo_write = fifo->write;
+       unsigned fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+
        for (i = 0; i < num; i++) {
                if (new_read == fifo_write)
                        break;
@@ -59,7 +90,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
                data[i] = fifo->buffer[new_read];
                new_read = (new_read + 1) & (fifo->len - 1);
        }
-       fifo->read = new_read;
+       __KNI_STORE_RELEASE(&fifo->read, new_read);
        return i;
 }
 
@@ -69,5 +100,7 @@ kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num)
 static inline uint32_t
 kni_fifo_count(struct rte_kni_fifo *fifo)
 {
-       return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+       unsigned fifo_write = __KNI_LOAD_ACQUIRE(&fifo->write);
+       unsigned fifo_read = __KNI_LOAD_ACQUIRE(&fifo->read);
+       return (fifo->len + fifo_write - fifo_read) & (fifo->len - 1);
 }
index acd515e..c877dc6 100644 (file)
@@ -15,3 +15,9 @@ DPDK_2.0 {
 
        local: *;
 };
+
+EXPERIMENTAL {
+       global:
+
+       rte_kni_update_link;
+};
index a28f769..f7030c6 100644 (file)
@@ -44,6 +44,20 @@ rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params)
                    kvlist->pairs[i].value == NULL)
                        return -1;
 
+               /* Detect list [a,b] to skip comma delimiter in list. */
+               str = kvlist->pairs[i].value;
+               if (str[0] == '[') {
+                       /* Find the end of the list. */
+                       while (str[strlen(str) - 1] != ']') {
+                               /* Restore the comma erased by strtok_r(). */
+                               str[strlen(str)] = ',';
+                               /* Parse until next comma. */
+                               str = strtok_r(NULL, RTE_KVARGS_PAIRS_DELIM, &ctx1);
+                               if (str == NULL)
+                                       return -1; /* no closing bracket */
+                       }
+               }
+
                kvlist->count++;
                str = NULL;
        }
@@ -120,6 +134,9 @@ rte_kvargs_process(const struct rte_kvargs *kvlist,
        const struct rte_kvargs_pair *pair;
        unsigned i;
 
+       if (kvlist == NULL)
+               return 0;
+
        for (i = 0; i < kvlist->count; i++) {
                pair = &kvlist->pairs[i];
                if (key_match == NULL || strcmp(pair->key, key_match) == 0) {
index fc04195..1946195 100644 (file)
@@ -110,7 +110,7 @@ struct rte_kvargs *rte_kvargs_parse_delim(const char *args,
  * rte_kvargs_parse().
  *
  * @param kvlist
- *   The rte_kvargs structure
+ *   The rte_kvargs structure. No error if NULL.
  */
 void rte_kvargs_free(struct rte_kvargs *kvlist);
 
@@ -119,11 +119,10 @@ void rte_kvargs_free(struct rte_kvargs *kvlist);
  *
  * For each key/value association that matches the given key, calls the
  * handler function with the for a given arg_name passing the value on the
- * dictionary for that key and a given extra argument. If *kvlist* is NULL
- * function does nothing.
+ * dictionary for that key and a given extra argument.
  *
  * @param kvlist
- *   The rte_kvargs structure
+ *   The rte_kvargs structure. No error if NULL.
  * @param key_match
  *   The key on which the handler should be called, or NULL to process handler
  *   on all associations
index 1fdec68..5715549 100644 (file)
@@ -125,8 +125,11 @@ add_time_stamps(uint16_t pid __rte_unused,
        for (i = 0; i < nb_pkts; i++) {
                diff_tsc = now - prev_tsc;
                timer_tsc += diff_tsc;
-               if (timer_tsc >= samp_intvl) {
+
+               if ((pkts[i]->ol_flags & PKT_RX_TIMESTAMP) == 0
+                               && (timer_tsc >= samp_intvl)) {
                        pkts[i]->timestamp = now;
+                       pkts[i]->ol_flags |= PKT_RX_TIMESTAMP;
                        timer_tsc = 0;
                }
                prev_tsc = now;
@@ -156,7 +159,7 @@ calc_latency(uint16_t pid __rte_unused,
 
        now = rte_rdtsc();
        for (i = 0; i < nb_pkts; i++) {
-               if (pkts[i]->timestamp)
+               if (pkts[i]->ol_flags & PKT_RX_TIMESTAMP)
                        latency[cnt++] = now - pkts[i]->timestamp;
        }
 
index 482bd72..a7946a1 100644 (file)
@@ -8,7 +8,7 @@ LIB = librte_lpm.a
 
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_hash
 
 EXPORT_MAP := rte_lpm_version.map
 
index 0678494..a5176d8 100644 (file)
@@ -7,3 +7,4 @@ headers = files('rte_lpm.h', 'rte_lpm6.h')
 # since header files have different names, we can install all vector headers
 # without worrying about which architecture we actually need
 headers += files('rte_lpm_altivec.h', 'rte_lpm_neon.h', 'rte_lpm_sse.h')
+deps += ['hash']
index 149677e..6212003 100644 (file)
@@ -21,6 +21,9 @@
 #include <rte_errno.h>
 #include <rte_rwlock.h>
 #include <rte_spinlock.h>
+#include <rte_hash.h>
+#include <assert.h>
+#include <rte_jhash.h>
 
 #include "rte_lpm6.h"
 
@@ -37,6 +40,9 @@
 #define BYTE_SIZE                                 8
 #define BYTES2_SIZE                              16
 
+#define RULE_HASH_TABLE_EXTRA_SPACE              64
+#define TBL24_IND                        UINT32_MAX
+
 #define lpm6_tbl8_gindex next_hop
 
 /** Flags for setting an entry as valid/invalid. */
@@ -70,6 +76,23 @@ struct rte_lpm6_rule {
        uint8_t depth; /**< Rule depth. */
 };
 
+/** Rules tbl entry key. */
+struct rte_lpm6_rule_key {
+       uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */
+       uint8_t depth; /**< Rule depth. */
+};
+
+/* Header of tbl8 */
+struct rte_lpm_tbl8_hdr {
+       uint32_t owner_tbl_ind; /**< owner table: TBL24_IND if owner is tbl24,
+                                 *  otherwise index of tbl8
+                                 */
+       uint32_t owner_entry_ind; /**< index of the owner table entry where
+                                   *  pointer to the tbl8 is stored
+                                   */
+       uint32_t ref_cnt; /**< table reference counter */
+};
+
 /** LPM6 structure. */
 struct rte_lpm6 {
        /* LPM metadata. */
@@ -77,12 +100,17 @@ struct rte_lpm6 {
        uint32_t max_rules;              /**< Max number of rules. */
        uint32_t used_rules;             /**< Used rules so far. */
        uint32_t number_tbl8s;           /**< Number of tbl8s to allocate. */
-       uint32_t next_tbl8;              /**< Next tbl8 to be used. */
 
        /* LPM Tables. */
-       struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */
+       struct rte_hash *rules_tbl; /**< LPM rules. */
        struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES]
                        __rte_cache_aligned; /**< LPM tbl24 table. */
+
+       uint32_t *tbl8_pool; /**< pool of indexes of free tbl8s */
+       uint32_t tbl8_pool_pos; /**< current position in the tbl8 pool */
+
+       struct rte_lpm_tbl8_hdr *tbl8_hdrs; /* array of tbl8 headers */
+
        struct rte_lpm6_tbl_entry tbl8[0]
                        __rte_cache_aligned; /**< LPM tbl8 table. */
 };
@@ -93,22 +121,122 @@ struct rte_lpm6 {
  * and set the rest to 0.
  */
 static inline void
-mask_ip(uint8_t *ip, uint8_t depth)
+ip6_mask_addr(uint8_t *ip, uint8_t depth)
 {
-        int16_t part_depth, mask;
-        int i;
+       int16_t part_depth, mask;
+       int i;
 
-               part_depth = depth;
+       part_depth = depth;
 
-               for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
-                       if (part_depth < BYTE_SIZE && part_depth >= 0) {
-                               mask = (uint16_t)(~(UINT8_MAX >> part_depth));
-                               ip[i] = (uint8_t)(ip[i] & mask);
-                       } else if (part_depth < 0) {
-                               ip[i] = 0;
-                       }
-                       part_depth -= BYTE_SIZE;
-               }
+       for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) {
+               if (part_depth < BYTE_SIZE && part_depth >= 0) {
+                       mask = (uint16_t)(~(UINT8_MAX >> part_depth));
+                       ip[i] = (uint8_t)(ip[i] & mask);
+               } else if (part_depth < 0)
+                       ip[i] = 0;
+
+               part_depth -= BYTE_SIZE;
+       }
+}
+
+/* copy ipv6 address */
+static inline void
+ip6_copy_addr(uint8_t *dst, const uint8_t *src)
+{
+       rte_memcpy(dst, src, RTE_LPM6_IPV6_ADDR_SIZE);
+}
+
+/*
+ * LPM6 rule hash function
+ *
+ * It's used as a hash function for the rte_hash
+ *     containing rules
+ */
+static inline uint32_t
+rule_hash(const void *data, __rte_unused uint32_t data_len,
+                 uint32_t init_val)
+{
+       return rte_jhash(data, sizeof(struct rte_lpm6_rule_key), init_val);
+}
+
+/*
+ * Init pool of free tbl8 indexes
+ */
+static void
+tbl8_pool_init(struct rte_lpm6 *lpm)
+{
+       uint32_t i;
+
+       /* put entire range of indexes to the tbl8 pool */
+       for (i = 0; i < lpm->number_tbl8s; i++)
+               lpm->tbl8_pool[i] = i;
+
+       lpm->tbl8_pool_pos = 0;
+}
+
+/*
+ * Get an index of a free tbl8 from the pool
+ */
+static inline uint32_t
+tbl8_get(struct rte_lpm6 *lpm, uint32_t *tbl8_ind)
+{
+       if (lpm->tbl8_pool_pos == lpm->number_tbl8s)
+               /* no more free tbl8 */
+               return -ENOSPC;
+
+       /* next index */
+       *tbl8_ind = lpm->tbl8_pool[lpm->tbl8_pool_pos++];
+       return 0;
+}
+
+/*
+ * Put an index of a free tbl8 back to the pool
+ */
+static inline uint32_t
+tbl8_put(struct rte_lpm6 *lpm, uint32_t tbl8_ind)
+{
+       if (lpm->tbl8_pool_pos == 0)
+               /* pool is full */
+               return -ENOSPC;
+
+       lpm->tbl8_pool[--lpm->tbl8_pool_pos] = tbl8_ind;
+       return 0;
+}
+
+/*
+ * Returns number of tbl8s available in the pool
+ */
+static inline uint32_t
+tbl8_available(struct rte_lpm6 *lpm)
+{
+       return lpm->number_tbl8s - lpm->tbl8_pool_pos;
+}
+
+/*
+ * Init a rule key.
+ *       note that ip must be already masked
+ */
+static inline void
+rule_key_init(struct rte_lpm6_rule_key *key, uint8_t *ip, uint8_t depth)
+{
+       ip6_copy_addr(key->ip, ip);
+       key->depth = depth;
+}
+
+/*
+ * Rebuild the entire LPM tree by reinserting all rules
+ */
+static void
+rebuild_lpm(struct rte_lpm6 *lpm)
+{
+       uint64_t next_hop;
+       struct rte_lpm6_rule_key *rule_key;
+       uint32_t iter = 0;
+
+       while (rte_hash_iterate(lpm->rules_tbl, (void *) &rule_key,
+                       (void **) &next_hop, &iter) >= 0)
+               rte_lpm6_add(lpm, rule_key->ip, rule_key->depth,
+                       (uint32_t) next_hop);
 }
 
 /*
@@ -121,8 +249,11 @@ rte_lpm6_create(const char *name, int socket_id,
        char mem_name[RTE_LPM6_NAMESIZE];
        struct rte_lpm6 *lpm = NULL;
        struct rte_tailq_entry *te;
-       uint64_t mem_size, rules_size;
+       uint64_t mem_size;
        struct rte_lpm6_list *lpm_list;
+       struct rte_hash *rules_tbl = NULL;
+       uint32_t *tbl8_pool = NULL;
+       struct rte_lpm_tbl8_hdr *tbl8_hdrs = NULL;
 
        lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list);
 
@@ -136,12 +267,54 @@ rte_lpm6_create(const char *name, int socket_id,
                return NULL;
        }
 
+       /* create rules hash table */
+       snprintf(mem_name, sizeof(mem_name), "LRH_%s", name);
+       struct rte_hash_parameters rule_hash_tbl_params = {
+               .entries = config->max_rules * 1.2 +
+                       RULE_HASH_TABLE_EXTRA_SPACE,
+               .key_len = sizeof(struct rte_lpm6_rule_key),
+               .hash_func = rule_hash,
+               .hash_func_init_val = 0,
+               .name = mem_name,
+               .reserved = 0,
+               .socket_id = socket_id,
+               .extra_flag = 0
+       };
+
+       rules_tbl = rte_hash_create(&rule_hash_tbl_params);
+       if (rules_tbl == NULL) {
+               RTE_LOG(ERR, LPM, "LPM rules hash table allocation failed: %s (%d)",
+                                 rte_strerror(rte_errno), rte_errno);
+               goto fail_wo_unlock;
+       }
+
+       /* allocate tbl8 indexes pool */
+       tbl8_pool = rte_malloc(NULL,
+                       sizeof(uint32_t) * config->number_tbl8s,
+                       RTE_CACHE_LINE_SIZE);
+       if (tbl8_pool == NULL) {
+               RTE_LOG(ERR, LPM, "LPM tbl8 pool allocation failed: %s (%d)",
+                                 rte_strerror(rte_errno), rte_errno);
+               rte_errno = ENOMEM;
+               goto fail_wo_unlock;
+       }
+
+       /* allocate tbl8 headers */
+       tbl8_hdrs = rte_malloc(NULL,
+                       sizeof(struct rte_lpm_tbl8_hdr) * config->number_tbl8s,
+                       RTE_CACHE_LINE_SIZE);
+       if (tbl8_hdrs == NULL) {
+               RTE_LOG(ERR, LPM, "LPM tbl8 headers allocation failed: %s (%d)",
+                                 rte_strerror(rte_errno), rte_errno);
+               rte_errno = ENOMEM;
+               goto fail_wo_unlock;
+       }
+
        snprintf(mem_name, sizeof(mem_name), "LPM_%s", name);
 
        /* Determine the amount of memory to allocate. */
        mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) *
                        RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s);
-       rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules;
 
        rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
 
@@ -154,7 +327,7 @@ rte_lpm6_create(const char *name, int socket_id,
        lpm = NULL;
        if (te != NULL) {
                rte_errno = EEXIST;
-               goto exit;
+               goto fail;
        }
 
        /* allocate tailq entry */
@@ -162,7 +335,7 @@ rte_lpm6_create(const char *name, int socket_id,
        if (te == NULL) {
                RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n");
                rte_errno = ENOMEM;
-               goto exit;
+               goto fail;
        }
 
        /* Allocate memory to store the LPM data structures. */
@@ -173,34 +346,35 @@ rte_lpm6_create(const char *name, int socket_id,
                RTE_LOG(ERR, LPM, "LPM memory allocation failed\n");
                rte_free(te);
                rte_errno = ENOMEM;
-               goto exit;
-       }
-
-       lpm->rules_tbl = rte_zmalloc_socket(NULL,
-                       (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id);
-
-       if (lpm->rules_tbl == NULL) {
-               RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n");
-               rte_free(lpm);
-               lpm = NULL;
-               rte_free(te);
-               rte_errno = ENOMEM;
-               goto exit;
+               goto fail;
        }
 
        /* Save user arguments. */
        lpm->max_rules = config->max_rules;
        lpm->number_tbl8s = config->number_tbl8s;
        snprintf(lpm->name, sizeof(lpm->name), "%s", name);
+       lpm->rules_tbl = rules_tbl;
+       lpm->tbl8_pool = tbl8_pool;
+       lpm->tbl8_hdrs = tbl8_hdrs;
+
+       /* init the stack */
+       tbl8_pool_init(lpm);
 
        te->data = (void *) lpm;
 
        TAILQ_INSERT_TAIL(lpm_list, te, next);
+       rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+       return lpm;
 
-exit:
+fail:
        rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-       return lpm;
+fail_wo_unlock:
+       rte_free(tbl8_hdrs);
+       rte_free(tbl8_pool);
+       rte_hash_free(rules_tbl);
+
+       return NULL;
 }
 
 /*
@@ -259,50 +433,88 @@ rte_lpm6_free(struct rte_lpm6 *lpm)
 
        rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
 
-       rte_free(lpm->rules_tbl);
+       rte_free(lpm->tbl8_hdrs);
+       rte_free(lpm->tbl8_pool);
+       rte_hash_free(lpm->rules_tbl);
        rte_free(lpm);
        rte_free(te);
 }
 
+/* Find a rule */
+static inline int
+rule_find_with_key(struct rte_lpm6 *lpm,
+                 const struct rte_lpm6_rule_key *rule_key,
+                 uint32_t *next_hop)
+{
+       uint64_t hash_val;
+       int ret;
+
+       /* lookup for a rule */
+       ret = rte_hash_lookup_data(lpm->rules_tbl, (const void *) rule_key,
+               (void **) &hash_val);
+       if (ret >= 0) {
+               *next_hop = (uint32_t) hash_val;
+               return 1;
+       }
+
+       return 0;
+}
+
+/* Find a rule */
+static int
+rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+                 uint32_t *next_hop)
+{
+       struct rte_lpm6_rule_key rule_key;
+
+       /* init a rule key */
+       rule_key_init(&rule_key, ip, depth);
+
+       return rule_find_with_key(lpm, &rule_key, next_hop);
+}
+
 /*
  * Checks if a rule already exists in the rules table and updates
  * the nexthop if so. Otherwise it adds a new rule if enough space is available.
+ *
+ * Returns:
+ *    0 - next hop of existed rule is updated
+ *    1 - new rule successfully added
+ *   <0 - error
  */
-static inline int32_t
-rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
+static inline int
+rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, uint32_t next_hop)
 {
-       uint32_t rule_index;
-
-       /* Scan through rule list to see if rule already exists. */
-       for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
+       int ret, rule_exist;
+       struct rte_lpm6_rule_key rule_key;
+       uint32_t unused;
 
-               /* If rule already exists update its next_hop and return. */
-               if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-                               RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-                               lpm->rules_tbl[rule_index].depth == depth) {
-                       lpm->rules_tbl[rule_index].next_hop = next_hop;
+       /* init a rule key */
+       rule_key_init(&rule_key, ip, depth);
 
-                       return rule_index;
-               }
-       }
+       /* Scan through rule list to see if rule already exists. */
+       rule_exist = rule_find_with_key(lpm, &rule_key, &unused);
 
        /*
         * If rule does not exist check if there is space to add a new rule to
         * this rule group. If there is no space return error.
         */
-       if (lpm->used_rules == lpm->max_rules) {
+       if (!rule_exist && lpm->used_rules == lpm->max_rules)
                return -ENOSPC;
-       }
 
-       /* If there is space for the new rule add it. */
-       rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-       lpm->rules_tbl[rule_index].next_hop = next_hop;
-       lpm->rules_tbl[rule_index].depth = depth;
+       /* add the rule or update rules next hop */
+       ret = rte_hash_add_key_data(lpm->rules_tbl, &rule_key,
+               (void *)(uintptr_t) next_hop);
+       if (ret < 0)
+               return ret;
 
        /* Increment the used rules counter for this rule group. */
-       lpm->used_rules++;
+       if (!rule_exist) {
+               lpm->used_rules++;
+               return 1;
+       }
 
-       return rule_index;
+       return 0;
 }
 
 /*
@@ -311,24 +523,24 @@ rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint32_t next_hop, uint8_t depth)
  * in the IP address returns a match.
  */
 static void
-expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
-               uint32_t next_hop)
+expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t old_depth,
+               uint8_t new_depth, uint32_t next_hop, uint8_t valid)
 {
        uint32_t tbl8_group_end, tbl8_gindex_next, j;
 
        tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
 
        struct rte_lpm6_tbl_entry new_tbl8_entry = {
-               .valid = VALID,
-               .valid_group = VALID,
-               .depth = depth,
+               .valid = valid,
+               .valid_group = valid,
+               .depth = new_depth,
                .next_hop = next_hop,
                .ext_entry = 0,
        };
 
        for (j = tbl8_gindex; j < tbl8_group_end; j++) {
                if (!lpm->tbl8[j].valid || (lpm->tbl8[j].ext_entry == 0
-                               && lpm->tbl8[j].depth <= depth)) {
+                               && lpm->tbl8[j].depth <= old_depth)) {
 
                        lpm->tbl8[j] = new_tbl8_entry;
 
@@ -336,11 +548,101 @@ expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
 
                        tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex
                                        * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
-                       expand_rule(lpm, tbl8_gindex_next, depth, next_hop);
+                       expand_rule(lpm, tbl8_gindex_next, old_depth, new_depth,
+                                       next_hop, valid);
                }
        }
 }
 
+/*
+ * Init a tbl8 header
+ */
+static inline void
+init_tbl8_header(struct rte_lpm6 *lpm, uint32_t tbl_ind,
+               uint32_t owner_tbl_ind, uint32_t owner_entry_ind)
+{
+       struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
+       tbl_hdr->owner_tbl_ind = owner_tbl_ind;
+       tbl_hdr->owner_entry_ind = owner_entry_ind;
+       tbl_hdr->ref_cnt = 0;
+}
+
+/*
+ * Calculate index to the table based on the number and position
+ * of the bytes being inspected in this step.
+ */
+static uint32_t
+get_bitshift(const uint8_t *ip, uint8_t first_byte, uint8_t bytes)
+{
+       uint32_t entry_ind, i;
+       int8_t bitshift;
+
+       entry_ind = 0;
+       for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
+               bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
+
+               if (bitshift < 0)
+                       bitshift = 0;
+               entry_ind = entry_ind | ip[i-1] << bitshift;
+       }
+
+       return entry_ind;
+}
+
+/*
+ * Simulate adding a new route to the LPM counting number
+ * of new tables that will be needed
+ *
+ * It returns 0 on success, or 1 if
+ * the process needs to be continued by calling the function again.
+ */
+static inline int
+simulate_add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
+               struct rte_lpm6_tbl_entry **next_tbl, const uint8_t *ip,
+               uint8_t bytes, uint8_t first_byte, uint8_t depth,
+               uint32_t *need_tbl_nb)
+{
+       uint32_t entry_ind;
+       uint8_t bits_covered;
+       uint32_t next_tbl_ind;
+
+       /*
+        * Calculate index to the table based on the number and position
+        * of the bytes being inspected in this step.
+        */
+       entry_ind = get_bitshift(ip, first_byte, bytes);
+
+       /* Number of bits covered in this step */
+       bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
+
+       if (depth <= bits_covered) {
+               *need_tbl_nb = 0;
+               return 0;
+       }
+
+       if (tbl[entry_ind].valid == 0 || tbl[entry_ind].ext_entry == 0) {
+               /* from this point on a new table is needed on each level
+                * that is not covered yet
+                */
+               depth -= bits_covered;
+               uint32_t cnt = depth >> 3; /* depth / BYTE_SIZE */
+               if (depth & 7) /* 0b00000111 */
+                       /* if depth % 8 > 0 then one more table is needed
+                        * for those last bits
+                        */
+                       cnt++;
+
+               *need_tbl_nb = cnt;
+               return 0;
+       }
+
+       next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
+       *next_tbl = &(lpm->tbl8[next_tbl_ind *
+               RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
+       *need_tbl_nb = 0;
+       return 1;
+}
+
 /*
  * Partially adds a new route to the data structure (tbl24+tbl8s).
  * It returns 0 on success, a negative number on failure, or 1 if
@@ -348,25 +650,21 @@ expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth,
  */
 static inline int
 add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
-               struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes,
-               uint8_t first_byte, uint8_t depth, uint32_t next_hop)
+               uint32_t tbl_ind, struct rte_lpm6_tbl_entry **next_tbl,
+               uint32_t *next_tbl_ind, uint8_t *ip, uint8_t bytes,
+               uint8_t first_byte, uint8_t depth, uint32_t next_hop,
+               uint8_t is_new_rule)
 {
-       uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i;
-       int32_t tbl8_gindex;
-       int8_t bitshift;
+       uint32_t entry_ind, tbl_range, tbl8_group_start, tbl8_group_end, i;
+       uint32_t tbl8_gindex;
        uint8_t bits_covered;
+       int ret;
 
        /*
         * Calculate index to the table based on the number and position
         * of the bytes being inspected in this step.
         */
-       tbl_index = 0;
-       for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) {
-               bitshift = (int8_t)((bytes - i)*BYTE_SIZE);
-
-               if (bitshift < 0) bitshift = 0;
-               tbl_index = tbl_index | ip[i-1] << bitshift;
-       }
+       entry_ind = get_bitshift(ip, first_byte, bytes);
 
        /* Number of bits covered in this step */
        bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE);
@@ -378,7 +676,7 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
        if (depth <= bits_covered) {
                tbl_range = 1 << (bits_covered - depth);
 
-               for (i = tbl_index; i < (tbl_index + tbl_range); i++) {
+               for (i = entry_ind; i < (entry_ind + tbl_range); i++) {
                        if (!tbl[i].valid || (tbl[i].ext_entry == 0 &&
                                        tbl[i].depth <= depth)) {
 
@@ -400,10 +698,15 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
                                 */
                                tbl8_gindex = tbl[i].lpm6_tbl8_gindex *
                                                RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
-                               expand_rule(lpm, tbl8_gindex, depth, next_hop);
+                               expand_rule(lpm, tbl8_gindex, depth, depth,
+                                               next_hop, VALID);
                        }
                }
 
+               /* update tbl8 rule reference counter */
+               if (tbl_ind != TBL24_IND && is_new_rule)
+                       lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
+
                return 0;
        }
        /*
@@ -412,12 +715,24 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
         */
        else {
                /* If it's invalid a new tbl8 is needed */
-               if (!tbl[tbl_index].valid) {
-                       if (lpm->next_tbl8 < lpm->number_tbl8s)
-                               tbl8_gindex = (lpm->next_tbl8)++;
-                       else
+               if (!tbl[entry_ind].valid) {
+                       /* get a new table */
+                       ret = tbl8_get(lpm, &tbl8_gindex);
+                       if (ret != 0)
                                return -ENOSPC;
 
+                       /* invalidate all new tbl8 entries */
+                       tbl8_group_start = tbl8_gindex *
+                                       RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
+                       memset(&lpm->tbl8[tbl8_group_start], 0,
+                                         RTE_LPM6_TBL8_GROUP_NUM_ENTRIES);
+
+                       /* init the new table's header:
+                        *   save the reference to the owner table
+                        */
+                       init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
+
+                       /* reference to a new tbl8 */
                        struct rte_lpm6_tbl_entry new_tbl_entry = {
                                .lpm6_tbl8_gindex = tbl8_gindex,
                                .depth = 0,
@@ -426,17 +741,20 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
                                .ext_entry = 1,
                        };
 
-                       tbl[tbl_index] = new_tbl_entry;
+                       tbl[entry_ind] = new_tbl_entry;
+
+                       /* update the current table's reference counter */
+                       if (tbl_ind != TBL24_IND)
+                               lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
                }
                /*
-                * If it's valid but not extended the rule that was stored *
+                * If it's valid but not extended the rule that was stored
                 * here needs to be moved to the next table.
                 */
-               else if (tbl[tbl_index].ext_entry == 0) {
-                       /* Search for free tbl8 group. */
-                       if (lpm->next_tbl8 < lpm->number_tbl8s)
-                               tbl8_gindex = (lpm->next_tbl8)++;
-                       else
+               else if (tbl[entry_ind].ext_entry == 0) {
+                       /* get a new tbl8 index */
+                       ret = tbl8_get(lpm, &tbl8_gindex);
+                       if (ret != 0)
                                return -ENOSPC;
 
                        tbl8_group_start = tbl8_gindex *
@@ -444,13 +762,22 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
                        tbl8_group_end = tbl8_group_start +
                                        RTE_LPM6_TBL8_GROUP_NUM_ENTRIES;
 
+                       struct rte_lpm6_tbl_entry tbl_entry = {
+                               .next_hop = tbl[entry_ind].next_hop,
+                               .depth = tbl[entry_ind].depth,
+                               .valid = VALID,
+                               .valid_group = VALID,
+                               .ext_entry = 0
+                       };
+
                        /* Populate new tbl8 with tbl value. */
-                       for (i = tbl8_group_start; i < tbl8_group_end; i++) {
-                               lpm->tbl8[i].valid = VALID;
-                               lpm->tbl8[i].depth = tbl[tbl_index].depth;
-                               lpm->tbl8[i].next_hop = tbl[tbl_index].next_hop;
-                               lpm->tbl8[i].ext_entry = 0;
-                       }
+                       for (i = tbl8_group_start; i < tbl8_group_end; i++)
+                               lpm->tbl8[i] = tbl_entry;
+
+                       /* init the new table's header:
+                        *   save the reference to the owner table
+                        */
+                       init_tbl8_header(lpm, tbl8_gindex, tbl_ind, entry_ind);
 
                        /*
                         * Update tbl entry to point to new tbl8 entry. Note: The
@@ -465,11 +792,16 @@ add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl,
                                .ext_entry = 1,
                        };
 
-                       tbl[tbl_index] = new_tbl_entry;
+                       tbl[entry_ind] = new_tbl_entry;
+
+                       /* update the current table's reference counter */
+                       if (tbl_ind != TBL24_IND)
+                               lpm->tbl8_hdrs[tbl_ind].ref_cnt++;
                }
 
-               *tbl_next = &(lpm->tbl8[tbl[tbl_index].lpm6_tbl8_gindex *
-                               RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
+               *next_tbl_ind = tbl[entry_ind].lpm6_tbl8_gindex;
+               *next_tbl = &(lpm->tbl8[*next_tbl_ind *
+                                 RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]);
        }
 
        return 1;
@@ -486,13 +818,56 @@ rte_lpm6_add_v20(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
 }
 VERSION_SYMBOL(rte_lpm6_add, _v20, 2.0);
 
+
+/*
+ * Simulate adding a route to LPM
+ *
+ *     Returns:
+ *    0 on success
+ *    -ENOSPC not enought tbl8 left
+ */
+static int
+simulate_add(struct rte_lpm6 *lpm, const uint8_t *masked_ip, uint8_t depth)
+{
+       struct rte_lpm6_tbl_entry *tbl;
+       struct rte_lpm6_tbl_entry *tbl_next = NULL;
+       int ret, i;
+
+       /* number of new tables needed for a step */
+       uint32_t need_tbl_nb;
+       /* total number of new tables needed */
+       uint32_t total_need_tbl_nb;
+
+       /* Inspect the first three bytes through tbl24 on the first step. */
+       ret = simulate_add_step(lpm, lpm->tbl24, &tbl_next, masked_ip,
+                       ADD_FIRST_BYTE, 1, depth, &need_tbl_nb);
+       total_need_tbl_nb = need_tbl_nb;
+       /*
+        * Inspect one by one the rest of the bytes until
+        * the process is completed.
+        */
+       for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && ret == 1; i++) {
+               tbl = tbl_next;
+               ret = simulate_add_step(lpm, tbl, &tbl_next, masked_ip, 1,
+                               (uint8_t)(i+1), depth, &need_tbl_nb);
+               total_need_tbl_nb += need_tbl_nb;
+       }
+
+       if (tbl8_available(lpm) < total_need_tbl_nb)
+               /* not enought tbl8 to add a rule */
+               return -ENOSPC;
+
+       return 0;
+}
+
 int
 rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
                uint32_t next_hop)
 {
        struct rte_lpm6_tbl_entry *tbl;
        struct rte_lpm6_tbl_entry *tbl_next = NULL;
-       int32_t rule_index;
+       /* init to avoid compiler warning */
+       uint32_t tbl_next_num = 123456;
        int status;
        uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
        int i;
@@ -502,26 +877,26 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
                return -EINVAL;
 
        /* Copy the IP and mask it to avoid modifying user's input data. */
-       memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-       mask_ip(masked_ip, depth);
+       ip6_copy_addr(masked_ip, ip);
+       ip6_mask_addr(masked_ip, depth);
 
-       /* Add the rule to the rule table. */
-       rule_index = rule_add(lpm, masked_ip, next_hop, depth);
+       /* Simulate adding a new route */
+       int ret = simulate_add(lpm, masked_ip, depth);
+       if (ret < 0)
+               return ret;
 
+       /* Add the rule to the rule table. */
+       int is_new_rule = rule_add(lpm, masked_ip, depth, next_hop);
        /* If there is no space available for new rule return error. */
-       if (rule_index < 0) {
-               return rule_index;
-       }
+       if (is_new_rule < 0)
+               return is_new_rule;
 
        /* Inspect the first three bytes through tbl24 on the first step. */
        tbl = lpm->tbl24;
-       status = add_step (lpm, tbl, &tbl_next, masked_ip, ADD_FIRST_BYTE, 1,
-                       depth, next_hop);
-       if (status < 0) {
-               rte_lpm6_delete(lpm, masked_ip, depth);
-
-               return status;
-       }
+       status = add_step(lpm, tbl, TBL24_IND, &tbl_next, &tbl_next_num,
+                       masked_ip, ADD_FIRST_BYTE, 1, depth, next_hop,
+                       is_new_rule);
+       assert(status >= 0);
 
        /*
         * Inspect one by one the rest of the bytes until
@@ -529,13 +904,10 @@ rte_lpm6_add_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
         */
        for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) {
                tbl = tbl_next;
-               status = add_step (lpm, tbl, &tbl_next, masked_ip, 1, (uint8_t)(i+1),
-                               depth, next_hop);
-               if (status < 0) {
-                       rte_lpm6_delete(lpm, masked_ip, depth);
-
-                       return status;
-               }
+               status = add_step(lpm, tbl, tbl_next_num, &tbl_next,
+                               &tbl_next_num, masked_ip, 1, (uint8_t)(i+1),
+                               depth, next_hop, is_new_rule);
+               assert(status >= 0);
        }
 
        return status;
@@ -610,9 +982,8 @@ rte_lpm6_lookup_v1705(const struct rte_lpm6 *lpm, uint8_t *ip,
        uint32_t tbl24_index;
 
        /* DEBUG: Check user input arguments. */
-       if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL)) {
+       if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL))
                return -EINVAL;
-       }
 
        first_byte = LOOKUP_FIRST_BYTE;
        tbl24_index = (ip[0] << BYTES2_SIZE) | (ip[1] << BYTE_SIZE) | ip[2];
@@ -648,9 +1019,8 @@ rte_lpm6_lookup_bulk_func_v20(const struct rte_lpm6 *lpm,
        int status;
 
        /* DEBUG: Check user input arguments. */
-       if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) {
+       if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL))
                return -EINVAL;
-       }
 
        for (i = 0; i < n; i++) {
                first_byte = LOOKUP_FIRST_BYTE;
@@ -724,30 +1094,6 @@ MAP_STATIC_SYMBOL(int rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm,
                                int32_t *next_hops, unsigned int n),
                rte_lpm6_lookup_bulk_func_v1705);
 
-/*
- * Finds a rule in rule table.
- * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
- */
-static inline int32_t
-rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
-{
-       uint32_t rule_index;
-
-       /* Scan used rules at given depth to find rule. */
-       for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) {
-               /* If rule is found return the rule index. */
-               if ((memcmp (lpm->rules_tbl[rule_index].ip, ip,
-                               RTE_LPM6_IPV6_ADDR_SIZE) == 0) &&
-                               lpm->rules_tbl[rule_index].depth == depth) {
-
-                       return rule_index;
-               }
-       }
-
-       /* If rule is not found return -ENOENT. */
-       return -ENOENT;
-}
-
 /*
  * Look for a rule in the high-level rules table
  */
@@ -775,8 +1121,7 @@ int
 rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
                uint32_t *next_hop)
 {
-       uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-       int32_t rule_index;
+       uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
 
        /* Check user arguments. */
        if ((lpm == NULL) || next_hop == NULL || ip == NULL ||
@@ -784,19 +1129,10 @@ rte_lpm6_is_rule_present_v1705(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
                return -EINVAL;
 
        /* Copy the IP and mask it to avoid modifying user's input data. */
-       memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-       mask_ip(ip_masked, depth);
-
-       /* Look for the rule using rule_find. */
-       rule_index = rule_find(lpm, ip_masked, depth);
-
-       if (rule_index >= 0) {
-               *next_hop = lpm->rules_tbl[rule_index].next_hop;
-               return 1;
-       }
+       ip6_copy_addr(masked_ip, ip);
+       ip6_mask_addr(masked_ip, depth);
 
-       /* If rule is not found return 0. */
-       return 0;
+       return rule_find(lpm, masked_ip, depth, next_hop);
 }
 BIND_DEFAULT_SYMBOL(rte_lpm6_is_rule_present, _v1705, 17.05);
 MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
@@ -806,133 +1142,66 @@ MAP_STATIC_SYMBOL(int rte_lpm6_is_rule_present(struct rte_lpm6 *lpm,
 /*
  * Delete a rule from the rule table.
  * NOTE: Valid range for depth parameter is 1 .. 128 inclusive.
+ * return
+ *       0 on success
+ *   <0 on failure
  */
-static inline void
-rule_delete(struct rte_lpm6 *lpm, int32_t rule_index)
-{
-       /*
-        * Overwrite redundant rule with last rule in group and decrement rule
-        * counter.
-        */
-       lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1];
-       lpm->used_rules--;
-}
-
-/*
- * Deletes a rule
- */
-int
-rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+static inline int
+rule_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
 {
-       int32_t rule_to_delete_index;
-       uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
-       unsigned i;
-
-       /*
-        * Check input arguments.
-        */
-       if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) {
-               return -EINVAL;
-       }
-
-       /* Copy the IP and mask it to avoid modifying user's input data. */
-       memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE);
-       mask_ip(ip_masked, depth);
-
-       /*
-        * Find the index of the input rule, that needs to be deleted, in the
-        * rule table.
-        */
-       rule_to_delete_index = rule_find(lpm, ip_masked, depth);
-
-       /*
-        * Check if rule_to_delete_index was found. If no rule was found the
-        * function rule_find returns -ENOENT.
-        */
-       if (rule_to_delete_index < 0)
-               return rule_to_delete_index;
-
-       /* Delete the rule from the rule table. */
-       rule_delete(lpm, rule_to_delete_index);
+       int ret;
+       struct rte_lpm6_rule_key rule_key;
 
-       /*
-        * Set all the table entries to 0 (ie delete every rule
-        * from the data structure.
-        */
-       lpm->next_tbl8 = 0;
-       memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
-       memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
-                       * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+       /* init rule key */
+       rule_key_init(&rule_key, ip, depth);
 
-       /*
-        * Add every rule again (except for the one that was removed from
-        * the rules table).
-        */
-       for (i = 0; i < lpm->used_rules; i++) {
-               rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-                               lpm->rules_tbl[i].next_hop);
-       }
+       /* delete the rule */
+       ret = rte_hash_del_key(lpm->rules_tbl, (void *) &rule_key);
+       if (ret >= 0)
+               lpm->used_rules--;
 
-       return 0;
+       return ret;
 }
 
 /*
  * Deletes a group of rules
+ *
+ * Note that the function rebuilds the lpm table,
+ * rather than doing incremental updates like
+ * the regular delete function
  */
 int
 rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm,
-               uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n)
+               uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths,
+               unsigned n)
 {
-       int32_t rule_to_delete_index;
-       uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE];
+       uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
        unsigned i;
 
-       /*
-        * Check input arguments.
-        */
-       if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) {
+       /* Check input arguments. */
+       if ((lpm == NULL) || (ips == NULL) || (depths == NULL))
                return -EINVAL;
-       }
 
        for (i = 0; i < n; i++) {
-               /* Copy the IP and mask it to avoid modifying user's input data. */
-               memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE);
-               mask_ip(ip_masked, depths[i]);
-
-               /*
-                * Find the index of the input rule, that needs to be deleted, in the
-                * rule table.
-                */
-               rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]);
-
-               /*
-                * Check if rule_to_delete_index was found. If no rule was found the
-                * function rule_find returns -ENOENT.
-                */
-               if (rule_to_delete_index < 0)
-                       continue;
-
-               /* Delete the rule from the rule table. */
-               rule_delete(lpm, rule_to_delete_index);
+               ip6_copy_addr(masked_ip, ips[i]);
+               ip6_mask_addr(masked_ip, depths[i]);
+               rule_delete(lpm, masked_ip, depths[i]);
        }
 
        /*
         * Set all the table entries to 0 (ie delete every rule
         * from the data structure.
         */
-       lpm->next_tbl8 = 0;
        memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
        memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0])
                        * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
+       tbl8_pool_init(lpm);
 
        /*
         * Add every rule again (except for the ones that were removed from
         * the rules table).
         */
-       for (i = 0; i < lpm->used_rules; i++) {
-               rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth,
-                               lpm->rules_tbl[i].next_hop);
-       }
+       rebuild_lpm(lpm);
 
        return 0;
 }
@@ -946,9 +1215,6 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
        /* Zero used rules counter. */
        lpm->used_rules = 0;
 
-       /* Zero next tbl8 index. */
-       lpm->next_tbl8 = 0;
-
        /* Zero tbl24. */
        memset(lpm->tbl24, 0, sizeof(lpm->tbl24));
 
@@ -956,6 +1222,268 @@ rte_lpm6_delete_all(struct rte_lpm6 *lpm)
        memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) *
                        RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s);
 
+       /* init pool of free tbl8 indexes */
+       tbl8_pool_init(lpm);
+
        /* Delete all rules form the rules table. */
-       memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules);
+       rte_hash_reset(lpm->rules_tbl);
+}
+
+/*
+ * Convert a depth to a one byte long mask
+ *   Example: 4 will be converted to 0xF0
+ */
+static uint8_t __attribute__((pure))
+depth_to_mask_1b(uint8_t depth)
+{
+       /* To calculate a mask start with a 1 on the left hand side and right
+        * shift while populating the left hand side with 1's
+        */
+       return (signed char)0x80 >> (depth - 1);
+}
+
+/*
+ * Find a less specific rule
+ */
+static int
+rule_find_less_specific(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth,
+       struct rte_lpm6_rule *rule)
+{
+       int ret;
+       uint32_t next_hop;
+       uint8_t mask;
+       struct rte_lpm6_rule_key rule_key;
+
+       if (depth == 1)
+               return 0;
+
+       rule_key_init(&rule_key, ip, depth);
+
+       while (depth > 1) {
+               depth--;
+
+               /* each iteration zero one more bit of the key */
+               mask = depth & 7; /* depth % BYTE_SIZE */
+               if (mask > 0)
+                       mask = depth_to_mask_1b(mask);
+
+               rule_key.depth = depth;
+               rule_key.ip[depth >> 3] &= mask;
+
+               ret = rule_find_with_key(lpm, &rule_key, &next_hop);
+               if (ret) {
+                       rule->depth = depth;
+                       ip6_copy_addr(rule->ip, rule_key.ip);
+                       rule->next_hop = next_hop;
+                       return 1;
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * Find range of tbl8 cells occupied by a rule
+ */
+static void
+rule_find_range(struct rte_lpm6 *lpm, const uint8_t *ip, uint8_t depth,
+                 struct rte_lpm6_tbl_entry **from,
+                 struct rte_lpm6_tbl_entry **to,
+                 uint32_t *out_tbl_ind)
+{
+       uint32_t ind;
+       uint32_t first_3bytes = (uint32_t)ip[0] << 16 | ip[1] << 8 | ip[2];
+
+       if (depth <= 24) {
+               /* rule is within the top level */
+               ind = first_3bytes;
+               *from = &lpm->tbl24[ind];
+               ind += (1 << (24 - depth)) - 1;
+               *to = &lpm->tbl24[ind];
+               *out_tbl_ind = TBL24_IND;
+       } else {
+               /* top level entry */
+               struct rte_lpm6_tbl_entry *tbl = &lpm->tbl24[first_3bytes];
+               assert(tbl->ext_entry == 1);
+               /* first tbl8 */
+               uint32_t tbl_ind = tbl->lpm6_tbl8_gindex;
+               tbl = &lpm->tbl8[tbl_ind *
+                               RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
+               /* current ip byte, the top level is already behind */
+               uint8_t byte = 3;
+               /* minus top level */
+               depth -= 24;
+
+               /* interate through levels (tbl8s)
+                * until we reach the last one
+                */
+               while (depth > 8) {
+                       tbl += ip[byte];
+                       assert(tbl->ext_entry == 1);
+                       /* go to the next level/tbl8 */
+                       tbl_ind = tbl->lpm6_tbl8_gindex;
+                       tbl = &lpm->tbl8[tbl_ind *
+                                       RTE_LPM6_TBL8_GROUP_NUM_ENTRIES];
+                       byte += 1;
+                       depth -= 8;
+               }
+
+               /* last level/tbl8 */
+               ind = ip[byte] & depth_to_mask_1b(depth);
+               *from = &tbl[ind];
+               ind += (1 << (8 - depth)) - 1;
+               *to = &tbl[ind];
+               *out_tbl_ind = tbl_ind;
+       }
+}
+
+/*
+ * Remove a table from the LPM tree
+ */
+static void
+remove_tbl(struct rte_lpm6 *lpm, struct rte_lpm_tbl8_hdr *tbl_hdr,
+                 uint32_t tbl_ind, struct rte_lpm6_rule *lsp_rule)
+{
+       struct rte_lpm6_tbl_entry *owner_entry;
+
+       if (tbl_hdr->owner_tbl_ind == TBL24_IND)
+               owner_entry = &lpm->tbl24[tbl_hdr->owner_entry_ind];
+       else {
+               uint32_t owner_tbl_ind = tbl_hdr->owner_tbl_ind;
+               owner_entry = &lpm->tbl8[
+                       owner_tbl_ind * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES +
+                       tbl_hdr->owner_entry_ind];
+
+               struct rte_lpm_tbl8_hdr *owner_tbl_hdr =
+                       &lpm->tbl8_hdrs[owner_tbl_ind];
+               if (--owner_tbl_hdr->ref_cnt == 0)
+                       remove_tbl(lpm, owner_tbl_hdr, owner_tbl_ind, lsp_rule);
+       }
+
+       assert(owner_entry->ext_entry == 1);
+
+       /* unlink the table */
+       if (lsp_rule != NULL) {
+               struct rte_lpm6_tbl_entry new_tbl_entry = {
+                       .next_hop = lsp_rule->next_hop,
+                       .depth = lsp_rule->depth,
+                       .valid = VALID,
+                       .valid_group = VALID,
+                       .ext_entry = 0
+               };
+
+               *owner_entry = new_tbl_entry;
+       } else {
+               struct rte_lpm6_tbl_entry new_tbl_entry = {
+                       .next_hop = 0,
+                       .depth = 0,
+                       .valid = INVALID,
+                       .valid_group = INVALID,
+                       .ext_entry = 0
+               };
+
+               *owner_entry = new_tbl_entry;
+       }
+
+       /* return the table to the pool */
+       tbl8_put(lpm, tbl_ind);
+}
+
+/*
+ * Deletes a rule
+ */
+int
+rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth)
+{
+       uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE];
+       struct rte_lpm6_rule lsp_rule_obj;
+       struct rte_lpm6_rule *lsp_rule;
+       int ret;
+       uint32_t tbl_ind;
+       struct rte_lpm6_tbl_entry *from, *to;
+
+       /* Check input arguments. */
+       if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH))
+               return -EINVAL;
+
+       /* Copy the IP and mask it to avoid modifying user's input data. */
+       ip6_copy_addr(masked_ip, ip);
+       ip6_mask_addr(masked_ip, depth);
+
+       /* Delete the rule from the rule table. */
+       ret = rule_delete(lpm, masked_ip, depth);
+       if (ret < 0)
+               return -ENOENT;
+
+       /* find rule cells */
+       rule_find_range(lpm, masked_ip, depth, &from, &to, &tbl_ind);
+
+       /* find a less specific rule (a rule with smaller depth)
+        * note: masked_ip will be modified, don't use it anymore
+        */
+       ret = rule_find_less_specific(lpm, masked_ip, depth,
+                       &lsp_rule_obj);
+       lsp_rule = ret ? &lsp_rule_obj : NULL;
+
+       /* decrement the table rule counter,
+        * note that tbl24 doesn't have a header
+        */
+       if (tbl_ind != TBL24_IND) {
+               struct rte_lpm_tbl8_hdr *tbl_hdr = &lpm->tbl8_hdrs[tbl_ind];
+               if (--tbl_hdr->ref_cnt == 0) {
+                       /* remove the table */
+                       remove_tbl(lpm, tbl_hdr, tbl_ind, lsp_rule);
+                       return 0;
+               }
+       }
+
+       /* iterate rule cells */
+       for (; from <= to; from++)
+               if (from->ext_entry == 1) {
+                       /* reference to a more specific space
+                        * of the prefix/rule. Entries in a more
+                        * specific space that are not used by
+                        * a more specific prefix must be occupied
+                        * by the prefix
+                        */
+                       if (lsp_rule != NULL)
+                               expand_rule(lpm,
+                                       from->lpm6_tbl8_gindex *
+                                       RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
+                                       depth, lsp_rule->depth,
+                                       lsp_rule->next_hop, VALID);
+                       else
+                               /* since the prefix has no less specific prefix,
+                                * its more specific space must be invalidated
+                                */
+                               expand_rule(lpm,
+                                       from->lpm6_tbl8_gindex *
+                                       RTE_LPM6_TBL8_GROUP_NUM_ENTRIES,
+                                       depth, 0, 0, INVALID);
+               } else if (from->depth == depth) {
+                       /* entry is not a reference and belongs to the prefix */
+                       if (lsp_rule != NULL) {
+                               struct rte_lpm6_tbl_entry new_tbl_entry = {
+                                       .next_hop = lsp_rule->next_hop,
+                                       .depth = lsp_rule->depth,
+                                       .valid = VALID,
+                                       .valid_group = VALID,
+                                       .ext_entry = 0
+                               };
+
+                               *from = new_tbl_entry;
+                       } else {
+                               struct rte_lpm6_tbl_entry new_tbl_entry = {
+                                       .next_hop = 0,
+                                       .depth = 0,
+                                       .valid = INVALID,
+                                       .valid_group = INVALID,
+                                       .ext_entry = 0
+                               };
+
+                               *from = new_tbl_entry;
+                       }
+               }
+
+       return 0;
 }
index 45ffb0d..94d9c4c 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-version = 3
+version = 4
 sources = files('rte_mbuf.c', 'rte_mbuf_ptype.c', 'rte_mbuf_pool_ops.c')
 headers = files('rte_mbuf.h', 'rte_mbuf_ptype.h', 'rte_mbuf_pool_ops.h')
 deps += ['mempool']
index e714c5a..9790b4f 100644 (file)
@@ -296,11 +296,19 @@ const char *rte_get_rx_ol_flag_name(uint64_t mask)
        case PKT_RX_VLAN_STRIPPED: return "PKT_RX_VLAN_STRIPPED";
        case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP";
        case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST";
+       case PKT_RX_FDIR_ID: return "PKT_RX_FDIR_ID";
+       case PKT_RX_FDIR_FLX: return "PKT_RX_FDIR_FLX";
        case PKT_RX_QINQ_STRIPPED: return "PKT_RX_QINQ_STRIPPED";
+       case PKT_RX_QINQ: return "PKT_RX_QINQ";
        case PKT_RX_LRO: return "PKT_RX_LRO";
        case PKT_RX_TIMESTAMP: return "PKT_RX_TIMESTAMP";
        case PKT_RX_SEC_OFFLOAD: return "PKT_RX_SEC_OFFLOAD";
        case PKT_RX_SEC_OFFLOAD_FAILED: return "PKT_RX_SEC_OFFLOAD_FAILED";
+       case PKT_RX_OUTER_L4_CKSUM_BAD: return "PKT_RX_OUTER_L4_CKSUM_BAD";
+       case PKT_RX_OUTER_L4_CKSUM_GOOD: return "PKT_RX_OUTER_L4_CKSUM_GOOD";
+       case PKT_RX_OUTER_L4_CKSUM_INVALID:
+               return "PKT_RX_OUTER_L4_CKSUM_INVALID";
+
        default: return NULL;
        }
 }
@@ -333,12 +341,21 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
                { PKT_RX_VLAN_STRIPPED, PKT_RX_VLAN_STRIPPED, NULL },
                { PKT_RX_IEEE1588_PTP, PKT_RX_IEEE1588_PTP, NULL },
                { PKT_RX_IEEE1588_TMST, PKT_RX_IEEE1588_TMST, NULL },
+               { PKT_RX_FDIR_ID, PKT_RX_FDIR_ID, NULL },
+               { PKT_RX_FDIR_FLX, PKT_RX_FDIR_FLX, NULL },
                { PKT_RX_QINQ_STRIPPED, PKT_RX_QINQ_STRIPPED, NULL },
                { PKT_RX_LRO, PKT_RX_LRO, NULL },
                { PKT_RX_TIMESTAMP, PKT_RX_TIMESTAMP, NULL },
                { PKT_RX_SEC_OFFLOAD, PKT_RX_SEC_OFFLOAD, NULL },
                { PKT_RX_SEC_OFFLOAD_FAILED, PKT_RX_SEC_OFFLOAD_FAILED, NULL },
                { PKT_RX_QINQ, PKT_RX_QINQ, NULL },
+               { PKT_RX_OUTER_L4_CKSUM_BAD, PKT_RX_OUTER_L4_CKSUM_MASK, NULL },
+               { PKT_RX_OUTER_L4_CKSUM_GOOD, PKT_RX_OUTER_L4_CKSUM_MASK,
+                 NULL },
+               { PKT_RX_OUTER_L4_CKSUM_INVALID, PKT_RX_OUTER_L4_CKSUM_MASK,
+                 NULL },
+               { PKT_RX_OUTER_L4_CKSUM_UNKNOWN, PKT_RX_OUTER_L4_CKSUM_MASK,
+                 "PKT_RX_OUTER_L4_CKSUM_UNKNOWN" },
        };
        const char *name;
        unsigned int i;
@@ -373,7 +390,7 @@ rte_get_rx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 const char *rte_get_tx_ol_flag_name(uint64_t mask)
 {
        switch (mask) {
-       case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT";
+       case PKT_TX_VLAN: return "PKT_TX_VLAN";
        case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM";
        case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM";
        case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
@@ -393,8 +410,12 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
        case PKT_TX_TUNNEL_VXLAN_GPE: return "PKT_TX_TUNNEL_VXLAN_GPE";
        case PKT_TX_TUNNEL_IP: return "PKT_TX_TUNNEL_IP";
        case PKT_TX_TUNNEL_UDP: return "PKT_TX_TUNNEL_UDP";
+       case PKT_TX_QINQ: return "PKT_TX_QINQ";
        case PKT_TX_MACSEC: return "PKT_TX_MACSEC";
        case PKT_TX_SEC_OFFLOAD: return "PKT_TX_SEC_OFFLOAD";
+       case PKT_TX_UDP_SEG: return "PKT_TX_UDP_SEG";
+       case PKT_TX_OUTER_UDP_CKSUM: return "PKT_TX_OUTER_UDP_CKSUM";
+       case PKT_TX_METADATA: return "PKT_TX_METADATA";
        default: return NULL;
        }
 }
@@ -404,7 +425,7 @@ int
 rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
 {
        const struct flag_mask tx_flags[] = {
-               { PKT_TX_VLAN_PKT, PKT_TX_VLAN_PKT, NULL },
+               { PKT_TX_VLAN, PKT_TX_VLAN, NULL },
                { PKT_TX_IP_CKSUM, PKT_TX_IP_CKSUM, NULL },
                { PKT_TX_TCP_CKSUM, PKT_TX_L4_MASK, NULL },
                { PKT_TX_SCTP_CKSUM, PKT_TX_L4_MASK, NULL },
@@ -417,24 +438,20 @@ rte_get_tx_ol_flag_list(uint64_t mask, char *buf, size_t buflen)
                { PKT_TX_OUTER_IP_CKSUM, PKT_TX_OUTER_IP_CKSUM, NULL },
                { PKT_TX_OUTER_IPV4, PKT_TX_OUTER_IPV4, NULL },
                { PKT_TX_OUTER_IPV6, PKT_TX_OUTER_IPV6, NULL },
-               { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
-               { PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK,
-                 "PKT_TX_TUNNEL_NONE" },
+               { PKT_TX_TUNNEL_VXLAN, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_GRE, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_IPIP, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_GENEVE, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_MPLSINUDP, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_VXLAN_GPE, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_IP, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_TUNNEL_UDP, PKT_TX_TUNNEL_MASK, NULL },
+               { PKT_TX_QINQ, PKT_TX_QINQ, NULL },
                { PKT_TX_MACSEC, PKT_TX_MACSEC, NULL },
                { PKT_TX_SEC_OFFLOAD, PKT_TX_SEC_OFFLOAD, NULL },
+               { PKT_TX_UDP_SEG, PKT_TX_UDP_SEG, NULL },
+               { PKT_TX_OUTER_UDP_CKSUM, PKT_TX_OUTER_UDP_CKSUM, NULL },
+               { PKT_TX_METADATA, PKT_TX_METADATA, NULL },
        };
        const char *name;
        unsigned int i;
index 9ce5d76..3dbc669 100644 (file)
@@ -140,7 +140,7 @@ extern "C" {
  * The 2 vlans have been stripped by the hardware and their tci are
  * saved in mbuf->vlan_tci (inner) and mbuf->vlan_tci_outer (outer).
  * This can only happen if vlan stripping is enabled in the RX
- * configuration of the PMD. If this flag is set,
+ * configuration of the PMD.
  * When PKT_RX_QINQ_STRIPPED is set, the flags (PKT_RX_VLAN |
  * PKT_RX_VLAN_STRIPPED | PKT_RX_QINQ) must also be set.
  */
@@ -170,17 +170,53 @@ extern "C" {
 
 /**
  * The RX packet is a double VLAN, and the outer tci has been
- * saved in in mbuf->vlan_tci_outer.
+ * saved in in mbuf->vlan_tci_outer. If PKT_RX_QINQ set, PKT_RX_VLAN
+ * also should be set and inner tci should be saved to mbuf->vlan_tci.
  * If the flag PKT_RX_QINQ_STRIPPED is also present, both VLANs
  * headers have been stripped from mbuf data, else they are still
  * present.
  */
 #define PKT_RX_QINQ          (1ULL << 20)
 
+/**
+ * Mask of bits used to determine the status of outer RX L4 checksum.
+ * - PKT_RX_OUTER_L4_CKSUM_UNKNOWN: no info about the outer RX L4 checksum
+ * - PKT_RX_OUTER_L4_CKSUM_BAD: the outer L4 checksum in the packet is wrong
+ * - PKT_RX_OUTER_L4_CKSUM_GOOD: the outer L4 checksum in the packet is valid
+ * - PKT_RX_OUTER_L4_CKSUM_INVALID: invalid outer L4 checksum state.
+ *
+ * The detection of PKT_RX_OUTER_L4_CKSUM_GOOD shall be based on the given
+ * HW capability, At minimum, the PMD should support
+ * PKT_RX_OUTER_L4_CKSUM_UNKNOWN and PKT_RX_OUTER_L4_CKSUM_BAD states
+ * if the DEV_RX_OFFLOAD_OUTER_UDP_CKSUM offload is available.
+ */
+#define PKT_RX_OUTER_L4_CKSUM_MASK     ((1ULL << 21) | (1ULL << 22))
+
+#define PKT_RX_OUTER_L4_CKSUM_UNKNOWN  0
+#define PKT_RX_OUTER_L4_CKSUM_BAD      (1ULL << 21)
+#define PKT_RX_OUTER_L4_CKSUM_GOOD     (1ULL << 22)
+#define PKT_RX_OUTER_L4_CKSUM_INVALID  ((1ULL << 21) | (1ULL << 22))
+
 /* add new RX flags here */
 
 /* add new TX flags here */
 
+/**
+ * Indicate that the metadata field in the mbuf is in use.
+ */
+#define PKT_TX_METADATA        (1ULL << 40)
+
+/**
+ * Outer UDP checksum offload flag. This flag is used for enabling
+ * outer UDP checksum in PMD. To use outer UDP checksum, the user needs to
+ * 1) Enable the following in mbuff,
+ * a) Fill outer_l2_len and outer_l3_len in mbuf.
+ * b) Set the PKT_TX_OUTER_UDP_CKSUM flag.
+ * c) Set the PKT_TX_OUTER_IPV4 or PKT_TX_OUTER_IPV6 flag.
+ * 2) Configure DEV_TX_OFFLOAD_OUTER_UDP_CKSUM offload flag.
+ */
+#define PKT_TX_OUTER_UDP_CKSUM     (1ULL << 41)
+
 /**
  * UDP Fragmentation Offload flag. This flag is used for enabling UDP
  * fragmentation in SW or in HW. When use UFO, mbuf->tso_segsz is used
@@ -334,16 +370,23 @@ extern "C" {
  * which can be set for packet.
  */
 #define PKT_TX_OFFLOAD_MASK (    \
+               PKT_TX_OUTER_IPV6 |      \
+               PKT_TX_OUTER_IPV4 |      \
+               PKT_TX_OUTER_IP_CKSUM |  \
+               PKT_TX_VLAN_PKT |        \
+               PKT_TX_IPV6 |            \
+               PKT_TX_IPV4 |            \
                PKT_TX_IP_CKSUM |        \
                PKT_TX_L4_MASK |         \
-               PKT_TX_OUTER_IP_CKSUM |  \
-               PKT_TX_TCP_SEG |         \
                PKT_TX_IEEE1588_TMST |   \
+               PKT_TX_TCP_SEG |         \
                PKT_TX_QINQ_PKT |        \
-               PKT_TX_VLAN_PKT |        \
                PKT_TX_TUNNEL_MASK |     \
                PKT_TX_MACSEC |          \
-               PKT_TX_SEC_OFFLOAD)
+               PKT_TX_SEC_OFFLOAD |     \
+               PKT_TX_UDP_SEG |         \
+               PKT_TX_OUTER_UDP_CKSUM | \
+               PKT_TX_METADATA)
 
 /**
  * Mbuf having an external buffer attached. shinfo in mbuf must be filled.
@@ -464,7 +507,9 @@ struct rte_mbuf {
        };
        uint16_t nb_segs;         /**< Number of segments. */
 
-       /** Input port (16 bits to support more than 256 virtual ports). */
+       /** Input port (16 bits to support more than 256 virtual ports).
+        * The event eth Tx adapter uses this field to specify the output port.
+        */
        uint16_t port;
 
        uint64_t ol_flags;        /**< Offload features. */
@@ -511,28 +556,47 @@ struct rte_mbuf {
        /** VLAN TCI (CPU order), valid if PKT_RX_VLAN is set. */
        uint16_t vlan_tci;
 
+       RTE_STD_C11
        union {
-               uint32_t rss;     /**< RSS hash result if RSS enabled */
-               struct {
-                       RTE_STD_C11
-                       union {
-                               struct {
-                                       uint16_t hash;
-                                       uint16_t id;
+               union {
+                       uint32_t rss;     /**< RSS hash result if RSS enabled */
+                       struct {
+                               union {
+                                       struct {
+                                               uint16_t hash;
+                                               uint16_t id;
+                                       };
+                                       uint32_t lo;
+                                       /**< Second 4 flexible bytes */
                                };
+                               uint32_t hi;
+                               /**< First 4 flexible bytes or FD ID, dependent
+                                * on PKT_RX_FDIR_* flag in ol_flags.
+                                */
+                       } fdir; /**< Filter identifier if FDIR enabled */
+                       struct {
                                uint32_t lo;
-                               /**< Second 4 flexible bytes */
-                       };
-                       uint32_t hi;
-                       /**< First 4 flexible bytes or FD ID, dependent on
-                            PKT_RX_FDIR_* flag in ol_flags. */
-               } fdir;           /**< Filter identifier if FDIR enabled */
+                               uint32_t hi;
+                               /**< The event eth Tx adapter uses this field
+                                * to store Tx queue id.
+                                * @see rte_event_eth_tx_adapter_txq_set()
+                                */
+                       } sched;          /**< Hierarchical scheduler */
+                       /**< User defined tags. See rte_distributor_process() */
+                       uint32_t usr;
+               } hash;                   /**< hash information */
                struct {
-                       uint32_t lo;
-                       uint32_t hi;
-               } sched;          /**< Hierarchical scheduler */
-               uint32_t usr;     /**< User defined tags. See rte_distributor_process() */
-       } hash;                   /**< hash information */
+                       /**
+                        * Application specific metadata value
+                        * for egress flow rule match.
+                        * Valid if PKT_TX_METADATA is set.
+                        * Located here to allow conjunct use
+                        * with hash.sched.hi.
+                        */
+                       uint32_t tx_metadata;
+                       uint32_t reserved;
+               };
+       };
 
        /** Outer VLAN TCI (CPU order), valid if PKT_RX_QINQ is set. */
        uint16_t vlan_tci_outer;
@@ -1038,14 +1102,6 @@ rte_mbuf_raw_free(struct rte_mbuf *m)
        rte_mempool_put(m->pool, m);
 }
 
-/* compat with older versions */
-__rte_deprecated
-static inline void
-__rte_mbuf_raw_free(struct rte_mbuf *m)
-{
-       rte_mbuf_raw_free(m);
-}
-
 /**
  * The packet mbuf constructor.
  *
@@ -1658,14 +1714,6 @@ rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
        return NULL;
 }
 
-/* deprecated, replaced by rte_pktmbuf_prefree_seg() */
-__rte_deprecated
-static inline struct rte_mbuf *
-__rte_pktmbuf_prefree_seg(struct rte_mbuf *m)
-{
-       return rte_pktmbuf_prefree_seg(m);
-}
-
 /**
  * Free a segment of a packet mbuf into its original mempool.
  *
index d7835e2..d6f906b 100644 (file)
@@ -19,6 +19,8 @@ const char *rte_get_ptype_l2_name(uint32_t ptype)
        case RTE_PTYPE_L2_ETHER_VLAN: return "L2_ETHER_VLAN";
        case RTE_PTYPE_L2_ETHER_QINQ: return "L2_ETHER_QINQ";
        case RTE_PTYPE_L2_ETHER_PPPOE: return "L2_ETHER_PPPOE";
+       case RTE_PTYPE_L2_ETHER_FCOE: return "L2_ETHER_FCOE";
+       case RTE_PTYPE_L2_ETHER_MPLS: return "L2_ETHER_MPLS";
        default: return "L2_UNKNOWN";
        }
 }
@@ -47,6 +49,7 @@ const char *rte_get_ptype_l4_name(uint32_t ptype)
        case RTE_PTYPE_L4_SCTP: return "L4_SCTP";
        case RTE_PTYPE_L4_ICMP: return "L4_ICMP";
        case RTE_PTYPE_L4_NONFRAG: return "L4_NONFRAG";
+       case RTE_PTYPE_L4_IGMP: return "L4_IGMP";
        default: return "L4_UNKNOWN";
        }
 }
index 01acc66..23bc635 100644 (file)
@@ -130,6 +130,20 @@ extern "C" {
  * <'ether type'=[0x8863|0x8864]>
  */
 #define RTE_PTYPE_L2_ETHER_PPPOE            0x00000008
+/**
+ * FCoE packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8906]>
+ */
+#define RTE_PTYPE_L2_ETHER_FCOE             0x00000009
+/**
+ * MPLS packet type.
+ *
+ * Packet format:
+ * <'ether type'=[0x8847|0x8848]>
+ */
+#define RTE_PTYPE_L2_ETHER_MPLS             0x0000000a
 /**
  * Mask of layer 2 packet types.
  * It is used for outer packet for tunneling cases.
@@ -286,6 +300,14 @@ extern "C" {
  * | 'version'=6, 'next header'!=[6|17|44|132|1]>
  */
 #define RTE_PTYPE_L4_NONFRAG                0x00000600
+/**
+ * IGMP (Internet Group Management Protocol) packet type.
+ *
+ * Packet format:
+ * <'ether type'=0x0800
+ * | 'version'=4, 'protocol'=2, 'MF'=0, 'frag_offset'=0>
+ */
+#define RTE_PTYPE_L4_IGMP                   0x00000700
 /**
  * Mask of layer 4 packet types.
  * It is used for outer packet for tunneling cases.
index 03e6b5f..683b216 100644 (file)
@@ -99,25 +99,44 @@ static unsigned optimize_object_size(unsigned obj_size)
        return new_obj_size * RTE_MEMPOOL_ALIGN;
 }
 
+struct pagesz_walk_arg {
+       int socket_id;
+       size_t min;
+};
+
 static int
 find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
 {
-       size_t *min = arg;
+       struct pagesz_walk_arg *wa = arg;
+       bool valid;
+
+       /*
+        * we need to only look at page sizes available for a particular socket
+        * ID.  so, we either need an exact match on socket ID (can match both
+        * native and external memory), or, if SOCKET_ID_ANY was specified as a
+        * socket ID argument, we must only look at native memory and ignore any
+        * page sizes associated with external memory.
+        */
+       valid = msl->socket_id == wa->socket_id;
+       valid |= wa->socket_id == SOCKET_ID_ANY && msl->external == 0;
 
-       if (msl->page_sz < *min)
-               *min = msl->page_sz;
+       if (valid && msl->page_sz < wa->min)
+               wa->min = msl->page_sz;
 
        return 0;
 }
 
 static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
 {
-       size_t min_pagesz = SIZE_MAX;
+       struct pagesz_walk_arg wa;
 
-       rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+       wa.min = SIZE_MAX;
+       wa.socket_id = socket_id;
 
-       return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+       rte_memseg_list_walk(find_min_pagesz, &wa);
+
+       return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
 }
 
 
@@ -409,12 +428,18 @@ rte_mempool_populate_default(struct rte_mempool *mp)
        rte_iova_t iova;
        unsigned mz_id, n;
        int ret;
-       bool no_contig, try_contig, no_pageshift;
+       bool no_contig, try_contig, no_pageshift, external;
 
        ret = mempool_ops_alloc_once(mp);
        if (ret != 0)
                return ret;
 
+       /* check if we can retrieve a valid socket ID */
+       ret = rte_malloc_heap_socket_is_external(mp->socket_id);
+       if (ret < 0)
+               return -EINVAL;
+       external = ret;
+
        /* mempool must not be populated */
        if (mp->nb_mem_chunks != 0)
                return -EEXIST;
@@ -462,15 +487,25 @@ rte_mempool_populate_default(struct rte_mempool *mp)
         * in one contiguous chunk as well (otherwise we might end up wasting a
         * 1G page on a 10MB memzone). If we fail to get enough contiguous
         * memory, then we'll go and reserve space page-by-page.
+        *
+        * We also have to take into account the fact that memory that we're
+        * going to allocate from can belong to an externally allocated memory
+        * area, in which case the assumption of IOVA as VA mode being
+        * synonymous with IOVA contiguousness will not hold. We should also try
+        * to go for contiguous memory even if we're in no-huge mode, because
+        * external memory may in fact be IOVA-contiguous.
         */
-       no_pageshift = no_contig || rte_eal_iova_mode() == RTE_IOVA_VA;
-       try_contig = !no_contig && !no_pageshift && rte_eal_has_hugepages();
+       external = rte_malloc_heap_socket_is_external(mp->socket_id) == 1;
+       no_pageshift = no_contig ||
+                       (!external && rte_eal_iova_mode() == RTE_IOVA_VA);
+       try_contig = !no_contig && !no_pageshift &&
+                       (rte_eal_has_hugepages() || external);
 
        if (no_pageshift) {
                pg_sz = 0;
                pg_shift = 0;
        } else if (try_contig) {
-               pg_sz = get_min_page_size();
+               pg_sz = get_min_page_size(mp->socket_id);
                pg_shift = rte_bsf32(pg_sz);
        } else {
                pg_sz = getpagesize();
index 85e403f..c308206 100644 (file)
@@ -20,6 +20,6 @@ SRCS-$(CONFIG_RTE_LIBRTE_NET) += rte_arp.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_esp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_sctp.h rte_icmp.h rte_arp.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_ether.h rte_gre.h rte_net.h
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include += rte_net_crc.h rte_mpls.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
index d3ea1fe..7d66f69 100644 (file)
@@ -13,7 +13,8 @@ headers = files('rte_ip.h',
        'rte_ether.h',
        'rte_gre.h',
        'rte_net.h',
-       'rte_net_crc.h')
+       'rte_net_crc.h',
+       'rte_mpls.h')
 
 sources = files('rte_arp.c', 'rte_net.c', 'rte_net_crc.c')
 deps += ['mbuf']
index da81524..1c7b7a5 100644 (file)
@@ -21,8 +21,8 @@ struct crc_pclmulqdq_ctx {
        __m128i rk7_rk8;
 };
 
-struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
-struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
+static struct crc_pclmulqdq_ctx crc32_eth_pclmulqdq __rte_aligned(16);
+static struct crc_pclmulqdq_ctx crc16_ccitt_pclmulqdq __rte_aligned(16);
 /**
  * @brief Performs one folding round
  *
index bee2b34..c2c5e24 100644 (file)
@@ -306,6 +306,8 @@ struct vxlan_hdr {
 #define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */
 #define ETHER_TYPE_TEB  0x6558 /**< Transparent Ethernet Bridging. */
 #define ETHER_TYPE_LLDP 0x88CC /**< LLDP Protocol. */
+#define ETHER_TYPE_MPLS 0x8847 /**< MPLS ethertype. */
+#define ETHER_TYPE_MPLSM 0x8848 /**< MPLS multicast ethertype. */
 
 #define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr))
 /**< VXLAN tunnel header length. */
diff --git a/lib/librte_net/rte_mpls.h b/lib/librte_net/rte_mpls.h
new file mode 100644 (file)
index 0000000..11d26ba
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2016 6WIND S.A.
+ */
+
+#ifndef _RTE_MPLS_H_
+#define _RTE_MPLS_H_
+
+/**
+ * @file
+ *
+ * MPLS-related defines
+ */
+
+#include <stdint.h>
+#include <rte_byteorder.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * MPLS header.
+ */
+struct mpls_hdr {
+       uint16_t tag_msb;   /**< Label(msb). */
+#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
+       uint8_t tag_lsb:4;  /**< Label(lsb). */
+       uint8_t tc:3;       /**< Traffic class. */
+       uint8_t bs:1;       /**< Bottom of stack. */
+#else
+       uint8_t bs:1;       /**< Bottom of stack. */
+       uint8_t tc:3;       /**< Traffic class. */
+       uint8_t tag_lsb:4;  /**< label(lsb) */
+#endif
+       uint8_t  ttl;       /**< Time to live. */
+} __attribute__((__packed__));
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* RTE_MPLS_H_ */
index 9eb7c74..378a412 100644 (file)
@@ -13,6 +13,7 @@
 #include <rte_udp.h>
 #include <rte_sctp.h>
 #include <rte_gre.h>
+#include <rte_mpls.h>
 #include <rte_net.h>
 
 /* get l3 packet type from ip6 next protocol */
@@ -274,9 +275,27 @@ uint32_t rte_net_get_ptype(const struct rte_mbuf *m,
                off += 2 * sizeof(*vh);
                hdr_lens->l2_len += 2 * sizeof(*vh);
                proto = vh->eth_proto;
+       } else if ((proto == rte_cpu_to_be_16(ETHER_TYPE_MPLS)) ||
+               (proto == rte_cpu_to_be_16(ETHER_TYPE_MPLSM))) {
+               unsigned int i;
+               const struct mpls_hdr *mh;
+               struct mpls_hdr mh_copy;
+
+#define MAX_MPLS_HDR 5
+               for (i = 0; i < MAX_MPLS_HDR; i++) {
+                       mh = rte_pktmbuf_read(m, off + (i * sizeof(*mh)),
+                               sizeof(*mh), &mh_copy);
+                       if (unlikely(mh == NULL))
+                               return pkt_type;
+               }
+               if (i == MAX_MPLS_HDR)
+                       return pkt_type;
+               pkt_type = RTE_PTYPE_L2_ETHER_MPLS;
+               hdr_lens->l2_len += (sizeof(*mh) * i);
+               return pkt_type;
        }
 
- l3:
+l3:
        if ((layers & RTE_PTYPE_L3_MASK) == 0)
                return pkt_type;
 
index b6ab6e1..e59760a 100644 (file)
@@ -122,14 +122,16 @@ rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags)
                (ol_flags & PKT_TX_OUTER_IPV6))
                inner_l3_offset += m->outer_l2_len + m->outer_l3_len;
 
-       if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
-               if (ol_flags & PKT_TX_IPV4) {
-                       ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-                                       inner_l3_offset);
+       if (ol_flags & PKT_TX_IPV4) {
+               ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+                               inner_l3_offset);
 
-                       if (ol_flags & PKT_TX_IP_CKSUM)
-                               ipv4_hdr->hdr_checksum = 0;
+               if (ol_flags & PKT_TX_IP_CKSUM)
+                       ipv4_hdr->hdr_checksum = 0;
+       }
 
+       if ((ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+               if (ol_flags & PKT_TX_IPV4) {
                        udp_hdr = (struct udp_hdr *)((char *)ipv4_hdr +
                                        m->l3_len);
                        udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr,
@@ -146,12 +148,6 @@ rte_net_intel_cksum_flags_prepare(struct rte_mbuf *m, uint64_t ol_flags)
        } else if ((ol_flags & PKT_TX_TCP_CKSUM) ||
                        (ol_flags & PKT_TX_TCP_SEG)) {
                if (ol_flags & PKT_TX_IPV4) {
-                       ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
-                                       inner_l3_offset);
-
-                       if (ol_flags & PKT_TX_IP_CKSUM)
-                               ipv4_hdr->hdr_checksum = 0;
-
                        /* non-TSO tcp or TSO */
                        tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr +
                                        m->l3_len);
index 0ee0fa1..b241151 100644 (file)
@@ -8,8 +8,6 @@ LIB = librte_pdump.a
 
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-CFLAGS += -D_GNU_SOURCE
-LDLIBS += -lpthread
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
 
 EXPORT_MAP := rte_pdump_version.map
index 84afe98..cf26550 100644 (file)
@@ -12,7 +12,7 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_table
-LDLIBS += -lrte_port -lrte_meter -lrte_sched
+LDLIBS += -lrte_port -lrte_meter -lrte_sched -lrte_cryptodev
 
 EXPORT_MAP := rte_pipeline_version.map
 
index dc16ab4..04e5f51 100644 (file)
@@ -5,4 +5,4 @@ version = 3
 allow_experimental_apis = true
 sources = files('rte_pipeline.c', 'rte_port_in_action.c', 'rte_table_action.c')
 headers = files('rte_pipeline.h', 'rte_port_in_action.h', 'rte_table_action.h')
-deps += ['port', 'table', 'meter', 'sched']
+deps += ['port', 'table', 'meter', 'sched', 'cryptodev']
index 0cb8b80..2c047a8 100644 (file)
@@ -178,8 +178,7 @@ rte_pipeline_check_params(struct rte_pipeline_params *params)
        }
 
        /* socket */
-       if ((params->socket_id < 0) ||
-           (params->socket_id >= RTE_MAX_NUMA_NODES)) {
+       if (params->socket_id < 0) {
                RTE_LOG(ERR, PIPELINE,
                        "%s: Incorrect value for parameter socket_id\n",
                        __func__);
index d820b22..420f065 100644 (file)
@@ -72,4 +72,5 @@ EXPERIMENTAL {
        rte_table_action_stats_read;
        rte_table_action_time_read;
        rte_table_action_ttl_read;
+       rte_table_action_crypto_sym_session_get;
 };
index 83ffa5d..537e659 100644 (file)
@@ -1,7 +1,6 @@
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2010-2018 Intel Corporation
  */
-
 #include <stdlib.h>
 #include <string.h>
 
@@ -15,6 +14,8 @@
 #include <rte_esp.h>
 #include <rte_tcp.h>
 #include <rte_udp.h>
+#include <rte_cryptodev.h>
+#include <rte_cryptodev_pmd.h>
 
 #include "rte_table_action.h"
 
@@ -430,6 +431,7 @@ encap_valid(enum rte_table_action_encap_type encap)
        case RTE_TABLE_ACTION_ENCAP_QINQ:
        case RTE_TABLE_ACTION_ENCAP_MPLS:
        case RTE_TABLE_ACTION_ENCAP_PPPOE:
+       case RTE_TABLE_ACTION_ENCAP_VXLAN:
                return 1;
        default:
                return 0;
@@ -498,6 +500,38 @@ struct encap_pppoe_data {
        struct pppoe_ppp_hdr pppoe_ppp;
 } __attribute__((__packed__));
 
+#define IP_PROTO_UDP                                       17
+
+struct encap_vxlan_ipv4_data {
+       struct ether_hdr ether;
+       struct ipv4_hdr ipv4;
+       struct udp_hdr udp;
+       struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv4_vlan_data {
+       struct ether_hdr ether;
+       struct vlan_hdr vlan;
+       struct ipv4_hdr ipv4;
+       struct udp_hdr udp;
+       struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv6_data {
+       struct ether_hdr ether;
+       struct ipv6_hdr ipv6;
+       struct udp_hdr udp;
+       struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
+struct encap_vxlan_ipv6_vlan_data {
+       struct ether_hdr ether;
+       struct vlan_hdr vlan;
+       struct ipv6_hdr ipv6;
+       struct udp_hdr udp;
+       struct vxlan_hdr vxlan;
+} __attribute__((__packed__));
+
 static size_t
 encap_data_size(struct rte_table_action_encap_config *encap)
 {
@@ -517,6 +551,18 @@ encap_data_size(struct rte_table_action_encap_config *encap)
        case 1LLU << RTE_TABLE_ACTION_ENCAP_PPPOE:
                return sizeof(struct encap_pppoe_data);
 
+       case 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN:
+               if (encap->vxlan.ip_version)
+                       if (encap->vxlan.vlan)
+                               return sizeof(struct encap_vxlan_ipv4_vlan_data);
+                       else
+                               return sizeof(struct encap_vxlan_ipv4_data);
+               else
+                       if (encap->vxlan.vlan)
+                               return sizeof(struct encap_vxlan_ipv6_vlan_data);
+                       else
+                               return sizeof(struct encap_vxlan_ipv6_data);
+
        default:
                return 0;
        }
@@ -550,6 +596,9 @@ encap_apply_check(struct rte_table_action_encap_params *p,
        case RTE_TABLE_ACTION_ENCAP_PPPOE:
                return 0;
 
+       case RTE_TABLE_ACTION_ENCAP_VXLAN:
+               return 0;
+
        default:
                return -EINVAL;
        }
@@ -678,6 +727,168 @@ encap_pppoe_apply(void *data,
        return 0;
 }
 
+static int
+encap_vxlan_apply(void *data,
+       struct rte_table_action_encap_params *p,
+       struct rte_table_action_encap_config *cfg)
+{
+       if ((p->vxlan.vxlan.vni > 0xFFFFFF) ||
+               (cfg->vxlan.ip_version && (p->vxlan.ipv4.dscp > 0x3F)) ||
+               (!cfg->vxlan.ip_version && (p->vxlan.ipv6.flow_label > 0xFFFFF)) ||
+               (!cfg->vxlan.ip_version && (p->vxlan.ipv6.dscp > 0x3F)) ||
+               (cfg->vxlan.vlan && (p->vxlan.vlan.vid > 0xFFF)))
+               return -1;
+
+       if (cfg->vxlan.ip_version)
+               if (cfg->vxlan.vlan) {
+                       struct encap_vxlan_ipv4_vlan_data *d = data;
+
+                       /* Ethernet */
+                       ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+                       ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+                       d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN);
+
+                       /* VLAN */
+                       d->vlan.vlan_tci = rte_htons(VLAN(p->vxlan.vlan.pcp,
+                               p->vxlan.vlan.dei,
+                               p->vxlan.vlan.vid));
+                       d->vlan.eth_proto = rte_htons(ETHER_TYPE_IPv4);
+
+                       /* IPv4*/
+                       d->ipv4.version_ihl = 0x45;
+                       d->ipv4.type_of_service = p->vxlan.ipv4.dscp << 2;
+                       d->ipv4.total_length = 0; /* not pre-computed */
+                       d->ipv4.packet_id = 0;
+                       d->ipv4.fragment_offset = 0;
+                       d->ipv4.time_to_live = p->vxlan.ipv4.ttl;
+                       d->ipv4.next_proto_id = IP_PROTO_UDP;
+                       d->ipv4.hdr_checksum = 0;
+                       d->ipv4.src_addr = rte_htonl(p->vxlan.ipv4.sa);
+                       d->ipv4.dst_addr = rte_htonl(p->vxlan.ipv4.da);
+
+                       d->ipv4.hdr_checksum = rte_ipv4_cksum(&d->ipv4);
+
+                       /* UDP */
+                       d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+                       d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+                       d->udp.dgram_len = 0; /* not pre-computed */
+                       d->udp.dgram_cksum = 0;
+
+                       /* VXLAN */
+                       d->vxlan.vx_flags = rte_htonl(0x08000000);
+                       d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+                       return 0;
+               } else {
+                       struct encap_vxlan_ipv4_data *d = data;
+
+                       /* Ethernet */
+                       ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+                       ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+                       d->ether.ether_type = rte_htons(ETHER_TYPE_IPv4);
+
+                       /* IPv4*/
+                       d->ipv4.version_ihl = 0x45;
+                       d->ipv4.type_of_service = p->vxlan.ipv4.dscp << 2;
+                       d->ipv4.total_length = 0; /* not pre-computed */
+                       d->ipv4.packet_id = 0;
+                       d->ipv4.fragment_offset = 0;
+                       d->ipv4.time_to_live = p->vxlan.ipv4.ttl;
+                       d->ipv4.next_proto_id = IP_PROTO_UDP;
+                       d->ipv4.hdr_checksum = 0;
+                       d->ipv4.src_addr = rte_htonl(p->vxlan.ipv4.sa);
+                       d->ipv4.dst_addr = rte_htonl(p->vxlan.ipv4.da);
+
+                       d->ipv4.hdr_checksum = rte_ipv4_cksum(&d->ipv4);
+
+                       /* UDP */
+                       d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+                       d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+                       d->udp.dgram_len = 0; /* not pre-computed */
+                       d->udp.dgram_cksum = 0;
+
+                       /* VXLAN */
+                       d->vxlan.vx_flags = rte_htonl(0x08000000);
+                       d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+                       return 0;
+               }
+       else
+               if (cfg->vxlan.vlan) {
+                       struct encap_vxlan_ipv6_vlan_data *d = data;
+
+                       /* Ethernet */
+                       ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+                       ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+                       d->ether.ether_type = rte_htons(ETHER_TYPE_VLAN);
+
+                       /* VLAN */
+                       d->vlan.vlan_tci = rte_htons(VLAN(p->vxlan.vlan.pcp,
+                               p->vxlan.vlan.dei,
+                               p->vxlan.vlan.vid));
+                       d->vlan.eth_proto = rte_htons(ETHER_TYPE_IPv6);
+
+                       /* IPv6*/
+                       d->ipv6.vtc_flow = rte_htonl((6 << 28) |
+                               (p->vxlan.ipv6.dscp << 22) |
+                               p->vxlan.ipv6.flow_label);
+                       d->ipv6.payload_len = 0; /* not pre-computed */
+                       d->ipv6.proto = IP_PROTO_UDP;
+                       d->ipv6.hop_limits = p->vxlan.ipv6.hop_limit;
+                       memcpy(d->ipv6.src_addr,
+                               p->vxlan.ipv6.sa,
+                               sizeof(p->vxlan.ipv6.sa));
+                       memcpy(d->ipv6.dst_addr,
+                               p->vxlan.ipv6.da,
+                               sizeof(p->vxlan.ipv6.da));
+
+                       /* UDP */
+                       d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+                       d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+                       d->udp.dgram_len = 0; /* not pre-computed */
+                       d->udp.dgram_cksum = 0;
+
+                       /* VXLAN */
+                       d->vxlan.vx_flags = rte_htonl(0x08000000);
+                       d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+                       return 0;
+               } else {
+                       struct encap_vxlan_ipv6_data *d = data;
+
+                       /* Ethernet */
+                       ether_addr_copy(&p->vxlan.ether.da, &d->ether.d_addr);
+                       ether_addr_copy(&p->vxlan.ether.sa, &d->ether.s_addr);
+                       d->ether.ether_type = rte_htons(ETHER_TYPE_IPv6);
+
+                       /* IPv6*/
+                       d->ipv6.vtc_flow = rte_htonl((6 << 28) |
+                               (p->vxlan.ipv6.dscp << 22) |
+                               p->vxlan.ipv6.flow_label);
+                       d->ipv6.payload_len = 0; /* not pre-computed */
+                       d->ipv6.proto = IP_PROTO_UDP;
+                       d->ipv6.hop_limits = p->vxlan.ipv6.hop_limit;
+                       memcpy(d->ipv6.src_addr,
+                               p->vxlan.ipv6.sa,
+                               sizeof(p->vxlan.ipv6.sa));
+                       memcpy(d->ipv6.dst_addr,
+                               p->vxlan.ipv6.da,
+                               sizeof(p->vxlan.ipv6.da));
+
+                       /* UDP */
+                       d->udp.src_port = rte_htons(p->vxlan.udp.sp);
+                       d->udp.dst_port = rte_htons(p->vxlan.udp.dp);
+                       d->udp.dgram_len = 0; /* not pre-computed */
+                       d->udp.dgram_cksum = 0;
+
+                       /* VXLAN */
+                       d->vxlan.vx_flags = rte_htonl(0x08000000);
+                       d->vxlan.vx_vni = rte_htonl(p->vxlan.vxlan.vni << 8);
+
+                       return 0;
+               }
+}
+
 static int
 encap_apply(void *data,
        struct rte_table_action_encap_params *p,
@@ -707,11 +918,31 @@ encap_apply(void *data,
        case RTE_TABLE_ACTION_ENCAP_PPPOE:
                return encap_pppoe_apply(data, p);
 
+       case RTE_TABLE_ACTION_ENCAP_VXLAN:
+               return encap_vxlan_apply(data, p, cfg);
+
        default:
                return -EINVAL;
        }
 }
 
+static __rte_always_inline uint16_t
+encap_vxlan_ipv4_checksum_update(uint16_t cksum0,
+       uint16_t total_length)
+{
+       int32_t cksum1;
+
+       cksum1 = cksum0;
+       cksum1 = ~cksum1 & 0xFFFF;
+
+       /* Add total length (one's complement logic) */
+       cksum1 += total_length;
+       cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+       cksum1 = (cksum1 & 0xFFFF) + (cksum1 >> 16);
+
+       return (uint16_t)(~cksum1);
+}
+
 static __rte_always_inline void *
 encap(void *dst, const void *src, size_t n)
 {
@@ -719,6 +950,118 @@ encap(void *dst, const void *src, size_t n)
        return rte_memcpy(dst, src, n);
 }
 
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv4(struct rte_mbuf *mbuf,
+       struct encap_vxlan_ipv4_data *vxlan_tbl,
+       struct rte_table_action_encap_config *cfg)
+{
+       uint32_t ether_offset = cfg->vxlan.data_offset;
+       void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+       struct encap_vxlan_ipv4_data *vxlan_pkt;
+       uint16_t ether_length, ipv4_total_length, ipv4_hdr_cksum, udp_length;
+
+       ether_length = (uint16_t)mbuf->pkt_len;
+       ipv4_total_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr) +
+               sizeof(struct ipv4_hdr));
+       ipv4_hdr_cksum = encap_vxlan_ipv4_checksum_update(vxlan_tbl->ipv4.hdr_checksum,
+               rte_htons(ipv4_total_length));
+       udp_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+
+       vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+       vxlan_pkt->ipv4.total_length = rte_htons(ipv4_total_length);
+       vxlan_pkt->ipv4.hdr_checksum = ipv4_hdr_cksum;
+       vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+       mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+       mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv4_vlan(struct rte_mbuf *mbuf,
+       struct encap_vxlan_ipv4_vlan_data *vxlan_tbl,
+       struct rte_table_action_encap_config *cfg)
+{
+       uint32_t ether_offset = cfg->vxlan.data_offset;
+       void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+       struct encap_vxlan_ipv4_vlan_data *vxlan_pkt;
+       uint16_t ether_length, ipv4_total_length, ipv4_hdr_cksum, udp_length;
+
+       ether_length = (uint16_t)mbuf->pkt_len;
+       ipv4_total_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr) +
+               sizeof(struct ipv4_hdr));
+       ipv4_hdr_cksum = encap_vxlan_ipv4_checksum_update(vxlan_tbl->ipv4.hdr_checksum,
+               rte_htons(ipv4_total_length));
+       udp_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+
+       vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+       vxlan_pkt->ipv4.total_length = rte_htons(ipv4_total_length);
+       vxlan_pkt->ipv4.hdr_checksum = ipv4_hdr_cksum;
+       vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+       mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+       mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv6(struct rte_mbuf *mbuf,
+       struct encap_vxlan_ipv6_data *vxlan_tbl,
+       struct rte_table_action_encap_config *cfg)
+{
+       uint32_t ether_offset = cfg->vxlan.data_offset;
+       void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+       struct encap_vxlan_ipv6_data *vxlan_pkt;
+       uint16_t ether_length, ipv6_payload_length, udp_length;
+
+       ether_length = (uint16_t)mbuf->pkt_len;
+       ipv6_payload_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+       udp_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+
+       vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+       vxlan_pkt->ipv6.payload_len = rte_htons(ipv6_payload_length);
+       vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+       mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+       mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
+static __rte_always_inline void
+pkt_work_encap_vxlan_ipv6_vlan(struct rte_mbuf *mbuf,
+       struct encap_vxlan_ipv6_vlan_data *vxlan_tbl,
+       struct rte_table_action_encap_config *cfg)
+{
+       uint32_t ether_offset = cfg->vxlan.data_offset;
+       void *ether = RTE_MBUF_METADATA_UINT32_PTR(mbuf, ether_offset);
+       struct encap_vxlan_ipv6_vlan_data *vxlan_pkt;
+       uint16_t ether_length, ipv6_payload_length, udp_length;
+
+       ether_length = (uint16_t)mbuf->pkt_len;
+       ipv6_payload_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+       udp_length = ether_length +
+               (sizeof(struct vxlan_hdr) +
+               sizeof(struct udp_hdr));
+
+       vxlan_pkt = encap(ether, vxlan_tbl, sizeof(*vxlan_tbl));
+       vxlan_pkt->ipv6.payload_len = rte_htons(ipv6_payload_length);
+       vxlan_pkt->udp.dgram_len = rte_htons(udp_length);
+
+       mbuf->data_off = ether_offset - (sizeof(struct rte_mbuf) + sizeof(*vxlan_pkt));
+       mbuf->pkt_len = mbuf->data_len = ether_length + sizeof(*vxlan_pkt);
+}
+
 static __rte_always_inline void
 pkt_work_encap(struct rte_mbuf *mbuf,
        void *data,
@@ -776,6 +1119,20 @@ pkt_work_encap(struct rte_mbuf *mbuf,
                break;
        }
 
+       case 1LLU << RTE_TABLE_ACTION_ENCAP_VXLAN:
+       {
+               if (cfg->vxlan.ip_version)
+                       if (cfg->vxlan.vlan)
+                               pkt_work_encap_vxlan_ipv4_vlan(mbuf, data, cfg);
+                       else
+                               pkt_work_encap_vxlan_ipv4(mbuf, data, cfg);
+               else
+                       if (cfg->vxlan.vlan)
+                               pkt_work_encap_vxlan_ipv6_vlan(mbuf, data, cfg);
+                       else
+                               pkt_work_encap_vxlan_ipv6(mbuf, data, cfg);
+       }
+
        default:
                break;
        }
@@ -1219,6 +1576,562 @@ pkt_work_time(struct time_data *data,
        data->time = time;
 }
 
+
+/**
+ * RTE_TABLE_ACTION_CRYPTO
+ */
+
+#define CRYPTO_OP_MASK_CIPHER  0x1
+#define CRYPTO_OP_MASK_AUTH    0x2
+#define CRYPTO_OP_MASK_AEAD    0x4
+
+struct crypto_op_sym_iv_aad {
+       struct rte_crypto_op op;
+       struct rte_crypto_sym_op sym_op;
+       union {
+               struct {
+                       uint8_t cipher_iv[
+                               RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+                       uint8_t auth_iv[
+                               RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+               } cipher_auth;
+
+               struct {
+                       uint8_t iv[RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX];
+                       uint8_t aad[RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX];
+               } aead_iv_aad;
+
+       } iv_aad;
+};
+
+struct sym_crypto_data {
+
+       union {
+               struct {
+
+                       /** Length of cipher iv. */
+                       uint16_t cipher_iv_len;
+
+                       /** Offset from start of IP header to the cipher iv. */
+                       uint16_t cipher_iv_data_offset;
+
+                       /** Length of cipher iv to be updated in the mbuf. */
+                       uint16_t cipher_iv_update_len;
+
+                       /** Offset from start of IP header to the auth iv. */
+                       uint16_t auth_iv_data_offset;
+
+                       /** Length of auth iv in the mbuf. */
+                       uint16_t auth_iv_len;
+
+                       /** Length of auth iv to be updated in the mbuf. */
+                       uint16_t auth_iv_update_len;
+
+               } cipher_auth;
+               struct {
+
+                       /** Length of iv. */
+                       uint16_t iv_len;
+
+                       /** Offset from start of IP header to the aead iv. */
+                       uint16_t iv_data_offset;
+
+                       /** Length of iv to be updated in the mbuf. */
+                       uint16_t iv_update_len;
+
+                       /** Length of aad */
+                       uint16_t aad_len;
+
+                       /** Offset from start of IP header to the aad. */
+                       uint16_t aad_data_offset;
+
+                       /** Length of aad to updated in the mbuf. */
+                       uint16_t aad_update_len;
+
+               } aead;
+       };
+
+       /** Offset from start of IP header to the data. */
+       uint16_t data_offset;
+
+       /** Digest length. */
+       uint16_t digest_len;
+
+       /** block size */
+       uint16_t block_size;
+
+       /** Mask of crypto operation */
+       uint16_t op_mask;
+
+       /** Session pointer. */
+       struct rte_cryptodev_sym_session *session;
+
+       /** Direction of crypto, encrypt or decrypt */
+       uint16_t direction;
+
+       /** Private data size to store cipher iv / aad. */
+       uint8_t iv_aad_data[32];
+
+} __attribute__((__packed__));
+
+static int
+sym_crypto_cfg_check(struct rte_table_action_sym_crypto_config *cfg)
+{
+       if (!rte_cryptodev_pmd_is_valid_dev(cfg->cryptodev_id))
+               return -EINVAL;
+       if (cfg->mp_create == NULL || cfg->mp_init == NULL)
+               return -EINVAL;
+
+       return 0;
+}
+
+static int
+get_block_size(const struct rte_crypto_sym_xform *xform, uint8_t cdev_id)
+{
+       struct rte_cryptodev_info dev_info;
+       const struct rte_cryptodev_capabilities *cap;
+       uint32_t i;
+
+       rte_cryptodev_info_get(cdev_id, &dev_info);
+
+       for (i = 0;; i++) {
+               cap = &dev_info.capabilities[i];
+               if (!cap)
+                       break;
+
+               if (cap->sym.xform_type != xform->type)
+                       continue;
+
+               if ((xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) &&
+                               (cap->sym.cipher.algo == xform->cipher.algo))
+                       return cap->sym.cipher.block_size;
+
+               if ((xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) &&
+                               (cap->sym.aead.algo == xform->aead.algo))
+                       return cap->sym.aead.block_size;
+
+               if (xform->type == RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED)
+                       break;
+       }
+
+       return -1;
+}
+
+static int
+sym_crypto_apply(struct sym_crypto_data *data,
+       struct rte_table_action_sym_crypto_config *cfg,
+       struct rte_table_action_sym_crypto_params *p)
+{
+       const struct rte_crypto_cipher_xform *cipher_xform = NULL;
+       const struct rte_crypto_auth_xform *auth_xform = NULL;
+       const struct rte_crypto_aead_xform *aead_xform = NULL;
+       struct rte_crypto_sym_xform *xform = p->xform;
+       struct rte_cryptodev_sym_session *session;
+       int ret;
+
+       memset(data, 0, sizeof(*data));
+
+       while (xform) {
+               if (xform->type == RTE_CRYPTO_SYM_XFORM_CIPHER) {
+                       cipher_xform = &xform->cipher;
+
+                       if (cipher_xform->iv.length >
+                               RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX)
+                               return -ENOMEM;
+                       if (cipher_xform->iv.offset !=
+                                       RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET)
+                               return -EINVAL;
+
+                       ret = get_block_size(xform, cfg->cryptodev_id);
+                       if (ret < 0)
+                               return -1;
+                       data->block_size = (uint16_t)ret;
+                       data->op_mask |= CRYPTO_OP_MASK_CIPHER;
+
+                       data->cipher_auth.cipher_iv_len =
+                                       cipher_xform->iv.length;
+                       data->cipher_auth.cipher_iv_data_offset = (uint16_t)
+                                       p->cipher_auth.cipher_iv_update.offset;
+                       data->cipher_auth.cipher_iv_update_len = (uint16_t)
+                                       p->cipher_auth.cipher_iv_update.length;
+
+                       rte_memcpy(data->iv_aad_data,
+                                       p->cipher_auth.cipher_iv.val,
+                                       p->cipher_auth.cipher_iv.length);
+
+                       data->direction = cipher_xform->op;
+
+               } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AUTH) {
+                       auth_xform = &xform->auth;
+                       if (auth_xform->iv.length >
+                               RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX)
+                               return -ENOMEM;
+                       data->op_mask |= CRYPTO_OP_MASK_AUTH;
+
+                       data->cipher_auth.auth_iv_len = auth_xform->iv.length;
+                       data->cipher_auth.auth_iv_data_offset = (uint16_t)
+                                       p->cipher_auth.auth_iv_update.offset;
+                       data->cipher_auth.auth_iv_update_len = (uint16_t)
+                                       p->cipher_auth.auth_iv_update.length;
+                       data->digest_len = auth_xform->digest_length;
+
+                       data->direction = (auth_xform->op ==
+                                       RTE_CRYPTO_AUTH_OP_GENERATE) ?
+                                       RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+                                       RTE_CRYPTO_CIPHER_OP_DECRYPT;
+
+               } else if (xform->type == RTE_CRYPTO_SYM_XFORM_AEAD) {
+                       aead_xform = &xform->aead;
+
+                       if ((aead_xform->iv.length >
+                               RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX) || (
+                               aead_xform->aad_length >
+                               RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX))
+                               return -EINVAL;
+                       if (aead_xform->iv.offset !=
+                                       RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET)
+                               return -EINVAL;
+
+                       ret = get_block_size(xform, cfg->cryptodev_id);
+                       if (ret < 0)
+                               return -1;
+                       data->block_size = (uint16_t)ret;
+                       data->op_mask |= CRYPTO_OP_MASK_AEAD;
+
+                       data->digest_len = aead_xform->digest_length;
+                       data->aead.iv_len = aead_xform->iv.length;
+                       data->aead.aad_len = aead_xform->aad_length;
+
+                       data->aead.iv_data_offset = (uint16_t)
+                                       p->aead.iv_update.offset;
+                       data->aead.iv_update_len = (uint16_t)
+                                       p->aead.iv_update.length;
+                       data->aead.aad_data_offset = (uint16_t)
+                                       p->aead.aad_update.offset;
+                       data->aead.aad_update_len = (uint16_t)
+                                       p->aead.aad_update.length;
+
+                       rte_memcpy(data->iv_aad_data,
+                                       p->aead.iv.val,
+                                       p->aead.iv.length);
+
+                       rte_memcpy(data->iv_aad_data + p->aead.iv.length,
+                                       p->aead.aad.val,
+                                       p->aead.aad.length);
+
+                       data->direction = (aead_xform->op ==
+                                       RTE_CRYPTO_AEAD_OP_ENCRYPT) ?
+                                       RTE_CRYPTO_CIPHER_OP_ENCRYPT :
+                                       RTE_CRYPTO_CIPHER_OP_DECRYPT;
+               } else
+                       return -EINVAL;
+
+               xform = xform->next;
+       }
+
+       if (auth_xform && auth_xform->iv.length) {
+               if (cipher_xform) {
+                       if (auth_xform->iv.offset !=
+                                       RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET +
+                                       cipher_xform->iv.length)
+                               return -EINVAL;
+
+                       rte_memcpy(data->iv_aad_data + cipher_xform->iv.length,
+                                       p->cipher_auth.auth_iv.val,
+                                       p->cipher_auth.auth_iv.length);
+               } else {
+                       rte_memcpy(data->iv_aad_data,
+                                       p->cipher_auth.auth_iv.val,
+                                       p->cipher_auth.auth_iv.length);
+               }
+       }
+
+       session = rte_cryptodev_sym_session_create(cfg->mp_create);
+       if (!session)
+               return -ENOMEM;
+
+       ret = rte_cryptodev_sym_session_init(cfg->cryptodev_id, session,
+                       p->xform, cfg->mp_init);
+       if (ret < 0) {
+               rte_cryptodev_sym_session_free(session);
+               return ret;
+       }
+
+       data->data_offset = (uint16_t)p->data_offset;
+       data->session = session;
+
+       return 0;
+}
+
+static __rte_always_inline uint64_t
+pkt_work_sym_crypto(struct rte_mbuf *mbuf, struct sym_crypto_data *data,
+               struct rte_table_action_sym_crypto_config *cfg,
+               uint16_t ip_offset)
+{
+       struct crypto_op_sym_iv_aad *crypto_op = (struct crypto_op_sym_iv_aad *)
+                       RTE_MBUF_METADATA_UINT8_PTR(mbuf, cfg->op_offset);
+       struct rte_crypto_op *op = &crypto_op->op;
+       struct rte_crypto_sym_op *sym = op->sym;
+       uint32_t pkt_offset = sizeof(*mbuf) + mbuf->data_off;
+       uint32_t payload_len = pkt_offset + mbuf->data_len - data->data_offset;
+
+       op->type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+       op->sess_type = RTE_CRYPTO_OP_WITH_SESSION;
+       op->phys_addr = mbuf->buf_iova + cfg->op_offset - sizeof(*mbuf);
+       op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED;
+       sym->m_src = mbuf;
+       sym->m_dst = NULL;
+       sym->session = data->session;
+
+       /** pad the packet */
+       if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT) {
+               uint32_t append_len = RTE_ALIGN_CEIL(payload_len,
+                               data->block_size) - payload_len;
+
+               if (unlikely(rte_pktmbuf_append(mbuf, append_len +
+                               data->digest_len) == NULL))
+                       return 1;
+
+               payload_len += append_len;
+       } else
+               payload_len -= data->digest_len;
+
+       if (data->op_mask & CRYPTO_OP_MASK_CIPHER) {
+               /** prepare cipher op */
+               uint8_t *iv = crypto_op->iv_aad.cipher_auth.cipher_iv;
+
+               sym->cipher.data.length = payload_len;
+               sym->cipher.data.offset = data->data_offset - pkt_offset;
+
+               if (data->cipher_auth.cipher_iv_update_len) {
+                       uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+                               data->cipher_auth.cipher_iv_data_offset
+                               + ip_offset);
+
+                       /** For encryption, update the pkt iv field, otherwise
+                        *  update the iv_aad_field
+                        **/
+                       if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+                               rte_memcpy(pkt_iv, data->iv_aad_data,
+                                       data->cipher_auth.cipher_iv_update_len);
+                       else
+                               rte_memcpy(data->iv_aad_data, pkt_iv,
+                                       data->cipher_auth.cipher_iv_update_len);
+               }
+
+               /** write iv */
+               rte_memcpy(iv, data->iv_aad_data,
+                               data->cipher_auth.cipher_iv_len);
+       }
+
+       if (data->op_mask & CRYPTO_OP_MASK_AUTH) {
+               /** authentication always start from IP header. */
+               sym->auth.data.offset = ip_offset - pkt_offset;
+               sym->auth.data.length = mbuf->data_len - sym->auth.data.offset -
+                               data->digest_len;
+               sym->auth.digest.data = rte_pktmbuf_mtod_offset(mbuf,
+                               uint8_t *, rte_pktmbuf_pkt_len(mbuf) -
+                               data->digest_len);
+               sym->auth.digest.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+                               rte_pktmbuf_pkt_len(mbuf) - data->digest_len);
+
+               if (data->cipher_auth.auth_iv_update_len) {
+                       uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+                                       data->cipher_auth.auth_iv_data_offset
+                                       + ip_offset);
+                       uint8_t *data_iv = data->iv_aad_data +
+                                       data->cipher_auth.cipher_iv_len;
+
+                       if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+                               rte_memcpy(pkt_iv, data_iv,
+                                       data->cipher_auth.auth_iv_update_len);
+                       else
+                               rte_memcpy(data_iv, pkt_iv,
+                                       data->cipher_auth.auth_iv_update_len);
+               }
+
+               if (data->cipher_auth.auth_iv_len) {
+                       /** prepare cipher op */
+                       uint8_t *iv = crypto_op->iv_aad.cipher_auth.auth_iv;
+
+                       rte_memcpy(iv, data->iv_aad_data +
+                                       data->cipher_auth.cipher_iv_len,
+                                       data->cipher_auth.auth_iv_len);
+               }
+       }
+
+       if (data->op_mask & CRYPTO_OP_MASK_AEAD) {
+               uint8_t *iv = crypto_op->iv_aad.aead_iv_aad.iv;
+               uint8_t *aad = crypto_op->iv_aad.aead_iv_aad.aad;
+
+               sym->aead.aad.data = aad;
+               sym->aead.aad.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+                               aad - rte_pktmbuf_mtod(mbuf, uint8_t *));
+               sym->aead.digest.data = rte_pktmbuf_mtod_offset(mbuf,
+                               uint8_t *, rte_pktmbuf_pkt_len(mbuf) -
+                               data->digest_len);
+               sym->aead.digest.phys_addr = rte_pktmbuf_iova_offset(mbuf,
+                               rte_pktmbuf_pkt_len(mbuf) - data->digest_len);
+               sym->aead.data.offset = data->data_offset - pkt_offset;
+               sym->aead.data.length = payload_len;
+
+               if (data->aead.iv_update_len) {
+                       uint8_t *pkt_iv = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+                                       data->aead.iv_data_offset + ip_offset);
+                       uint8_t *data_iv = data->iv_aad_data;
+
+                       if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+                               rte_memcpy(pkt_iv, data_iv,
+                                               data->aead.iv_update_len);
+                       else
+                               rte_memcpy(data_iv, pkt_iv,
+                                       data->aead.iv_update_len);
+               }
+
+               rte_memcpy(iv, data->iv_aad_data, data->aead.iv_len);
+
+               if (data->aead.aad_update_len) {
+                       uint8_t *pkt_aad = RTE_MBUF_METADATA_UINT8_PTR(mbuf,
+                                       data->aead.aad_data_offset + ip_offset);
+                       uint8_t *data_aad = data->iv_aad_data +
+                                       data->aead.iv_len;
+
+                       if (data->direction == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+                               rte_memcpy(pkt_aad, data_aad,
+                                               data->aead.iv_update_len);
+                       else
+                               rte_memcpy(data_aad, pkt_aad,
+                                       data->aead.iv_update_len);
+               }
+
+               rte_memcpy(aad, data->iv_aad_data + data->aead.iv_len,
+                                       data->aead.aad_len);
+       }
+
+       return 0;
+}
+
+/**
+ * RTE_TABLE_ACTION_TAG
+ */
+struct tag_data {
+       uint32_t tag;
+} __attribute__((__packed__));
+
+static int
+tag_apply(struct tag_data *data,
+       struct rte_table_action_tag_params *p)
+{
+       data->tag = p->tag;
+       return 0;
+}
+
+static __rte_always_inline void
+pkt_work_tag(struct rte_mbuf *mbuf,
+       struct tag_data *data)
+{
+       mbuf->hash.fdir.hi = data->tag;
+       mbuf->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+}
+
+static __rte_always_inline void
+pkt4_work_tag(struct rte_mbuf *mbuf0,
+       struct rte_mbuf *mbuf1,
+       struct rte_mbuf *mbuf2,
+       struct rte_mbuf *mbuf3,
+       struct tag_data *data0,
+       struct tag_data *data1,
+       struct tag_data *data2,
+       struct tag_data *data3)
+{
+       mbuf0->hash.fdir.hi = data0->tag;
+       mbuf1->hash.fdir.hi = data1->tag;
+       mbuf2->hash.fdir.hi = data2->tag;
+       mbuf3->hash.fdir.hi = data3->tag;
+
+       mbuf0->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+       mbuf1->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+       mbuf2->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+       mbuf3->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID;
+}
+
+/**
+ * RTE_TABLE_ACTION_DECAP
+ */
+struct decap_data {
+       uint16_t n;
+} __attribute__((__packed__));
+
+static int
+decap_apply(struct decap_data *data,
+       struct rte_table_action_decap_params *p)
+{
+       data->n = p->n;
+       return 0;
+}
+
+static __rte_always_inline void
+pkt_work_decap(struct rte_mbuf *mbuf,
+       struct decap_data *data)
+{
+       uint16_t data_off = mbuf->data_off;
+       uint16_t data_len = mbuf->data_len;
+       uint32_t pkt_len = mbuf->pkt_len;
+       uint16_t n = data->n;
+
+       mbuf->data_off = data_off + n;
+       mbuf->data_len = data_len - n;
+       mbuf->pkt_len = pkt_len - n;
+}
+
+static __rte_always_inline void
+pkt4_work_decap(struct rte_mbuf *mbuf0,
+       struct rte_mbuf *mbuf1,
+       struct rte_mbuf *mbuf2,
+       struct rte_mbuf *mbuf3,
+       struct decap_data *data0,
+       struct decap_data *data1,
+       struct decap_data *data2,
+       struct decap_data *data3)
+{
+       uint16_t data_off0 = mbuf0->data_off;
+       uint16_t data_len0 = mbuf0->data_len;
+       uint32_t pkt_len0 = mbuf0->pkt_len;
+
+       uint16_t data_off1 = mbuf1->data_off;
+       uint16_t data_len1 = mbuf1->data_len;
+       uint32_t pkt_len1 = mbuf1->pkt_len;
+
+       uint16_t data_off2 = mbuf2->data_off;
+       uint16_t data_len2 = mbuf2->data_len;
+       uint32_t pkt_len2 = mbuf2->pkt_len;
+
+       uint16_t data_off3 = mbuf3->data_off;
+       uint16_t data_len3 = mbuf3->data_len;
+       uint32_t pkt_len3 = mbuf3->pkt_len;
+
+       uint16_t n0 = data0->n;
+       uint16_t n1 = data1->n;
+       uint16_t n2 = data2->n;
+       uint16_t n3 = data3->n;
+
+       mbuf0->data_off = data_off0 + n0;
+       mbuf0->data_len = data_len0 - n0;
+       mbuf0->pkt_len = pkt_len0 - n0;
+
+       mbuf1->data_off = data_off1 + n1;
+       mbuf1->data_len = data_len1 - n1;
+       mbuf1->pkt_len = pkt_len1 - n1;
+
+       mbuf2->data_off = data_off2 + n2;
+       mbuf2->data_len = data_len2 - n2;
+       mbuf2->pkt_len = pkt_len2 - n2;
+
+       mbuf3->data_off = data_off3 + n3;
+       mbuf3->data_len = data_len3 - n3;
+       mbuf3->pkt_len = pkt_len3 - n3;
+}
+
 /**
  * Action profile
  */
@@ -1235,6 +2148,9 @@ action_valid(enum rte_table_action_type action)
        case RTE_TABLE_ACTION_TTL:
        case RTE_TABLE_ACTION_STATS:
        case RTE_TABLE_ACTION_TIME:
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+       case RTE_TABLE_ACTION_TAG:
+       case RTE_TABLE_ACTION_DECAP:
                return 1;
        default:
                return 0;
@@ -1254,6 +2170,7 @@ struct ap_config {
        struct rte_table_action_nat_config nat;
        struct rte_table_action_ttl_config ttl;
        struct rte_table_action_stats_config stats;
+       struct rte_table_action_sym_crypto_config sym_crypto;
 };
 
 static size_t
@@ -1274,6 +2191,8 @@ action_cfg_size(enum rte_table_action_type action)
                return sizeof(struct rte_table_action_ttl_config);
        case RTE_TABLE_ACTION_STATS:
                return sizeof(struct rte_table_action_stats_config);
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+               return sizeof(struct rte_table_action_sym_crypto_config);
        default:
                return 0;
        }
@@ -1305,6 +2224,8 @@ action_cfg_get(struct ap_config *ap_config,
        case RTE_TABLE_ACTION_STATS:
                return &ap_config->stats;
 
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+               return &ap_config->sym_crypto;
        default:
                return NULL;
        }
@@ -1361,6 +2282,15 @@ action_data_size(enum rte_table_action_type action,
        case RTE_TABLE_ACTION_TIME:
                return sizeof(struct time_data);
 
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+               return (sizeof(struct sym_crypto_data));
+
+       case RTE_TABLE_ACTION_TAG:
+               return sizeof(struct tag_data);
+
+       case RTE_TABLE_ACTION_DECAP:
+               return sizeof(struct decap_data);
+
        default:
                return 0;
        }
@@ -1460,6 +2390,10 @@ rte_table_action_profile_action_register(struct rte_table_action_profile *profil
                status = stats_cfg_check(action_config);
                break;
 
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+               status = sym_crypto_cfg_check(action_config);
+               break;
+
        default:
                status = 0;
                break;
@@ -1609,6 +2543,19 @@ rte_table_action_apply(struct rte_table_action *action,
                return time_apply(action_data,
                        action_params);
 
+       case RTE_TABLE_ACTION_SYM_CRYPTO:
+               return sym_crypto_apply(action_data,
+                               &action->cfg.sym_crypto,
+                               action_params);
+
+       case RTE_TABLE_ACTION_TAG:
+               return tag_apply(action_data,
+                       action_params);
+
+       case RTE_TABLE_ACTION_DECAP:
+               return decap_apply(action_data,
+                       action_params);
+
        default:
                return -EINVAL;
        }
@@ -1861,6 +2808,25 @@ rte_table_action_time_read(struct rte_table_action *action,
        return 0;
 }
 
+struct rte_cryptodev_sym_session *
+rte_table_action_crypto_sym_session_get(struct rte_table_action *action,
+       void *data)
+{
+       struct sym_crypto_data *sym_crypto_data;
+
+       /* Check input arguments */
+       if ((action == NULL) ||
+               ((action->cfg.action_mask &
+               (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) == 0) ||
+               (data == NULL))
+               return NULL;
+
+       sym_crypto_data = action_data_get(data, action,
+                       RTE_TABLE_ACTION_SYM_CRYPTO);
+
+       return sym_crypto_data->session;
+}
+
 static __rte_always_inline uint64_t
 pkt_work(struct rte_mbuf *mbuf,
        struct rte_pipeline_table_entry *table_entry,
@@ -1920,6 +2886,14 @@ pkt_work(struct rte_mbuf *mbuf,
                        dscp);
        }
 
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               void *data = action_data_get(table_entry,
+                       action,
+                       RTE_TABLE_ACTION_DECAP);
+
+               pkt_work_decap(mbuf, data);
+       }
+
        if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
                void *data =
                        action_data_get(table_entry, action, RTE_TABLE_ACTION_ENCAP);
@@ -1966,6 +2940,22 @@ pkt_work(struct rte_mbuf *mbuf,
                pkt_work_time(data, time);
        }
 
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               void *data = action_data_get(table_entry, action,
+                               RTE_TABLE_ACTION_SYM_CRYPTO);
+
+               drop_mask |= pkt_work_sym_crypto(mbuf, data, &cfg->sym_crypto,
+                               ip_offset);
+       }
+
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               void *data = action_data_get(table_entry,
+                       action,
+                       RTE_TABLE_ACTION_TAG);
+
+               pkt_work_tag(mbuf, data);
+       }
+
        return drop_mask;
 }
 
@@ -2137,6 +3127,24 @@ pkt4_work(struct rte_mbuf **mbufs,
                        dscp3);
        }
 
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_DECAP)) {
+               void *data0 = action_data_get(table_entry0,
+                       action,
+                       RTE_TABLE_ACTION_DECAP);
+               void *data1 = action_data_get(table_entry1,
+                       action,
+                       RTE_TABLE_ACTION_DECAP);
+               void *data2 = action_data_get(table_entry2,
+                       action,
+                       RTE_TABLE_ACTION_DECAP);
+               void *data3 = action_data_get(table_entry3,
+                       action,
+                       RTE_TABLE_ACTION_DECAP);
+
+               pkt4_work_decap(mbuf0, mbuf1, mbuf2, mbuf3,
+                       data0, data1, data2, data3);
+       }
+
        if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_ENCAP)) {
                void *data0 =
                        action_data_get(table_entry0, action, RTE_TABLE_ACTION_ENCAP);
@@ -2254,6 +3262,44 @@ pkt4_work(struct rte_mbuf **mbufs,
                pkt_work_time(data3, time);
        }
 
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_SYM_CRYPTO)) {
+               void *data0 = action_data_get(table_entry0, action,
+                               RTE_TABLE_ACTION_SYM_CRYPTO);
+               void *data1 = action_data_get(table_entry1, action,
+                               RTE_TABLE_ACTION_SYM_CRYPTO);
+               void *data2 = action_data_get(table_entry2, action,
+                               RTE_TABLE_ACTION_SYM_CRYPTO);
+               void *data3 = action_data_get(table_entry3, action,
+                               RTE_TABLE_ACTION_SYM_CRYPTO);
+
+               drop_mask0 |= pkt_work_sym_crypto(mbuf0, data0, &cfg->sym_crypto,
+                               ip_offset);
+               drop_mask1 |= pkt_work_sym_crypto(mbuf1, data1, &cfg->sym_crypto,
+                               ip_offset);
+               drop_mask2 |= pkt_work_sym_crypto(mbuf2, data2, &cfg->sym_crypto,
+                               ip_offset);
+               drop_mask3 |= pkt_work_sym_crypto(mbuf3, data3, &cfg->sym_crypto,
+                               ip_offset);
+       }
+
+       if (cfg->action_mask & (1LLU << RTE_TABLE_ACTION_TAG)) {
+               void *data0 = action_data_get(table_entry0,
+                       action,
+                       RTE_TABLE_ACTION_TAG);
+               void *data1 = action_data_get(table_entry1,
+                       action,
+                       RTE_TABLE_ACTION_TAG);
+               void *data2 = action_data_get(table_entry2,
+                       action,
+                       RTE_TABLE_ACTION_TAG);
+               void *data3 = action_data_get(table_entry3,
+                       action,
+                       RTE_TABLE_ACTION_TAG);
+
+               pkt4_work_tag(mbuf0, mbuf1, mbuf2, mbuf3,
+                       data0, data1, data2, data3);
+       }
+
        return drop_mask0 |
                (drop_mask1 << 1) |
                (drop_mask2 << 2) |
index c7f751a..c960612 100644 (file)
@@ -93,6 +93,15 @@ enum rte_table_action_type {
 
        /** Timestamp. */
        RTE_TABLE_ACTION_TIME,
+
+       /** Crypto. */
+       RTE_TABLE_ACTION_SYM_CRYPTO,
+
+       /** Tag. */
+       RTE_TABLE_ACTION_TAG,
+
+       /** Packet decapsulations. */
+       RTE_TABLE_ACTION_DECAP,
 };
 
 /** Common action configuration (per table action profile). */
@@ -366,6 +375,11 @@ enum rte_table_action_encap_type {
 
        /** IP -> { Ether | PPPoE | PPP | IP } */
        RTE_TABLE_ACTION_ENCAP_PPPOE,
+
+       /** Ether -> { Ether | IP | UDP | VXLAN | Ether }
+        * Ether -> { Ether | VLAN | IP | UDP | VXLAN | Ether }
+        */
+       RTE_TABLE_ACTION_ENCAP_VXLAN,
 };
 
 /** Pre-computed Ethernet header fields for encapsulation action. */
@@ -393,6 +407,34 @@ struct rte_table_action_pppoe_hdr {
        uint16_t session_id; /**< Session ID. */
 };
 
+/** Pre-computed IPv4 header fields for encapsulation action. */
+struct rte_table_action_ipv4_header {
+       uint32_t sa; /**< Source address. */
+       uint32_t da; /**< Destination address. */
+       uint8_t dscp; /**< DiffServ Code Point (DSCP). */
+       uint8_t ttl; /**< Time To Live (TTL). */
+};
+
+/** Pre-computed IPv6 header fields for encapsulation action. */
+struct rte_table_action_ipv6_header {
+       uint8_t sa[16]; /**< Source address. */
+       uint8_t da[16]; /**< Destination address. */
+       uint32_t flow_label; /**< Flow label. */
+       uint8_t dscp; /**< DiffServ Code Point (DSCP). */
+       uint8_t hop_limit; /**< Hop Limit (HL). */
+};
+
+/** Pre-computed UDP header fields for encapsulation action. */
+struct rte_table_action_udp_header {
+       uint16_t sp; /**< Source port. */
+       uint16_t dp; /**< Destination port. */
+};
+
+/** Pre-computed VXLAN header fields for encapsulation action. */
+struct rte_table_action_vxlan_hdr {
+       uint32_t vni; /**< VXLAN Network Identifier (VNI). */
+};
+
 /** Ether encap parameters. */
 struct rte_table_action_encap_ether_params {
        struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
@@ -437,6 +479,21 @@ struct rte_table_action_encap_pppoe_params {
        struct rte_table_action_pppoe_hdr pppoe; /**< PPPoE/PPP headers. */
 };
 
+/** VXLAN encap parameters. */
+struct rte_table_action_encap_vxlan_params {
+       struct rte_table_action_ether_hdr ether; /**< Ethernet header. */
+       struct rte_table_action_vlan_hdr vlan; /**< VLAN header. */
+
+       RTE_STD_C11
+       union {
+               struct rte_table_action_ipv4_header ipv4; /**< IPv4 header. */
+               struct rte_table_action_ipv6_header ipv6; /**< IPv6 header. */
+       };
+
+       struct rte_table_action_udp_header udp; /**< UDP header. */
+       struct rte_table_action_vxlan_hdr vxlan; /**< VXLAN header. */
+};
+
 /** Encap action configuration (per table action profile). */
 struct rte_table_action_encap_config {
        /** Bit mask defining the set of packet encapsulations enabled for the
@@ -446,6 +503,30 @@ struct rte_table_action_encap_config {
         * @see enum rte_table_action_encap_type
         */
        uint64_t encap_mask;
+
+       /** Encapsulation type specific configuration. */
+       RTE_STD_C11
+       union {
+               struct {
+                       /** Input packet to be encapsulated: offset within the
+                        * input packet buffer to the start of the Ethernet
+                        * frame to be encapsulated. Offset 0 points to the
+                        * first byte of the MBUF structure.
+                        */
+                       uint32_t data_offset;
+
+                       /** Encapsulation header: non-zero when encapsulation
+                        * header includes a VLAN tag, zero otherwise.
+                        */
+                       int vlan;
+
+                       /** Encapsulation header: IP version of the IP header
+                        * within the encapsulation header. Non-zero for IPv4,
+                        * zero for IPv6.
+                        */
+                       int ip_version;
+               } vxlan; /**< VXLAN specific configuration. */
+       };
 };
 
 /** Encap action parameters (per table rule). */
@@ -469,6 +550,9 @@ struct rte_table_action_encap_params {
 
                /** Only valid when *type* is set to PPPoE. */
                struct rte_table_action_encap_pppoe_params pppoe;
+
+               /** Only valid when *type* is set to VXLAN. */
+               struct rte_table_action_encap_vxlan_params vxlan;
        };
 };
 
@@ -605,6 +689,111 @@ struct rte_table_action_time_params {
        uint64_t time;
 };
 
+/**
+ * RTE_TABLE_ACTION_CRYPTO
+ */
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX
+#define RTE_TABLE_ACTION_SYM_CRYPTO_IV_SIZE_MAX                (16)
+#endif
+
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX
+#define RTE_TABLE_ACTION_SYM_CRYPTO_AAD_SIZE_MAX       (16)
+#endif
+
+#ifndef RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET
+#define RTE_TABLE_ACTION_SYM_CRYPTO_IV_OFFSET                          \
+       (sizeof(struct rte_crypto_op) + sizeof(struct rte_crypto_sym_op))
+#endif
+
+/** Common action structure to store the data's value, length, and offset */
+struct rte_table_action_vlo {
+       uint8_t *val;
+       uint32_t length;
+       uint32_t offset;
+};
+
+/** Symmetric crypto action configuration (per table action profile). */
+struct rte_table_action_sym_crypto_config {
+       /** Target Cryptodev ID. */
+       uint8_t cryptodev_id;
+
+       /**
+        * Offset to rte_crypto_op structure within the input packet buffer.
+        * Offset 0 points to the first byte of the MBUF structure.
+        */
+       uint32_t op_offset;
+
+       /** The mempool for creating cryptodev sessions. */
+       struct rte_mempool *mp_create;
+
+       /** The mempool for initializing cryptodev sessions. */
+       struct rte_mempool *mp_init;
+};
+
+/** Symmetric Crypto action parameters (per table rule). */
+struct rte_table_action_sym_crypto_params {
+
+       /** Xform pointer contains all relevant information */
+       struct rte_crypto_sym_xform *xform;
+
+       /**
+        * Offset within the input packet buffer to the first byte of data
+        * to be processed by the crypto unit. Offset 0 points to the first
+        * byte of the MBUF structure.
+        */
+       uint32_t data_offset;
+
+       union {
+               struct {
+                       /** Cipher iv data. */
+                       struct rte_table_action_vlo cipher_iv;
+
+                       /** Cipher iv data. */
+                       struct rte_table_action_vlo cipher_iv_update;
+
+                       /** Auth iv data. */
+                       struct rte_table_action_vlo auth_iv;
+
+                       /** Auth iv data. */
+                       struct rte_table_action_vlo auth_iv_update;
+
+               } cipher_auth;
+
+               struct {
+                       /** AEAD AAD data. */
+                       struct rte_table_action_vlo aad;
+
+                       /** AEAD iv data. */
+                       struct rte_table_action_vlo iv;
+
+                       /** AEAD AAD data. */
+                       struct rte_table_action_vlo aad_update;
+
+                       /** AEAD iv data. */
+                       struct rte_table_action_vlo iv_update;
+
+               } aead;
+       };
+};
+
+/**
+ * RTE_TABLE_ACTION_TAG
+ */
+/** Tag action parameters (per table rule). */
+struct rte_table_action_tag_params {
+       /** Tag to be attached to the input packet. */
+       uint32_t tag;
+};
+
+/**
+ * RTE_TABLE_ACTION_DECAP
+ */
+/** Decap action parameters (per table rule). */
+struct rte_table_action_decap_params {
+       /** Number of bytes to be removed from the start of the packet. */
+       uint16_t n;
+};
+
 /**
  * Table action profile.
  */
@@ -898,6 +1087,20 @@ rte_table_action_time_read(struct rte_table_action *action,
        void *data,
        uint64_t *timestamp);
 
+/**
+ * Table action cryptodev symmetric session get.
+ *
+ * @param[in] action
+ *   Handle to table action object (needs to be valid).
+ * @param[in] data
+ *   Data byte array (typically table rule data) with sym crypto action.
+ * @return
+ *   The pointer to the session on success, NULL otherwise.
+ */
+struct rte_cryptodev_sym_session *__rte_experimental
+rte_table_action_crypto_sym_session_get(struct rte_table_action *action,
+       void *data);
+
 #ifdef __cplusplus
 }
 #endif
index 8df4864..1b83f6f 100644 (file)
@@ -11,7 +11,7 @@ ifeq ($(CONFIG_RTE_PORT_PCAP),y)
 LDLIBS += -lpcap
 endif
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
-LDLIBS += -lrte_ip_frag -lrte_sched
+LDLIBS += -lrte_ip_frag -lrte_sched -lrte_cryptodev
 ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 LDLIBS += -lrte_kni
 endif
@@ -38,6 +38,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_kni.c
 endif
 SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c
+SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sym_crypto.c
 
 # install includes
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port.h
@@ -53,5 +54,6 @@ ifeq ($(CONFIG_RTE_LIBRTE_KNI),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_kni.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sym_crypto.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
index f3d8b44..0d11456 100644 (file)
@@ -9,7 +9,8 @@ sources = files(
        'rte_port_ras.c',
        'rte_port_ring.c',
        'rte_port_sched.c',
-       'rte_port_source_sink.c')
+       'rte_port_source_sink.c',
+       'rte_port_sym_crypto.c')
 headers = files(
        'rte_port_ethdev.h',
        'rte_port_fd.h',
@@ -18,8 +19,9 @@ headers = files(
        'rte_port.h',
        'rte_port_ring.h',
        'rte_port_sched.h',
-       'rte_port_source_sink.h')
-deps += ['ethdev', 'sched', 'ip_frag']
+       'rte_port_source_sink.h',
+       'rte_port_sym_crypto.h')
+deps += ['ethdev', 'sched', 'ip_frag', 'cryptodev']
 
 if dpdk_conf.has('RTE_LIBRTE_KNI')
        sources += files('rte_port_kni.c')
diff --git a/lib/librte_port/rte_port_sym_crypto.c b/lib/librte_port/rte_port_sym_crypto.c
new file mode 100644 (file)
index 0000000..295984d
--- /dev/null
@@ -0,0 +1,552 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include "rte_port_sym_crypto.h"
+
+/*
+ * Port Crypto Reader
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(port, val) \
+       (port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(port, val) \
+       (port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_reader {
+       struct rte_port_in_stats stats;
+
+       uint8_t cryptodev_id;
+       uint16_t queue_id;
+       struct rte_crypto_op *ops[RTE_PORT_IN_BURST_SIZE_MAX];
+       rte_port_sym_crypto_reader_callback_fn f_callback;
+       void *arg_callback;
+};
+
+static void *
+rte_port_sym_crypto_reader_create(void *params, int socket_id)
+{
+       struct rte_port_sym_crypto_reader_params *conf =
+                       params;
+       struct rte_port_sym_crypto_reader *port;
+
+       /* Check input parameters */
+       if (conf == NULL) {
+               RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+               RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->cryptodev_id = conf->cryptodev_id;
+       port->queue_id = conf->queue_id;
+       port->f_callback = conf->f_callback;
+       port->arg_callback = conf->arg_callback;
+
+       return port;
+}
+
+static int
+rte_port_sym_crypto_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts)
+{
+       struct rte_port_sym_crypto_reader *p =
+                       port;
+       uint16_t rx_ops_cnt, i, n = 0;
+
+       rx_ops_cnt = rte_cryptodev_dequeue_burst(p->cryptodev_id, p->queue_id,
+                       p->ops, n_pkts);
+
+       for (i = 0; i < rx_ops_cnt; i++) {
+               struct rte_crypto_op *op = p->ops[i];
+
+               /** Drop failed pkts */
+               if (unlikely(op->status != RTE_CRYPTO_OP_STATUS_SUCCESS)) {
+                       rte_pktmbuf_free(op->sym->m_src);
+                       continue;
+               }
+
+               pkts[n++] = op->sym->m_src;
+       }
+
+       if (p->f_callback)
+               (*p->f_callback)(pkts, n, p->arg_callback);
+
+       RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_IN_ADD(p, n);
+       RTE_PORT_SYM_CRYPTO_READER_STATS_PKTS_DROP_ADD(p, rx_ops_cnt - n);
+
+       return n;
+}
+
+static int
+rte_port_sym_crypto_reader_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_free(port);
+
+       return 0;
+}
+
+static int rte_port_sym_crypto_reader_stats_read(void *port,
+       struct rte_port_in_stats *stats, int clear)
+{
+       struct rte_port_sym_crypto_reader *p =
+                       port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+/*
+ * Port crypto Writer
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(port, val) \
+       (port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(port, val) \
+       (port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_writer {
+       struct rte_port_out_stats stats;
+
+       struct rte_crypto_op *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+
+       uint32_t tx_burst_sz;
+       uint32_t tx_buf_count;
+       uint64_t bsz_mask;
+
+       uint8_t cryptodev_id;
+       uint16_t queue_id;
+       uint16_t crypto_op_offset;
+};
+
+static void *
+rte_port_sym_crypto_writer_create(void *params, int socket_id)
+{
+       struct rte_port_sym_crypto_writer_params *conf =
+                       params;
+       struct rte_port_sym_crypto_writer *port;
+
+       /* Check input parameters */
+       if ((conf == NULL) ||
+               (conf->tx_burst_sz == 0) ||
+               (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+               (!rte_is_power_of_2(conf->tx_burst_sz))) {
+               RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+               RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->tx_burst_sz = conf->tx_burst_sz;
+       port->tx_buf_count = 0;
+       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+       port->cryptodev_id = conf->cryptodev_id;
+       port->queue_id = conf->queue_id;
+       port->crypto_op_offset = conf->crypto_op_offset;
+
+       return port;
+}
+
+static inline void
+send_burst(struct rte_port_sym_crypto_writer *p)
+{
+       uint32_t nb_tx;
+
+       nb_tx = rte_cryptodev_enqueue_burst(p->cryptodev_id, p->queue_id,
+                       p->tx_buf, p->tx_buf_count);
+
+       RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count -
+                       nb_tx);
+       for (; nb_tx < p->tx_buf_count; nb_tx++)
+               rte_pktmbuf_free(p->tx_buf[nb_tx]->sym->m_src);
+
+       p->tx_buf_count = 0;
+}
+
+static int
+rte_port_sym_crypto_writer_tx(void *port, struct rte_mbuf *pkt)
+{
+       struct rte_port_sym_crypto_writer *p =
+                       port;
+
+       p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+                       RTE_MBUF_METADATA_UINT8_PTR(pkt, p->crypto_op_offset);
+       RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+       if (p->tx_buf_count >= p->tx_burst_sz)
+               send_burst(p);
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_tx_bulk(void *port,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask)
+{
+       struct rte_port_sym_crypto_writer *p =
+                       port;
+       uint64_t bsz_mask = p->bsz_mask;
+       uint32_t tx_buf_count = p->tx_buf_count;
+       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+                                       ((pkts_mask & bsz_mask) ^ bsz_mask);
+
+       if (expr == 0) {
+               uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+               uint32_t i;
+
+               RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, n_pkts);
+
+               for (i = 0; i < n_pkts; i++)
+                       p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+                                       RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+                                                       p->crypto_op_offset);
+
+               if (p->tx_buf_count >= p->tx_burst_sz)
+                       send_burst(p);
+       } else {
+               for (; pkts_mask;) {
+                       uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+                       uint64_t pkt_mask = 1LLU << pkt_index;
+                       struct rte_mbuf *pkt = pkts[pkt_index];
+
+                       p->tx_buf[tx_buf_count++] = (struct rte_crypto_op *)
+                                       RTE_MBUF_METADATA_UINT8_PTR(pkt,
+                                                       p->crypto_op_offset);
+
+                       RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+                       pkts_mask &= ~pkt_mask;
+               }
+
+               p->tx_buf_count = tx_buf_count;
+               if (tx_buf_count >= p->tx_burst_sz)
+                       send_burst(p);
+       }
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_flush(void *port)
+{
+       struct rte_port_sym_crypto_writer *p =
+                       port;
+
+       if (p->tx_buf_count > 0)
+               send_burst(p);
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_port_sym_crypto_writer_flush(port);
+       rte_free(port);
+
+       return 0;
+}
+
+static int rte_port_sym_crypto_writer_stats_read(void *port,
+       struct rte_port_out_stats *stats, int clear)
+{
+       struct rte_port_sym_crypto_writer *p =
+                       port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+/*
+ * Port crypto Writer Nodrop
+ */
+#ifdef RTE_PORT_STATS_COLLECT
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \
+       (port)->stats.n_pkts_in += (val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \
+       (port)->stats.n_pkts_drop += (val)
+
+#else
+
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val)
+#define RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val)
+
+#endif
+
+struct rte_port_sym_crypto_writer_nodrop {
+       struct rte_port_out_stats stats;
+
+       struct rte_crypto_op *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX];
+       uint32_t tx_burst_sz;
+       uint32_t tx_buf_count;
+       uint64_t bsz_mask;
+       uint64_t n_retries;
+
+       uint8_t cryptodev_id;
+       uint16_t queue_id;
+       uint16_t crypto_op_offset;
+};
+
+static void *
+rte_port_sym_crypto_writer_nodrop_create(void *params, int socket_id)
+{
+       struct rte_port_sym_crypto_writer_nodrop_params *conf =
+               params;
+       struct rte_port_sym_crypto_writer_nodrop *port;
+
+       /* Check input parameters */
+       if ((conf == NULL) ||
+               (conf->tx_burst_sz == 0) ||
+               (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) ||
+               (!rte_is_power_of_2(conf->tx_burst_sz))) {
+               RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__);
+               return NULL;
+       }
+
+       /* Memory allocation */
+       port = rte_zmalloc_socket("PORT", sizeof(*port),
+               RTE_CACHE_LINE_SIZE, socket_id);
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__);
+               return NULL;
+       }
+
+       /* Initialization */
+       port->cryptodev_id = conf->cryptodev_id;
+       port->queue_id = conf->queue_id;
+       port->crypto_op_offset = conf->crypto_op_offset;
+       port->tx_burst_sz = conf->tx_burst_sz;
+       port->tx_buf_count = 0;
+       port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1);
+
+       /*
+        * When n_retries is 0 it means that we should wait for every packet to
+        * send no matter how many retries should it take. To limit number of
+        * branches in fast path, we use UINT64_MAX instead of branching.
+        */
+       port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries;
+
+       return port;
+}
+
+static inline void
+send_burst_nodrop(struct rte_port_sym_crypto_writer_nodrop *p)
+{
+       uint32_t nb_tx = 0, i;
+
+       nb_tx = rte_cryptodev_enqueue_burst(p->cryptodev_id, p->queue_id,
+                       p->tx_buf, p->tx_buf_count);
+
+       /* We sent all the packets in a first try */
+       if (nb_tx >= p->tx_buf_count) {
+               p->tx_buf_count = 0;
+               return;
+       }
+
+       for (i = 0; i < p->n_retries; i++) {
+               nb_tx += rte_cryptodev_enqueue_burst(p->cryptodev_id,
+                               p->queue_id, p->tx_buf + nb_tx,
+                               p->tx_buf_count - nb_tx);
+
+               /* We sent all the packets in more than one try */
+               if (nb_tx >= p->tx_buf_count) {
+                       p->tx_buf_count = 0;
+                       return;
+               }
+       }
+
+       /* We didn't send the packets in maximum allowed attempts */
+       RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_DROP_ADD(p,
+                       p->tx_buf_count - nb_tx);
+       for ( ; nb_tx < p->tx_buf_count; nb_tx++)
+               rte_pktmbuf_free(p->tx_buf[nb_tx]->sym->m_src);
+
+       p->tx_buf_count = 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_tx(void *port, struct rte_mbuf *pkt)
+{
+       struct rte_port_sym_crypto_writer_nodrop *p =
+                       port;
+
+       p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+                       RTE_MBUF_METADATA_UINT8_PTR(pkt, p->crypto_op_offset);
+       RTE_PORT_SYM_CRYPTO_WRITER_STATS_PKTS_IN_ADD(p, 1);
+       if (p->tx_buf_count >= p->tx_burst_sz)
+               send_burst_nodrop(p);
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_tx_bulk(void *port,
+       struct rte_mbuf **pkts,
+       uint64_t pkts_mask)
+{
+       struct rte_port_sym_crypto_writer_nodrop *p =
+                       port;
+
+       uint64_t bsz_mask = p->bsz_mask;
+       uint32_t tx_buf_count = p->tx_buf_count;
+       uint64_t expr = (pkts_mask & (pkts_mask + 1)) |
+                                       ((pkts_mask & bsz_mask) ^ bsz_mask);
+
+       if (expr == 0) {
+               uint64_t n_pkts = __builtin_popcountll(pkts_mask);
+               uint32_t i;
+
+               RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts);
+
+               for (i = 0; i < n_pkts; i++)
+                       p->tx_buf[p->tx_buf_count++] = (struct rte_crypto_op *)
+                                       RTE_MBUF_METADATA_UINT8_PTR(pkts[i],
+                                                       p->crypto_op_offset);
+
+               if (p->tx_buf_count >= p->tx_burst_sz)
+                       send_burst_nodrop(p);
+       } else {
+               for ( ; pkts_mask; ) {
+                       uint32_t pkt_index = __builtin_ctzll(pkts_mask);
+                       uint64_t pkt_mask = 1LLU << pkt_index;
+                       struct rte_mbuf *pkt = pkts[pkt_index];
+
+                       p->tx_buf[tx_buf_count++] = (struct rte_crypto_op *)
+                                       RTE_MBUF_METADATA_UINT8_PTR(pkt,
+                                                       p->crypto_op_offset);
+                       RTE_PORT_SYM_CRYPTO_WRITER_NODROP_STATS_PKTS_IN_ADD(p,
+                                       1);
+                       pkts_mask &= ~pkt_mask;
+               }
+
+               p->tx_buf_count = tx_buf_count;
+               if (tx_buf_count >= p->tx_burst_sz)
+                       send_burst_nodrop(p);
+       }
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_flush(void *port)
+{
+       struct rte_port_sym_crypto_writer_nodrop *p =
+               port;
+
+       if (p->tx_buf_count > 0)
+               send_burst_nodrop(p);
+
+       return 0;
+}
+
+static int
+rte_port_sym_crypto_writer_nodrop_free(void *port)
+{
+       if (port == NULL) {
+               RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__);
+               return -EINVAL;
+       }
+
+       rte_port_sym_crypto_writer_nodrop_flush(port);
+       rte_free(port);
+
+       return 0;
+}
+
+static int rte_port_sym_crypto_writer_nodrop_stats_read(void *port,
+       struct rte_port_out_stats *stats, int clear)
+{
+       struct rte_port_sym_crypto_writer_nodrop *p =
+                       port;
+
+       if (stats != NULL)
+               memcpy(stats, &p->stats, sizeof(p->stats));
+
+       if (clear)
+               memset(&p->stats, 0, sizeof(p->stats));
+
+       return 0;
+}
+
+
+/*
+ * Summary of port operations
+ */
+struct rte_port_in_ops rte_port_sym_crypto_reader_ops = {
+       .f_create = rte_port_sym_crypto_reader_create,
+       .f_free = rte_port_sym_crypto_reader_free,
+       .f_rx = rte_port_sym_crypto_reader_rx,
+       .f_stats = rte_port_sym_crypto_reader_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sym_crypto_writer_ops = {
+       .f_create = rte_port_sym_crypto_writer_create,
+       .f_free = rte_port_sym_crypto_writer_free,
+       .f_tx = rte_port_sym_crypto_writer_tx,
+       .f_tx_bulk = rte_port_sym_crypto_writer_tx_bulk,
+       .f_flush = rte_port_sym_crypto_writer_flush,
+       .f_stats = rte_port_sym_crypto_writer_stats_read,
+};
+
+struct rte_port_out_ops rte_port_sym_crypto_writer_nodrop_ops = {
+       .f_create = rte_port_sym_crypto_writer_nodrop_create,
+       .f_free = rte_port_sym_crypto_writer_nodrop_free,
+       .f_tx = rte_port_sym_crypto_writer_nodrop_tx,
+       .f_tx_bulk = rte_port_sym_crypto_writer_nodrop_tx_bulk,
+       .f_flush = rte_port_sym_crypto_writer_nodrop_flush,
+       .f_stats = rte_port_sym_crypto_writer_nodrop_stats_read,
+};
diff --git a/lib/librte_port/rte_port_sym_crypto.h b/lib/librte_port/rte_port_sym_crypto.h
new file mode 100644 (file)
index 0000000..181f6ce
--- /dev/null
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
+#define __INCLUDE_RTE_PORT_SYM_CRYPTO_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file
+ * RTE Port sym crypto Interface
+ *
+ * crypto_reader: input port built on top of pre-initialized crypto interface
+ * crypto_writer: output port built on top of pre-initialized crypto interface
+ *
+ **/
+
+#include <stdint.h>
+
+#include <rte_cryptodev.h>
+
+#include "rte_port.h"
+
+/** Function prototype for reader post action. */
+typedef void (*rte_port_sym_crypto_reader_callback_fn)(struct rte_mbuf **pkts,
+               uint16_t n_pkts, void *arg);
+
+/** Crypto_reader port parameters */
+struct rte_port_sym_crypto_reader_params {
+       /** Target cryptodev ID. */
+       uint8_t cryptodev_id;
+
+       /** Target cryptodev Queue Pair ID. */
+       uint16_t queue_id;
+
+       /** Crypto reader post callback function. */
+       rte_port_sym_crypto_reader_callback_fn f_callback;
+
+       /** Crypto reader post callback function arguments. */
+       void *arg_callback;
+};
+
+/** Crypto_reader port operations. */
+extern struct rte_port_in_ops rte_port_sym_crypto_reader_ops;
+
+
+/** Crypto_writer port parameters. */
+struct rte_port_sym_crypto_writer_params {
+       /** Target cryptodev ID. */
+       uint8_t cryptodev_id;
+
+       /** Target cryptodev Queue Pair ID. */
+       uint16_t queue_id;
+
+       /** offset to rte_crypto_op in the mbufs. */
+       uint16_t crypto_op_offset;
+
+       /** Burst size to crypto interface. */
+       uint32_t tx_burst_sz;
+};
+
+/** Crypto_writer port operations. */
+extern struct rte_port_out_ops rte_port_sym_crypto_writer_ops;
+
+/** Crypto_writer_nodrop port parameters. */
+struct rte_port_sym_crypto_writer_nodrop_params {
+       /** Target cryptodev ID. */
+       uint8_t cryptodev_id;
+
+       /** Target cryptodev queue pair id. */
+       uint16_t queue_id;
+
+       /** Offset to rte_crypto_op in the mbufs. */
+       uint16_t crypto_op_offset;
+
+       /** Burst size to crypto interface. */
+       uint32_t tx_burst_sz;
+
+       /** Maximum number of retries, 0 for no limit. */
+       uint32_t n_retries;
+};
+
+/** Crypto_writer_nodrop port operations. */
+extern struct rte_port_out_ops rte_port_sym_crypto_writer_nodrop_ops;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 6470629..609bcec 100644 (file)
@@ -51,3 +51,12 @@ DPDK_16.11 {
        rte_port_fd_writer_nodrop_ops;
 
 } DPDK_16.07;
+
+DPDK_18.11 {
+       global:
+
+       rte_port_sym_crypto_reader_ops;
+       rte_port_sym_crypto_writer_ops;
+       rte_port_sym_crypto_writer_nodrop_ops;
+
+} DPDK_16.11;
index 6f85e88..9bec668 100644 (file)
@@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_power.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_timer
 
 EXPORT_MAP := rte_power_version.map
 
@@ -16,8 +16,9 @@ LIBABIVER := 1
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_POWER) := rte_power.c power_acpi_cpufreq.c
 SRCS-$(CONFIG_RTE_LIBRTE_POWER) += power_kvm_vm.c guest_channel.c
+SRCS-$(CONFIG_RTE_LIBRTE_POWER) += rte_power_empty_poll.c
 
 # install this header file
-SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h  rte_power_empty_poll.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
index ee638ee..e7b93a7 100644 (file)
@@ -19,6 +19,7 @@ extern "C" {
 #define CPU_POWER               1
 #define CPU_POWER_CONNECT       2
 #define PKT_POLICY              3
+#define PKT_POLICY_REMOVE       4
 
 /* CPU Power Command Scaling */
 #define CPU_POWER_SCALE_UP      1
@@ -58,6 +59,9 @@ struct traffic {
        uint32_t max_max_packet_thresh;
 };
 
+#define CORE_TYPE_VIRTUAL 0
+#define CORE_TYPE_PHYSICAL 1
+
 struct channel_packet {
        uint64_t resource_id; /**< core_num, device */
        uint32_t unit;        /**< scale down/up/min/max */
@@ -70,6 +74,7 @@ struct channel_packet {
        uint8_t vcpu_to_control[MAX_VCPU_PER_VM];
        uint8_t num_vcpu;
        struct timer_profile timer_policy;
+       bool core_type;
        enum workload workload;
        enum policy_to_use policy_to_use;
        struct t_boost_status t_boost_status;
index 253173f..9ed8b56 100644 (file)
@@ -5,5 +5,7 @@ if host_machine.system() != 'linux'
        build = false
 endif
 sources = files('rte_power.c', 'power_acpi_cpufreq.c',
-               'power_kvm_vm.c', 'guest_channel.c')
-headers = files('rte_power.h')
+               'power_kvm_vm.c', 'guest_channel.c',
+               'rte_power_empty_poll.c')
+headers = files('rte_power.h','rte_power_empty_poll.h')
+deps += ['timer']
diff --git a/lib/librte_power/rte_power_empty_poll.c b/lib/librte_power/rte_power_empty_poll.c
new file mode 100644 (file)
index 0000000..e614546
--- /dev/null
@@ -0,0 +1,545 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_atomic.h>
+#include <rte_malloc.h>
+#include <inttypes.h>
+
+#include "rte_power.h"
+#include "rte_power_empty_poll.h"
+
+#define INTERVALS_PER_SECOND 100     /* (10ms) */
+#define SECONDS_TO_TRAIN_FOR 2
+#define DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD 70
+#define DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD 30
+#define DEFAULT_CYCLES_PER_PACKET 800
+
+static struct ep_params *ep_params;
+static uint32_t med_to_high_threshold = DEFAULT_MED_TO_HIGH_PERCENT_THRESHOLD;
+static uint32_t high_to_med_threshold = DEFAULT_HIGH_TO_MED_PERCENT_THRESHOLD;
+
+static uint32_t avail_freqs[RTE_MAX_LCORE][NUM_FREQS];
+
+static uint32_t total_avail_freqs[RTE_MAX_LCORE];
+
+static uint32_t freq_index[NUM_FREQ];
+
+static uint32_t
+get_freq_index(enum freq_val index)
+{
+       return freq_index[index];
+}
+
+
+static int
+set_power_freq(int lcore_id, enum freq_val freq, bool specific_freq)
+{
+       int err = 0;
+       uint32_t power_freq_index;
+       if (!specific_freq)
+               power_freq_index = get_freq_index(freq);
+       else
+               power_freq_index = freq;
+
+       err = rte_power_set_freq(lcore_id, power_freq_index);
+
+       return err;
+}
+
+
+static inline void __attribute__((always_inline))
+exit_training_state(struct priority_worker *poll_stats)
+{
+       RTE_SET_USED(poll_stats);
+}
+
+static inline void __attribute__((always_inline))
+enter_training_state(struct priority_worker *poll_stats)
+{
+       poll_stats->iter_counter = 0;
+       poll_stats->cur_freq = LOW;
+       poll_stats->queue_state = TRAINING;
+}
+
+static inline void __attribute__((always_inline))
+enter_normal_state(struct priority_worker *poll_stats)
+{
+       /* Clear the averages arrays and strs */
+       memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
+       poll_stats->ec = 0;
+       memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
+       poll_stats->pc = 0;
+
+       poll_stats->cur_freq = MED;
+       poll_stats->iter_counter = 0;
+       poll_stats->threshold_ctr = 0;
+       poll_stats->queue_state = MED_NORMAL;
+       RTE_LOG(INFO, POWER, "Set the power freq to MED\n");
+       set_power_freq(poll_stats->lcore_id, MED, false);
+
+       poll_stats->thresh[MED].threshold_percent = med_to_high_threshold;
+       poll_stats->thresh[HGH].threshold_percent = high_to_med_threshold;
+}
+
+static inline void __attribute__((always_inline))
+enter_busy_state(struct priority_worker *poll_stats)
+{
+       memset(poll_stats->edpi_av, 0, sizeof(poll_stats->edpi_av));
+       poll_stats->ec = 0;
+       memset(poll_stats->ppi_av, 0, sizeof(poll_stats->ppi_av));
+       poll_stats->pc = 0;
+
+       poll_stats->cur_freq = HGH;
+       poll_stats->iter_counter = 0;
+       poll_stats->threshold_ctr = 0;
+       poll_stats->queue_state = HGH_BUSY;
+       set_power_freq(poll_stats->lcore_id, HGH, false);
+}
+
+static inline void __attribute__((always_inline))
+enter_purge_state(struct priority_worker *poll_stats)
+{
+       poll_stats->iter_counter = 0;
+       poll_stats->queue_state = LOW_PURGE;
+}
+
+static inline void __attribute__((always_inline))
+set_state(struct priority_worker *poll_stats,
+               enum queue_state new_state)
+{
+       enum queue_state old_state = poll_stats->queue_state;
+       if (old_state != new_state) {
+
+               /* Call any old state exit functions */
+               if (old_state == TRAINING)
+                       exit_training_state(poll_stats);
+
+               /* Call any new state entry functions */
+               if (new_state == TRAINING)
+                       enter_training_state(poll_stats);
+               if (new_state == MED_NORMAL)
+                       enter_normal_state(poll_stats);
+               if (new_state == HGH_BUSY)
+                       enter_busy_state(poll_stats);
+               if (new_state == LOW_PURGE)
+                       enter_purge_state(poll_stats);
+       }
+}
+
+static inline void __attribute__((always_inline))
+set_policy(struct priority_worker *poll_stats,
+               struct ep_policy *policy)
+{
+       set_state(poll_stats, policy->state);
+
+       if (policy->state == TRAINING)
+               return;
+
+       poll_stats->thresh[MED_NORMAL].base_edpi = policy->med_base_edpi;
+       poll_stats->thresh[HGH_BUSY].base_edpi = policy->hgh_base_edpi;
+
+       poll_stats->thresh[MED_NORMAL].trained = true;
+       poll_stats->thresh[HGH_BUSY].trained = true;
+
+}
+
+static void
+update_training_stats(struct priority_worker *poll_stats,
+               uint32_t freq,
+               bool specific_freq,
+               uint32_t max_train_iter)
+{
+       RTE_SET_USED(specific_freq);
+
+       char pfi_str[32];
+       uint64_t p0_empty_deq;
+
+       sprintf(pfi_str, "%02d", freq);
+
+       if (poll_stats->cur_freq == freq &&
+                       poll_stats->thresh[freq].trained == false) {
+               if (poll_stats->thresh[freq].cur_train_iter == 0) {
+
+                       set_power_freq(poll_stats->lcore_id,
+                                       freq, specific_freq);
+
+                       poll_stats->empty_dequeues_prev =
+                               poll_stats->empty_dequeues;
+
+                       poll_stats->thresh[freq].cur_train_iter++;
+
+                       return;
+               } else if (poll_stats->thresh[freq].cur_train_iter
+                               <= max_train_iter) {
+
+                       p0_empty_deq = poll_stats->empty_dequeues -
+                               poll_stats->empty_dequeues_prev;
+
+                       poll_stats->empty_dequeues_prev =
+                               poll_stats->empty_dequeues;
+
+                       poll_stats->thresh[freq].base_edpi += p0_empty_deq;
+                       poll_stats->thresh[freq].cur_train_iter++;
+
+               } else {
+                       if (poll_stats->thresh[freq].trained == false) {
+                               poll_stats->thresh[freq].base_edpi =
+                                       poll_stats->thresh[freq].base_edpi /
+                                       max_train_iter;
+
+                               /* Add on a factor of 0.05%
+                                * this should remove any
+                                * false negatives when the system is 0% busy
+                                */
+                               poll_stats->thresh[freq].base_edpi +=
+                               poll_stats->thresh[freq].base_edpi / 2000;
+
+                               poll_stats->thresh[freq].trained = true;
+                               poll_stats->cur_freq++;
+
+                       }
+               }
+       }
+}
+
+static inline uint32_t __attribute__((always_inline))
+update_stats(struct priority_worker *poll_stats)
+{
+       uint64_t tot_edpi = 0, tot_ppi = 0;
+       uint32_t j, percent;
+
+       struct priority_worker *s = poll_stats;
+
+       uint64_t cur_edpi = s->empty_dequeues - s->empty_dequeues_prev;
+
+       s->empty_dequeues_prev = s->empty_dequeues;
+
+       uint64_t ppi = s->num_dequeue_pkts - s->num_dequeue_pkts_prev;
+
+       s->num_dequeue_pkts_prev = s->num_dequeue_pkts;
+
+       if (s->thresh[s->cur_freq].base_edpi < cur_edpi) {
+
+               /* edpi mean empty poll counter difference per interval */
+               RTE_LOG(DEBUG, POWER, "cur_edpi is too large "
+                               "cur edpi %"PRId64" "
+                               "base edpi %"PRId64"\n",
+                               cur_edpi,
+                               s->thresh[s->cur_freq].base_edpi);
+               /* Value to make us fail need debug log*/
+               return 1000UL;
+       }
+
+       s->edpi_av[s->ec++ % BINS_AV] = cur_edpi;
+       s->ppi_av[s->pc++ % BINS_AV] = ppi;
+
+       for (j = 0; j < BINS_AV; j++) {
+               tot_edpi += s->edpi_av[j];
+               tot_ppi += s->ppi_av[j];
+       }
+
+       tot_edpi = tot_edpi / BINS_AV;
+
+       percent = 100 - (uint32_t)(((float)tot_edpi /
+                       (float)s->thresh[s->cur_freq].base_edpi) * 100);
+
+       return (uint32_t)percent;
+}
+
+
+static inline void  __attribute__((always_inline))
+update_stats_normal(struct priority_worker *poll_stats)
+{
+       uint32_t percent;
+
+       if (poll_stats->thresh[poll_stats->cur_freq].base_edpi == 0) {
+
+               enum freq_val cur_freq = poll_stats->cur_freq;
+
+               /* edpi mean empty poll counter difference per interval */
+               RTE_LOG(DEBUG, POWER, "cure freq is %d, edpi is %"PRIu64"\n",
+                               cur_freq,
+                               poll_stats->thresh[cur_freq].base_edpi);
+               return;
+       }
+
+       percent = update_stats(poll_stats);
+
+       if (percent > 100) {
+               /* edpi mean empty poll counter difference per interval */
+               RTE_LOG(DEBUG, POWER, "Edpi is bigger than threshold\n");
+               return;
+       }
+
+       if (poll_stats->cur_freq == LOW)
+               RTE_LOG(INFO, POWER, "Purge Mode is not currently supported\n");
+       else if (poll_stats->cur_freq == MED) {
+
+               if (percent >
+                       poll_stats->thresh[MED].threshold_percent) {
+
+                       if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
+                               poll_stats->threshold_ctr++;
+                       else {
+                               set_state(poll_stats, HGH_BUSY);
+                               RTE_LOG(INFO, POWER, "MOVE to HGH\n");
+                       }
+
+               } else {
+                       /* reset */
+                       poll_stats->threshold_ctr = 0;
+               }
+
+       } else if (poll_stats->cur_freq == HGH) {
+
+               if (percent <
+                               poll_stats->thresh[HGH].threshold_percent) {
+
+                       if (poll_stats->threshold_ctr < INTERVALS_PER_SECOND)
+                               poll_stats->threshold_ctr++;
+                       else {
+                               set_state(poll_stats, MED_NORMAL);
+                               RTE_LOG(INFO, POWER, "MOVE to MED\n");
+                       }
+               } else {
+                       /* reset */
+                       poll_stats->threshold_ctr = 0;
+               }
+
+       }
+}
+
+static int
+empty_poll_training(struct priority_worker *poll_stats,
+               uint32_t max_train_iter)
+{
+
+       if (poll_stats->iter_counter < INTERVALS_PER_SECOND) {
+               poll_stats->iter_counter++;
+               return 0;
+       }
+
+
+       update_training_stats(poll_stats,
+                       LOW,
+                       false,
+                       max_train_iter);
+
+       update_training_stats(poll_stats,
+                       MED,
+                       false,
+                       max_train_iter);
+
+       update_training_stats(poll_stats,
+                       HGH,
+                       false,
+                       max_train_iter);
+
+
+       if (poll_stats->thresh[LOW].trained == true
+                       && poll_stats->thresh[MED].trained == true
+                       && poll_stats->thresh[HGH].trained == true) {
+
+               set_state(poll_stats, MED_NORMAL);
+
+               RTE_LOG(INFO, POWER, "LOW threshold is %"PRIu64"\n",
+                               poll_stats->thresh[LOW].base_edpi);
+
+               RTE_LOG(INFO, POWER, "MED threshold is %"PRIu64"\n",
+                               poll_stats->thresh[MED].base_edpi);
+
+
+               RTE_LOG(INFO, POWER, "HIGH threshold is %"PRIu64"\n",
+                               poll_stats->thresh[HGH].base_edpi);
+
+               RTE_LOG(INFO, POWER, "Training is Complete for %d\n",
+                               poll_stats->lcore_id);
+       }
+
+       return 0;
+}
+
+void __rte_experimental
+rte_empty_poll_detection(struct rte_timer *tim, void *arg)
+{
+
+       uint32_t i;
+
+       struct priority_worker *poll_stats;
+
+       RTE_SET_USED(tim);
+
+       RTE_SET_USED(arg);
+
+       for (i = 0; i < NUM_NODES; i++) {
+
+               poll_stats = &(ep_params->wrk_data.wrk_stats[i]);
+
+               if (rte_lcore_is_enabled(poll_stats->lcore_id) == 0)
+                       continue;
+
+               switch (poll_stats->queue_state) {
+               case(TRAINING):
+                       empty_poll_training(poll_stats,
+                                       ep_params->max_train_iter);
+                       break;
+
+               case(HGH_BUSY):
+               case(MED_NORMAL):
+                       update_stats_normal(poll_stats);
+                       break;
+
+               case(LOW_PURGE):
+                       break;
+               default:
+                       break;
+
+               }
+
+       }
+
+}
+
+int __rte_experimental
+rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
+               struct ep_policy *policy)
+{
+       uint32_t i;
+       /* Allocate the ep_params structure */
+       ep_params = rte_zmalloc_socket(NULL,
+                       sizeof(struct ep_params),
+                       0,
+                       rte_socket_id());
+
+       if (!ep_params)
+               return -1;
+
+       if (freq_tlb == NULL) {
+               freq_index[LOW] = 14;
+               freq_index[MED] = 9;
+               freq_index[HGH] = 1;
+       } else {
+               freq_index[LOW] = freq_tlb[LOW];
+               freq_index[MED] = freq_tlb[MED];
+               freq_index[HGH] = freq_tlb[HGH];
+       }
+
+       RTE_LOG(INFO, POWER, "Initialize the Empty Poll\n");
+
+       /* Train for pre-defined period */
+       ep_params->max_train_iter = INTERVALS_PER_SECOND * SECONDS_TO_TRAIN_FOR;
+
+       struct stats_data *w = &ep_params->wrk_data;
+
+       *eptr = ep_params;
+
+       /* initialize all wrk_stats state */
+       for (i = 0; i < NUM_NODES; i++) {
+
+               if (rte_lcore_is_enabled(i) == 0)
+                       continue;
+               /*init the freqs table */
+               total_avail_freqs[i] = rte_power_freqs(i,
+                               avail_freqs[i],
+                               NUM_FREQS);
+
+               RTE_LOG(INFO, POWER, "total avail freq is %d , lcoreid %d\n",
+                               total_avail_freqs[i],
+                               i);
+
+               if (get_freq_index(LOW) > total_avail_freqs[i])
+                       return -1;
+
+               if (rte_get_master_lcore() != i) {
+                       w->wrk_stats[i].lcore_id = i;
+                       set_policy(&w->wrk_stats[i], policy);
+               }
+       }
+
+       return 0;
+}
+
+void __rte_experimental
+rte_power_empty_poll_stat_free(void)
+{
+
+       RTE_LOG(INFO, POWER, "Close the Empty Poll\n");
+
+       if (ep_params != NULL)
+               rte_free(ep_params);
+}
+
+int __rte_experimental
+rte_power_empty_poll_stat_update(unsigned int lcore_id)
+{
+       struct priority_worker *poll_stats;
+
+       if (lcore_id >= NUM_NODES)
+               return -1;
+
+       poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+       if (poll_stats->lcore_id == 0)
+               poll_stats->lcore_id = lcore_id;
+
+       poll_stats->empty_dequeues++;
+
+       return 0;
+}
+
+int __rte_experimental
+rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt)
+{
+
+       struct priority_worker *poll_stats;
+
+       if (lcore_id >= NUM_NODES)
+               return -1;
+
+       poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+       if (poll_stats->lcore_id == 0)
+               poll_stats->lcore_id = lcore_id;
+
+       poll_stats->num_dequeue_pkts += nb_pkt;
+
+       return 0;
+}
+
+
+uint64_t __rte_experimental
+rte_power_empty_poll_stat_fetch(unsigned int lcore_id)
+{
+       struct priority_worker *poll_stats;
+
+       if (lcore_id >= NUM_NODES)
+               return -1;
+
+       poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+       if (poll_stats->lcore_id == 0)
+               poll_stats->lcore_id = lcore_id;
+
+       return poll_stats->empty_dequeues;
+}
+
+uint64_t __rte_experimental
+rte_power_poll_stat_fetch(unsigned int lcore_id)
+{
+       struct priority_worker *poll_stats;
+
+       if (lcore_id >= NUM_NODES)
+               return -1;
+
+       poll_stats = &(ep_params->wrk_data.wrk_stats[lcore_id]);
+
+       if (poll_stats->lcore_id == 0)
+               poll_stats->lcore_id = lcore_id;
+
+       return poll_stats->num_dequeue_pkts;
+}
diff --git a/lib/librte_power/rte_power_empty_poll.h b/lib/librte_power/rte_power_empty_poll.h
new file mode 100644 (file)
index 0000000..c1ad5c2
--- /dev/null
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef _RTE_EMPTY_POLL_H
+#define _RTE_EMPTY_POLL_H
+
+/**
+ * @file
+ * RTE Power Management
+ */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_power.h>
+#include <rte_timer.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define NUM_FREQS  RTE_MAX_LCORE_FREQS
+
+#define BINS_AV 4 /* Has to be ^2 */
+
+#define DROP (NUM_DIRECTIONS * NUM_DEVICES)
+
+#define NUM_PRIORITIES          2
+
+#define NUM_NODES         256  /* Max core number*/
+
+/* Processor Power State */
+enum freq_val {
+       LOW,
+       MED,
+       HGH,
+       NUM_FREQ = NUM_FREQS
+};
+
+
+/* Queue Polling State */
+enum queue_state {
+       TRAINING, /* NO TRAFFIC */
+       MED_NORMAL,   /* MED */
+       HGH_BUSY,     /* HIGH */
+       LOW_PURGE,    /* LOW */
+};
+
+/* Queue Stats */
+struct freq_threshold {
+
+       uint64_t base_edpi;
+       bool trained;
+       uint32_t threshold_percent;
+       uint32_t cur_train_iter;
+};
+
+/* Each Worder Thread Empty Poll Stats */
+struct priority_worker {
+
+       /* Current dequeue and throughput counts */
+       /* These 2 are written to by the worker threads */
+       /* So keep them on their own cache line */
+       uint64_t empty_dequeues;
+       uint64_t num_dequeue_pkts;
+
+       enum queue_state queue_state;
+
+       uint64_t empty_dequeues_prev;
+       uint64_t num_dequeue_pkts_prev;
+
+       /* Used for training only */
+       struct freq_threshold thresh[NUM_FREQ];
+       enum freq_val cur_freq;
+
+       /* bucket arrays to calculate the averages */
+       /* edpi mean empty poll counter difference per interval */
+       uint64_t edpi_av[BINS_AV];
+       /* empty poll counter */
+       uint32_t ec;
+       /* ppi mean valid poll counter per interval */
+       uint64_t ppi_av[BINS_AV];
+       /* valid poll counter */
+       uint32_t pc;
+
+       uint32_t lcore_id;
+       uint32_t iter_counter;
+       uint32_t threshold_ctr;
+       uint32_t display_ctr;
+       uint8_t  dev_id;
+
+} __rte_cache_aligned;
+
+
+struct stats_data {
+
+       struct priority_worker wrk_stats[NUM_NODES];
+
+       /* flag to stop rx threads processing packets until training over */
+       bool start_rx;
+
+};
+
+/* Empty Poll Parameters */
+struct ep_params {
+
+       /* Timer related stuff */
+       uint64_t interval_ticks;
+       uint32_t max_train_iter;
+
+       struct rte_timer timer0;
+       struct stats_data wrk_data;
+};
+
+
+/* Sample App Init information */
+struct ep_policy {
+
+       uint64_t med_base_edpi;
+       uint64_t hgh_base_edpi;
+
+       enum queue_state state;
+};
+
+
+
+/**
+ * Initialize the power management system.
+ *
+ * @param eptr
+ *   the structure of empty poll configuration
+ * @param freq_tlb
+ *   the power state/frequency mapping table
+ * @param policy
+ *   the initialization policy from sample app
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_empty_poll_stat_init(struct ep_params **eptr, uint8_t *freq_tlb,
+               struct ep_policy *policy);
+
+/**
+ * Free the resource hold by power management system.
+ */
+void __rte_experimental
+rte_power_empty_poll_stat_free(void);
+
+/**
+ * Update specific core empty poll counter
+ * It's not thread safe.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_empty_poll_stat_update(unsigned int lcore_id);
+
+/**
+ * Update specific core valid poll counter, not thread safe.
+ *
+ * @param lcore_id
+ *  lcore id.
+ * @param nb_pkt
+ *  The packet number of one valid poll.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int __rte_experimental
+rte_power_poll_stat_update(unsigned int lcore_id, uint8_t nb_pkt);
+
+/**
+ * Fetch specific core empty poll counter.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  Current lcore empty poll counter value.
+ */
+uint64_t __rte_experimental
+rte_power_empty_poll_stat_fetch(unsigned int lcore_id);
+
+/**
+ * Fetch specific core valid poll counter.
+ *
+ * @param lcore_id
+ *  lcore id
+ *
+ * @return
+ *  Current lcore valid poll counter value.
+ */
+uint64_t __rte_experimental
+rte_power_poll_stat_fetch(unsigned int lcore_id);
+
+/**
+ * Empty poll  state change detection function
+ *
+ * @param  tim
+ *  The timer structure
+ * @param  arg
+ *  The customized parameter
+ */
+void  __rte_experimental
+rte_empty_poll_detection(struct rte_timer *tim, void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index dd587df..17a083b 100644 (file)
@@ -33,3 +33,16 @@ DPDK_18.08 {
        rte_power_get_capabilities;
 
 } DPDK_17.11;
+
+EXPERIMENTAL {
+        global:
+
+        rte_empty_poll_detection;
+        rte_power_empty_poll_stat_fetch;
+        rte_power_empty_poll_stat_free;
+        rte_power_empty_poll_stat_init;
+        rte_power_empty_poll_stat_update;
+        rte_power_poll_stat_fetch;
+        rte_power_poll_stat_update;
+
+};
index 62b6b97..9f1e359 100644 (file)
 /* dynamic log identifier */
 int librawdev_logtype;
 
-struct rte_rawdev rte_rawdevices[RTE_RAWDEV_MAX_DEVS];
+static struct rte_rawdev rte_rawdevices[RTE_RAWDEV_MAX_DEVS];
 
-struct rte_rawdev *rte_rawdevs = &rte_rawdevices[0];
+struct rte_rawdev *rte_rawdevs = rte_rawdevices;
 
 static struct rte_rawdev_global rawdev_globals = {
        .nb_devs                = 0
 };
 
-struct rte_rawdev_global *rte_rawdev_globals = &rawdev_globals;
-
 /* Raw device, northbound API implementation */
 uint8_t
 rte_rawdev_count(void)
 {
-       return rte_rawdev_globals->nb_devs;
+       return rawdev_globals.nb_devs;
 }
 
 uint16_t
@@ -60,7 +58,7 @@ rte_rawdev_get_dev_id(const char *name)
        if (!name)
                return -EINVAL;
 
-       for (i = 0; i < rte_rawdev_globals->nb_devs; i++)
+       for (i = 0; i < rawdev_globals.nb_devs; i++)
                if ((strcmp(rte_rawdevices[i].name, name)
                                == 0) &&
                                (rte_rawdevices[i].attached ==
index bb9bbc3..811e51d 100644 (file)
@@ -73,8 +73,6 @@ struct rte_rawdev_global {
        uint16_t nb_devs;
 };
 
-extern struct rte_rawdev_global *rte_rawdev_globals;
-/** Pointer to global raw devices data structure. */
 extern struct rte_rawdev *rte_rawdevs;
 /** The pool of rte_rawdev structures. */
 
index ca8a435..ab8b0b4 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
+version = 2
 sources = files('rte_ring.c')
 headers = files('rte_ring.h',
                'rte_ring_c11_mem.h',
index 7a731d0..af5444a 100644 (file)
@@ -303,11 +303,11 @@ void rte_ring_dump(FILE *f, const struct rte_ring *r);
  * There are 2 choices for the users
  * 1.use rmb() memory barrier
  * 2.use one-direcion load_acquire/store_release barrier,defined by
- * CONFIG_RTE_RING_USE_C11_MEM_MODEL=y
+ * CONFIG_RTE_USE_C11_MEM_MODEL=y
  * It depends on performance test results.
  * By default, move common functions to rte_ring_generic.h
  */
-#ifdef RTE_RING_USE_C11_MEM_MODEL
+#ifdef RTE_USE_C11_MEM_MODEL
 #include "rte_ring_c11_mem.h"
 #else
 #include "rte_ring_generic.h"
index 55d9c69..46c53ed 100644 (file)
@@ -11,8 +11,6 @@ LIB = librte_sched.a
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
-CFLAGS_rte_red.o := -D_GNU_SOURCE
-
 LDLIBS += -lm
 LDLIBS += -lrt
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_net
index 9269e5c..587d5e6 100644 (file)
@@ -329,7 +329,7 @@ rte_sched_port_check_params(struct rte_sched_port_params *params)
                return -1;
 
        /* socket */
-       if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES))
+       if (params->socket < 0)
                return -3;
 
        /* rate */
@@ -633,7 +633,8 @@ rte_sched_port_config(struct rte_sched_port_params *params)
                return NULL;
 
        /* Allocate memory to store the data structures */
-       port = rte_zmalloc("qos_params", mem_size, RTE_CACHE_LINE_SIZE);
+       port = rte_zmalloc_socket("qos_params", mem_size, RTE_CACHE_LINE_SIZE,
+               params->socket);
        if (port == NULL)
                return NULL;
 
index 1954960..c6355de 100644 (file)
@@ -131,6 +131,10 @@ rte_security_capability_get(struct rte_security_ctx *instance,
                                        capability->ipsec.direction ==
                                                        idx->ipsec.direction)
                                        return capability;
+                       } else if (idx->protocol == RTE_SECURITY_PROTOCOL_PDCP) {
+                               if (capability->pdcp.domain ==
+                                                       idx->pdcp.domain)
+                                       return capability;
                        }
                }
        }
index b0d1b97..1431b4d 100644 (file)
@@ -206,6 +206,64 @@ struct rte_security_macsec_xform {
        int dummy;
 };
 
+/**
+ * PDCP Mode of session
+ */
+enum rte_security_pdcp_domain {
+       RTE_SECURITY_PDCP_MODE_CONTROL, /**< PDCP control plane */
+       RTE_SECURITY_PDCP_MODE_DATA,    /**< PDCP data plane */
+};
+
+/** PDCP Frame direction */
+enum rte_security_pdcp_direction {
+       RTE_SECURITY_PDCP_UPLINK,       /**< Uplink */
+       RTE_SECURITY_PDCP_DOWNLINK,     /**< Downlink */
+};
+
+/** PDCP Sequence Number Size selectors */
+enum rte_security_pdcp_sn_size {
+       /** PDCP_SN_SIZE_5: 5bit sequence number */
+       RTE_SECURITY_PDCP_SN_SIZE_5 = 5,
+       /** PDCP_SN_SIZE_7: 7bit sequence number */
+       RTE_SECURITY_PDCP_SN_SIZE_7 = 7,
+       /** PDCP_SN_SIZE_12: 12bit sequence number */
+       RTE_SECURITY_PDCP_SN_SIZE_12 = 12,
+       /** PDCP_SN_SIZE_15: 15bit sequence number */
+       RTE_SECURITY_PDCP_SN_SIZE_15 = 15,
+       /** PDCP_SN_SIZE_18: 18bit sequence number */
+       RTE_SECURITY_PDCP_SN_SIZE_18 = 18
+};
+
+/**
+ * PDCP security association configuration data.
+ *
+ * This structure contains data required to create a PDCP security session.
+ */
+struct rte_security_pdcp_xform {
+       int8_t bearer;  /**< PDCP bearer ID */
+       /** Enable in order delivery, this field shall be set only if
+        * driver/HW is capable. See RTE_SECURITY_PDCP_ORDERING_CAP.
+        */
+       uint8_t en_ordering;
+       /** Notify driver/HW to detect and remove duplicate packets.
+        * This field should be set only when driver/hw is capable.
+        * See RTE_SECURITY_PDCP_DUP_DETECT_CAP.
+        */
+       uint8_t remove_duplicates;
+       /** PDCP mode of operation: Control or data */
+       enum rte_security_pdcp_domain domain;
+       /** PDCP Frame Direction 0:UL 1:DL */
+       enum rte_security_pdcp_direction pkt_dir;
+       /** Sequence number size, 5/7/12/15/18 */
+       enum rte_security_pdcp_sn_size sn_size;
+       /** Starting Hyper Frame Number to be used together with the SN
+        * from the PDCP frames
+        */
+       uint32_t hfn;
+       /** HFN Threshold for key renegotiation */
+       uint32_t hfn_threshold;
+};
+
 /**
  * Security session action type.
  */
@@ -232,6 +290,8 @@ enum rte_security_session_protocol {
        /**< IPsec Protocol */
        RTE_SECURITY_PROTOCOL_MACSEC,
        /**< MACSec Protocol */
+       RTE_SECURITY_PROTOCOL_PDCP,
+       /**< PDCP Protocol */
 };
 
 /**
@@ -246,6 +306,7 @@ struct rte_security_session_conf {
        union {
                struct rte_security_ipsec_xform ipsec;
                struct rte_security_macsec_xform macsec;
+               struct rte_security_pdcp_xform pdcp;
        };
        /**< Configuration parameters for security session */
        struct rte_crypto_sym_xform *crypto_xform;
@@ -413,6 +474,10 @@ struct rte_security_ipsec_stats {
 
 };
 
+struct rte_security_pdcp_stats {
+       uint64_t reserved;
+};
+
 struct rte_security_stats {
        enum rte_security_session_protocol protocol;
        /**< Security protocol to be configured */
@@ -421,6 +486,7 @@ struct rte_security_stats {
        union {
                struct rte_security_macsec_stats macsec;
                struct rte_security_ipsec_stats ipsec;
+               struct rte_security_pdcp_stats pdcp;
        };
 };
 
@@ -465,6 +531,13 @@ struct rte_security_capability {
                        int dummy;
                } macsec;
                /**< MACsec capability */
+               struct {
+                       enum rte_security_pdcp_domain domain;
+                       /**< PDCP mode of operation: Control or data */
+                       uint32_t capa_flags;
+                       /**< Capabilitity flags, see RTE_SECURITY_PDCP_* */
+               } pdcp;
+               /**< PDCP capability */
        };
 
        const struct rte_cryptodev_capabilities *crypto_capabilities;
@@ -474,6 +547,19 @@ struct rte_security_capability {
        /**< Device offload flags */
 };
 
+/** Underlying Hardware/driver which support PDCP may or may not support
+ * packet ordering. Set RTE_SECURITY_PDCP_ORDERING_CAP if it support.
+ * If it is not set, driver/HW assumes packets received are in order
+ * and it will be application's responsibility to maintain ordering.
+ */
+#define RTE_SECURITY_PDCP_ORDERING_CAP         0x00000001
+
+/** Underlying Hardware/driver which support PDCP may or may not detect
+ * duplicate packet. Set RTE_SECURITY_PDCP_DUP_DETECT_CAP if it support.
+ * If it is not set, driver/HW assumes there is no duplicate packet received.
+ */
+#define RTE_SECURITY_PDCP_DUP_DETECT_CAP       0x00000002
+
 #define RTE_SECURITY_TX_OLOAD_NEED_MDATA       0x00000001
 /**< HW needs metadata update, see rte_security_set_pkt_metadata().
  */
@@ -506,6 +592,10 @@ struct rte_security_capability_idx {
                        enum rte_security_ipsec_sa_mode mode;
                        enum rte_security_ipsec_sa_direction direction;
                } ipsec;
+               struct {
+                       enum rte_security_pdcp_domain domain;
+                       uint32_t capa_flags;
+               } pdcp;
        };
 };
 
index 276d476..f935678 100644 (file)
@@ -46,6 +46,8 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h
 endif
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_cuckoo.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_func.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash_func_arm64.h
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h
 ifeq ($(CONFIG_RTE_ARCH_X86),y)
 SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru_x86.h
index 8b2f841..6ae3cd6 100644 (file)
@@ -19,6 +19,8 @@ headers = files('rte_table.h',
                'rte_table_lpm_ipv6.h',
                'rte_table_hash.h',
                'rte_table_hash_cuckoo.h',
+               'rte_table_hash_func.h',
+               'rte_table_hash_func_arm64.h',
                'rte_lru.h',
                'rte_table_array.h',
                'rte_table_stub.h')
diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h
new file mode 100644 (file)
index 0000000..02296ea
--- /dev/null
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_H__
+#define __INCLUDE_RTE_TABLE_HASH_FUNC_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#include <rte_compat.h>
+#include <rte_common.h>
+
+static inline uint64_t __rte_experimental
+rte_crc32_u64_generic(uint64_t crc, uint64_t value)
+{
+       int i;
+
+       crc = (crc & 0xFFFFFFFFLLU) ^ value;
+       for (i = 63; i >= 0; i--) {
+               uint64_t mask;
+
+               mask = -(crc & 1LLU);
+               crc = (crc >> 1LLU) ^ (0x82F63B78LLU & mask);
+       }
+
+       return crc;
+}
+
+#if defined(RTE_ARCH_X86_64)
+
+#include <x86intrin.h>
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+       return _mm_crc32_u64(crc, v);
+}
+
+#elif defined(RTE_ARCH_ARM64)
+#include "rte_table_hash_func_arm64.h"
+#else
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+       return rte_crc32_u64_generic(crc, v);
+}
+
+#endif
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key8(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t crc0;
+
+       crc0 = rte_crc32_u64(seed, k[0] & m[0]);
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key16(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, crc0, crc1;
+
+       k0 = k[0] & m[0];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key24(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, crc0, crc1;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc0 = rte_crc32_u64(crc0, k2);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key32(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+       crc3 = k2 >> 32;
+
+       crc0 = rte_crc32_u64(crc0, crc1);
+       crc1 = rte_crc32_u64(crc2, crc3);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key40(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+       crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+       crc0 = rte_crc32_u64(crc0, crc1);
+       crc1 = rte_crc32_u64(crc2, crc3);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key48(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+       k5 = k[5] & m[5];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+       crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+       crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+       crc1 = rte_crc32_u64(crc3, k5);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key56(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+       k5 = k[5] & m[5];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+       crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+       crc4 = rte_crc32_u64(k5, k[6] & m[6]);
+       crc5 = k5 >> 32;
+
+       crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+       crc1 = rte_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+static inline uint64_t __rte_experimental
+rte_table_hash_crc_key64(void *key, void *mask, __rte_unused uint32_t key_size,
+       uint64_t seed)
+{
+       uint64_t *k = key;
+       uint64_t *m = mask;
+       uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+       k0 = k[0] & m[0];
+       k2 = k[2] & m[2];
+       k5 = k[5] & m[5];
+
+       crc0 = rte_crc32_u64(k0, seed);
+       crc1 = rte_crc32_u64(k0 >> 32, k[1] & m[1]);
+
+       crc2 = rte_crc32_u64(k2, k[3] & m[3]);
+       crc3 = rte_crc32_u64(k2 >> 32, k[4] & m[4]);
+
+       crc4 = rte_crc32_u64(k5, k[6] & m[6]);
+       crc5 = rte_crc32_u64(k5 >> 32, k[7] & m[7]);
+
+       crc0 = rte_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+       crc1 = rte_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+       crc0 ^= crc1;
+
+       return crc0;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/lib/librte_table/rte_table_hash_func_arm64.h b/lib/librte_table/rte_table_hash_func_arm64.h
new file mode 100644 (file)
index 0000000..eb04c1f
--- /dev/null
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2017-2018 Linaro Limited
+ */
+
+#ifndef __INCLUDE_RTE_TABLE_HASH_FUNC_ARM64_H__
+#define __INCLUDE_RTE_TABLE_HASH_FUNC_ARM64_H__
+
+#define _CRC32CX(crc, val)     \
+       __asm__("crc32cx %w[c], %w[c], %x[v]":[c] "+r" (crc):[v] "r" (val))
+
+static inline uint64_t
+rte_crc32_u64(uint64_t crc, uint64_t v)
+{
+       uint32_t crc32 = crc;
+
+       _CRC32CX(crc32, v);
+
+       return crc32;
+}
+
+#endif
diff --git a/lib/librte_telemetry/Makefile b/lib/librte_telemetry/Makefile
new file mode 100644 (file)
index 0000000..1a05069
--- /dev/null
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# library name
+LIB = librte_telemetry.a
+
+CFLAGS += -O3
+CFLAGS += -I$(SRCDIR)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+LDLIBS += -lrte_eal -lrte_ethdev
+LDLIBS += -lrte_metrics
+LDLIBS += -lpthread
+LDLIBS += -ljansson
+
+EXPORT_MAP := rte_telemetry_version.map
+
+LIBABIVER := 1
+
+# library source files
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) := rte_telemetry.c
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += rte_telemetry_parser.c
+SRCS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += rte_telemetry_parser_test.c
+
+# export include files
+SYMLINK-$(CONFIG_RTE_LIBRTE_TELEMETRY)-include := rte_telemetry.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_telemetry/meson.build b/lib/librte_telemetry/meson.build
new file mode 100644 (file)
index 0000000..9492f54
--- /dev/null
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+sources = files('rte_telemetry.c', 'rte_telemetry_parser.c', 'rte_telemetry_parser_test.c')
+headers = files('rte_telemetry.h', 'rte_telemetry_internal.h', 'rte_telemetry_parser.h', 'rte_telemetry_parser_test.h')
+deps += ['metrics', 'ethdev']
+cflags += '-DALLOW_EXPERIMENTAL_API'
+
+jansson = cc.find_library('jansson', required: false)
+if jansson.found()
+       ext_deps += jansson
+       dpdk_app_link_libraries += ['telemetry']
+else
+       build = false
+endif
diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c
new file mode 100644 (file)
index 0000000..016431f
--- /dev/null
@@ -0,0 +1,1813 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <jansson.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_metrics.h>
+#include <rte_option.h>
+#include <rte_string_fns.h>
+
+#include "rte_telemetry.h"
+#include "rte_telemetry_internal.h"
+#include "rte_telemetry_parser.h"
+#include "rte_telemetry_parser_test.h"
+#include "rte_telemetry_socket_tests.h"
+
+#define BUF_SIZE 1024
+#define ACTION_POST 1
+#define SLEEP_TIME 10
+
+#define SELFTEST_VALID_CLIENT "/var/run/dpdk/valid_client"
+#define SELFTEST_INVALID_CLIENT "/var/run/dpdk/invalid_client"
+#define SOCKET_TEST_CLIENT_PATH "/var/run/dpdk/client"
+
+static telemetry_impl *static_telemetry;
+
+struct telemetry_message_test {
+       char *test_name;
+       int (*test_func_ptr)(struct telemetry_impl *telemetry, int fd);
+};
+
+struct json_data {
+       char *status_code;
+       char *data;
+       int port;
+       char *stat_name;
+       int stat_value;
+};
+
+static void
+rte_telemetry_get_runtime_dir(char *socket_path, size_t size)
+{
+       snprintf(socket_path, size, "%s/telemetry", rte_eal_get_runtime_dir());
+}
+
+int32_t
+rte_telemetry_is_port_active(int port_id)
+{
+       int ret;
+
+       ret = rte_eth_find_next(port_id);
+       if (ret == port_id)
+               return 1;
+
+       TELEMETRY_LOG_ERR("port_id: %d is invalid, not active",
+               port_id);
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_update_metrics_ethdev(struct telemetry_impl *telemetry,
+       uint16_t port_id, int reg_start_index)
+{
+       int ret, num_xstats, i;
+       struct rte_eth_xstat *eth_xstats;
+
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               TELEMETRY_LOG_ERR("port_id: %d is invalid", port_id);
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       ret = rte_telemetry_is_port_active(port_id);
+       if (ret < 1) {
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
+       if (num_xstats < 0) {
+               TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) failed: %d", port_id,
+                               num_xstats);
+               ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
+       if (eth_xstats == NULL) {
+               TELEMETRY_LOG_ERR("Failed to malloc memory for xstats");
+               ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
+       if (ret < 0 || ret > num_xstats) {
+               free(eth_xstats);
+               TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) len%i failed: %d",
+                               port_id, num_xstats, ret);
+               ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       uint64_t xstats_values[num_xstats];
+       for (i = 0; i < num_xstats; i++)
+               xstats_values[i] = eth_xstats[i].value;
+
+       ret = rte_metrics_update_values(port_id, reg_start_index, xstats_values,
+                       num_xstats);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not update metrics values");
+               ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               free(eth_xstats);
+               return -1;
+       }
+
+       free(eth_xstats);
+       return 0;
+}
+
+int32_t
+rte_telemetry_write_to_socket(struct telemetry_impl *telemetry,
+       const char *json_string)
+{
+       int ret;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Could not initialise TELEMETRY_API");
+               return -1;
+       }
+
+       if (telemetry->request_client == NULL) {
+               TELEMETRY_LOG_ERR("No client has been chosen to write to");
+               return -1;
+       }
+
+       if (json_string == NULL) {
+               TELEMETRY_LOG_ERR("Invalid JSON string!");
+               return -1;
+       }
+
+       ret = send(telemetry->request_client->fd,
+                       json_string, strlen(json_string), 0);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Failed to write to socket for client: %s",
+                               telemetry->request_client->file_path);
+               return -1;
+       }
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_send_error_response(struct telemetry_impl *telemetry,
+       int error_type)
+{
+       int ret;
+       const char *status_code, *json_buffer;
+       json_t *root;
+
+       if (error_type == -EPERM)
+               status_code = "Status Error: Unknown";
+       else if (error_type == -EINVAL)
+               status_code = "Status Error: Invalid Argument 404";
+       else if (error_type == -ENOMEM)
+               status_code = "Status Error: Memory Allocation Error";
+       else {
+               TELEMETRY_LOG_ERR("Invalid error type");
+               return -EINVAL;
+       }
+
+       root = json_object();
+
+       if (root == NULL) {
+               TELEMETRY_LOG_ERR("Could not create root JSON object");
+               return -EPERM;
+       }
+
+       ret = json_object_set_new(root, "status_code", json_string(status_code));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Status code field cannot be set");
+               json_decref(root);
+               return -EPERM;
+       }
+
+       ret = json_object_set_new(root, "data", json_null());
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Data field cannot be set");
+               json_decref(root);
+               return -EPERM;
+       }
+
+       json_buffer = json_dumps(root, 0);
+       json_decref(root);
+
+       ret = rte_telemetry_write_to_socket(telemetry, json_buffer);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not write to socket");
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+static int
+rte_telemetry_get_metrics(struct telemetry_impl *telemetry, uint32_t port_id,
+       struct rte_metric_value *metrics, struct rte_metric_name *names,
+       int num_metrics)
+{
+       int ret, num_values;
+
+       if (num_metrics < 0) {
+               TELEMETRY_LOG_ERR("Invalid metrics count");
+               goto einval_fail;
+       } else if (num_metrics == 0) {
+               TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+               goto eperm_fail;
+       }
+
+       if (metrics == NULL) {
+               TELEMETRY_LOG_ERR("Metrics must be initialised.");
+               goto einval_fail;
+       }
+
+       if (names == NULL) {
+               TELEMETRY_LOG_ERR("Names must be initialised.");
+               goto einval_fail;
+       }
+
+       ret = rte_metrics_get_names(names, num_metrics);
+       if (ret < 0 || ret > num_metrics) {
+               TELEMETRY_LOG_ERR("Cannot get metrics names");
+               goto eperm_fail;
+       }
+
+       num_values = rte_metrics_get_values(port_id, NULL, 0);
+       ret = rte_metrics_get_values(port_id, metrics, num_values);
+       if (ret < 0 || ret > num_values) {
+               TELEMETRY_LOG_ERR("Cannot get metrics values");
+               goto eperm_fail;
+       }
+
+       return 0;
+
+eperm_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+}
+
+static int32_t
+rte_telemetry_json_format_stat(struct telemetry_impl *telemetry, json_t *stats,
+       const char *metric_name, uint64_t metric_value)
+{
+       int ret;
+       json_t *stat = json_object();
+
+       if (stat == NULL) {
+               TELEMETRY_LOG_ERR("Could not create stat JSON object");
+               goto eperm_fail;
+       }
+
+       ret = json_object_set_new(stat, "name", json_string(metric_name));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Stat Name field cannot be set");
+               goto eperm_fail;
+       }
+
+       ret = json_object_set_new(stat, "value", json_integer(metric_value));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Stat Value field cannot be set");
+               goto eperm_fail;
+       }
+
+       ret = json_array_append_new(stats, stat);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Stat cannot be added to stats json array");
+               goto eperm_fail;
+       }
+
+       return 0;
+
+eperm_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+}
+
+static int32_t
+rte_telemetry_json_format_port(struct telemetry_impl *telemetry,
+       uint32_t port_id, json_t *ports, uint32_t *metric_ids,
+       uint32_t num_metric_ids)
+{
+       struct rte_metric_value *metrics = 0;
+       struct rte_metric_name *names = 0;
+       int num_metrics, ret, err_ret;
+       json_t *port, *stats;
+       uint32_t i;
+
+       num_metrics = rte_metrics_get_names(NULL, 0);
+       if (num_metrics < 0) {
+               TELEMETRY_LOG_ERR("Cannot get metrics count");
+               goto einval_fail;
+       } else if (num_metrics == 0) {
+               TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+               goto eperm_fail;
+       }
+
+       metrics = malloc(sizeof(struct rte_metric_value) * num_metrics);
+       names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+       if (metrics == NULL || names == NULL) {
+               TELEMETRY_LOG_ERR("Cannot allocate memory");
+               free(metrics);
+               free(names);
+
+               err_ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+               if (err_ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       ret  = rte_telemetry_get_metrics(telemetry, port_id, metrics, names,
+               num_metrics);
+       if (ret < 0) {
+               free(metrics);
+               free(names);
+               TELEMETRY_LOG_ERR("rte_telemetry_get_metrics failed");
+               return -1;
+       }
+
+       port = json_object();
+       stats = json_array();
+       if (port == NULL || stats == NULL) {
+               TELEMETRY_LOG_ERR("Could not create port/stats JSON objects");
+               goto eperm_fail;
+       }
+
+       ret = json_object_set_new(port, "port", json_integer(port_id));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Port field cannot be set");
+               goto eperm_fail;
+       }
+
+       for (i = 0; i < num_metric_ids; i++) {
+               int metric_id = metric_ids[i];
+               int metric_index = -1;
+               int metric_name_key = -1;
+               int32_t j;
+               uint64_t metric_value;
+
+               if (metric_id >= num_metrics) {
+                       TELEMETRY_LOG_ERR("Metric_id: %d is not valid",
+                                       metric_id);
+                       goto einval_fail;
+               }
+
+               for (j = 0; j < num_metrics; j++) {
+                       if (metrics[j].key == metric_id) {
+                               metric_name_key = metrics[j].key;
+                               metric_index = j;
+                               break;
+                       }
+               }
+
+               const char *metric_name = names[metric_name_key].name;
+               metric_value = metrics[metric_index].value;
+
+               if (metric_name_key < 0 || metric_index < 0) {
+                       TELEMETRY_LOG_ERR("Could not get metric name/index");
+                       goto eperm_fail;
+               }
+
+               ret = rte_telemetry_json_format_stat(telemetry, stats,
+                       metric_name, metric_value);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Format stat with id: %u failed",
+                                       metric_id);
+                       free(metrics);
+                       free(names);
+                       return -1;
+               }
+       }
+
+       if (json_array_size(stats) == 0)
+               ret = json_object_set_new(port, "stats", json_null());
+       else
+               ret = json_object_set_new(port, "stats", stats);
+
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Stats object cannot be set");
+               goto eperm_fail;
+       }
+
+       ret = json_array_append_new(ports, port);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Port object cannot be added to ports array");
+               goto eperm_fail;
+       }
+
+       free(metrics);
+       free(names);
+       return 0;
+
+eperm_fail:
+       free(metrics);
+       free(names);
+       ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+einval_fail:
+       free(metrics);
+       free(names);
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+static int32_t
+rte_telemetry_encode_json_format(struct telemetry_impl *telemetry,
+       uint32_t *port_ids, uint32_t num_port_ids, uint32_t *metric_ids,
+       uint32_t num_metric_ids, char **json_buffer)
+{
+       int ret;
+       json_t *root, *ports;
+       uint32_t i;
+
+       if (num_port_ids <= 0 || num_metric_ids <= 0) {
+               TELEMETRY_LOG_ERR("Please provide port and metric ids to query");
+               goto einval_fail;
+       }
+
+       ports = json_array();
+       if (ports == NULL) {
+               TELEMETRY_LOG_ERR("Could not create ports JSON array");
+               goto eperm_fail;
+       }
+
+       for (i = 0; i < num_port_ids; i++) {
+               if (!rte_eth_dev_is_valid_port(port_ids[i])) {
+                       TELEMETRY_LOG_ERR("Port: %d invalid", port_ids[i]);
+                       goto einval_fail;
+               }
+       }
+
+       for (i = 0; i < num_port_ids; i++) {
+               ret = rte_telemetry_json_format_port(telemetry, port_ids[i],
+                       ports, metric_ids, num_metric_ids);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Format port in JSON failed");
+                       return -1;
+               }
+       }
+
+       root = json_object();
+       if (root == NULL) {
+               TELEMETRY_LOG_ERR("Could not create root JSON object");
+               goto eperm_fail;
+       }
+
+       ret = json_object_set_new(root, "status_code",
+               json_string("Status OK: 200"));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Status code field cannot be set");
+               goto eperm_fail;
+       }
+
+       ret = json_object_set_new(root, "data", ports);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Data field cannot be set");
+               goto eperm_fail;
+       }
+
+       *json_buffer = json_dumps(root, JSON_INDENT(2));
+       json_decref(root);
+       return 0;
+
+eperm_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+int32_t
+rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
+       uint32_t *port_ids, int num_port_ids, struct telemetry_impl *telemetry)
+{
+       int ret, i;
+       char *json_buffer = NULL;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (metric_ids == NULL) {
+               TELEMETRY_LOG_ERR("Invalid metric_ids array");
+               goto einval_fail;
+       }
+
+       if (num_metric_ids < 0) {
+               TELEMETRY_LOG_ERR("Invalid num_metric_ids, must be positive");
+               goto einval_fail;
+       }
+
+       if (port_ids == NULL) {
+               TELEMETRY_LOG_ERR("Invalid port_ids array");
+               goto einval_fail;
+       }
+
+       if (num_port_ids < 0) {
+               TELEMETRY_LOG_ERR("Invalid num_port_ids, must be positive");
+               goto einval_fail;
+       }
+
+       for (i = 0; i < num_port_ids; i++) {
+               if (!rte_eth_dev_is_valid_port(port_ids[i])) {
+                       TELEMETRY_LOG_ERR("Port: %d invalid", port_ids[i]);
+                       goto einval_fail;
+               }
+
+               ret = rte_telemetry_update_metrics_ethdev(telemetry,
+                               port_ids[i], telemetry->reg_index);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Failed to update ethdev metrics");
+                       return -1;
+               }
+       }
+
+       ret = rte_telemetry_encode_json_format(telemetry, port_ids,
+               num_port_ids, metric_ids, num_metric_ids, &json_buffer);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("JSON encode function failed");
+               return -1;
+       }
+
+       ret = rte_telemetry_write_to_socket(telemetry, json_buffer);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not write to socket");
+               return -1;
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+
+static int32_t
+rte_telemetry_reg_ethdev_to_metrics(uint16_t port_id)
+{
+       int ret, num_xstats, ret_val, i;
+       struct rte_eth_xstat *eth_xstats = NULL;
+       struct rte_eth_xstat_name *eth_xstats_names = NULL;
+
+       if (!rte_eth_dev_is_valid_port(port_id)) {
+               TELEMETRY_LOG_ERR("port_id: %d is invalid", port_id);
+               return -EINVAL;
+       }
+
+       num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
+       if (num_xstats < 0) {
+               TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) failed: %d",
+                               port_id, num_xstats);
+               return -EPERM;
+       }
+
+       eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
+       if (eth_xstats == NULL) {
+               TELEMETRY_LOG_ERR("Failed to malloc memory for xstats");
+               return -ENOMEM;
+       }
+
+       ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
+       const char *xstats_names[num_xstats];
+       eth_xstats_names = malloc(sizeof(struct rte_eth_xstat_name) * num_xstats);
+       if (ret < 0 || ret > num_xstats) {
+               TELEMETRY_LOG_ERR("rte_eth_xstats_get(%u) len%i failed: %d",
+                               port_id, num_xstats, ret);
+               ret_val = -EPERM;
+               goto free_xstats;
+       }
+
+       if (eth_xstats_names == NULL) {
+               TELEMETRY_LOG_ERR("Failed to malloc memory for xstats_names");
+               ret_val = -ENOMEM;
+               goto free_xstats;
+       }
+
+       ret = rte_eth_xstats_get_names(port_id, eth_xstats_names, num_xstats);
+       if (ret < 0 || ret > num_xstats) {
+               TELEMETRY_LOG_ERR("rte_eth_xstats_get_names(%u) len%i failed: %d",
+                               port_id, num_xstats, ret);
+               ret_val = -EPERM;
+               goto free_xstats;
+       }
+
+       for (i = 0; i < num_xstats; i++)
+               xstats_names[i] = eth_xstats_names[eth_xstats[i].id].name;
+
+       ret_val = rte_metrics_reg_names(xstats_names, num_xstats);
+       if (ret_val < 0) {
+               TELEMETRY_LOG_ERR("rte_metrics_reg_names failed - metrics may already be registered");
+               ret_val = -1;
+               goto free_xstats;
+       }
+
+       goto free_xstats;
+
+free_xstats:
+       free(eth_xstats);
+       free(eth_xstats_names);
+       return ret_val;
+}
+
+static int32_t
+rte_telemetry_initial_accept(struct telemetry_impl *telemetry)
+{
+       uint16_t pid;
+       int ret;
+       int selftest = 0;
+
+       RTE_ETH_FOREACH_DEV(pid) {
+               telemetry->reg_index = rte_telemetry_reg_ethdev_to_metrics(pid);
+               break;
+       }
+
+       if (telemetry->reg_index < 0) {
+               TELEMETRY_LOG_ERR("Failed to register ethdev metrics");
+               return -1;
+       }
+
+       telemetry->metrics_register_done = 1;
+       if (selftest) {
+               ret = rte_telemetry_socket_messaging_testing(telemetry->reg_index,
+                               telemetry->server_fd);
+               if (ret < 0)
+                       return -1;
+
+               ret = rte_telemetry_parser_test(telemetry);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Parser Tests Failed");
+                       return -1;
+               }
+
+               TELEMETRY_LOG_INFO("Success - All Parser Tests Passed");
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_read_client(struct telemetry_impl *telemetry)
+{
+       char buf[BUF_SIZE];
+       int ret, buffer_read;
+
+       buffer_read = read(telemetry->accept_fd, buf, BUF_SIZE-1);
+
+       if (buffer_read == -1) {
+               TELEMETRY_LOG_ERR("Read error");
+               return -1;
+       } else if (buffer_read == 0) {
+               goto close_socket;
+       } else {
+               buf[buffer_read] = '\0';
+               ret = rte_telemetry_parse_client_message(telemetry, buf);
+               if (ret < 0)
+                       TELEMETRY_LOG_WARN("Parse message failed");
+               goto close_socket;
+       }
+
+close_socket:
+       if (close(telemetry->accept_fd) < 0) {
+               TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+               free(telemetry);
+               return -EPERM;
+       }
+       telemetry->accept_fd = 0;
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_accept_new_client(struct telemetry_impl *telemetry)
+{
+       int ret;
+
+       if (telemetry->accept_fd <= 0) {
+               ret = listen(telemetry->server_fd, 1);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Listening error with server fd");
+                       return -1;
+               }
+
+               telemetry->accept_fd = accept(telemetry->server_fd, NULL, NULL);
+               if (telemetry->accept_fd >= 0 &&
+                       telemetry->metrics_register_done == 0) {
+                       ret = rte_telemetry_initial_accept(telemetry);
+                       if (ret < 0) {
+                               TELEMETRY_LOG_ERR("Failed to run initial configurations/tests");
+                               return -1;
+                       }
+               }
+       } else {
+               ret = rte_telemetry_read_client(telemetry);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Failed to read socket buffer");
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_read_client_sockets(struct telemetry_impl *telemetry)
+{
+       int ret;
+       telemetry_client *client;
+       char client_buf[BUF_SIZE];
+       int bytes;
+
+       TAILQ_FOREACH(client, &telemetry->client_list_head, client_list) {
+               bytes = read(client->fd, client_buf, BUF_SIZE-1);
+
+               if (bytes > 0) {
+                       client_buf[bytes] = '\0';
+                       telemetry->request_client = client;
+                       ret = rte_telemetry_parse(telemetry, client_buf);
+                       if (ret < 0) {
+                               TELEMETRY_LOG_WARN("Parse socket input failed: %i",
+                                               ret);
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_run(void *userdata)
+{
+       int ret;
+       struct telemetry_impl *telemetry = userdata;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_WARN("TELEMETRY could not be initialised");
+               return -1;
+       }
+
+       ret = rte_telemetry_accept_new_client(telemetry);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Accept and read new client failed");
+               return -1;
+       }
+
+       ret = rte_telemetry_read_client_sockets(telemetry);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Client socket read failed");
+               return -1;
+       }
+
+       return 0;
+}
+
+static void
+*rte_telemetry_run_thread_func(void *userdata)
+{
+       int ret;
+       struct telemetry_impl *telemetry = userdata;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("%s passed a NULL instance", __func__);
+               pthread_exit(0);
+       }
+
+       while (telemetry->thread_status) {
+               rte_telemetry_run(telemetry);
+               ret = usleep(SLEEP_TIME);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Calling thread could not be put to sleep");
+       }
+       pthread_exit(0);
+}
+
+static int32_t
+rte_telemetry_set_socket_nonblock(int fd)
+{
+       int flags;
+
+       if (fd < 0) {
+               TELEMETRY_LOG_ERR("Invalid fd provided");
+               return -1;
+       }
+
+       flags = fcntl(fd, F_GETFL, 0);
+       if (flags < 0)
+               flags = 0;
+
+       return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+static int32_t
+rte_telemetry_create_socket(struct telemetry_impl *telemetry)
+{
+       int ret;
+       struct sockaddr_un addr;
+       char socket_path[BUF_SIZE];
+
+       if (telemetry == NULL)
+               return -1;
+
+       telemetry->server_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       if (telemetry->server_fd == -1) {
+               TELEMETRY_LOG_ERR("Failed to open socket");
+               return -1;
+       }
+
+       ret  = rte_telemetry_set_socket_nonblock(telemetry->server_fd);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not set socket to NONBLOCK");
+               goto close_socket;
+       }
+
+       addr.sun_family = AF_UNIX;
+       rte_telemetry_get_runtime_dir(socket_path, sizeof(socket_path));
+       strlcpy(addr.sun_path, socket_path, sizeof(addr.sun_path));
+       unlink(socket_path);
+
+       if (bind(telemetry->server_fd, (struct sockaddr *)&addr,
+               sizeof(addr)) < 0) {
+               TELEMETRY_LOG_ERR("Socket binding error");
+               goto close_socket;
+       }
+
+       return 0;
+
+close_socket:
+       if (close(telemetry->server_fd) < 0) {
+               TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+               return -EPERM;
+       }
+
+       return -1;
+}
+
+int32_t __rte_experimental
+rte_telemetry_init()
+{
+       int ret;
+       pthread_attr_t attr;
+       const char *telemetry_ctrl_thread = "telemetry";
+
+       if (static_telemetry) {
+               TELEMETRY_LOG_WARN("TELEMETRY structure already initialised");
+               return -EALREADY;
+       }
+
+       static_telemetry = calloc(1, sizeof(struct telemetry_impl));
+       if (static_telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Memory could not be allocated");
+               return -ENOMEM;
+       }
+
+       static_telemetry->socket_id = rte_socket_id();
+       rte_metrics_init(static_telemetry->socket_id);
+
+       ret = pthread_attr_init(&attr);
+       if (ret != 0) {
+               TELEMETRY_LOG_ERR("Pthread attribute init failed");
+               return -EPERM;
+       }
+
+       ret = rte_telemetry_create_socket(static_telemetry);
+       if (ret < 0) {
+               ret = rte_telemetry_cleanup();
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("TELEMETRY cleanup failed");
+               return -EPERM;
+       }
+       TAILQ_INIT(&static_telemetry->client_list_head);
+
+       ret = rte_ctrl_thread_create(&static_telemetry->thread_id,
+               telemetry_ctrl_thread, &attr, rte_telemetry_run_thread_func,
+               (void *)static_telemetry);
+       static_telemetry->thread_status = 1;
+
+       if (ret < 0) {
+               ret = rte_telemetry_cleanup();
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("TELEMETRY cleanup failed");
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_client_cleanup(struct telemetry_client *client)
+{
+       int ret;
+
+       ret = close(client->fd);
+       free(client->file_path);
+       free(client);
+
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Close client socket failed");
+               return -EPERM;
+       }
+
+       return 0;
+}
+
+int32_t __rte_experimental
+rte_telemetry_cleanup(void)
+{
+       int ret;
+       struct telemetry_impl *telemetry = static_telemetry;
+       telemetry_client *client, *temp_client;
+
+       TAILQ_FOREACH_SAFE(client, &telemetry->client_list_head, client_list,
+               temp_client) {
+               TAILQ_REMOVE(&telemetry->client_list_head, client, client_list);
+               ret = rte_telemetry_client_cleanup(client);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Client cleanup failed");
+                       return -EPERM;
+               }
+       }
+
+       ret = close(telemetry->server_fd);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Close TELEMETRY socket failed");
+               free(telemetry);
+               return -EPERM;
+       }
+
+       telemetry->thread_status = 0;
+       pthread_join(telemetry->thread_id, NULL);
+       free(telemetry);
+       static_telemetry = NULL;
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_unregister_client(struct telemetry_impl *telemetry,
+       const char *client_path)
+{
+       int ret;
+       telemetry_client *client, *temp_client;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_WARN("TELEMETRY is not initialised");
+               return -ENODEV;
+       }
+
+       if (client_path == NULL) {
+               TELEMETRY_LOG_ERR("Invalid client path");
+               goto einval_fail;
+       }
+
+       if (TAILQ_EMPTY(&telemetry->client_list_head)) {
+               TELEMETRY_LOG_ERR("There are no clients currently registered");
+               return -EPERM;
+       }
+
+       TAILQ_FOREACH_SAFE(client, &telemetry->client_list_head, client_list,
+                       temp_client) {
+               if (strcmp(client_path, client->file_path) == 0) {
+                       TAILQ_REMOVE(&telemetry->client_list_head, client,
+                               client_list);
+                       ret = rte_telemetry_client_cleanup(client);
+
+                       if (ret < 0) {
+                               TELEMETRY_LOG_ERR("Client cleanup failed");
+                               return -EPERM;
+                       }
+
+                       return 0;
+               }
+       }
+
+       TELEMETRY_LOG_WARN("Couldn't find client, possibly not registered yet.");
+       return -1;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -EINVAL;
+}
+
+int32_t
+rte_telemetry_register_client(struct telemetry_impl *telemetry,
+       const char *client_path)
+{
+       int ret, fd;
+       struct sockaddr_un addrs;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Could not initialize TELEMETRY API");
+               return -ENODEV;
+       }
+
+       if (client_path == NULL) {
+               TELEMETRY_LOG_ERR("Invalid client path");
+               return -EINVAL;
+       }
+
+       telemetry_client *client;
+       TAILQ_FOREACH(client, &telemetry->client_list_head, client_list) {
+               if (strcmp(client_path, client->file_path) == 0) {
+                       TELEMETRY_LOG_WARN("'%s' already registered",
+                                       client_path);
+                       return -EINVAL;
+               }
+       }
+
+       fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       if (fd == -1) {
+               TELEMETRY_LOG_ERR("Client socket error");
+               return -EACCES;
+       }
+
+       ret = rte_telemetry_set_socket_nonblock(fd);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not set socket to NONBLOCK");
+               return -EPERM;
+       }
+
+       addrs.sun_family = AF_UNIX;
+       strlcpy(addrs.sun_path, client_path, sizeof(addrs.sun_path));
+       telemetry_client *new_client = malloc(sizeof(telemetry_client));
+       new_client->file_path = strdup(client_path);
+       new_client->fd = fd;
+
+       if (connect(fd, (struct sockaddr *)&addrs, sizeof(addrs)) == -1) {
+               TELEMETRY_LOG_ERR("TELEMETRY client connect to %s didn't work",
+                               client_path);
+               ret = rte_telemetry_client_cleanup(new_client);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Client cleanup failed");
+                       return -EPERM;
+               }
+               return -EINVAL;
+       }
+
+       TAILQ_INSERT_HEAD(&telemetry->client_list_head, new_client, client_list);
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_parse_client_message(struct telemetry_impl *telemetry, char *buf)
+{
+       int ret, action_int;
+       json_error_t error;
+       json_t *root = json_loads(buf, 0, &error);
+
+       if (root == NULL) {
+               TELEMETRY_LOG_WARN("Could not load JSON object from data passed in : %s",
+                               error.text);
+               goto fail;
+       } else if (!json_is_object(root)) {
+               TELEMETRY_LOG_WARN("JSON Request is not a JSON object");
+               goto fail;
+       }
+
+       json_t *action = json_object_get(root, "action");
+       if (action == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have action field");
+               goto fail;
+       } else if (!json_is_integer(action)) {
+               TELEMETRY_LOG_WARN("Action value is not an integer");
+               goto fail;
+       }
+
+       json_t *command = json_object_get(root, "command");
+       if (command == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have command field");
+               goto fail;
+       } else if (!json_is_string(command)) {
+               TELEMETRY_LOG_WARN("Command value is not a string");
+               goto fail;
+       }
+
+       action_int = json_integer_value(action);
+       if (action_int != ACTION_POST) {
+               TELEMETRY_LOG_WARN("Invalid action code");
+               goto fail;
+       }
+
+       if (strcmp(json_string_value(command), "clients") != 0) {
+               TELEMETRY_LOG_WARN("Invalid command");
+               goto fail;
+       }
+
+       json_t *data = json_object_get(root, "data");
+       if (data == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have data field");
+               goto fail;
+       }
+
+       json_t *client_path = json_object_get(data, "client_path");
+       if (client_path == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have client_path field");
+               goto fail;
+       }
+
+       if (!json_is_string(client_path)) {
+               TELEMETRY_LOG_WARN("Client_path value is not a string");
+               goto fail;
+       }
+
+       ret = rte_telemetry_register_client(telemetry,
+                       json_string_value(client_path));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not register client");
+               telemetry->register_fail_count++;
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       TELEMETRY_LOG_WARN("Client attempted to register with invalid message");
+       json_decref(root);
+       return -1;
+}
+
+int32_t
+rte_telemetry_dummy_client_socket(const char *valid_client_path)
+{
+       int sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       struct sockaddr_un addr = {0};
+
+       if (sockfd < 0) {
+               TELEMETRY_LOG_ERR("Test socket creation failure");
+               return -1;
+       }
+
+       addr.sun_family = AF_UNIX;
+       strlcpy(addr.sun_path, valid_client_path, sizeof(addr.sun_path));
+       unlink(valid_client_path);
+
+       if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+               TELEMETRY_LOG_ERR("Test socket binding failure");
+               return -1;
+       }
+
+       if (listen(sockfd, 1) < 0) {
+               TELEMETRY_LOG_ERR("Listen failure");
+               return -1;
+       }
+
+       return sockfd;
+}
+
+int32_t __rte_experimental
+rte_telemetry_selftest(void)
+{
+       const char *invalid_client_path = SELFTEST_INVALID_CLIENT;
+       const char *valid_client_path = SELFTEST_VALID_CLIENT;
+       int ret, sockfd;
+
+       TELEMETRY_LOG_INFO("Selftest");
+
+       ret = rte_telemetry_init();
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Valid initialisation test failed");
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Valid initialisation test passed");
+
+       ret = rte_telemetry_init();
+       if (ret != -EALREADY) {
+               TELEMETRY_LOG_ERR("Invalid initialisation test failed");
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Invalid initialisation test passed");
+
+       ret = rte_telemetry_unregister_client(static_telemetry,
+                       invalid_client_path);
+       if (ret != -EPERM) {
+               TELEMETRY_LOG_ERR("Invalid unregister test failed");
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Invalid unregister test passed");
+
+       sockfd = rte_telemetry_dummy_client_socket(valid_client_path);
+       if (sockfd < 0) {
+               TELEMETRY_LOG_ERR("Test socket creation failed");
+               return -1;
+       }
+
+       ret = rte_telemetry_register_client(static_telemetry, valid_client_path);
+       if (ret != 0) {
+               TELEMETRY_LOG_ERR("Valid register test failed: %i", ret);
+               return -1;
+       }
+
+       accept(sockfd, NULL, NULL);
+       TELEMETRY_LOG_INFO("Success - Valid register test passed");
+
+       ret = rte_telemetry_register_client(static_telemetry, valid_client_path);
+       if (ret != -EINVAL) {
+               TELEMETRY_LOG_ERR("Invalid register test failed: %i", ret);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Invalid register test passed");
+
+       ret = rte_telemetry_unregister_client(static_telemetry,
+               invalid_client_path);
+       if (ret != -1) {
+               TELEMETRY_LOG_ERR("Invalid unregister test failed: %i", ret);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Invalid unregister test passed");
+
+       ret = rte_telemetry_unregister_client(static_telemetry, valid_client_path);
+       if (ret != 0) {
+               TELEMETRY_LOG_ERR("Valid unregister test failed: %i", ret);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Valid unregister test passed");
+
+       ret = rte_telemetry_cleanup();
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Cleanup test failed");
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Valid cleanup test passed");
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_socket_messaging_testing(int index, int socket)
+{
+       struct telemetry_impl *telemetry = calloc(1, sizeof(telemetry_impl));
+       int fd, bad_send_fd, send_fd, bad_fd, bad_recv_fd, recv_fd, ret;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Could not initialize Telemetry API");
+               return -1;
+       }
+
+       telemetry->server_fd = socket;
+       telemetry->reg_index = index;
+       TELEMETRY_LOG_INFO("Beginning Telemetry socket message Selftest");
+       rte_telemetry_socket_test_setup(telemetry, &send_fd, &recv_fd);
+       TELEMETRY_LOG_INFO("Register valid client test");
+
+       ret = rte_telemetry_socket_register_test(telemetry, &fd, send_fd,
+               recv_fd);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Register valid client test failed!");
+               free(telemetry);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Register valid client test passed!");
+
+       TELEMETRY_LOG_INFO("Register invalid/same client test");
+       ret = rte_telemetry_socket_test_setup(telemetry, &bad_send_fd,
+               &bad_recv_fd);
+       ret = rte_telemetry_socket_register_test(telemetry, &bad_fd,
+               bad_send_fd, bad_recv_fd);
+       if (!ret) {
+               TELEMETRY_LOG_ERR("Register invalid/same client test failed!");
+               free(telemetry);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - Register invalid/same client test passed!");
+
+       ret = rte_telemetry_json_socket_message_test(telemetry, fd);
+       if (ret < 0) {
+               free(telemetry);
+               return -1;
+       }
+
+       free(telemetry);
+       return 0;
+}
+
+int32_t
+rte_telemetry_socket_register_test(struct telemetry_impl *telemetry, int *fd,
+       int send_fd, int recv_fd)
+{
+       int ret;
+       char good_req_string[BUF_SIZE];
+
+       snprintf(good_req_string, sizeof(good_req_string),
+       "{\"action\":1,\"command\":\"clients\",\"data\":{\"client_path\""
+               ":\"%s\"}}", SOCKET_TEST_CLIENT_PATH);
+
+       listen(recv_fd, 1);
+
+       ret = send(send_fd, good_req_string, strlen(good_req_string), 0);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send message over socket");
+               return -1;
+       }
+
+       rte_telemetry_run(telemetry);
+
+       if (telemetry->register_fail_count != 0)
+               return -1;
+
+       *fd = accept(recv_fd, NULL, NULL);
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_socket_test_setup(struct telemetry_impl *telemetry, int *send_fd,
+       int *recv_fd)
+{
+       int ret;
+       const char *client_path = SOCKET_TEST_CLIENT_PATH;
+       char socket_path[BUF_SIZE];
+       struct sockaddr_un addr = {0};
+       struct sockaddr_un addrs = {0};
+       *send_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       *recv_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+
+       listen(telemetry->server_fd, 5);
+       addr.sun_family = AF_UNIX;
+       rte_telemetry_get_runtime_dir(socket_path, sizeof(socket_path));
+       strlcpy(addr.sun_path, socket_path, sizeof(addr.sun_path));
+
+       ret = connect(*send_fd, (struct sockaddr *) &addr, sizeof(addr));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not connect socket");
+               return -1;
+       }
+
+       telemetry->accept_fd = accept(telemetry->server_fd, NULL, NULL);
+
+       addrs.sun_family = AF_UNIX;
+       strlcpy(addrs.sun_path, client_path, sizeof(addrs.sun_path));
+       unlink(client_path);
+
+       ret = bind(*recv_fd, (struct sockaddr *)&addrs, sizeof(addrs));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not bind socket");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_stat_parse(char *buf, struct json_data *json_data_struct)
+{
+       json_error_t error;
+       json_t *root = json_loads(buf, 0, &error);
+       int arraylen, i;
+       json_t *status, *dataArray, *port, *stats, *name, *value, *dataArrayObj,
+              *statsArrayObj;
+
+       stats = NULL;
+       port = NULL;
+       name = NULL;
+
+       if (buf == NULL) {
+               TELEMETRY_LOG_ERR("JSON message is NULL");
+               return -EINVAL;
+       }
+
+       if (root == NULL) {
+               TELEMETRY_LOG_ERR("Could not load JSON object from data passed in : %s",
+                               error.text);
+               return -EPERM;
+       } else if (!json_is_object(root)) {
+               TELEMETRY_LOG_ERR("JSON Request is not a JSON object");
+               json_decref(root);
+               return -EINVAL;
+       }
+
+       status = json_object_get(root, "status_code");
+       if (!status) {
+               TELEMETRY_LOG_ERR("Request does not have status field");
+               return -EINVAL;
+       } else if (!json_is_string(status)) {
+               TELEMETRY_LOG_ERR("Status value is not a string");
+               return -EINVAL;
+       }
+
+       json_data_struct->status_code = strdup(json_string_value(status));
+
+       dataArray = json_object_get(root, "data");
+       if (dataArray == NULL) {
+               TELEMETRY_LOG_ERR("Request does not have data field");
+               return -EINVAL;
+       }
+
+       arraylen = json_array_size(dataArray);
+       if (arraylen == 0) {
+               json_data_struct->data = "null";
+               return -EINVAL;
+       }
+
+       for (i = 0; i < arraylen; i++) {
+               dataArrayObj = json_array_get(dataArray, i);
+               port = json_object_get(dataArrayObj, "port");
+               stats = json_object_get(dataArrayObj, "stats");
+       }
+
+       if (port == NULL) {
+               TELEMETRY_LOG_ERR("Request does not have port field");
+               return -EINVAL;
+       }
+
+       if (!json_is_integer(port)) {
+               TELEMETRY_LOG_ERR("Port value is not an integer");
+               return -EINVAL;
+       }
+
+       json_data_struct->port = json_integer_value(port);
+
+       if (stats == NULL) {
+               TELEMETRY_LOG_ERR("Request does not have stats field");
+               return -EINVAL;
+       }
+
+       arraylen = json_array_size(stats);
+       for (i = 0; i < arraylen; i++) {
+               statsArrayObj = json_array_get(stats, i);
+               name = json_object_get(statsArrayObj, "name");
+               value = json_object_get(statsArrayObj, "value");
+       }
+
+       if (name == NULL) {
+               TELEMETRY_LOG_ERR("Request does not have name field");
+               return -EINVAL;
+       }
+
+       if (!json_is_string(name)) {
+               TELEMETRY_LOG_ERR("Stat name value is not a string");
+               return -EINVAL;
+       }
+
+       json_data_struct->stat_name = strdup(json_string_value(name));
+
+       if (value == NULL) {
+               TELEMETRY_LOG_ERR("Request does not have value field");
+               return -EINVAL;
+       }
+
+       if (!json_is_integer(value)) {
+               TELEMETRY_LOG_ERR("Stat value is not an integer");
+               return -EINVAL;
+       }
+
+       json_data_struct->stat_value = json_integer_value(value);
+
+       return 0;
+}
+
+static void
+rte_telemetry_free_test_data(struct json_data *data)
+{
+       free(data->status_code);
+       free(data->stat_name);
+       free(data);
+}
+
+int32_t
+rte_telemetry_valid_json_test(struct telemetry_impl *telemetry, int fd)
+{
+       int ret;
+       int port = 0;
+       int value = 0;
+       int fail_count = 0;
+       int buffer_read = 0;
+       char buf[BUF_SIZE];
+       struct json_data *data_struct;
+       errno = 0;
+       const char *status = "Status OK: 200";
+       const char *name = "rx_good_packets";
+       const char *valid_json_message = "{\"action\":0,\"command\":"
+       "\"ports_stats_values_by_name\",\"data\":{\"ports\""
+       ":[0],\"stats\":[\"rx_good_packets\"]}}";
+
+       ret = send(fd, valid_json_message, strlen(valid_json_message), 0);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send message over socket");
+               return -1;
+       }
+
+       rte_telemetry_run(telemetry);
+       buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+       if (buffer_read == -1) {
+               TELEMETRY_LOG_ERR("Read error");
+               return -1;
+       }
+
+       buf[buffer_read] = '\0';
+       data_struct = calloc(1, sizeof(struct json_data));
+       ret = rte_telemetry_stat_parse(buf, data_struct);
+
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not parse stats");
+               fail_count++;
+       }
+
+       if (strcmp(data_struct->status_code, status) != 0) {
+               TELEMETRY_LOG_ERR("Status code is invalid");
+               fail_count++;
+       }
+
+       if (data_struct->port != port) {
+               TELEMETRY_LOG_ERR("Port is invalid");
+               fail_count++;
+       }
+
+       if (strcmp(data_struct->stat_name, name) != 0) {
+               TELEMETRY_LOG_ERR("Stat name is invalid");
+               fail_count++;
+       }
+
+       if (data_struct->stat_value != value) {
+               TELEMETRY_LOG_ERR("Stat value is invalid");
+               fail_count++;
+       }
+
+       rte_telemetry_free_test_data(data_struct);
+       if (fail_count > 0)
+               return -1;
+
+       TELEMETRY_LOG_INFO("Success - Passed valid JSON message test passed");
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_invalid_json_test(struct telemetry_impl *telemetry, int fd)
+{
+       int ret;
+       char buf[BUF_SIZE];
+       int fail_count = 0;
+       const char *invalid_json = "{]";
+       const char *status = "Status Error: Unknown";
+       const char *data = "null";
+       struct json_data *data_struct;
+       int buffer_read = 0;
+       errno = 0;
+
+       ret = send(fd, invalid_json, strlen(invalid_json), 0);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send message over socket");
+               return -1;
+       }
+
+       rte_telemetry_run(telemetry);
+       buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+       if (buffer_read == -1) {
+               TELEMETRY_LOG_ERR("Read error");
+               return -1;
+       }
+
+       buf[buffer_read] = '\0';
+
+       data_struct = calloc(1, sizeof(struct json_data));
+       ret = rte_telemetry_stat_parse(buf, data_struct);
+
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not parse stats");
+
+       if (strcmp(data_struct->status_code, status) != 0) {
+               TELEMETRY_LOG_ERR("Status code is invalid");
+               fail_count++;
+       }
+
+       if (strcmp(data_struct->data, data) != 0) {
+               TELEMETRY_LOG_ERR("Data status is invalid");
+               fail_count++;
+       }
+
+       rte_telemetry_free_test_data(data_struct);
+       if (fail_count > 0)
+               return -1;
+
+       TELEMETRY_LOG_INFO("Success - Passed invalid JSON message test");
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_json_contents_test(struct telemetry_impl *telemetry, int fd)
+{
+       int ret;
+       char buf[BUF_SIZE];
+       int fail_count = 0;
+       char *status = "Status Error: Invalid Argument 404";
+       char *data = "null";
+       struct json_data *data_struct;
+       const char *invalid_contents = "{\"action\":0,\"command\":"
+       "\"ports_stats_values_by_name\",\"data\":{\"ports\""
+       ":[0],\"stats\":[\"some_invalid_param\","
+       "\"another_invalid_param\"]}}";
+       int buffer_read = 0;
+       errno = 0;
+
+       ret = send(fd, invalid_contents, strlen(invalid_contents), 0);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send message over socket");
+               return -1;
+       }
+
+       rte_telemetry_run(telemetry);
+       buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+       if (buffer_read == -1) {
+               TELEMETRY_LOG_ERR("Read error");
+               return -1;
+       }
+
+       buf[buffer_read] = '\0';
+       data_struct = calloc(1, sizeof(struct json_data));
+       ret = rte_telemetry_stat_parse(buf, data_struct);
+
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not parse stats");
+
+       if (strcmp(data_struct->status_code, status) != 0) {
+               TELEMETRY_LOG_ERR("Status code is invalid");
+               fail_count++;
+       }
+
+       if (strcmp(data_struct->data, data) != 0) {
+               TELEMETRY_LOG_ERR("Data status is invalid");
+               fail_count++;
+       }
+
+       rte_telemetry_free_test_data(data_struct);
+       if (fail_count > 0)
+               return -1;
+
+       TELEMETRY_LOG_INFO("Success - Passed invalid JSON content test");
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_json_empty_test(struct telemetry_impl *telemetry, int fd)
+{
+       int ret;
+       char buf[BUF_SIZE];
+       int fail_count = 0;
+       const char *status = "Status Error: Invalid Argument 404";
+       char *data = "null";
+       struct json_data *data_struct;
+       const char *empty_json  = "{}";
+       int buffer_read = 0;
+       errno = 0;
+
+       ret = (send(fd, empty_json, strlen(empty_json), 0));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send message over socket");
+               return -1;
+       }
+
+       rte_telemetry_run(telemetry);
+       buffer_read = recv(fd, buf, BUF_SIZE-1, 0);
+
+       if (buffer_read == -1) {
+               TELEMETRY_LOG_ERR("Read error");
+               return -1;
+       }
+
+       buf[buffer_read] = '\0';
+       data_struct = calloc(1, sizeof(struct json_data));
+       ret = rte_telemetry_stat_parse(buf, data_struct);
+
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not parse stats");
+
+       if (strcmp(data_struct->status_code, status) != 0) {
+               TELEMETRY_LOG_ERR("Status code is invalid");
+               fail_count++;
+       }
+
+       if (strcmp(data_struct->data, data) != 0) {
+               TELEMETRY_LOG_ERR("Data status is invalid");
+               fail_count++;
+       }
+
+       rte_telemetry_free_test_data(data_struct);
+
+       if (fail_count > 0)
+               return -1;
+
+       TELEMETRY_LOG_INFO("Success - Passed JSON empty message test");
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_json_socket_message_test(struct telemetry_impl *telemetry, int fd)
+{
+       uint16_t i;
+       int ret, fail_count;
+
+       fail_count = 0;
+       struct telemetry_message_test socket_json_tests[] = {
+               {.test_name = "Invalid JSON test",
+                       .test_func_ptr = rte_telemetry_invalid_json_test},
+               {.test_name = "Valid JSON test",
+                       .test_func_ptr = rte_telemetry_valid_json_test},
+               {.test_name = "JSON contents test",
+                       .test_func_ptr = rte_telemetry_json_contents_test},
+               {.test_name = "JSON empty tests",
+                       .test_func_ptr = rte_telemetry_json_empty_test}
+               };
+
+#define NUM_TESTS RTE_DIM(socket_json_tests)
+
+       for (i = 0; i < NUM_TESTS; i++) {
+               TELEMETRY_LOG_INFO("%s", socket_json_tests[i].test_name);
+               ret = (socket_json_tests[i].test_func_ptr)
+                       (telemetry, fd);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("%s failed",
+                                       socket_json_tests[i].test_name);
+                       fail_count++;
+               }
+       }
+
+       if (fail_count > 0) {
+               TELEMETRY_LOG_ERR("Failed %i JSON socket message test(s)",
+                               fail_count);
+               return -1;
+       }
+
+       TELEMETRY_LOG_INFO("Success - All JSON tests passed");
+
+       return 0;
+}
+
+int telemetry_log_level;
+
+static struct rte_option option = {
+       .opt_str = "--telemetry",
+       .cb = &rte_telemetry_init,
+       .enabled = 0
+};
+
+RTE_INIT(rte_telemetry_register)
+{
+       telemetry_log_level = rte_log_register("lib.telemetry");
+       if (telemetry_log_level >= 0)
+               rte_log_set_level(telemetry_log_level, RTE_LOG_ERR);
+
+       rte_option_register(&option);
+}
diff --git a/lib/librte_telemetry/rte_telemetry.h b/lib/librte_telemetry/rte_telemetry.h
new file mode 100644 (file)
index 0000000..119db16
--- /dev/null
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdint.h>
+
+#ifndef _RTE_TELEMETRY_H_
+#define _RTE_TELEMETRY_H_
+
+/**
+ * @file
+ * RTE Telemetry
+ *
+ * The telemetry library provides a method to retrieve statistics from
+ * DPDK by sending a JSON encoded message over a socket. DPDK will send
+ * a JSON encoded response containing telemetry data.
+ ***/
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Initialize Telemetry
+ *
+ * @return
+ *  0 on successful initialisation.
+ * @return
+ *  -ENOMEM on memory allocation error
+ * @return
+ *  -EPERM on unknown error failure
+ * @return
+ *  -EALREADY if Telemetry is already initialised.
+ */
+int32_t __rte_experimental
+rte_telemetry_init(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Clean up and free memory.
+ *
+ * @return
+ *  0 on success
+ * @return
+ *  -EPERM on failure
+ */
+int32_t __rte_experimental
+rte_telemetry_cleanup(void);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Runs various tests to ensure telemetry initialisation and register/unregister
+ * functions are working correctly.
+ *
+ * @return
+ *  0 on success when all tests have passed
+ * @return
+ *  -1 on failure when the test has failed
+ */
+int32_t __rte_experimental
+rte_telemetry_selftest(void);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_internal.h b/lib/librte_telemetry/rte_telemetry_internal.h
new file mode 100644 (file)
index 0000000..de7afda
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <rte_log.h>
+#include <rte_tailq.h>
+
+#ifndef _RTE_TELEMETRY_INTERNAL_H_
+#define _RTE_TELEMETRY_INTERNAL_H_
+
+/* Logging Macros */
+extern int telemetry_log_level;
+
+#define TELEMETRY_LOG(level, fmt, args...) \
+       rte_log(RTE_LOG_ ##level, telemetry_log_level, "%s(): "fmt "\n", \
+               __func__, ##args)
+
+#define TELEMETRY_LOG_ERR(fmt, args...) \
+       TELEMETRY_LOG(ERR, fmt, ## args)
+
+#define TELEMETRY_LOG_WARN(fmt, args...) \
+       TELEMETRY_LOG(WARNING, fmt, ## args)
+
+#define TELEMETRY_LOG_INFO(fmt, args...) \
+       TELEMETRY_LOG(INFO, fmt, ## args)
+
+typedef struct telemetry_client {
+       char *file_path;
+       int fd;
+       TAILQ_ENTRY(telemetry_client) client_list;
+} telemetry_client;
+
+typedef struct telemetry_impl {
+       int accept_fd;
+       int server_fd;
+       pthread_t thread_id;
+       int thread_status;
+       uint32_t socket_id;
+       int reg_index;
+       int metrics_register_done;
+       TAILQ_HEAD(, telemetry_client) client_list_head;
+       struct telemetry_client *request_client;
+       int register_fail_count;
+} telemetry_impl;
+
+enum rte_telemetry_parser_actions {
+       ACTION_GET = 0,
+       ACTION_DELETE = 2
+};
+
+int32_t
+rte_telemetry_parse_client_message(struct telemetry_impl *telemetry, char *buf);
+
+int32_t
+rte_telemetry_send_error_response(struct telemetry_impl *telemetry,
+       int error_type);
+
+int32_t
+rte_telemetry_register_client(struct telemetry_impl *telemetry,
+       const char *client_path);
+
+int32_t
+rte_telemetry_unregister_client(struct telemetry_impl *telemetry,
+       const char *client_path);
+
+/**
+ * This is a wrapper for the ethdev api rte_eth_find_next().
+ * If rte_eth_find_next() returns the same port id that we passed it,
+ * then we know that that port is active.
+ */
+int32_t
+rte_telemetry_is_port_active(int port_id);
+
+int32_t
+rte_telemetry_send_ports_stats_values(uint32_t *metric_ids, int num_metric_ids,
+       uint32_t *port_ids, int num_port_ids, struct telemetry_impl *telemetry);
+
+int32_t
+rte_telemetry_socket_messaging_testing(int index, int socket);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_parser.c b/lib/librte_telemetry/rte_telemetry_parser.c
new file mode 100644 (file)
index 0000000..03a58a2
--- /dev/null
@@ -0,0 +1,586 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <jansson.h>
+
+#include <rte_metrics.h>
+#include <rte_common.h>
+#include <rte_ethdev.h>
+
+#include "rte_telemetry_internal.h"
+
+typedef int (*command_func)(struct telemetry_impl *, int, json_t *);
+
+struct rte_telemetry_command {
+       char *text;
+       command_func fn;
+} command;
+
+static int32_t
+rte_telemetry_command_clients(struct telemetry_impl *telemetry, int action,
+       json_t *data)
+{
+       int ret;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (action != ACTION_DELETE) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               goto einval_fail;
+       }
+
+       if (!json_is_object(data)) {
+               TELEMETRY_LOG_WARN("Invalid data provided for this command");
+               goto einval_fail;
+       }
+
+       json_t *client_path = json_object_get(data, "client_path");
+       if (!json_is_string(client_path)) {
+               TELEMETRY_LOG_WARN("Command value is not a string");
+               goto einval_fail;
+       }
+
+       ret = rte_telemetry_unregister_client(telemetry,
+                       json_string_value(client_path));
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not unregister client");
+               goto einval_fail;
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+static int32_t
+rte_telemetry_command_ports(struct telemetry_impl *telemetry, int action,
+       json_t *data)
+{
+       int ret;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (!json_is_null(data)) {
+               TELEMETRY_LOG_WARN("Data should be NULL JSON object for 'ports' command");
+               goto einval_fail;
+       }
+
+       if (action != ACTION_GET) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               goto einval_fail;
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+static int32_t
+rte_telemetry_command_ports_details(struct telemetry_impl *telemetry,
+       int action, json_t *data)
+{
+       json_t *value, *port_ids_json = json_object_get(data, "ports");
+       uint64_t num_port_ids = json_array_size(port_ids_json);
+       int ret, port_ids[num_port_ids];
+       RTE_SET_USED(port_ids);
+       size_t index;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (action != ACTION_GET) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               goto einval_fail;
+       }
+
+       if (!json_is_object(data)) {
+               TELEMETRY_LOG_WARN("Invalid data provided for this command");
+               goto einval_fail;
+       }
+
+       if (!json_is_array(port_ids_json)) {
+               TELEMETRY_LOG_WARN("Invalid Port ID array");
+               goto einval_fail;
+       }
+
+       json_array_foreach(port_ids_json, index, value) {
+               if (!json_is_integer(value)) {
+                       TELEMETRY_LOG_WARN("Port ID given is invalid");
+                       goto einval_fail;
+               }
+               port_ids[index] = json_integer_value(value);
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+static int32_t
+rte_telemetry_command_port_stats(struct telemetry_impl *telemetry, int action,
+       json_t *data)
+{
+       int ret;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (!json_is_null(data)) {
+               TELEMETRY_LOG_WARN("Data should be NULL JSON object for 'port_stats' command");
+               goto einval_fail;
+       }
+
+       if (action != ACTION_GET) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               goto einval_fail;
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+static int32_t
+rte_telemetry_stat_names_to_ids(struct telemetry_impl *telemetry,
+       const char * const *stat_names, uint32_t *stat_ids,
+       uint64_t num_stat_names)
+{
+       struct rte_metric_name *names;
+       int ret, num_metrics;
+       uint32_t i, k;
+
+       if (stat_names == NULL) {
+               TELEMETRY_LOG_WARN("Invalid stat_names argument");
+               goto einval_fail;
+       }
+
+       if (num_stat_names <= 0) {
+               TELEMETRY_LOG_WARN("Invalid num_stat_names argument");
+               goto einval_fail;
+       }
+
+       num_metrics = rte_metrics_get_names(NULL, 0);
+       if (num_metrics < 0) {
+               TELEMETRY_LOG_ERR("Cannot get metrics count");
+               goto eperm_fail;
+       } else if (num_metrics == 0) {
+               TELEMETRY_LOG_WARN("No metrics have been registered");
+               goto eperm_fail;
+       }
+
+       names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+       if (names == NULL) {
+               TELEMETRY_LOG_ERR("Cannot allocate memory for names");
+
+               ret = rte_telemetry_send_error_response(telemetry, -ENOMEM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+
+               return -1;
+       }
+
+       ret = rte_metrics_get_names(names, num_metrics);
+       if (ret < 0 || ret > num_metrics) {
+               TELEMETRY_LOG_ERR("Cannot get metrics names");
+               free(names);
+               goto eperm_fail;
+       }
+
+       k = 0;
+       for (i = 0; i < (uint32_t)num_stat_names; i++) {
+               uint32_t j;
+               for (j = 0; j < (uint32_t)num_metrics; j++) {
+                       if (strcmp(stat_names[i], names[j].name) == 0) {
+                               stat_ids[k] = j;
+                               k++;
+                               break;
+                       }
+               }
+       }
+
+       if (k != num_stat_names) {
+               TELEMETRY_LOG_WARN("Invalid stat names provided");
+               free(names);
+               goto einval_fail;
+       }
+
+       free(names);
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+
+eperm_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+       return -1;
+}
+
+int32_t
+rte_telemetry_command_ports_all_stat_values(struct telemetry_impl *telemetry,
+        int action, json_t *data)
+{
+       int ret, num_metrics, i, p;
+       struct rte_metric_name *names;
+       uint64_t num_port_ids = 0;
+       uint32_t port_ids[RTE_MAX_ETHPORTS];
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (action != ACTION_GET) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       if (json_is_object(data)) {
+               TELEMETRY_LOG_WARN("Invalid data provided for this command");
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       num_metrics = rte_metrics_get_names(NULL, 0);
+       if (num_metrics < 0) {
+               TELEMETRY_LOG_ERR("Cannot get metrics count");
+
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+
+               return -1;
+       } else if (num_metrics == 0) {
+               TELEMETRY_LOG_ERR("No metrics to display (none have been registered)");
+
+               ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+
+               return -1;
+       }
+
+       names = malloc(sizeof(struct rte_metric_name) * num_metrics);
+       if (names == NULL) {
+               TELEMETRY_LOG_ERR("Cannot allocate memory");
+               ret = rte_telemetry_send_error_response(telemetry,
+                        -ENOMEM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       const char *stat_names[num_metrics];
+       uint32_t stat_ids[num_metrics];
+
+       RTE_ETH_FOREACH_DEV(p) {
+               port_ids[num_port_ids] = p;
+               num_port_ids++;
+       }
+
+       if (!num_port_ids) {
+               TELEMETRY_LOG_WARN("No active ports");
+
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+
+               goto fail;
+       }
+
+       ret = rte_metrics_get_names(names, num_metrics);
+       for (i = 0; i < num_metrics; i++)
+               stat_names[i] = names[i].name;
+
+       ret = rte_telemetry_stat_names_to_ids(telemetry, stat_names, stat_ids,
+               num_metrics);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not convert stat names to IDs");
+               goto fail;
+       }
+
+       ret = rte_telemetry_send_ports_stats_values(stat_ids, num_metrics,
+               port_ids, num_port_ids, telemetry);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Sending ports stats values failed");
+               goto fail;
+       }
+
+       return 0;
+
+fail:
+       free(names);
+       return -1;
+}
+
+int32_t
+rte_telemetry_command_ports_stats_values_by_name(struct telemetry_impl
+       *telemetry, int action, json_t *data)
+{
+       int ret;
+       json_t *port_ids_json = json_object_get(data, "ports");
+       json_t *stat_names_json = json_object_get(data, "stats");
+       uint64_t num_port_ids = json_array_size(port_ids_json);
+       uint64_t num_stat_names = json_array_size(stat_names_json);
+       const char *stat_names[num_stat_names];
+       uint32_t port_ids[num_port_ids], stat_ids[num_stat_names];
+       size_t index;
+       json_t *value;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       if (action != ACTION_GET) {
+               TELEMETRY_LOG_WARN("Invalid action for this command");
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       if (!json_is_object(data)) {
+               TELEMETRY_LOG_WARN("Invalid data provided for this command");
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       if (!json_is_array(port_ids_json) ||
+                !json_is_array(stat_names_json)) {
+               TELEMETRY_LOG_WARN("Invalid input data array(s)");
+               ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -1;
+       }
+
+       json_array_foreach(port_ids_json, index, value) {
+               if (!json_is_integer(value)) {
+                       TELEMETRY_LOG_WARN("Port ID given is not valid");
+                       ret = rte_telemetry_send_error_response(telemetry,
+                               -EINVAL);
+                       if (ret < 0)
+                               TELEMETRY_LOG_ERR("Could not send error");
+                       return -1;
+               }
+               port_ids[index] = json_integer_value(value);
+               ret = rte_telemetry_is_port_active(port_ids[index]);
+               if (ret < 1) {
+                       ret = rte_telemetry_send_error_response(telemetry,
+                               -EINVAL);
+                       if (ret < 0)
+                               TELEMETRY_LOG_ERR("Could not send error");
+                       return -1;
+               }
+       }
+
+       json_array_foreach(stat_names_json, index, value) {
+               if (!json_is_string(value)) {
+                       TELEMETRY_LOG_WARN("Stat Name given is not a string");
+
+                       ret = rte_telemetry_send_error_response(telemetry,
+                                       -EINVAL);
+                       if (ret < 0)
+                               TELEMETRY_LOG_ERR("Could not send error");
+
+                       return -1;
+               }
+               stat_names[index] = json_string_value(value);
+       }
+
+       ret = rte_telemetry_stat_names_to_ids(telemetry, stat_names, stat_ids,
+               num_stat_names);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not convert stat names to IDs");
+               return -1;
+       }
+
+       ret = rte_telemetry_send_ports_stats_values(stat_ids, num_stat_names,
+               port_ids, num_port_ids, telemetry);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Sending ports stats values failed");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int32_t
+rte_telemetry_parse_command(struct telemetry_impl *telemetry, int action,
+       const char *command, json_t *data)
+{
+       int ret;
+       uint32_t i;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       struct rte_telemetry_command commands[] = {
+               {
+                       .text = "clients",
+                       .fn = &rte_telemetry_command_clients
+               },
+               {
+                       .text = "ports",
+                       .fn = &rte_telemetry_command_ports
+               },
+               {
+                       .text = "ports_details",
+                       .fn = &rte_telemetry_command_ports_details
+               },
+               {
+                       .text = "port_stats",
+                       .fn = &rte_telemetry_command_port_stats
+               },
+               {
+                       .text = "ports_stats_values_by_name",
+                       .fn = &rte_telemetry_command_ports_stats_values_by_name
+               },
+               {
+                       .text = "ports_all_stat_values",
+                       .fn = &rte_telemetry_command_ports_all_stat_values
+               }
+       };
+
+       const uint32_t num_commands = RTE_DIM(commands);
+
+       for (i = 0; i < num_commands; i++) {
+               if (strcmp(command, commands[i].text) == 0) {
+                       ret = commands[i].fn(telemetry, action, data);
+                       if (ret < 0) {
+                               TELEMETRY_LOG_ERR("Command Function for %s failed",
+                                       commands[i].text);
+                               return -1;
+                       }
+                       return 0;
+               }
+       }
+
+       TELEMETRY_LOG_WARN("\"%s\" command not found", command);
+
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0)
+               TELEMETRY_LOG_ERR("Could not send error");
+
+       return -1;
+}
+
+int32_t __rte_experimental
+rte_telemetry_parse(struct telemetry_impl *telemetry, char *socket_rx_data)
+{
+       int ret, action_int;
+       json_error_t error;
+       json_t *root, *action, *command, *data;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Invalid telemetry argument");
+               return -1;
+       }
+
+       root = json_loads(socket_rx_data, 0, &error);
+       if (root == NULL) {
+               TELEMETRY_LOG_WARN("Could not load JSON object from data passed in : %s",
+                               error.text);
+               ret = rte_telemetry_send_error_response(telemetry, -EPERM);
+               if (ret < 0)
+                       TELEMETRY_LOG_ERR("Could not send error");
+               return -EPERM;
+       } else if (!json_is_object(root)) {
+               TELEMETRY_LOG_WARN("JSON Request is not a JSON object");
+               json_decref(root);
+               goto einval_fail;
+       }
+
+       action = json_object_get(root, "action");
+       if (action == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have action field");
+               goto einval_fail;
+       } else if (!json_is_integer(action)) {
+               TELEMETRY_LOG_WARN("Action value is not an integer");
+               goto einval_fail;
+       }
+
+       command = json_object_get(root, "command");
+       if (command == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have command field");
+               goto einval_fail;
+       } else if (!json_is_string(command)) {
+               TELEMETRY_LOG_WARN("Command value is not a string");
+               goto einval_fail;
+       }
+
+       action_int = json_integer_value(action);
+       if (action_int != ACTION_GET && action_int != ACTION_DELETE) {
+               TELEMETRY_LOG_WARN("Invalid action code");
+               goto einval_fail;
+       }
+
+       const char *command_string = json_string_value(command);
+       data = json_object_get(root, "data");
+       if (data == NULL) {
+               TELEMETRY_LOG_WARN("Request does not have data field");
+               goto einval_fail;
+       }
+
+       ret = rte_telemetry_parse_command(telemetry, action_int, command_string,
+               data);
+       if (ret < 0) {
+               TELEMETRY_LOG_WARN("Could not parse command");
+               return -EINVAL;
+       }
+
+       return 0;
+
+einval_fail:
+       ret = rte_telemetry_send_error_response(telemetry, -EINVAL);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not send error");
+               return -EPERM;
+       }
+       return -EINVAL;
+}
diff --git a/lib/librte_telemetry/rte_telemetry_parser.h b/lib/librte_telemetry/rte_telemetry_parser.h
new file mode 100644 (file)
index 0000000..b705194
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include "rte_telemetry_internal.h"
+#include "rte_compat.h"
+
+#ifndef _RTE_TELEMETRY_PARSER_H_
+#define _RTE_TELEMETRY_PARSER_H_
+
+int32_t __rte_experimental
+rte_telemetry_parse(struct telemetry_impl *telemetry, char *socket_rx_data);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_parser_test.c b/lib/librte_telemetry/rte_telemetry_parser_test.c
new file mode 100644 (file)
index 0000000..5fe93fa
--- /dev/null
@@ -0,0 +1,534 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <errno.h>
+#include <jansson.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_tailq.h>
+#include <rte_string_fns.h>
+
+#include "rte_telemetry_parser.h"
+
+enum choices {
+       INV_ACTION_VAL,
+       INV_COMMAND_VAL,
+       INV_DATA_VAL,
+       INV_ACTION_FIELD,
+       INV_COMMAND_FIELD,
+       INV_DATA_FIELD,
+       INV_JSON_FORMAT,
+       VALID_REQ
+};
+
+
+#define TEST_CLIENT "/var/run/dpdk/test_client"
+
+int32_t
+rte_telemetry_create_test_socket(struct telemetry_impl *telemetry,
+       const char *test_client_path)
+{
+       int ret, sockfd;
+       struct sockaddr_un addr = {0};
+       struct telemetry_client *client;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+       sockfd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+       if (sockfd < 0) {
+               TELEMETRY_LOG_ERR("Test socket creation failure");
+               return -1;
+       }
+
+       addr.sun_family = AF_UNIX;
+       strlcpy(addr.sun_path, test_client_path, sizeof(addr.sun_path));
+       unlink(test_client_path);
+
+       if (bind(sockfd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+               TELEMETRY_LOG_ERR("Test socket binding failure");
+               return -1;
+       }
+
+       if (listen(sockfd, 1) < 0) {
+               TELEMETRY_LOG_ERR("Listen failure");
+               return -1;
+       }
+
+       ret = rte_telemetry_register_client(telemetry, test_client_path);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Register dummy client failed: %i", ret);
+               return -1;
+       }
+
+       ret = accept(sockfd, NULL, NULL);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Socket accept failed");
+               return -1;
+       }
+
+       TAILQ_FOREACH(client, &telemetry->client_list_head, client_list)
+               telemetry->request_client = client;
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_format_port_stat_ids(int *port_ids, int num_port_ids,
+       const char * const *stat_names, int num_stat_names, json_t **data)
+{
+
+       int ret;
+       json_t *stat_names_json_array = NULL;
+       json_t *port_ids_json_array = NULL;
+       uint32_t i;
+
+       if (num_port_ids < 0) {
+               TELEMETRY_LOG_ERR("Port Ids Count invalid");
+               goto fail;
+       }
+
+       *data = json_object();
+       if (*data == NULL) {
+               TELEMETRY_LOG_ERR("Data json object creation failed");
+               goto fail;
+       }
+
+       port_ids_json_array = json_array();
+       if (port_ids_json_array == NULL) {
+               TELEMETRY_LOG_ERR("port_ids_json_array creation failed");
+               goto fail;
+       }
+
+       for (i = 0; i < (uint32_t)num_port_ids; i++) {
+               ret = json_array_append(port_ids_json_array,
+                               json_integer(port_ids[i]));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("JSON array creation failed");
+                       goto fail;
+               }
+       }
+
+       ret = json_object_set_new(*data, "ports", port_ids_json_array);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Setting 'ports' value in data object failed");
+               goto fail;
+       }
+
+       if (stat_names) {
+               if (num_stat_names < 0) {
+                       TELEMETRY_LOG_ERR("Stat Names Count invalid");
+                       goto fail;
+               }
+
+               stat_names_json_array = json_array();
+               if (stat_names_json_array == NULL) {
+                       TELEMETRY_LOG_ERR("stat_names_json_array creation failed");
+                       goto fail;
+               }
+
+               uint32_t i;
+               for (i = 0; i < (uint32_t)num_stat_names; i++) {
+                       ret = json_array_append(stat_names_json_array,
+                                json_string(stat_names[i]));
+                       if (ret < 0) {
+                               TELEMETRY_LOG_ERR("JSON array creation failed");
+                               goto fail;
+                       }
+               }
+
+               ret = json_object_set_new(*data, "stats", stat_names_json_array);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting 'stats' value in data object failed");
+                       goto fail;
+               }
+       }
+
+       return 0;
+
+fail:
+       if (*data)
+               json_decref(*data);
+       if (stat_names_json_array)
+               json_decref(stat_names_json_array);
+       if (port_ids_json_array)
+               json_decref(port_ids_json_array);
+       return -1;
+}
+
+int32_t
+rte_telemetry_create_json_request(int action, char *command,
+       const char *client_path, int *port_ids, int num_port_ids,
+       const char * const *stat_names, int num_stat_names, char **request,
+       int inv_choice)
+{
+       int ret;
+       json_t *root = json_object();
+       json_t *data;
+
+       if (root == NULL) {
+               TELEMETRY_LOG_ERR("Could not create root json object");
+               goto fail;
+       }
+
+       if (inv_choice == INV_ACTION_FIELD) {
+               ret = json_object_set_new(root, "ac--on", json_integer(action));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting invalid action field in root object failed");
+                       goto fail;
+               }
+       } else {
+               ret = json_object_set_new(root, "action", json_integer(action));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting valid action field in root object failed");
+                       goto fail;
+               }
+       }
+
+       if (inv_choice == INV_COMMAND_FIELD) {
+               ret = json_object_set_new(root, "co---nd", json_string(command));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting invalid command field in root object failed");
+                       goto fail;
+               }
+       } else {
+               ret = json_object_set_new(root, "command", json_string(command));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting valid command field in root object failed");
+                       goto fail;
+               }
+       }
+
+       data = json_null();
+       if (client_path) {
+               data = json_object();
+               if (data == NULL) {
+                       TELEMETRY_LOG_ERR("Data json object creation failed");
+                       goto fail;
+               }
+
+               ret = json_object_set_new(data, "client_path",
+                               json_string(client_path));
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting valid client_path field in data object failed");
+                       goto fail;
+               }
+
+       } else if (port_ids) {
+               ret = rte_telemetry_format_port_stat_ids(port_ids, num_port_ids,
+                               stat_names, num_stat_names, &data);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Formatting Port/Stat arrays failed");
+                       goto fail;
+               }
+
+       }
+
+       if (inv_choice == INV_DATA_FIELD) {
+               ret = json_object_set_new(root, "d--a", data);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting invalid data field in data object failed");
+                       goto fail;
+               }
+       } else {
+               ret = json_object_set_new(root, "data", data);
+               if (ret < 0) {
+                       TELEMETRY_LOG_ERR("Setting valid data field in data object failed");
+                       goto fail;
+               }
+       }
+
+       *request = json_dumps(root, 0);
+       if (*request == NULL) {
+               TELEMETRY_LOG_ERR("Converting JSON root object to char* failed");
+               goto fail;
+       }
+
+       json_decref(root);
+       return 0;
+
+fail:
+       if (root)
+               json_decref(root);
+       return -1;
+}
+
+int32_t
+rte_telemetry_send_get_ports_and_stats_request(struct telemetry_impl *telemetry,
+       int action_choice, char *command_choice, int inv_choice)
+{
+       int ret;
+       char *request;
+       char *client_path_data = NULL;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+
+       if (inv_choice == INV_ACTION_VAL)
+               action_choice = -1;
+       else if (inv_choice == INV_COMMAND_VAL)
+               command_choice = "INVALID_COMMAND";
+       else if (inv_choice == INV_DATA_VAL)
+               client_path_data = "INVALID_DATA";
+
+       ret = rte_telemetry_create_json_request(action_choice, command_choice,
+               client_path_data, NULL, -1, NULL, -1, &request, inv_choice);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not create JSON Request");
+               return -1;
+       }
+
+       if (inv_choice == INV_JSON_FORMAT)
+               request++;
+
+       ret = rte_telemetry_parse(telemetry, request);
+       if (ret < 0) {
+               TELEMETRY_LOG_WARN("Could not parse JSON Request");
+               return -1;
+       }
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_send_get_ports_details_request(struct telemetry_impl *telemetry,
+       int action_choice, int *port_ids, int num_port_ids, int inv_choice)
+{
+       int ret;
+       char *request;
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+       char *command = "ports_details";
+
+       if (inv_choice == INV_ACTION_VAL)
+               action_choice = -1;
+       else if (inv_choice == INV_COMMAND_VAL)
+               command = "INVALID_COMMAND";
+       else if (inv_choice == INV_DATA_VAL)
+               port_ids = NULL;
+
+
+       ret = rte_telemetry_create_json_request(action_choice, command, NULL,
+               port_ids, num_port_ids, NULL, -1, &request, inv_choice);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not create JSON Request");
+               return -1;
+       }
+
+       if (inv_choice == INV_JSON_FORMAT)
+               request++;
+
+       ret = rte_telemetry_parse(telemetry, request);
+       if (ret < 0) {
+               TELEMETRY_LOG_WARN("Could not parse JSON Request");
+               return -1;
+       }
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_send_stats_values_by_name_request(struct telemetry_impl
+       *telemetry, int action_choice, int *port_ids, int num_port_ids,
+       const char * const *stat_names, int num_stat_names,
+       int inv_choice)
+{
+       int ret;
+       char *request;
+       char *command = "ports_stats_values_by_name";
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+       if (inv_choice == INV_ACTION_VAL)
+               action_choice = -1;
+       else if (inv_choice == INV_COMMAND_VAL)
+               command = "INVALID_COMMAND";
+       else if (inv_choice == INV_DATA_VAL) {
+               port_ids = NULL;
+               stat_names = NULL;
+       }
+
+       ret = rte_telemetry_create_json_request(action_choice, command, NULL,
+               port_ids, num_port_ids, stat_names, num_stat_names, &request,
+               inv_choice);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not create JSON Request");
+               return -1;
+       }
+
+       if (inv_choice == INV_JSON_FORMAT)
+               request++;
+
+       ret = rte_telemetry_parse(telemetry, request);
+       if (ret < 0) {
+               TELEMETRY_LOG_WARN("Could not parse JSON Request");
+               return -1;
+       }
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_send_unreg_request(struct telemetry_impl *telemetry,
+       int action_choice, const char *client_path, int inv_choice)
+{
+       int ret;
+       char *request;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+       char *command = "clients";
+
+       if (inv_choice == INV_ACTION_VAL)
+               action_choice = -1;
+       else if (inv_choice == INV_COMMAND_VAL)
+               command = "INVALID_COMMAND";
+       else if (inv_choice == INV_DATA_VAL)
+               client_path = NULL;
+
+       ret = rte_telemetry_create_json_request(action_choice, command,
+               client_path, NULL, -1, NULL, -1, &request, inv_choice);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not create JSON Request");
+               return -1;
+       }
+
+       if (inv_choice == INV_JSON_FORMAT)
+               request++;
+
+       ret = rte_telemetry_parse(telemetry, request);
+       if (ret < 0) {
+               TELEMETRY_LOG_WARN("Could not parse JSON Request");
+               return -1;
+       }
+
+       return 0;
+}
+
+int32_t
+rte_telemetry_parser_test(struct telemetry_impl *telemetry)
+{
+       int ret;
+       const char *client_path = TEST_CLIENT;
+
+       if (telemetry == NULL) {
+               TELEMETRY_LOG_ERR("Telemetry argument has not been initialised");
+               return -EINVAL;
+       }
+
+       ret = rte_telemetry_create_test_socket(telemetry, client_path);
+       if (ret < 0) {
+               TELEMETRY_LOG_ERR("Could not create test request client socket");
+               return -1;
+       }
+
+       int port_ids[] = {0, 1};
+       int num_port_ids = RTE_DIM(port_ids);
+
+       static const char * const stat_names[] = {"tx_good_packets",
+               "rx_good_packets"};
+       int num_stat_names = RTE_DIM(stat_names);
+
+       static const char * const test_types[] = {
+               "INVALID ACTION VALUE TESTS",
+               "INVALID COMMAND VALUE TESTS",
+               "INVALID DATA VALUE TESTS",
+               "INVALID ACTION FIELD TESTS",
+               "INVALID COMMAND FIELD TESTS",
+               "INVALID DATA FIELD TESTS",
+               "INVALID JSON FORMAT TESTS",
+               "VALID TESTS"
+       };
+
+
+#define NUM_TEST_TYPES (sizeof(test_types)/sizeof(const char * const))
+
+       uint32_t i;
+       for (i = 0; i < NUM_TEST_TYPES; i++) {
+               TELEMETRY_LOG_INFO("%s", test_types[i]);
+
+               ret = rte_telemetry_send_get_ports_and_stats_request(telemetry,
+                       ACTION_GET, "ports", i);
+               if (ret != 0 && i == VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports valid test failed");
+                       return -EPERM;
+               } else if (ret != -1 && i != VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports invalid test failed");
+                       return -EPERM;
+               }
+
+               TELEMETRY_LOG_INFO("Success - Get ports test passed");
+
+               ret = rte_telemetry_send_get_ports_details_request(telemetry,
+                       ACTION_GET, port_ids, num_port_ids, i);
+               if (ret != 0 && i == VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports details valid");
+                       return -EPERM;
+               } else if (ret != -1 && i != VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports details invalid");
+                       return -EPERM;
+               }
+
+               TELEMETRY_LOG_INFO("Success - Get ports details test passed");
+
+               ret = rte_telemetry_send_get_ports_and_stats_request(telemetry,
+                       ACTION_GET, "port_stats", i);
+               if (ret != 0  && i == VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get port stats valid test");
+                       return -EPERM;
+               } else if (ret != -1 && i != VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports stats invalid test failed");
+                       return -EPERM;
+               }
+
+               TELEMETRY_LOG_INFO("Success - Get ports stats test passed");
+
+               ret = rte_telemetry_send_stats_values_by_name_request(telemetry,
+                       ACTION_GET, port_ids, num_port_ids, stat_names,
+                       num_stat_names, i);
+               if (ret != 0 && i == VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports stats values by name valid test failed");
+                       return -EPERM;
+               } else if (ret != -1 && i != VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Get ports stats values by name invalid test failed");
+                       return -EPERM;
+               }
+
+               TELEMETRY_LOG_INFO("Success - Get ports stats values by name test passed");
+
+               ret = rte_telemetry_send_unreg_request(telemetry, ACTION_DELETE,
+                       client_path, i);
+               if (ret != 0 && i == VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Deregister valid test failed");
+                       return -EPERM;
+               } else if (ret != -1 && i != VALID_REQ) {
+                       TELEMETRY_LOG_ERR("Deregister invalid test failed");
+                       return -EPERM;
+               }
+
+               TELEMETRY_LOG_INFO("Success - Deregister test passed");
+       }
+
+       return 0;
+}
diff --git a/lib/librte_telemetry/rte_telemetry_parser_test.h b/lib/librte_telemetry/rte_telemetry_parser_test.h
new file mode 100644 (file)
index 0000000..6ada852
--- /dev/null
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_TELEMETRY_PARSER_TEST_H_
+#define _RTE_TELEMETRY_PARSER_TEST_H_
+
+int32_t
+rte_telemetry_parser_test(struct telemetry_impl *telemetry);
+
+int32_t
+rte_telemetry_format_port_stat_ids(int *port_ids, int num_port_ids,
+       const char * const stat_names, int num_stat_names, json_t **data);
+
+int32_t
+rte_telemetry_create_json_request(int action, char *command,
+       const char *client_path, int *port_ids, int num_port_ids,
+       const char * const stat_names, int num_stat_names, char **request,
+       int inv_choice);
+
+int32_t
+rte_telemetry_send_get_ports_and_stats_request(struct telemetry_impl *telemetry,
+       int action_choice, char *command_choice, int inv_choice);
+
+int32_t
+rte_telemetry_send_get_ports_details_request(struct telemetry_impl *telemetry,
+       int action_choice, int *port_ids, int num_port_ids, int inv_choice);
+
+int32_t
+rte_telemetry_send_stats_values_by_name_request(struct telemetry_impl
+       *telemetry, int action_choice, int *port_ids, int num_port_ids,
+       const char * const stat_names, int num_stat_names,
+       int inv_choice);
+
+int32_t
+rte_telemetry_send_unreg_request(int action_choice, const char *client_path,
+       int inv_choice);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_socket_tests.h b/lib/librte_telemetry/rte_telemetry_socket_tests.h
new file mode 100644 (file)
index 0000000..db9167c
--- /dev/null
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdbool.h>
+
+#include "rte_telemetry_internal.h"
+
+#ifndef _RTE_TELEMETRY_SOCKET_TESTING_H_
+#define _RTE_TELEMETRY_SOCKET_TESTING_H_
+
+int32_t
+rte_telemetry_json_socket_message_test(struct telemetry_impl *telemetry,
+       int fd);
+
+int32_t
+rte_telemetry_invalid_json_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_valid_json_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_json_contents_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_json_empty_test(struct telemetry_impl *telemetry, int fd);
+
+int32_t
+rte_telemetry_socket_register_test(struct telemetry_impl *telemetry, int *fd,
+       int send_fd, int recv_fd);
+
+int32_t
+rte_telemetry_socket_test_setup(struct telemetry_impl *telemetry, int *send_fd,
+       int *recv_fd);
+
+#endif
diff --git a/lib/librte_telemetry/rte_telemetry_version.map b/lib/librte_telemetry/rte_telemetry_version.map
new file mode 100644 (file)
index 0000000..fa62d77
--- /dev/null
@@ -0,0 +1,10 @@
+EXPERIMENTAL {
+       global:
+
+       rte_telemetry_cleanup;
+       rte_telemetry_init;
+       rte_telemetry_parse;
+       rte_telemetry_selftest;
+
+       local: *;
+};
index de431fb..5dd3189 100644 (file)
@@ -13,13 +13,13 @@ LIBABIVER := 4
 CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64
 CFLAGS += -I vhost_user
+CFLAGS += -fno-strict-aliasing
 LDLIBS += -lpthread
 
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y)
 LDLIBS += -lnuma
 endif
-LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net \
-                                       -lrte_cryptodev -lrte_hash
+LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
@@ -30,6 +30,7 @@ SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
 
 # only compile vhost crypto when cryptodev is enabled
 ifeq ($(CONFIG_RTE_LIBRTE_CRYPTODEV),y)
+LDLIBS += -lrte_cryptodev -lrte_hash
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_crypto.c
 SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost_crypto.h
 endif
index bd62e0e..e33e6fc 100644 (file)
@@ -7,8 +7,11 @@ endif
 if has_libnuma == 1
        dpdk_conf.set10('RTE_LIBRTE_VHOST_NUMA', true)
 endif
+dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY',
+             cc.has_header('linux/userfaultfd.h'))
 version = 4
 allow_experimental_apis = true
+cflags += '-fno-strict-aliasing'
 sources = files('fd_man.c', 'iotlb.c', 'socket.c', 'vdpa.c',
                'vhost.c', 'vhost_user.c',
                'virtio_net.c', 'vhost_crypto.c')
index 90465ca..a418da4 100644 (file)
@@ -21,67 +21,138 @@ enum vdpa_addr_type {
        VDPA_ADDR_MAX
 };
 
+/**
+ * vdpa device address
+ */
 struct rte_vdpa_dev_addr {
+       /** vdpa address type */
        enum vdpa_addr_type type;
+
+       /** vdpa pci address */
        union {
                uint8_t __dummy[64];
                struct rte_pci_addr pci_addr;
        };
 };
 
+/**
+ * vdpa device operations
+ */
 struct rte_vdpa_dev_ops {
-       /* Get capabilities of this device */
+       /** Get capabilities of this device */
        int (*get_queue_num)(int did, uint32_t *queue_num);
+
+       /** Get supported features of this device */
        int (*get_features)(int did, uint64_t *features);
+
+       /** Get supported protocol features of this device */
        int (*get_protocol_features)(int did, uint64_t *protocol_features);
 
-       /* Driver configure/close the device */
+       /** Driver configure/close the device */
        int (*dev_conf)(int vid);
        int (*dev_close)(int vid);
 
-       /* Enable/disable this vring */
+       /** Enable/disable this vring */
        int (*set_vring_state)(int vid, int vring, int state);
 
-       /* Set features when changed */
+       /** Set features when changed */
        int (*set_features)(int vid);
 
-       /* Destination operations when migration done */
+       /** Destination operations when migration done */
        int (*migration_done)(int vid);
 
-       /* Get the vfio group fd */
+       /** Get the vfio group fd */
        int (*get_vfio_group_fd)(int vid);
 
-       /* Get the vfio device fd */
+       /** Get the vfio device fd */
        int (*get_vfio_device_fd)(int vid);
 
-       /* Get the notify area info of the queue */
+       /** Get the notify area info of the queue */
        int (*get_notify_area)(int vid, int qid,
                        uint64_t *offset, uint64_t *size);
 
-       /* Reserved for future extension */
+       /** Reserved for future extension */
        void *reserved[5];
 };
 
+/**
+ * vdpa device structure includes device address and device operations.
+ */
 struct rte_vdpa_device {
+       /** vdpa device address */
        struct rte_vdpa_dev_addr addr;
+       /** vdpa device operations */
        struct rte_vdpa_dev_ops *ops;
 } __rte_cache_aligned;
 
-/* Register a vdpa device, return did if successful, -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Register a vdpa device
+ *
+ * @param addr
+ *  the vdpa device address
+ * @param ops
+ *  the vdpa device operations
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
                struct rte_vdpa_dev_ops *ops);
 
-/* Unregister a vdpa device, return -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Unregister a vdpa device
+ *
+ * @param did
+ *  vdpa device id
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_unregister_device(int did);
 
-/* Find did of a vdpa device, return -1 on failure */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Find the device id of a vdpa device
+ *
+ * @param addr
+ *  the vdpa device address
+ * @return
+ *  device id on success, -1 on failure
+ */
 int __rte_experimental
 rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
 
-/* Find a vdpa device based on did */
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Find a vdpa device based on device id
+ *
+ * @param did
+ *  device id
+ * @return
+ *  rte_vdpa_device on success, NULL on failure
+ */
 struct rte_vdpa_device * __rte_experimental
 rte_vdpa_get_device(int did);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * Get current available vdpa device number
+ *
+ * @return
+ *  available vdpa device number
+ */
+int __rte_experimental
+rte_vdpa_get_device_num(void);
 #endif /* _RTE_VDPA_H_ */
index b02673d..d280ac4 100644 (file)
@@ -28,6 +28,7 @@ extern "C" {
 #define RTE_VHOST_USER_NO_RECONNECT    (1ULL << 1)
 #define RTE_VHOST_USER_DEQUEUE_ZERO_COPY       (1ULL << 2)
 #define RTE_VHOST_USER_IOMMU_SUPPORT   (1ULL << 3)
+#define RTE_VHOST_USER_POSTCOPY_SUPPORT                (1ULL << 4)
 
 /** Protocol features. */
 #ifndef VHOST_USER_PROTOCOL_F_MQ
@@ -58,6 +59,10 @@ extern "C" {
 #define VHOST_USER_PROTOCOL_F_CRYPTO_SESSION 7
 #endif
 
+#ifndef VHOST_USER_PROTOCOL_F_PAGEFAULT
+#define VHOST_USER_PROTOCOL_F_PAGEFAULT 8
+#endif
+
 #ifndef VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD
 #define VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD 10
 #endif
index da220dd..ae39b6e 100644 (file)
@@ -67,6 +67,7 @@ EXPERIMENTAL {
        rte_vdpa_unregister_device;
        rte_vdpa_find_device_id;
        rte_vdpa_get_device;
+       rte_vdpa_get_device_num;
        rte_vhost_driver_attach_vdpa_device;
        rte_vhost_driver_detach_vdpa_device;
        rte_vhost_driver_get_vdpa_device_id;
index d630317..01b60ff 100644 (file)
@@ -51,6 +51,8 @@ struct vhost_user_socket {
        uint64_t supported_features;
        uint64_t features;
 
+       uint64_t protocol_features;
+
        /*
         * Device id to identify a specific backend device.
         * It's set to -1 for the default software implementation.
@@ -94,18 +96,23 @@ static struct vhost_user vhost_user = {
        .mutex = PTHREAD_MUTEX_INITIALIZER,
 };
 
-/* return bytes# of read on success or negative val on failure. */
+/*
+ * return bytes# of read on success or negative val on failure. Update fdnum
+ * with number of fds read.
+ */
 int
-read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
+read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+               int *fd_num)
 {
        struct iovec iov;
        struct msghdr msgh;
-       size_t fdsize = fd_num * sizeof(int);
-       char control[CMSG_SPACE(fdsize)];
+       char control[CMSG_SPACE(max_fds * sizeof(int))];
        struct cmsghdr *cmsg;
        int got_fds = 0;
        int ret;
 
+       *fd_num = 0;
+
        memset(&msgh, 0, sizeof(msgh));
        iov.iov_base = buf;
        iov.iov_len  = buflen;
@@ -131,13 +138,14 @@ read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num)
                if ((cmsg->cmsg_level == SOL_SOCKET) &&
                        (cmsg->cmsg_type == SCM_RIGHTS)) {
                        got_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+                       *fd_num = got_fds;
                        memcpy(fds, CMSG_DATA(cmsg), got_fds * sizeof(int));
                        break;
                }
        }
 
        /* Clear out unused file descriptors */
-       while (got_fds < fd_num)
+       while (got_fds < max_fds)
                fds[got_fds++] = -1;
 
        return ret;
@@ -720,7 +728,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
        did = vsocket->vdpa_dev_id;
        vdpa_dev = rte_vdpa_get_device(did);
        if (!vdpa_dev || !vdpa_dev->ops->get_protocol_features) {
-               *protocol_features = VHOST_USER_PROTOCOL_FEATURES;
+               *protocol_features = vsocket->protocol_features;
                goto unlock_exit;
        }
 
@@ -733,7 +741,7 @@ rte_vhost_driver_get_protocol_features(const char *path,
                goto unlock_exit;
        }
 
-       *protocol_features = VHOST_USER_PROTOCOL_FEATURES
+       *protocol_features = vsocket->protocol_features
                & vdpa_protocol_features;
 
 unlock_exit:
@@ -852,11 +860,21 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
        vsocket->use_builtin_virtio_net = true;
        vsocket->supported_features = VIRTIO_NET_SUPPORTED_FEATURES;
        vsocket->features           = VIRTIO_NET_SUPPORTED_FEATURES;
+       vsocket->protocol_features  = VHOST_USER_PROTOCOL_FEATURES;
 
-       /* Dequeue zero copy can't assure descriptors returned in order */
+       /*
+        * Dequeue zero copy can't assure descriptors returned in order.
+        * Also, it requires that the guest memory is populated, which is
+        * not compatible with postcopy.
+        */
        if (vsocket->dequeue_zero_copy) {
                vsocket->supported_features &= ~(1ULL << VIRTIO_F_IN_ORDER);
                vsocket->features &= ~(1ULL << VIRTIO_F_IN_ORDER);
+
+               RTE_LOG(INFO, VHOST_CONFIG,
+                       "Dequeue zero copy requested, disabling postcopy support\n");
+               vsocket->protocol_features &=
+                       ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
        }
 
        if (!(flags & RTE_VHOST_USER_IOMMU_SUPPORT)) {
@@ -864,6 +882,18 @@ rte_vhost_driver_register(const char *path, uint64_t flags)
                vsocket->features &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM);
        }
 
+       if (!(flags & RTE_VHOST_USER_POSTCOPY_SUPPORT)) {
+               vsocket->protocol_features &=
+                       ~(1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT);
+       } else {
+#ifndef RTE_LIBRTE_VHOST_POSTCOPY
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "Postcopy requested but not compiled\n");
+               ret = -1;
+               goto out_mutex;
+#endif
+       }
+
        if ((flags & RTE_VHOST_USER_CLIENT) != 0) {
                vsocket->reconnect = !(flags & RTE_VHOST_USER_NO_RECONNECT);
                if (vsocket->reconnect && reconn_tid == 0) {
index c82fd43..c2c5dff 100644 (file)
@@ -113,3 +113,9 @@ rte_vdpa_get_device(int did)
 
        return vdpa_devices[did];
 }
+
+int
+rte_vdpa_get_device_num(void)
+{
+       return vdpa_device_num;
+}
index 3c9be10..70ac6bc 100644 (file)
@@ -8,6 +8,7 @@
 #include <stdint.h>
 #include <stdlib.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
+#include <numa.h>
 #include <numaif.h>
 #endif
 
@@ -343,6 +344,7 @@ vhost_new_device(void)
        dev->flags = VIRTIO_DEV_BUILTIN_VIRTIO_NET;
        dev->slave_req_fd = -1;
        dev->vdpa_dev_id = -1;
+       dev->postcopy_ufd = -1;
        rte_spinlock_init(&dev->slave_req_lock);
 
        return i;
@@ -480,7 +482,7 @@ rte_vhost_get_numa_node(int vid)
        int numa_node;
        int ret;
 
-       if (dev == NULL)
+       if (dev == NULL || numa_available() != 0)
                return -1;
 
        ret = get_mempolicy(&numa_node, NULL, 0, dev,
@@ -646,12 +648,18 @@ rte_vhost_avail_entries(int vid, uint16_t queue_id)
 }
 
 static inline void
-vhost_enable_notify_split(struct vhost_virtqueue *vq, int enable)
+vhost_enable_notify_split(struct virtio_net *dev,
+               struct vhost_virtqueue *vq, int enable)
 {
-       if (enable)
-               vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
-       else
-               vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+       if (!(dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX))) {
+               if (enable)
+                       vq->used->flags &= ~VRING_USED_F_NO_NOTIFY;
+               else
+                       vq->used->flags |= VRING_USED_F_NO_NOTIFY;
+       } else {
+               if (enable)
+                       vhost_avail_event(vq) = vq->last_avail_idx;
+       }
 }
 
 static inline void
@@ -660,8 +668,10 @@ vhost_enable_notify_packed(struct virtio_net *dev,
 {
        uint16_t flags;
 
-       if (!enable)
+       if (!enable) {
                vq->device_event->flags = VRING_EVENT_F_DISABLE;
+               return;
+       }
 
        flags = VRING_EVENT_F_ENABLE;
        if (dev->features & (1ULL << VIRTIO_RING_F_EVENT_IDX)) {
@@ -689,7 +699,7 @@ rte_vhost_enable_guest_notification(int vid, uint16_t queue_id, int enable)
        if (vq_is_packed(dev))
                vhost_enable_notify_packed(dev, vq, enable);
        else
-               vhost_enable_notify_split(vq, enable);
+               vhost_enable_notify_split(dev, vq, enable);
 
        return 0;
 }
index 760a09c..b4abad3 100644 (file)
@@ -284,6 +284,16 @@ struct guest_page {
        uint64_t size;
 };
 
+/* The possible results of a message handling function */
+enum vh_result {
+       /* Message handling failed */
+       VH_RESULT_ERR   = -1,
+       /* Message handling successful */
+       VH_RESULT_OK    =  0,
+       /* Message handling successful and reply prepared */
+       VH_RESULT_REPLY =  1,
+};
+
 /**
  * function prototype for the vhost backend to handler specific vhost user
  * messages prior to the master message handling
@@ -292,17 +302,15 @@ struct guest_page {
  *  vhost device id
  * @param msg
  *  Message pointer.
- * @param require_reply
- *  If the handler requires sending a reply, this varaible shall be written 1,
- *  otherwise 0.
  * @param skip_master
  *  If the handler requires skipping the master message handling, this variable
  *  shall be written 1, otherwise 0.
  * @return
- *  0 on success, -1 on failure
+ *  VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
+ *  VH_RESULT_ERR on failure
  */
-typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
-               uint32_t *require_reply, uint32_t *skip_master);
+typedef enum vh_result (*vhost_msg_pre_handle)(int vid, void *msg,
+               uint32_t *skip_master);
 
 /**
  * function prototype for the vhost backend to handler specific vhost user
@@ -312,14 +320,11 @@ typedef int (*vhost_msg_pre_handle)(int vid, void *msg,
  *  vhost device id
  * @param msg
  *  Message pointer.
- * @param require_reply
- *  If the handler requires sending a reply, this varaible shall be written 1,
- *  otherwise 0.
  * @return
- *  0 on success, -1 on failure
+ *  VH_RESULT_OK on success, VH_RESULT_REPLY on success with reply,
+ *  VH_RESULT_ERR on failure
  */
-typedef int (*vhost_msg_post_handle)(int vid, void *msg,
-               uint32_t *require_reply);
+typedef enum vh_result (*vhost_msg_post_handle)(int vid, void *msg);
 
 /**
  * pre and post vhost user message handlers
@@ -363,6 +368,9 @@ struct virtio_net {
        int                     slave_req_fd;
        rte_spinlock_t          slave_req_lock;
 
+       int                     postcopy_ufd;
+       int                     postcopy_listening;
+
        /*
         * Device id to identify a specific backend device.
         * It's set to -1 for the default software implementation.
@@ -648,6 +656,8 @@ vhost_iova_to_vva(struct virtio_net *dev, struct vhost_virtqueue *vq,
        return __vhost_iova_to_vva(dev, vq, iova, len, perm);
 }
 
+#define vhost_avail_event(vr) \
+       (*(volatile uint16_t*)&(vr)->used->ring[(vr)->size])
 #define vhost_used_event(vr) \
        (*(volatile uint16_t*)&(vr)->avail->ring[(vr)->size])
 
index 57341ef..9811a23 100644 (file)
@@ -425,35 +425,34 @@ vhost_crypto_close_sess(struct vhost_crypto *vcrypto, uint64_t session_id)
        return 0;
 }
 
-static int
-vhost_crypto_msg_post_handler(int vid, void *msg, uint32_t *require_reply)
+static enum vh_result
+vhost_crypto_msg_post_handler(int vid, void *msg)
 {
        struct virtio_net *dev = get_device(vid);
        struct vhost_crypto *vcrypto;
        VhostUserMsg *vmsg = msg;
-       int ret = 0;
+       enum vh_result ret = VH_RESULT_OK;
 
-       if (dev == NULL || require_reply == NULL) {
+       if (dev == NULL) {
                VC_LOG_ERR("Invalid vid %i", vid);
-               return -EINVAL;
+               return VH_RESULT_ERR;
        }
 
        vcrypto = dev->extern_data;
        if (vcrypto == NULL) {
                VC_LOG_ERR("Cannot find required data, is it initialized?");
-               return -ENOENT;
+               return VH_RESULT_ERR;
        }
 
-       *require_reply = 0;
-
        if (vmsg->request.master == VHOST_USER_CRYPTO_CREATE_SESS) {
                vhost_crypto_create_sess(vcrypto,
                                &vmsg->payload.crypto_session);
-               *require_reply = 1;
-       } else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS)
-               ret = vhost_crypto_close_sess(vcrypto, vmsg->payload.u64);
-       else
-               ret = -EINVAL;
+               vmsg->fd_num = 0;
+               ret = VH_RESULT_REPLY;
+       } else if (vmsg->request.master == VHOST_USER_CRYPTO_CLOSE_SESS) {
+               if (vhost_crypto_close_sess(vcrypto, vmsg->payload.u64))
+                       ret = VH_RESULT_ERR;
+       }
 
        return ret;
 }
index a2d4c9f..508228a 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <assert.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
 #include <numaif.h>
 #endif
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+#include <linux/userfaultfd.h>
+#endif
 
 #include <rte_common.h>
 #include <rte_malloc.h>
@@ -69,8 +75,14 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
        [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
        [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
        [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
+       [VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
+       [VHOST_USER_POSTCOPY_LISTEN]  = "VHOST_USER_POSTCOPY_LISTEN",
+       [VHOST_USER_POSTCOPY_END]  = "VHOST_USER_POSTCOPY_END",
 };
 
+static int send_vhost_reply(int sockfd, struct VhostUserMsg *msg);
+static int read_vhost_message(int sockfd, struct VhostUserMsg *msg);
+
 static uint64_t
 get_blk_size(int fd)
 {
@@ -120,6 +132,13 @@ vhost_backend_cleanup(struct virtio_net *dev)
                close(dev->slave_req_fd);
                dev->slave_req_fd = -1;
        }
+
+       if (dev->postcopy_ufd >= 0) {
+               close(dev->postcopy_ufd);
+               dev->postcopy_ufd = -1;
+       }
+
+       dev->postcopy_listening = 0;
 }
 
 /*
@@ -127,51 +146,73 @@ vhost_backend_cleanup(struct virtio_net *dev)
  * the device hasn't been initialised.
  */
 static int
-vhost_user_set_owner(void)
+vhost_user_set_owner(struct virtio_net **pdev __rte_unused,
+                       struct VhostUserMsg *msg __rte_unused,
+                       int main_fd __rte_unused)
 {
-       return 0;
+       return VH_RESULT_OK;
 }
 
 static int
-vhost_user_reset_owner(struct virtio_net *dev)
+vhost_user_reset_owner(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg __rte_unused,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        vhost_destroy_device_notify(dev);
 
        cleanup_device(dev, 0);
        reset_device(dev);
-       return 0;
+       return VH_RESULT_OK;
 }
 
 /*
  * The features that we support are requested.
  */
-static uint64_t
-vhost_user_get_features(struct virtio_net *dev)
+static int
+vhost_user_get_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        uint64_t features = 0;
 
        rte_vhost_driver_get_features(dev->ifname, &features);
-       return features;
+
+       msg->payload.u64 = features;
+       msg->size = sizeof(msg->payload.u64);
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
 }
 
 /*
  * The queue number that we support are requested.
  */
-static uint32_t
-vhost_user_get_queue_num(struct virtio_net *dev)
+static int
+vhost_user_get_queue_num(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        uint32_t queue_num = 0;
 
        rte_vhost_driver_get_queue_num(dev->ifname, &queue_num);
-       return queue_num;
+
+       msg->payload.u64 = (uint64_t)queue_num;
+       msg->size = sizeof(msg->payload.u64);
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
 }
 
 /*
  * We receive the negotiated features supported by us and the virtio device.
  */
 static int
-vhost_user_set_features(struct virtio_net *dev, uint64_t features)
+vhost_user_set_features(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
+       uint64_t features = msg->payload.u64;
        uint64_t vhost_features = 0;
        struct rte_vdpa_device *vdpa_dev;
        int did = -1;
@@ -181,12 +222,12 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
                RTE_LOG(ERR, VHOST_CONFIG,
                        "(%d) received invalid negotiated features.\n",
                        dev->vid);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        if (dev->flags & VIRTIO_DEV_RUNNING) {
                if (dev->features == features)
-                       return 0;
+                       return VH_RESULT_OK;
 
                /*
                 * Error out if master tries to change features while device is
@@ -197,7 +238,7 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
                        RTE_LOG(ERR, VHOST_CONFIG,
                                "(%d) features changed while device is running.\n",
                                dev->vid);
-                       return -1;
+                       return VH_RESULT_ERR;
                }
 
                if (dev->notify_ops->features_changed)
@@ -242,16 +283,18 @@ vhost_user_set_features(struct virtio_net *dev, uint64_t features)
        if (vdpa_dev && vdpa_dev->ops->set_features)
                vdpa_dev->ops->set_features(dev->vid);
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 /*
  * The virtio device sends us the size of the descriptor ring.
  */
 static int
-vhost_user_set_vring_num(struct virtio_net *dev,
-                        VhostUserMsg *msg)
+vhost_user_set_vring_num(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
        vq->size = msg->payload.state.num;
@@ -264,7 +307,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
        if ((vq->size & (vq->size - 1)) || vq->size > 32768) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "invalid virtqueue size %u\n", vq->size);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        if (dev->dequeue_zero_copy) {
@@ -290,7 +333,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
                if (!vq->shadow_used_packed) {
                        RTE_LOG(ERR, VHOST_CONFIG,
                                        "failed to allocate memory for shadow used ring.\n");
-                       return -1;
+                       return VH_RESULT_ERR;
                }
 
        } else {
@@ -300,7 +343,7 @@ vhost_user_set_vring_num(struct virtio_net *dev,
                if (!vq->shadow_used_split) {
                        RTE_LOG(ERR, VHOST_CONFIG,
                                        "failed to allocate memory for shadow used ring.\n");
-                       return -1;
+                       return VH_RESULT_ERR;
                }
        }
 
@@ -310,10 +353,10 @@ vhost_user_set_vring_num(struct virtio_net *dev,
        if (!vq->batch_copy_elems) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "failed to allocate memory for batching copy.\n");
-               return -1;
+               return VH_RESULT_ERR;
        }
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 /*
@@ -357,11 +400,13 @@ numa_realloc(struct virtio_net *dev, int index)
                memcpy(vq, old_vq, sizeof(*vq));
                TAILQ_INIT(&vq->zmbuf_list);
 
-               new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
-                       sizeof(struct zcopy_mbuf), 0, newnode);
-               if (new_zmbuf) {
-                       rte_free(vq->zmbufs);
-                       vq->zmbufs = new_zmbuf;
+               if (dev->dequeue_zero_copy) {
+                       new_zmbuf = rte_malloc_socket(NULL, vq->zmbuf_size *
+                                       sizeof(struct zcopy_mbuf), 0, newnode);
+                       if (new_zmbuf) {
+                               rte_free(vq->zmbufs);
+                               vq->zmbufs = new_zmbuf;
+                       }
                }
 
                if (vq_is_packed(dev)) {
@@ -609,14 +654,15 @@ translate_ring_addresses(struct virtio_net *dev, int vq_index)
  * This function then converts these to our address space.
  */
 static int
-vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
+vhost_user_set_vring_addr(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        struct vhost_virtqueue *vq;
        struct vhost_vring_addr *addr = &msg->payload.addr;
-       struct virtio_net *dev = *pdev;
 
        if (dev->mem == NULL)
-               return -1;
+               return VH_RESULT_ERR;
 
        /* addr->index refers to the queue index. The txq 1, rxq is 0. */
        vq = dev->virtqueue[msg->payload.addr.index];
@@ -633,27 +679,29 @@ vhost_user_set_vring_addr(struct virtio_net **pdev, VhostUserMsg *msg)
                                (1ULL << VHOST_USER_F_PROTOCOL_FEATURES))) {
                dev = translate_ring_addresses(dev, msg->payload.addr.index);
                if (!dev)
-                       return -1;
+                       return VH_RESULT_ERR;
 
                *pdev = dev;
        }
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 /*
  * The virtio device sends us the available ring last used index.
  */
 static int
-vhost_user_set_vring_base(struct virtio_net *dev,
-                         VhostUserMsg *msg)
+vhost_user_set_vring_base(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        dev->virtqueue[msg->payload.state.index]->last_used_idx  =
                        msg->payload.state.num;
        dev->virtqueue[msg->payload.state.index]->last_avail_idx =
                        msg->payload.state.num;
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 static int
@@ -778,10 +826,11 @@ vhost_memory_changed(struct VhostUserMemory *new,
 }
 
 static int
-vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
+vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd)
 {
        struct virtio_net *dev = *pdev;
-       struct VhostUserMemory memory = pmsg->payload.memory;
+       struct VhostUserMemory *memory = &msg->payload.memory;
        struct rte_vhost_mem_region *reg;
        void *mmap_addr;
        uint64_t mmap_size;
@@ -791,20 +840,20 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
        int populate;
        int fd;
 
-       if (memory.nregions > VHOST_MEMORY_MAX_NREGIONS) {
+       if (memory->nregions > VHOST_MEMORY_MAX_NREGIONS) {
                RTE_LOG(ERR, VHOST_CONFIG,
-                       "too many memory regions (%u)\n", memory.nregions);
-               return -1;
+                       "too many memory regions (%u)\n", memory->nregions);
+               return VH_RESULT_ERR;
        }
 
-       if (dev->mem && !vhost_memory_changed(&memory, dev->mem)) {
+       if (dev->mem && !vhost_memory_changed(memory, dev->mem)) {
                RTE_LOG(INFO, VHOST_CONFIG,
                        "(%d) memory regions not changed\n", dev->vid);
 
-               for (i = 0; i < memory.nregions; i++)
-                       close(pmsg->fds[i]);
+               for (i = 0; i < memory->nregions; i++)
+                       close(msg->fds[i]);
 
-               return 0;
+               return VH_RESULT_OK;
        }
 
        if (dev->mem) {
@@ -828,30 +877,30 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
                                "(%d) failed to allocate memory "
                                "for dev->guest_pages\n",
                                dev->vid);
-                       return -1;
+                       return VH_RESULT_ERR;
                }
        }
 
        dev->mem = rte_zmalloc("vhost-mem-table", sizeof(struct rte_vhost_memory) +
-               sizeof(struct rte_vhost_mem_region) * memory.nregions, 0);
+               sizeof(struct rte_vhost_mem_region) * memory->nregions, 0);
        if (dev->mem == NULL) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "(%d) failed to allocate memory for dev->mem\n",
                        dev->vid);
-               return -1;
+               return VH_RESULT_ERR;
        }
-       dev->mem->nregions = memory.nregions;
+       dev->mem->nregions = memory->nregions;
 
-       for (i = 0; i < memory.nregions; i++) {
-               fd  = pmsg->fds[i];
+       for (i = 0; i < memory->nregions; i++) {
+               fd  = msg->fds[i];
                reg = &dev->mem->regions[i];
 
-               reg->guest_phys_addr = memory.regions[i].guest_phys_addr;
-               reg->guest_user_addr = memory.regions[i].userspace_addr;
-               reg->size            = memory.regions[i].memory_size;
+               reg->guest_phys_addr = memory->regions[i].guest_phys_addr;
+               reg->guest_user_addr = memory->regions[i].userspace_addr;
+               reg->size            = memory->regions[i].memory_size;
                reg->fd              = fd;
 
-               mmap_offset = memory.regions[i].mmap_offset;
+               mmap_offset = memory->regions[i].mmap_offset;
 
                /* Check for memory_size + mmap_offset overflow */
                if (mmap_offset >= -reg->size) {
@@ -920,6 +969,70 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
                        mmap_size,
                        alignment,
                        mmap_offset);
+
+               if (dev->postcopy_listening) {
+                       /*
+                        * We haven't a better way right now than sharing
+                        * DPDK's virtual address with Qemu, so that Qemu can
+                        * retrieve the region offset when handling userfaults.
+                        */
+                       memory->regions[i].userspace_addr =
+                               reg->host_user_addr;
+               }
+       }
+       if (dev->postcopy_listening) {
+               /* Send the addresses back to qemu */
+               msg->fd_num = 0;
+               send_vhost_reply(main_fd, msg);
+
+               /* Wait for qemu to acknolwedge it's got the addresses
+                * we've got to wait before we're allowed to generate faults.
+                */
+               VhostUserMsg ack_msg;
+               if (read_vhost_message(main_fd, &ack_msg) <= 0) {
+                       RTE_LOG(ERR, VHOST_CONFIG,
+                               "Failed to read qemu ack on postcopy set-mem-table\n");
+                       goto err_mmap;
+               }
+               if (ack_msg.request.master != VHOST_USER_SET_MEM_TABLE) {
+                       RTE_LOG(ERR, VHOST_CONFIG,
+                               "Bad qemu ack on postcopy set-mem-table (%d)\n",
+                               ack_msg.request.master);
+                       goto err_mmap;
+               }
+
+               /* Now userfault register and we can use the memory */
+               for (i = 0; i < memory->nregions; i++) {
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+                       reg = &dev->mem->regions[i];
+                       struct uffdio_register reg_struct;
+
+                       /*
+                        * Let's register all the mmap'ed area to ensure
+                        * alignment on page boundary.
+                        */
+                       reg_struct.range.start =
+                               (uint64_t)(uintptr_t)reg->mmap_addr;
+                       reg_struct.range.len = reg->mmap_size;
+                       reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
+
+                       if (ioctl(dev->postcopy_ufd, UFFDIO_REGISTER,
+                                               &reg_struct)) {
+                               RTE_LOG(ERR, VHOST_CONFIG,
+                                       "Failed to register ufd for region %d: (ufd = %d) %s\n",
+                                       i, dev->postcopy_ufd,
+                                       strerror(errno));
+                               goto err_mmap;
+                       }
+                       RTE_LOG(INFO, VHOST_CONFIG,
+                               "\t userfaultfd registered for range : %llx - %llx\n",
+                               reg_struct.range.start,
+                               reg_struct.range.start +
+                               reg_struct.range.len - 1);
+#else
+                       goto err_mmap;
+#endif
+               }
        }
 
        for (i = 0; i < dev->nr_vring; i++) {
@@ -934,8 +1047,10 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
                        vring_invalidate(dev, vq);
 
                        dev = translate_ring_addresses(dev, i);
-                       if (!dev)
-                               return -1;
+                       if (!dev) {
+                               dev = *pdev;
+                               goto err_mmap;
+                       }
 
                        *pdev = dev;
                }
@@ -943,13 +1058,13 @@ vhost_user_set_mem_table(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
 
        dump_guest_pages(dev);
 
-       return 0;
+       return VH_RESULT_OK;
 
 err_mmap:
        free_mem_region(dev);
        rte_free(dev->mem);
        dev->mem = NULL;
-       return -1;
+       return VH_RESULT_ERR;
 }
 
 static bool
@@ -991,17 +1106,19 @@ virtio_is_ready(struct virtio_net *dev)
        return 1;
 }
 
-static void
-vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
+static int
+vhost_user_set_vring_call(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        struct vhost_vring_file file;
        struct vhost_virtqueue *vq;
 
-       file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-       if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+       file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+       if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
                file.fd = VIRTIO_INVALID_EVENTFD;
        else
-               file.fd = pmsg->fds[0];
+               file.fd = msg->fds[0];
        RTE_LOG(INFO, VHOST_CONFIG,
                "vring call idx:%d file:%d\n", file.index, file.fd);
 
@@ -1010,27 +1127,41 @@ vhost_user_set_vring_call(struct virtio_net *dev, struct VhostUserMsg *pmsg)
                close(vq->callfd);
 
        vq->callfd = file.fd;
+
+       return VH_RESULT_OK;
 }
 
-static void
-vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
+static int vhost_user_set_vring_err(struct virtio_net **pdev __rte_unused,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       if (!(msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK))
+               close(msg->fds[0]);
+       RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
+
+       return VH_RESULT_OK;
+}
+
+static int
+vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
+{
+       struct virtio_net *dev = *pdev;
        struct vhost_vring_file file;
        struct vhost_virtqueue *vq;
-       struct virtio_net *dev = *pdev;
 
-       file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
-       if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
+       file.index = msg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
+       if (msg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)
                file.fd = VIRTIO_INVALID_EVENTFD;
        else
-               file.fd = pmsg->fds[0];
+               file.fd = msg->fds[0];
        RTE_LOG(INFO, VHOST_CONFIG,
                "vring kick idx:%d file:%d\n", file.index, file.fd);
 
        /* Interpret ring addresses only when ring is started. */
        dev = translate_ring_addresses(dev, file.index);
        if (!dev)
-               return;
+               return VH_RESULT_ERR;
 
        *pdev = dev;
 
@@ -1047,6 +1178,8 @@ vhost_user_set_vring_kick(struct virtio_net **pdev, struct VhostUserMsg *pmsg)
        if (vq->kickfd >= 0)
                close(vq->kickfd);
        vq->kickfd = file.fd;
+
+       return VH_RESULT_OK;
 }
 
 static void
@@ -1069,9 +1202,11 @@ free_zmbufs(struct vhost_virtqueue *vq)
  * when virtio is stopped, qemu will send us the GET_VRING_BASE message.
  */
 static int
-vhost_user_get_vring_base(struct virtio_net *dev,
-                         VhostUserMsg *msg)
+vhost_user_get_vring_base(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        struct vhost_virtqueue *vq = dev->virtqueue[msg->payload.state.index];
 
        /* We have to stop the queue (virtio) if it is running. */
@@ -1114,7 +1249,10 @@ vhost_user_get_vring_base(struct virtio_net *dev,
        rte_free(vq->batch_copy_elems);
        vq->batch_copy_elems = NULL;
 
-       return 0;
+       msg->size = sizeof(msg->payload.state);
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
 }
 
 /*
@@ -1122,9 +1260,11 @@ vhost_user_get_vring_base(struct virtio_net *dev,
  * enable the virtio queue pair.
  */
 static int
-vhost_user_set_vring_enable(struct virtio_net *dev,
-                           VhostUserMsg *msg)
+vhost_user_set_vring_enable(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        int enable = (int)msg->payload.state.num;
        int index = (int)msg->payload.state.index;
        struct rte_vdpa_device *vdpa_dev;
@@ -1145,13 +1285,15 @@ vhost_user_set_vring_enable(struct virtio_net *dev,
 
        dev->virtqueue[index]->enabled = enable;
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
-static void
-vhost_user_get_protocol_features(struct virtio_net *dev,
-                                struct VhostUserMsg *msg)
+static int
+vhost_user_get_protocol_features(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        uint64_t features, protocol_features;
 
        rte_vhost_driver_get_features(dev->ifname, &features);
@@ -1168,35 +1310,53 @@ vhost_user_get_protocol_features(struct virtio_net *dev,
 
        msg->payload.u64 = protocol_features;
        msg->size = sizeof(msg->payload.u64);
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
 }
 
-static void
-vhost_user_set_protocol_features(struct virtio_net *dev,
-                                uint64_t protocol_features)
+static int
+vhost_user_set_protocol_features(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
-       if (protocol_features & ~VHOST_USER_PROTOCOL_FEATURES)
-               return;
+       struct virtio_net *dev = *pdev;
+       uint64_t protocol_features = msg->payload.u64;
+       uint64_t slave_protocol_features = 0;
+
+       rte_vhost_driver_get_protocol_features(dev->ifname,
+                       &slave_protocol_features);
+       if (protocol_features & ~slave_protocol_features) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "(%d) received invalid protocol features.\n",
+                       dev->vid);
+               return VH_RESULT_ERR;
+       }
 
        dev->protocol_features = protocol_features;
+
+       return VH_RESULT_OK;
 }
 
 static int
-vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_set_log_base(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        int fd = msg->fds[0];
        uint64_t size, off;
        void *addr;
 
        if (fd < 0) {
                RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        if (msg->size != sizeof(VhostUserLog)) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "invalid log base msg size: %"PRId32" != %d\n",
                        msg->size, (int)sizeof(VhostUserLog));
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        size = msg->payload.log.mmap_size;
@@ -1207,7 +1367,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
                RTE_LOG(ERR, VHOST_CONFIG,
                        "log offset %#"PRIx64" exceeds log size %#"PRIx64"\n",
                        off, size);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        RTE_LOG(INFO, VHOST_CONFIG,
@@ -1222,7 +1382,7 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
        close(fd);
        if (addr == MAP_FAILED) {
                RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        /*
@@ -1236,7 +1396,24 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
        dev->log_base = dev->log_addr + off;
        dev->log_size = size;
 
-       return 0;
+       /*
+        * The spec is not clear about it (yet), but QEMU doesn't expect
+        * any payload in the reply.
+        */
+       msg->size = 0;
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
+}
+
+static int vhost_user_set_log_fd(struct virtio_net **pdev __rte_unused,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
+{
+       close(msg->fds[0]);
+       RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
+
+       return VH_RESULT_OK;
 }
 
 /*
@@ -1248,8 +1425,10 @@ vhost_user_set_log_base(struct virtio_net *dev, struct VhostUserMsg *msg)
  * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it.
  */
 static int
-vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_send_rarp(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        uint8_t *mac = (uint8_t *)&msg->payload.u64;
        struct rte_vdpa_device *vdpa_dev;
        int did = -1;
@@ -1273,40 +1452,44 @@ vhost_user_send_rarp(struct virtio_net *dev, struct VhostUserMsg *msg)
        if (vdpa_dev && vdpa_dev->ops->migration_done)
                vdpa_dev->ops->migration_done(dev->vid);
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 static int
-vhost_user_net_set_mtu(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_net_set_mtu(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        if (msg->payload.u64 < VIRTIO_MIN_MTU ||
                        msg->payload.u64 > VIRTIO_MAX_MTU) {
                RTE_LOG(ERR, VHOST_CONFIG, "Invalid MTU size (%"PRIu64")\n",
                                msg->payload.u64);
 
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        dev->mtu = msg->payload.u64;
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 static int
-vhost_user_set_req_fd(struct virtio_net *dev, struct VhostUserMsg *msg)
+vhost_user_set_req_fd(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
+       struct virtio_net *dev = *pdev;
        int fd = msg->fds[0];
 
        if (fd < 0) {
                RTE_LOG(ERR, VHOST_CONFIG,
                                "Invalid file descriptor for slave channel (%d)\n",
                                fd);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
        dev->slave_req_fd = fd;
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
 static int
@@ -1359,7 +1542,8 @@ is_vring_iotlb_invalidate(struct vhost_virtqueue *vq,
 }
 
 static int
-vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
+vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
 {
        struct virtio_net *dev = *pdev;
        struct vhost_iotlb_msg *imsg = &msg->payload.iotlb;
@@ -1371,7 +1555,7 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
                len = imsg->size;
                vva = qva_to_vva(dev, imsg->uaddr, &len);
                if (!vva)
-                       return -1;
+                       return VH_RESULT_ERR;
 
                for (i = 0; i < dev->nr_vring; i++) {
                        struct vhost_virtqueue *vq = dev->virtqueue[i];
@@ -1397,12 +1581,118 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
        default:
                RTE_LOG(ERR, VHOST_CONFIG, "Invalid IOTLB message type (%d)\n",
                                imsg->type);
-               return -1;
+               return VH_RESULT_ERR;
        }
 
-       return 0;
+       return VH_RESULT_OK;
 }
 
+static int
+vhost_user_set_postcopy_advise(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
+{
+       struct virtio_net *dev = *pdev;
+#ifdef RTE_LIBRTE_VHOST_POSTCOPY
+       struct uffdio_api api_struct;
+
+       dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+       if (dev->postcopy_ufd == -1) {
+               RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+                       strerror(errno));
+               return VH_RESULT_ERR;
+       }
+       api_struct.api = UFFD_API;
+       api_struct.features = 0;
+       if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
+               RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+                       strerror(errno));
+               close(dev->postcopy_ufd);
+               dev->postcopy_ufd = -1;
+               return VH_RESULT_ERR;
+       }
+       msg->fds[0] = dev->postcopy_ufd;
+       msg->fd_num = 1;
+
+       return VH_RESULT_REPLY;
+#else
+       dev->postcopy_ufd = -1;
+       msg->fd_num = 0;
+
+       return VH_RESULT_ERR;
+#endif
+}
+
+static int
+vhost_user_set_postcopy_listen(struct virtio_net **pdev,
+                       struct VhostUserMsg *msg __rte_unused,
+                       int main_fd __rte_unused)
+{
+       struct virtio_net *dev = *pdev;
+
+       if (dev->mem && dev->mem->nregions) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "Regions already registered at postcopy-listen\n");
+               return VH_RESULT_ERR;
+       }
+       dev->postcopy_listening = 1;
+
+       return VH_RESULT_OK;
+}
+
+static int
+vhost_user_postcopy_end(struct virtio_net **pdev, struct VhostUserMsg *msg,
+                       int main_fd __rte_unused)
+{
+       struct virtio_net *dev = *pdev;
+
+       dev->postcopy_listening = 0;
+       if (dev->postcopy_ufd >= 0) {
+               close(dev->postcopy_ufd);
+               dev->postcopy_ufd = -1;
+       }
+
+       msg->payload.u64 = 0;
+       msg->size = sizeof(msg->payload.u64);
+       msg->fd_num = 0;
+
+       return VH_RESULT_REPLY;
+}
+
+typedef int (*vhost_message_handler_t)(struct virtio_net **pdev,
+                                       struct VhostUserMsg *msg,
+                                       int main_fd);
+static vhost_message_handler_t vhost_message_handlers[VHOST_USER_MAX] = {
+       [VHOST_USER_NONE] = NULL,
+       [VHOST_USER_GET_FEATURES] = vhost_user_get_features,
+       [VHOST_USER_SET_FEATURES] = vhost_user_set_features,
+       [VHOST_USER_SET_OWNER] = vhost_user_set_owner,
+       [VHOST_USER_RESET_OWNER] = vhost_user_reset_owner,
+       [VHOST_USER_SET_MEM_TABLE] = vhost_user_set_mem_table,
+       [VHOST_USER_SET_LOG_BASE] = vhost_user_set_log_base,
+       [VHOST_USER_SET_LOG_FD] = vhost_user_set_log_fd,
+       [VHOST_USER_SET_VRING_NUM] = vhost_user_set_vring_num,
+       [VHOST_USER_SET_VRING_ADDR] = vhost_user_set_vring_addr,
+       [VHOST_USER_SET_VRING_BASE] = vhost_user_set_vring_base,
+       [VHOST_USER_GET_VRING_BASE] = vhost_user_get_vring_base,
+       [VHOST_USER_SET_VRING_KICK] = vhost_user_set_vring_kick,
+       [VHOST_USER_SET_VRING_CALL] = vhost_user_set_vring_call,
+       [VHOST_USER_SET_VRING_ERR] = vhost_user_set_vring_err,
+       [VHOST_USER_GET_PROTOCOL_FEATURES] = vhost_user_get_protocol_features,
+       [VHOST_USER_SET_PROTOCOL_FEATURES] = vhost_user_set_protocol_features,
+       [VHOST_USER_GET_QUEUE_NUM] = vhost_user_get_queue_num,
+       [VHOST_USER_SET_VRING_ENABLE] = vhost_user_set_vring_enable,
+       [VHOST_USER_SEND_RARP] = vhost_user_send_rarp,
+       [VHOST_USER_NET_SET_MTU] = vhost_user_net_set_mtu,
+       [VHOST_USER_SET_SLAVE_REQ_FD] = vhost_user_set_req_fd,
+       [VHOST_USER_IOTLB_MSG] = vhost_user_iotlb_msg,
+       [VHOST_USER_POSTCOPY_ADVISE] = vhost_user_set_postcopy_advise,
+       [VHOST_USER_POSTCOPY_LISTEN] = vhost_user_set_postcopy_listen,
+       [VHOST_USER_POSTCOPY_END] = vhost_user_postcopy_end,
+};
+
+
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1410,7 +1700,7 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
        int ret;
 
        ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE,
-               msg->fds, VHOST_MEMORY_MAX_NREGIONS);
+               msg->fds, VHOST_MEMORY_MAX_NREGIONS, &msg->fd_num);
        if (ret <= 0)
                return ret;
 
@@ -1434,13 +1724,13 @@ read_vhost_message(int sockfd, struct VhostUserMsg *msg)
 }
 
 static int
-send_vhost_message(int sockfd, struct VhostUserMsg *msg, int *fds, int fd_num)
+send_vhost_message(int sockfd, struct VhostUserMsg *msg)
 {
        if (!msg)
                return 0;
 
        return send_fd_message(sockfd, (char *)msg,
-               VHOST_USER_HDR_SIZE + msg->size, fds, fd_num);
+               VHOST_USER_HDR_SIZE + msg->size, msg->fds, msg->fd_num);
 }
 
 static int
@@ -1454,19 +1744,18 @@ send_vhost_reply(int sockfd, struct VhostUserMsg *msg)
        msg->flags |= VHOST_USER_VERSION;
        msg->flags |= VHOST_USER_REPLY_MASK;
 
-       return send_vhost_message(sockfd, msg, NULL, 0);
+       return send_vhost_message(sockfd, msg);
 }
 
 static int
-send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
-                        int *fds, int fd_num)
+send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg)
 {
        int ret;
 
        if (msg->flags & VHOST_USER_NEED_REPLY)
                rte_spinlock_lock(&dev->slave_req_lock);
 
-       ret = send_vhost_message(dev->slave_req_fd, msg, fds, fd_num);
+       ret = send_vhost_message(dev->slave_req_fd, msg);
        if (ret < 0 && (msg->flags & VHOST_USER_NEED_REPLY))
                rte_spinlock_unlock(&dev->slave_req_lock);
 
@@ -1477,7 +1766,8 @@ send_vhost_slave_message(struct virtio_net *dev, struct VhostUserMsg *msg,
  * Allocate a queue pair if it hasn't been allocated yet
  */
 static int
-vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev, VhostUserMsg *msg)
+vhost_user_check_and_alloc_queue_pair(struct virtio_net *dev,
+                       struct VhostUserMsg *msg)
 {
        uint16_t vring_idx;
 
@@ -1555,6 +1845,7 @@ vhost_user_msg_handler(int vid, int fd)
        int ret;
        int unlock_required = 0;
        uint32_t skip_master = 0;
+       int request;
 
        dev = get_device(vid);
        if (dev == NULL)
@@ -1633,132 +1924,54 @@ vhost_user_msg_handler(int vid, int fd)
        }
 
        if (dev->extern_ops.pre_msg_handle) {
-               uint32_t need_reply;
-
                ret = (*dev->extern_ops.pre_msg_handle)(dev->vid,
-                               (void *)&msg, &need_reply, &skip_master);
-               if (ret < 0)
+                               (void *)&msg, &skip_master);
+               if (ret == VH_RESULT_ERR)
                        goto skip_to_reply;
-
-               if (need_reply)
+               else if (ret == VH_RESULT_REPLY)
                        send_vhost_reply(fd, &msg);
 
                if (skip_master)
                        goto skip_to_post_handle;
        }
 
-       switch (msg.request.master) {
-       case VHOST_USER_GET_FEATURES:
-               msg.payload.u64 = vhost_user_get_features(dev);
-               msg.size = sizeof(msg.payload.u64);
-               send_vhost_reply(fd, &msg);
-               break;
-       case VHOST_USER_SET_FEATURES:
-               ret = vhost_user_set_features(dev, msg.payload.u64);
-               if (ret)
-                       return -1;
-               break;
-
-       case VHOST_USER_GET_PROTOCOL_FEATURES:
-               vhost_user_get_protocol_features(dev, &msg);
-               send_vhost_reply(fd, &msg);
-               break;
-       case VHOST_USER_SET_PROTOCOL_FEATURES:
-               vhost_user_set_protocol_features(dev, msg.payload.u64);
-               break;
-
-       case VHOST_USER_SET_OWNER:
-               vhost_user_set_owner();
-               break;
-       case VHOST_USER_RESET_OWNER:
-               vhost_user_reset_owner(dev);
-               break;
-
-       case VHOST_USER_SET_MEM_TABLE:
-               ret = vhost_user_set_mem_table(&dev, &msg);
-               break;
-
-       case VHOST_USER_SET_LOG_BASE:
-               vhost_user_set_log_base(dev, &msg);
-
-               /* it needs a reply */
-               msg.size = sizeof(msg.payload.u64);
-               send_vhost_reply(fd, &msg);
-               break;
-       case VHOST_USER_SET_LOG_FD:
-               close(msg.fds[0]);
-               RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
-               break;
-
-       case VHOST_USER_SET_VRING_NUM:
-               vhost_user_set_vring_num(dev, &msg);
-               break;
-       case VHOST_USER_SET_VRING_ADDR:
-               vhost_user_set_vring_addr(&dev, &msg);
-               break;
-       case VHOST_USER_SET_VRING_BASE:
-               vhost_user_set_vring_base(dev, &msg);
-               break;
-
-       case VHOST_USER_GET_VRING_BASE:
-               vhost_user_get_vring_base(dev, &msg);
-               msg.size = sizeof(msg.payload.state);
-               send_vhost_reply(fd, &msg);
-               break;
-
-       case VHOST_USER_SET_VRING_KICK:
-               vhost_user_set_vring_kick(&dev, &msg);
-               break;
-       case VHOST_USER_SET_VRING_CALL:
-               vhost_user_set_vring_call(dev, &msg);
-               break;
-
-       case VHOST_USER_SET_VRING_ERR:
-               if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK))
-                       close(msg.fds[0]);
-               RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n");
-               break;
-
-       case VHOST_USER_GET_QUEUE_NUM:
-               msg.payload.u64 = (uint64_t)vhost_user_get_queue_num(dev);
-               msg.size = sizeof(msg.payload.u64);
-               send_vhost_reply(fd, &msg);
-               break;
-
-       case VHOST_USER_SET_VRING_ENABLE:
-               vhost_user_set_vring_enable(dev, &msg);
-               break;
-       case VHOST_USER_SEND_RARP:
-               vhost_user_send_rarp(dev, &msg);
-               break;
-
-       case VHOST_USER_NET_SET_MTU:
-               ret = vhost_user_net_set_mtu(dev, &msg);
-               break;
-
-       case VHOST_USER_SET_SLAVE_REQ_FD:
-               ret = vhost_user_set_req_fd(dev, &msg);
-               break;
-
-       case VHOST_USER_IOTLB_MSG:
-               ret = vhost_user_iotlb_msg(&dev, &msg);
-               break;
+       request = msg.request.master;
+       if (request > VHOST_USER_NONE && request < VHOST_USER_MAX) {
+               if (!vhost_message_handlers[request])
+                       goto skip_to_post_handle;
+               ret = vhost_message_handlers[request](&dev, &msg, fd);
 
-       default:
-               ret = -1;
-               break;
+               switch (ret) {
+               case VH_RESULT_ERR:
+                       RTE_LOG(ERR, VHOST_CONFIG,
+                               "Processing %s failed.\n",
+                               vhost_message_str[request]);
+                       break;
+               case VH_RESULT_OK:
+                       RTE_LOG(DEBUG, VHOST_CONFIG,
+                               "Processing %s succeeded.\n",
+                               vhost_message_str[request]);
+                       break;
+               case VH_RESULT_REPLY:
+                       RTE_LOG(DEBUG, VHOST_CONFIG,
+                               "Processing %s succeeded and needs reply.\n",
+                               vhost_message_str[request]);
+                       send_vhost_reply(fd, &msg);
+                       break;
+               }
+       } else {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "Requested invalid message type %d.\n", request);
+               ret = VH_RESULT_ERR;
        }
 
 skip_to_post_handle:
-       if (dev->extern_ops.post_msg_handle) {
-               uint32_t need_reply;
-
+       if (ret != VH_RESULT_ERR && dev->extern_ops.post_msg_handle) {
                ret = (*dev->extern_ops.post_msg_handle)(
-                               dev->vid, (void *)&msg, &need_reply);
-               if (ret < 0)
+                               dev->vid, (void *)&msg);
+               if (ret == VH_RESULT_ERR)
                        goto skip_to_reply;
-
-               if (need_reply)
+               else if (ret == VH_RESULT_REPLY)
                        send_vhost_reply(fd, &msg);
        }
 
@@ -1766,10 +1979,20 @@ skip_to_reply:
        if (unlock_required)
                vhost_user_unlock_all_queue_pairs(dev);
 
+       /*
+        * If the request required a reply that was already sent,
+        * this optional reply-ack won't be sent as the
+        * VHOST_USER_NEED_REPLY was cleared in send_vhost_reply().
+        */
        if (msg.flags & VHOST_USER_NEED_REPLY) {
-               msg.payload.u64 = !!ret;
+               msg.payload.u64 = ret == VH_RESULT_ERR;
                msg.size = sizeof(msg.payload.u64);
+               msg.fd_num = 0;
                send_vhost_reply(fd, &msg);
+       } else if (ret == VH_RESULT_ERR) {
+               RTE_LOG(ERR, VHOST_CONFIG,
+                       "vhost message handling failed.\n");
+               return -1;
        }
 
        if (!(dev->flags & VIRTIO_DEV_RUNNING) && virtio_is_ready(dev)) {
@@ -1805,9 +2028,9 @@ skip_to_reply:
 }
 
 static int process_slave_message_reply(struct virtio_net *dev,
-                                      const VhostUserMsg *msg)
+                                      const struct VhostUserMsg *msg)
 {
-       VhostUserMsg msg_reply;
+       struct VhostUserMsg msg_reply;
        int ret;
 
        if ((msg->flags & VHOST_USER_NEED_REPLY) == 0)
@@ -1848,7 +2071,7 @@ vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm)
                },
        };
 
-       ret = send_vhost_message(dev->slave_req_fd, &msg, NULL, 0);
+       ret = send_vhost_message(dev->slave_req_fd, &msg);
        if (ret < 0) {
                RTE_LOG(ERR, VHOST_CONFIG,
                                "Failed to send IOTLB miss message (%d)\n",
@@ -1864,8 +2087,6 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
                                                    uint64_t offset,
                                                    uint64_t size)
 {
-       int *fdp = NULL;
-       size_t fd_num = 0;
        int ret;
        struct VhostUserMsg msg = {
                .request.slave = VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG,
@@ -1881,11 +2102,11 @@ static int vhost_user_slave_set_vring_host_notifier(struct virtio_net *dev,
        if (fd < 0)
                msg.payload.area.u64 |= VHOST_USER_VRING_NOFD_MASK;
        else {
-               fdp = &fd;
-               fd_num = 1;
+               msg.fds[0] = fd;
+               msg.fd_num = 1;
        }
 
-       ret = send_vhost_slave_message(dev, &msg, fdp, fd_num);
+       ret = send_vhost_slave_message(dev, &msg);
        if (ret < 0) {
                RTE_LOG(ERR, VHOST_CONFIG,
                        "Failed to set host notifier (%d)\n", ret);
index 42166ad..dc97be8 100644 (file)
@@ -22,7 +22,8 @@
                                         (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_REQ) | \
                                         (1ULL << VHOST_USER_PROTOCOL_F_CRYPTO_SESSION) | \
                                         (1ULL << VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD) | \
-                                        (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER))
+                                        (1ULL << VHOST_USER_PROTOCOL_F_HOST_NOTIFIER) | \
+                                        (1ULL << VHOST_USER_PROTOCOL_F_PAGEFAULT))
 
 typedef enum VhostUserRequest {
        VHOST_USER_NONE = 0,
@@ -50,7 +51,10 @@ typedef enum VhostUserRequest {
        VHOST_USER_IOTLB_MSG = 22,
        VHOST_USER_CRYPTO_CREATE_SESS = 26,
        VHOST_USER_CRYPTO_CLOSE_SESS = 27,
-       VHOST_USER_MAX = 28
+       VHOST_USER_POSTCOPY_ADVISE = 28,
+       VHOST_USER_POSTCOPY_LISTEN = 29,
+       VHOST_USER_POSTCOPY_END = 30,
+       VHOST_USER_MAX = 31
 } VhostUserRequest;
 
 typedef enum VhostUserSlaveRequest {
@@ -132,6 +136,7 @@ typedef struct VhostUserMsg {
                VhostUserVringArea area;
        } payload;
        int fds[VHOST_MEMORY_MAX_NREGIONS];
+       int fd_num;
 } __attribute((packed)) VhostUserMsg;
 
 #define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
@@ -146,7 +151,8 @@ int vhost_user_iotlb_miss(struct virtio_net *dev, uint64_t iova, uint8_t perm);
 int vhost_user_host_notifier_ctrl(int vid, bool enable);
 
 /* socket.c */
-int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
+int read_fd_message(int sockfd, char *buf, int buflen, int *fds, int max_fds,
+               int *fd_num);
 int send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num);
 
 #endif
index 99c7afc..8ad30c9 100644 (file)
@@ -122,7 +122,7 @@ flush_shadow_used_ring_split(struct virtio_net *dev, struct vhost_virtqueue *vq)
 
 static __rte_always_inline void
 update_shadow_used_ring_split(struct vhost_virtqueue *vq,
-                        uint16_t desc_idx, uint16_t len)
+                        uint16_t desc_idx, uint32_t len)
 {
        uint16_t i = vq->shadow_used_idx++;
 
@@ -186,7 +186,7 @@ flush_shadow_used_ring_packed(struct virtio_net *dev,
 
 static __rte_always_inline void
 update_shadow_used_ring_packed(struct vhost_virtqueue *vq,
-                        uint16_t desc_idx, uint16_t len, uint16_t count)
+                        uint16_t desc_idx, uint32_t len, uint16_t count)
 {
        uint16_t i = vq->shadow_used_idx++;
 
@@ -329,7 +329,7 @@ static __rte_always_inline int
 fill_vec_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
                         uint32_t avail_idx, uint16_t *vec_idx,
                         struct buf_vector *buf_vec, uint16_t *desc_chain_head,
-                        uint16_t *desc_chain_len, uint8_t perm)
+                        uint32_t *desc_chain_len, uint8_t perm)
 {
        uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)];
        uint16_t vec_id = *vec_idx;
@@ -409,7 +409,7 @@ reserve_avail_buf_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
        uint16_t max_tries, tries = 0;
 
        uint16_t head_idx = 0;
-       uint16_t len = 0;
+       uint32_t len = 0;
 
        *num_buffers = 0;
        cur_idx  = vq->last_avail_idx;
@@ -452,7 +452,7 @@ static __rte_always_inline int
 fill_vec_buf_packed_indirect(struct virtio_net *dev,
                        struct vhost_virtqueue *vq,
                        struct vring_packed_desc *desc, uint16_t *vec_idx,
-                       struct buf_vector *buf_vec, uint16_t *len, uint8_t perm)
+                       struct buf_vector *buf_vec, uint32_t *len, uint8_t perm)
 {
        uint16_t i;
        uint32_t nr_descs;
@@ -508,7 +508,7 @@ static __rte_always_inline int
 fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                                uint16_t avail_idx, uint16_t *desc_count,
                                struct buf_vector *buf_vec, uint16_t *vec_idx,
-                               uint16_t *buf_id, uint16_t *len, uint8_t perm)
+                               uint16_t *buf_id, uint32_t *len, uint8_t perm)
 {
        bool wrap_counter = vq->avail_wrap_counter;
        struct vring_packed_desc *descs = vq->desc_packed;
@@ -521,6 +521,7 @@ fill_vec_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                return -1;
 
        *desc_count = 0;
+       *len = 0;
 
        while (1) {
                if (unlikely(vec_id >= BUF_VECTOR_MAX))
@@ -573,7 +574,7 @@ reserve_avail_buf_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
        uint16_t max_tries, tries = 0;
 
        uint16_t buf_id = 0;
-       uint16_t len = 0;
+       uint32_t len = 0;
        uint16_t desc_count;
 
        *num_buffers = 0;
@@ -888,6 +889,7 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
        struct rte_mbuf **pkts, uint32_t count)
 {
        struct vhost_virtqueue *vq;
+       uint32_t nb_tx = 0;
 
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
        if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->nr_vring))) {
@@ -915,9 +917,9 @@ virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id,
                goto out;
 
        if (vq_is_packed(dev))
-               count = virtio_dev_rx_packed(dev, vq, pkts, count);
+               nb_tx = virtio_dev_rx_packed(dev, vq, pkts, count);
        else
-               count = virtio_dev_rx_split(dev, vq, pkts, count);
+               nb_tx = virtio_dev_rx_split(dev, vq, pkts, count);
 
 out:
        if (dev->features & (1ULL << VIRTIO_F_IOMMU_PLATFORM))
@@ -926,7 +928,7 @@ out:
 out_access_unlock:
        rte_spinlock_unlock(&vq->access_lock);
 
-       return count;
+       return nb_tx;
 }
 
 uint16_t
@@ -1358,8 +1360,10 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        }
                }
 
-               flush_shadow_used_ring_split(dev, vq);
-               vhost_vring_call_split(dev, vq);
+               if (likely(vq->shadow_used_idx)) {
+                       flush_shadow_used_ring_split(dev, vq);
+                       vhost_vring_call_split(dev, vq);
+               }
        }
 
        rte_prefetch0(&vq->avail->ring[vq->last_avail_idx & (vq->size - 1)]);
@@ -1378,7 +1382,8 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
        for (i = 0; i < count; i++) {
                struct buf_vector buf_vec[BUF_VECTOR_MAX];
-               uint16_t head_idx, dummy_len;
+               uint16_t head_idx;
+               uint32_t dummy_len;
                uint16_t nr_vec = 0;
                int err;
 
@@ -1437,8 +1442,10 @@ virtio_dev_tx_split(struct virtio_net *dev, struct vhost_virtqueue *vq,
                do_data_copy_dequeue(vq);
                if (unlikely(i < count))
                        vq->shadow_used_idx = i;
-               flush_shadow_used_ring_split(dev, vq);
-               vhost_vring_call_split(dev, vq);
+               if (likely(vq->shadow_used_idx)) {
+                       flush_shadow_used_ring_split(dev, vq);
+                       vhost_vring_call_split(dev, vq);
+               }
        }
 
        return i;
@@ -1473,8 +1480,10 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                        }
                }
 
-               flush_shadow_used_ring_packed(dev, vq);
-               vhost_vring_call_packed(dev, vq);
+               if (likely(vq->shadow_used_idx)) {
+                       flush_shadow_used_ring_packed(dev, vq);
+                       vhost_vring_call_packed(dev, vq);
+               }
        }
 
        VHOST_LOG_DEBUG(VHOST_DATA, "(%d) %s\n", dev->vid, __func__);
@@ -1485,7 +1494,8 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
 
        for (i = 0; i < count; i++) {
                struct buf_vector buf_vec[BUF_VECTOR_MAX];
-               uint16_t buf_id, dummy_len;
+               uint16_t buf_id;
+               uint32_t dummy_len;
                uint16_t desc_count, nr_vec = 0;
                int err;
 
@@ -1551,8 +1561,10 @@ virtio_dev_tx_packed(struct virtio_net *dev, struct vhost_virtqueue *vq,
                do_data_copy_dequeue(vq);
                if (unlikely(i < count))
                        vq->shadow_used_idx = i;
-               flush_shadow_used_ring_packed(dev, vq);
-               vhost_vring_call_packed(dev, vq);
+               if (likely(vq->shadow_used_idx)) {
+                       flush_shadow_used_ring_packed(dev, vq);
+                       vhost_vring_call_packed(dev, vq);
+               }
        }
 
        return i;
index eb91f10..bb7f443 100644 (file)
@@ -9,13 +9,14 @@
 # given as a dep, no need to mention ring. This is especially true for the
 # core libs which are widely reused, so their deps are kept to a minimum.
 libraries = [ 'compat', # just a header, used for versioning
-       'kvargs',
+       'cmdline', # ethdev depends on cmdline for parsing functions
+       'kvargs', # eal depends on kvargs
        'eal', 'ring', 'mempool', 'mbuf', 'net', 'ethdev', 'pci', # core
        'metrics', # bitrate/latency stats depends on this
        'hash',    # efd depends on this
        'timer',   # eventdev depends on this
        'acl', 'bbdev', 'bitratestats', 'cfgfile',
-       'cmdline', 'compressdev', 'cryptodev',
+       'compressdev', 'cryptodev',
        'distributor', 'efd', 'eventdev',
        'gro', 'gso', 'ip_frag', 'jobstats',
        'kni', 'latencystats', 'lpm', 'member',
@@ -24,12 +25,18 @@ libraries = [ 'compat', # just a header, used for versioning
        # add pkt framework libs which use other libs from above
        'port', 'table', 'pipeline',
        # flow_classify lib depends on pkt framework table lib
-       'flow_classify', 'bpf']
+       'flow_classify', 'bpf', 'telemetry']
 
 default_cflags = machine_args
 if cc.has_argument('-Wno-format-truncation')
        default_cflags += '-Wno-format-truncation'
 endif
+
+enabled_libs = [] # used to print summary at the end
+
+# -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 foreach l:libraries
        build = true
        name = l
@@ -45,18 +52,17 @@ foreach l:libraries
        # use "deps" for internal DPDK dependencies, and "ext_deps" for
        # external package/library requirements
        ext_deps = []
-       deps = ['eal']   # eal is standard dependency except for itself
-       if l == 'kvargs'
-               deps = []
-       endif
-       if l == 'eal'
-               deps = ['kvargs']
+       deps = []
+       # eal is standard dependency once built
+       if dpdk_conf.has('RTE_LIBRTE_EAL')
+               deps += ['eal']
        endif
 
        dir_name = 'librte_' + l
        subdir(dir_name)
 
        if build
+               enabled_libs += name
                dpdk_conf.set('RTE_LIBRTE_' + name.to_upper(), 1)
                install_headers(headers)
 
@@ -87,10 +93,8 @@ foreach l:libraries
                                lib_version = '@0@.1'.format(version)
                                so_version = '@0@'.format(version)
                        else
-                               prj_ver = meson.project_version().split('.')
-                               lib_version = '@0@.@1@'.format(
-                                               prj_ver.get(0), prj_ver.get(1))
-                               so_version = lib_version
+                               lib_version = major_version
+                               so_version = major_version
                        endif
 
                        # first build static lib
@@ -126,6 +130,7 @@ foreach l:libraries
                                        dependencies: shared_deps)
 
                        dpdk_libraries = [shared_lib] + dpdk_libraries
+                       dpdk_static_libraries = [static_lib] + dpdk_static_libraries
                endif # sources.length() > 0
 
                set_variable('shared_' + libname, shared_dep)
index e718972..9f62697 100644 (file)
@@ -2,7 +2,7 @@
 # Copyright(c) 2017 Intel Corporation
 
 project('DPDK', 'C',
-       version: '18.08.0',
+       version: '18.11.0-rc1',
        license: 'BSD',
        default_options: ['buildtype=release', 'default_library=static'],
        meson_version: '>= 0.41'
@@ -12,10 +12,21 @@ project('DPDK', 'C',
 cc = meson.get_compiler('c')
 dpdk_conf = configuration_data()
 dpdk_libraries = []
+dpdk_static_libraries = []
 dpdk_drivers = []
 dpdk_extra_ldflags = []
+dpdk_app_link_libraries = []
 
-driver_install_path = join_paths(get_option('libdir'), 'dpdk/drivers')
+# set the major version, which might be used by drivers and libraries
+# depending on the configuration options
+pver = meson.project_version().split('.')
+major_version = '@0@.@1@'.format(pver.get(0), pver.get(1))
+
+pmd_subdir_opt = get_option('drivers_install_subdir')
+if pmd_subdir_opt.contains('<VERSION>')
+       pmd_subdir_opt = major_version.join(pmd_subdir_opt.split('<VERSION>'))
+endif
+driver_install_path = join_paths(get_option('libdir'), pmd_subdir_opt)
 eal_pmd_path = join_paths(get_option('prefix'), driver_install_path)
 
 # configure the build, and make sure configs here and in config folder are
@@ -34,6 +45,9 @@ subdir('usertools')
 subdir('app')
 subdir('test')
 
+# build docs
+subdir('doc')
+
 # build any examples explicitly requested - useful for developers
 if get_option('examples') != ''
        subdir('examples')
@@ -73,3 +87,39 @@ pkg.generate(name: meson.project_name(),
        subdirs: [get_option('include_subdir_arch'), '.'],
        extra_cflags: ['-include', 'rte_config.h'] + machine_args
 )
+
+# final output, list all the libs and drivers to be built
+# this does not affect any part of the build, for information only.
+output_message = '\n=================\nLibraries Enabled\n=================\n'
+output_message += '\nlibs:\n\t'
+output_count = 0
+foreach lib:enabled_libs
+       output_message += lib + ', '
+       output_count += 1
+       if output_count == 8
+               output_message += '\n\t'
+               output_count = 0
+       endif
+endforeach
+message(output_message + '\n')
+
+
+# prior to 0.47 set_variable didn't work with arrays, so we can't
+# track driver lists easily
+if meson.version().version_compare('>=0.47')
+       output_message = '\n===============\nDrivers Enabled\n===============\n'
+       foreach class:driver_classes
+               class_drivers = get_variable(class + '_drivers')
+               output_message += '\n' + class + ':\n\t'
+               output_count = 0
+               foreach drv:class_drivers
+                       output_message += drv + ', '
+                       output_count += 1
+                       if output_count == 8
+                               output_message += '\n\t'
+                               output_count = 0
+                       endif
+               endforeach
+       endforeach
+       message(output_message + '\n')
+endif
index c843278..10b5416 100644 (file)
@@ -1,13 +1,19 @@
 option('allow_invalid_socket_id', type: 'boolean', value: false,
        description: 'allow out-of-range NUMA socket id\'s for platforms that don\'t report the value correctly')
+option('drivers_install_subdir', type: 'string', value: 'dpdk/pmds-<VERSION>',
+       description: 'Subdirectory of libdir where to install PMDs. Defaults to using a versioned subdirectory.')
+option('enable_driver_mlx_glue', type: 'boolean', value: false,
+       description: 'Enable glue library for Mellanox PMDs')
 option('enable_kmods', type: 'boolean', value: true,
        description: 'build kernel modules')
+option('enable_docs', type: 'boolean', value: false,
+       description: 'build documentation')
 option('examples', type: 'string', value: '',
        description: 'Comma-separated list of examples to build by default')
 option('include_subdir_arch', type: 'string', value: '',
        description: 'subdirectory where to install arch-dependent headers')
 option('kernel_dir', type: 'string', value: '',
-       description: 'path to the kernel for building kernel modules')
+       description: 'path to the kernel for building kernel modules, they will be installed in $DEST_DIR/$kernel_dir/../extra/dpdk')
 option('lib_musdk_dir', type: 'string', value: '',
        description: 'path to the MUSDK library installation directory')
 option('machine', type: 'string', value: 'native',
index a734cbb..0cf3791 100644 (file)
@@ -60,7 +60,7 @@ EXPERIMENTAL_CHECK = $(RTE_SDK)/buildtools/check-experimental-syms.sh
 CHECK_EXPERIMENTAL = $(EXPERIMENTAL_CHECK) $(SRCDIR)/$(EXPORT_MAP) $@
 
 PMDINFO_GEN = $(RTE_SDK_BIN)/app/dpdk-pmdinfogen $@ $@.pmd.c
-PMDINFO_CC = $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@.pmd.o $@.pmd.c
+PMDINFO_CC = $(CC) $(CPPFLAGS) $(CFLAGS) $(EXTRA_CFLAGS) -c -o $@.pmd.o $@.pmd.c
 PMDINFO_LD = $(CROSS)ld $(LDFLAGS) -r -o $@.o $@.pmd.o $@
 PMDINFO_TO_O = if grep -q 'RTE_PMD_REGISTER_.*(.*)' $<; then \
        echo "$(if $V,$(PMDINFO_GEN),  PMDINFO $@.pmd.c)" && \
index de33883..3ebc4e6 100644 (file)
@@ -50,6 +50,9 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM)            += -lrte_lpm
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --whole-archive
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += -lrte_acl
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --no-whole-archive
+_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --whole-archive
+_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += -lrte_telemetry -ljansson
+_LDLIBS-$(CONFIG_RTE_LIBRTE_TELEMETRY)      += --no-whole-archive
 _LDLIBS-$(CONFIG_RTE_LIBRTE_JOBSTATS)       += -lrte_jobstats
 _LDLIBS-$(CONFIG_RTE_LIBRTE_METRICS)        += -lrte_metrics
 _LDLIBS-$(CONFIG_RTE_LIBRTE_BITRATE)        += -lrte_bitratestats
@@ -94,10 +97,28 @@ ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO),y)
+_LDLIBS-y += -lrte_common_cpt
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF)$(CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL),yy)
 _LDLIBS-y += -lrte_common_octeontx
 endif
 
+MVEP-y := $(CONFIG_RTE_LIBRTE_MVPP2_PMD)
+MVEP-y += $(CONFIG_RTE_LIBRTE_MVNETA_PMD)
+MVEP-y += $(CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO)
+ifneq (,$(findstring y,$(MVEP-y)))
+_LDLIBS-y += -lrte_common_mvep -L$(LIBMUSDK_PATH)/lib -lmusdk
+endif
+
+ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_COMMON_DPAAX)   += -lrte_common_dpaax
+endif
+ifeq ($(CONFIG_RTE_LIBRTE_FSLMC_BUS),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_COMMON_DPAAX)   += -lrte_common_dpaax
+endif
+
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PCI_BUS)        += -lrte_bus_pci
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VDEV_BUS)       += -lrte_bus_vdev
 _LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA_BUS)       += -lrte_bus_dpaa
@@ -119,6 +140,7 @@ endif
 
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET)  += -lrte_pmd_af_packet
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD)        += -lrte_pmd_ark
+_LDLIBS-$(CONFIG_RTE_LIBRTE_ATLANTIC_PMD)   += -lrte_pmd_atlantic
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD)        += -lrte_pmd_avf
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD)        += -lrte_pmd_avp
 _LDLIBS-$(CONFIG_RTE_LIBRTE_AXGBE_PMD)      += -lrte_pmd_axgbe
@@ -134,6 +156,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DPAA2_PMD)      += -lrte_pmd_dpaa2
 endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_E1000_PMD)      += -lrte_pmd_e1000
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ENA_PMD)        += -lrte_pmd_ena
+_LDLIBS-$(CONFIG_RTE_LIBRTE_ENETC_PMD)      += -lrte_pmd_enetc
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -lrte_pmd_enic
 _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_FAILSAFE)   += -lrte_pmd_failsafe
@@ -153,7 +176,8 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -ldl -lmnl
 else
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs -lmlx5 -lmnl
 endif
-_LDLIBS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD)      += -lrte_pmd_mvpp2 -L$(LIBMUSDK_PATH)/lib -lmusdk
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD)      += -lrte_pmd_mvpp2
+_LDLIBS-$(CONFIG_RTE_LIBRTE_MVNETA_PMD)     += -lrte_pmd_mvneta
 _LDLIBS-$(CONFIG_RTE_LIBRTE_NFP_PMD)        += -lrte_pmd_nfp
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL)       += -lrte_pmd_null
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
@@ -211,13 +235,17 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ZUC)         += -L$(LIBSSO_ZUC_PATH)/build -lsso
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO)    += -lrte_pmd_armv8
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_ARMV8_CRYPTO)    += -L$(ARMV8_CRYPTO_LIB_PATH) -larmv8_crypto
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_MVSAM_CRYPTO) += -L$(LIBMUSDK_PATH)/lib -lrte_pmd_mvsam_crypto -lmusdk
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO) += -lrte_pmd_octeontx_crypto
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_CRYPTO_SCHEDULER) += -lrte_pmd_crypto_scheduler
+ifeq ($(CONFIG_RTE_LIBRTE_SECURITY),y)
 ifeq ($(CONFIG_RTE_EAL_VFIO)$(CONFIG_RTE_LIBRTE_FSLMC_BUS),yy)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA2_SEC)   += -lrte_pmd_dpaa2_sec
 endif # CONFIG_RTE_LIBRTE_FSLMC_BUS
 ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_SEC)   += -lrte_pmd_dpaa_sec
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_CAAM_JR)   += -lrte_pmd_caam_jr
 endif # CONFIG_RTE_LIBRTE_DPAA_BUS
+endif # CONFIG_RTE_LIBRTE_SECURITY
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_VIRTIO_CRYPTO) += -lrte_pmd_virtio_crypto
 endif # CONFIG_RTE_LIBRTE_CRYPTODEV
 
@@ -236,6 +264,7 @@ endif # CONFIG_RTE_LIBRTE_COMPRESSDEV
 ifeq ($(CONFIG_RTE_LIBRTE_EVENTDEV),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SKELETON_EVENTDEV) += -lrte_pmd_skeleton_event
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SW_EVENTDEV) += -lrte_pmd_sw_event
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DSW_EVENTDEV) += -lrte_pmd_dsw_event
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF) += -lrte_pmd_octeontx_ssovf
 ifeq ($(CONFIG_RTE_LIBRTE_DPAA_BUS),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_DPAA_EVENTDEV) += -lrte_pmd_dpaa_event
index 6071313..43ed841 100644 (file)
@@ -89,7 +89,7 @@ CPUFLAGS += VSX
 endif
 
 # ARM flags
-ifneq ($(filter $(AUTO_CPUFLAGS),__ARM_NEON),)
+ifneq ($(filter __ARM_NEON __aarch64__,$(AUTO_CPUFLAGS)),)
 CPUFLAGS += NEON
 endif
 
index bd2e576..c44db64 100644 (file)
@@ -43,15 +43,13 @@ clean: api-html-clean guides-html-clean guides-pdf-clean guides-man-clean
 api-html: $(API_EXAMPLES)
        @echo 'doxygen for API...'
        $(Q)mkdir -p $(RTE_OUTPUT)/doc/html
-       $(Q)(cat $(RTE_SDK)/doc/api/doxy-api.conf     && \
-           printf 'PROJECT_NUMBER = '                && \
-                            $(MAKE) -rRs showversion && \
-           echo INPUT           += $(API_EXAMPLES)   && \
-           echo OUTPUT_DIRECTORY = $(RTE_OUTPUT)/doc && \
-           echo HTML_OUTPUT      = html/api          && \
-           echo GENERATE_HTML    = YES               && \
-           echo GENERATE_LATEX   = NO                && \
-           echo GENERATE_MAN     = NO                )| \
+       $(Q)(sed -e "s|@VERSION@|`$(MAKE) -rRs showversion`|" \
+                -e "s|@API_EXAMPLES@|$(API_EXAMPLES)|"       \
+                -e "s|@OUTPUT@|$(RTE_OUTPUT)/doc|"           \
+                -e "s|@HTML_OUTPUT@|html/api|"               \
+                -e "s|@TOPDIR@|./|g"                         \
+                -e "s|@STRIP_FROM_PATH@|./|g"                \
+                $(RTE_SDK)/doc/api/doxy-api.conf.in)|        \
            doxygen -
        $(Q)$(RTE_SDK)/doc/api/doxy-html-custom.sh $(RTE_OUTPUT)/doc/html/api/doxygen.css
 
@@ -63,10 +61,7 @@ api-html-clean:
 
 $(API_EXAMPLES): api-html-clean
        $(Q)mkdir -p $(@D)
-       @printf '/**\n' > $(API_EXAMPLES)
-       @printf '@page examples DPDK Example Programs\n\n' >> $(API_EXAMPLES)
-       @find examples -type f -name '*.c' -printf '@example %p\n' | LC_ALL=C sort >> $(API_EXAMPLES)
-       @printf '*/\n' >> $(API_EXAMPLES)
+       $(Q)doc/api/generate_examples.sh examples $(API_EXAMPLES)
 
 guides-pdf-clean: guides-pdf-img-clean
 guides-pdf-img-clean:
index 98085cd..dd149ac 100644 (file)
@@ -108,6 +108,9 @@ CFLAGS += -include $(RTE_SDK_BIN)/include/rte_config.h
 LDFLAGS += -L$(RTE_SDK_BIN)/lib
 endif
 
+# always define _GNU_SOURCE
+CFLAGS += -D_GNU_SOURCE
+
 export CFLAGS
 export LDFLAGS
 
index 1e4434f..4490429 100644 (file)
@@ -79,11 +79,13 @@ else
                CONFIG_RTE_LIBRTE_PMD_OPDL_EVENTDEV=d
        endif
 
-       # Disable octeontx event PMD for gcc < 4.8.6
+       # Disable octeontx event PMD for gcc < 4.8.6 & ARCH=arm64
+       ifeq ($(CONFIG_RTE_ARCH), arm64)
        ifeq ($(shell test $(GCC_VERSION)$(GCC_PATCHLEVEL) -lt 486 && echo 1), 1)
                CONFIG_RTE_LIBRTE_PMD_OCTEONTX_SSOVF=d
                CONFIG_RTE_LIBRTE_OCTEONTX_MEMPOOL=d
                CONFIG_RTE_LIBRTE_OCTEONTX_PMD=d
        endif
+       endif
 
 endif
index f33216c..b75688a 100644 (file)
@@ -70,7 +70,7 @@ struct app_params app = {
 static struct rte_eth_conf port_conf = {
        .rxmode = {
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CHECKSUM | DEV_RX_OFFLOAD_CRC_STRIP,
+               .offloads = DEV_RX_OFFLOAD_CHECKSUM,
        },
        .rx_adv_conf = {
                .rss_conf = {
index e6967ba..ab4fec3 100644 (file)
@@ -71,6 +71,7 @@ SRCS-y += test_bitmap.c
 SRCS-y += test_reciprocal_division.c
 SRCS-y += test_reciprocal_division_perf.c
 SRCS-y += test_fbarray.c
+SRCS-y += test_external_mem.c
 
 SRCS-y += test_ring.c
 SRCS-y += test_ring_perf.c
@@ -112,9 +113,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_thash.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_perf.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_functions.c
-SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_scaling.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_multiwriter.c
 SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_readwrite.c
+SRCS-$(CONFIG_RTE_LIBRTE_HASH) += test_hash_readwrite_lf.c
 
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm.c
 SRCS-$(CONFIG_RTE_LIBRTE_LPM) += test_lpm_perf.c
@@ -183,6 +184,8 @@ SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_blockcipher.c
 SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev.c
 SRCS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += test_cryptodev_asym.c
 
+SRCS-$(CONFIG_RTE_LIBRTE_METRICS) += test_metrics.c
+
 ifeq ($(CONFIG_RTE_COMPRESSDEV_TEST),y)
 SRCS-$(CONFIG_RTE_LIBRTE_COMPRESSDEV) += test_compressdev.c
 endif
@@ -191,6 +194,7 @@ ifeq ($(CONFIG_RTE_LIBRTE_EVENTDEV),y)
 SRCS-y += test_eventdev.c
 SRCS-y += test_event_ring.c
 SRCS-y += test_event_eth_rx_adapter.c
+SRCS-y += test_event_eth_tx_adapter.c
 SRCS-y += test_event_timer_adapter.c
 SRCS-y += test_event_crypto_adapter.c
 endif
@@ -208,8 +212,6 @@ CFLAGS += -DALLOW_EXPERIMENTAL_API
 CFLAGS += -O3
 CFLAGS += $(WERROR_FLAGS)
 
-CFLAGS += -D_GNU_SOURCE
-
 LDLIBS += -lm
 ifeq ($(CONFIG_RTE_COMPRESSDEV_TEST),y)
 ifeq ($(CONFIG_RTE_LIBRTE_COMPRESSDEV),y)
index f68d9b1..0fb7866 100644 (file)
@@ -278,6 +278,12 @@ parallel_test_list = [
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Sleep delay",
+        "Command": "delay_us_sleep_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":    "Rawdev autotest",
         "Command": "rawdev_autotest",
@@ -344,12 +350,6 @@ parallel_test_list = [
         "Func":    default_autotest,
         "Report":  None,
     },
-    {
-        "Name":    "Hash scaling autotest",
-        "Command": "hash_scaling_autotest",
-        "Func":    default_autotest,
-        "Report":  None,
-    },
     {
         "Name":    "Hash multiwriter autotest",
         "Command": "hash_multiwriter_autotest",
@@ -477,8 +477,20 @@ parallel_test_list = [
         "Report":  None,
     },
     {
-        "Name":    "Fbarray autotest",
-        "Command": "fbarray_autotest",
+       "Name":    "Fbarray autotest",
+       "Command": "fbarray_autotest",
+       "Func":    default_autotest,
+       "Report":  None,
+    },
+    {
+       "Name":    "External memory autotest",
+       "Command": "external_mem_autotest",
+       "Func":    default_autotest,
+       "Report":  None,
+    },
+    {
+        "Name":    "Metrics autotest",
+        "Command": "metrics_autotest",
         "Func":    default_autotest,
         "Report":  None,
     },
@@ -573,6 +585,18 @@ non_parallel_test_list = [
         "Func":    default_autotest,
         "Report":  None,
     },
+    {
+        "Name":    "Hash read-write concurrency autotest",
+        "Command": "hash_readwrite_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
+    {
+        "Name":    "Hash read-write lock-free concurrency autotest",
+        "Command": "hash_readwrite_lf_autotest",
+        "Func":    default_autotest,
+        "Report":  None,
+    },
     {
         "Name":       "Power autotest",
         "Command":    "power_autotest",
index b1dd6ec..554e994 100644 (file)
@@ -33,15 +33,20 @@ test_sources = files('commands.c',
        'test_efd.c',
        'test_efd_perf.c',
        'test_errno.c',
+       'test_event_crypto_adapter.c',
+       'test_event_eth_rx_adapter.c',
        'test_event_ring.c',
+       'test_event_eth_tx_adapter.c',
+       'test_event_timer_adapter.c',
        'test_eventdev.c',
        'test_func_reentrancy.c',
        'test_flow_classify.c',
        'test_hash.c',
        'test_hash_functions.c',
        'test_hash_multiwriter.c',
+       'test_hash_readwrite.c',
        'test_hash_perf.c',
-       'test_hash_scaling.c',
+       'test_hash_readwrite_lf.c',
        'test_interrupts.c',
        'test_kni.c',
        'test_kvargs.c',
@@ -63,6 +68,7 @@ test_sources = files('commands.c',
        'test_mempool_perf.c',
        'test_memzone.c',
        'test_meter.c',
+       'test_metrics.c',
        'test_mp_secondary.c',
        'test_per_lcore.c',
        'test_pmd_perf.c',
@@ -111,6 +117,7 @@ test_deps = ['acl',
        'hash',
        'lpm',
        'member',
+       'metrics',
        'pipeline',
        'port',
        'reorder',
@@ -141,8 +148,10 @@ test_names = [
        'cryptodev_sw_mvsam_autotest',
        'cryptodev_dpaa2_sec_autotest',
        'cryptodev_dpaa_sec_autotest',
+       'cryptodev_octeontx_autotest',
        'cycles_autotest',
        'debug_autotest',
+       'delay_us_sleep_autotest',
        'devargs_autotest',
        'distributor_autotest',
        'distributor_perf_autotest',
@@ -151,17 +160,24 @@ test_names = [
        'efd_autotest',
        'efd_perf_autotest',
        'errno_autotest',
+       'event_crypto_adapter_autotest',
+       'event_eth_rx_adapter_autotest',
+       'event_eth_rx_intr_adapter_autotest',
        'event_ring_autotest',
+       'event_eth_tx_adapter_autotest',
+       'event_timer_adapter_autotest',
        'eventdev_common_autotest',
        'eventdev_octeontx_autotest',
        'eventdev_sw_autotest',
+       'external_mem_autotest',
        'func_reentrancy_autotest',
        'flow_classify_autotest',
-       'hash_scaling_autotest',
        'hash_autotest',
        'hash_functions_autotest',
        'hash_multiwriter_autotest',
        'hash_perf_autotest',
+       'hash_readwrite_autotest',
+       'hash_readwrite_lf_autotest',
        'interrupt_autotest',
        'kni_autotest',
        'kvargs_autotest',
@@ -183,6 +199,7 @@ test_names = [
        'mempool_perf_autotest',
        'memzone_autotest',
        'meter_autotest',
+       'metrics_autotest',
        'multiprocess_autotest',
        'per_lcore_autotest',
        'pmd_perf_autotest',
@@ -242,6 +259,9 @@ if cc.has_argument('-Wno-format-truncation')
     cflags += '-Wno-format-truncation'
 endif
 
+# specify -D_GNU_SOURCE unconditionally
+default_cflags += '-D_GNU_SOURCE'
+
 test_dep_objs = []
 compress_test_dep = dependency('zlib', required: false)
 if compress_test_dep.found()
index 44dfe20..ffa9c36 100644 (file)
@@ -84,22 +84,29 @@ main(int argc, char **argv)
        int ret;
 
        ret = rte_eal_init(argc, argv);
-       if (ret < 0)
-               return -1;
+       if (ret < 0) {
+               ret = -1;
+               goto out;
+       }
 
 #ifdef RTE_LIBRTE_TIMER
        rte_timer_subsystem_init();
 #endif
 
-       if (commands_init() < 0)
-               return -1;
+       if (commands_init() < 0) {
+               ret = -1;
+               goto out;
+       }
 
        argv += ret;
 
        prgname = argv[0];
 
-       if ((recursive_call = getenv(RECURSIVE_ENV_VAR)) != NULL)
-               return do_recursive_call();
+       recursive_call = getenv(RECURSIVE_ENV_VAR);
+       if (recursive_call != NULL) {
+               ret = do_recursive_call();
+               goto out;
+       }
 
 #ifdef RTE_LIBEAL_USE_HPET
        if (rte_eal_hpet_init(1) < 0)
@@ -111,7 +118,8 @@ main(int argc, char **argv)
 #ifdef RTE_LIBRTE_CMDLINE
        cl = cmdline_stdin_new(main_ctx, "RTE>>");
        if (cl == NULL) {
-               return -1;
+               ret = -1;
+               goto out;
        }
 
        char *dpdk_test = getenv("DPDK_TEST");
@@ -120,18 +128,23 @@ main(int argc, char **argv)
                snprintf(buf, sizeof(buf), "%s\n", dpdk_test);
                if (cmdline_in(cl, buf, strlen(buf)) < 0) {
                        printf("error on cmdline input\n");
-                       return -1;
+                       ret = -1;
+                       goto out;
                }
 
                cmdline_stdin_exit(cl);
-               return last_test_result;
+               ret = last_test_result;
+               goto out;
        }
        /* if no DPDK_TEST env variable, go interactive */
        cmdline_interact(cl);
        cmdline_stdin_exit(cl);
 #endif
+       ret = 0;
 
-       return 0;
+out:
+       rte_eal_cleanup();
+       return ret;
 }
 
 
index c4811c8..bbb0447 100644 (file)
@@ -80,34 +80,40 @@ enum {
 struct rte_acl_ipv4vlan_rule invalid_layout_rules[] = {
                /* test src and dst address */
                {
-                               .data = {.userdata = 1, .category_mask = 1},
+                               .data = {.userdata = 1, .category_mask = 1,
+                                       .priority = 1},
                                .src_addr = IPv4(10,0,0,0),
                                .src_mask_len = 24,
                },
                {
-                               .data = {.userdata = 2, .category_mask = 1},
+                               .data = {.userdata = 2, .category_mask = 1,
+                                       .priority = 1},
                                .dst_addr = IPv4(10,0,0,0),
                                .dst_mask_len = 24,
                },
                /* test src and dst ports */
                {
-                               .data = {.userdata = 3, .category_mask = 1},
+                               .data = {.userdata = 3, .category_mask = 1,
+                                       .priority = 1},
                                .dst_port_low = 100,
                                .dst_port_high = 100,
                },
                {
-                               .data = {.userdata = 4, .category_mask = 1},
+                               .data = {.userdata = 4, .category_mask = 1,
+                                       .priority = 1},
                                .src_port_low = 100,
                                .src_port_high = 100,
                },
                /* test proto */
                {
-                               .data = {.userdata = 5, .category_mask = 1},
+                               .data = {.userdata = 5, .category_mask = 1,
+                                       .priority = 1},
                                .proto = 0xf,
                                .proto_mask = 0xf
                },
                {
-                               .data = {.userdata = 6, .category_mask = 1},
+                               .data = {.userdata = 6, .category_mask = 1,
+                                       .priority = 1},
                                .dst_port_low = 0xf,
                                .dst_port_high = 0xf,
                }
index f566947..d1284b3 100644 (file)
@@ -178,7 +178,10 @@ static int
 test_alarm(void)
 {
        int count = 0;
-
+#ifdef RTE_EXEC_ENV_BSDAPP
+       printf("The alarm API is not supported on FreeBSD\n");
+       return 0;
+#endif
        /* check if the callback will be called */
        printf("check if the callback will be called\n");
        flag = 0;
index c3169e9..95c5184 100644 (file)
@@ -101,6 +101,7 @@ test_bitmap_slab_set_get(struct rte_bitmap *bmp)
 static int
 test_bitmap_set_get_clear(struct rte_bitmap *bmp)
 {
+       uint64_t val;
        int i;
 
        rte_bitmap_reset(bmp);
@@ -124,6 +125,23 @@ test_bitmap_set_get_clear(struct rte_bitmap *bmp)
                }
        }
 
+       rte_bitmap_reset(bmp);
+
+       /* Alternate slab set test */
+       for (i = 0; i < MAX_BITS; i++) {
+               if (i % RTE_BITMAP_SLAB_BIT_SIZE)
+                       rte_bitmap_set(bmp, i);
+       }
+
+       for (i = 0; i < MAX_BITS; i++) {
+               val = rte_bitmap_get(bmp, i);
+               if (((i % RTE_BITMAP_SLAB_BIT_SIZE) && !val) ||
+                   (!(i % RTE_BITMAP_SLAB_BIT_SIZE) && val)) {
+                       printf("Failed to get set bit.\n");
+                       return TEST_FAILED;
+               }
+       }
+
        return TEST_SUCCESS;
 }
 
index a6044b2..84065eb 100644 (file)
@@ -635,7 +635,7 @@ test_device_configure_invalid_queue_pair_ids(void)
 
 
        /* valid - max value queue pairs */
-       ts_params->conf.nb_queue_pairs = MAX_NUM_QPS_PER_QAT_DEVICE;
+       ts_params->conf.nb_queue_pairs = orig_nb_qps;
 
        TEST_ASSERT_SUCCESS(rte_cryptodev_configure(ts_params->valid_devs[0],
                        &ts_params->conf),
@@ -667,7 +667,7 @@ test_device_configure_invalid_queue_pair_ids(void)
 
 
        /* invalid - max value + 1 queue pairs */
-       ts_params->conf.nb_queue_pairs = MAX_NUM_QPS_PER_QAT_DEVICE + 1;
+       ts_params->conf.nb_queue_pairs = orig_nb_qps + 1;
 
        TEST_ASSERT_FAIL(rte_cryptodev_configure(ts_params->valid_devs[0],
                        &ts_params->conf),
@@ -819,7 +819,7 @@ test_queue_pair_descriptor_setup(void)
        /* test invalid queue pair id */
        qp_conf.nb_descriptors = DEFAULT_NUM_OPS_INFLIGHT;      /*valid */
 
-       qp_id = DEFAULT_NUM_QPS_PER_QAT_DEVICE;         /*invalid */
+       qp_id = ts_params->conf.nb_queue_pairs;         /*invalid */
 
        TEST_ASSERT_FAIL(rte_cryptodev_queue_pair_setup(
                        ts_params->valid_devs[0],
@@ -1876,6 +1876,64 @@ test_AES_cipheronly_virtio_all(void)
        return TEST_SUCCESS;
 }
 
+static int
+test_AES_chain_caam_jr_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool,
+               ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)),
+               BLKCIPHER_AES_CHAIN_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_AES_cipheronly_caam_jr_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool,
+               ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)),
+               BLKCIPHER_AES_CIPHERONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_authonly_caam_jr_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool,
+               ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)),
+               BLKCIPHER_AUTHONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+
 static int
 test_AES_chain_dpaa_sec_all(void)
 {
@@ -2142,6 +2200,96 @@ test_3DES_cipheronly_mrvl_all(void)
        return TEST_SUCCESS;
 }
 
+static int
+test_AES_chain_octeontx_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool, ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD)),
+               BLKCIPHER_AES_CHAIN_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_AES_cipheronly_octeontx_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool, ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD)),
+               BLKCIPHER_AES_CIPHERONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_3DES_chain_octeontx_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool, ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD)),
+               BLKCIPHER_3DES_CHAIN_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_3DES_cipheronly_octeontx_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool, ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD)),
+               BLKCIPHER_3DES_CIPHERONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_authonly_octeontx_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool, ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD)),
+               BLKCIPHER_AUTHONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
 /* ***** SNOW 3G Tests ***** */
 static int
 create_wireless_algo_hash_session(uint8_t dev_id,
@@ -5058,6 +5206,44 @@ test_DES_docsis_mb_all(void)
        return TEST_SUCCESS;
 }
 
+static int
+test_3DES_chain_caam_jr_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool,
+               ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)),
+               BLKCIPHER_3DES_CHAIN_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_3DES_cipheronly_caam_jr_all(void)
+{
+       struct crypto_testsuite_params *ts_params = &testsuite_params;
+       int status;
+
+       status = test_blockcipher_all_tests(ts_params->mbuf_pool,
+               ts_params->op_mpool,
+               ts_params->session_mpool,
+               ts_params->valid_devs[0],
+               rte_cryptodev_driver_id_get(
+               RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD)),
+               BLKCIPHER_3DES_CIPHERONLY_TYPE);
+
+       TEST_ASSERT_EQUAL(status, 0, "Test failed");
+
+       return TEST_SUCCESS;
+}
+
 static int
 test_3DES_chain_dpaa_sec_all(void)
 {
@@ -9042,57 +9228,6 @@ static struct unit_test_suite cryptodev_aesni_mb_testsuite  = {
        .setup = testsuite_setup,
        .teardown = testsuite_teardown,
        .unit_test_cases = {
-               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_chain_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_cipheronly_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_docsis_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown, test_authonly_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                                               test_DES_cipheronly_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                                               test_DES_docsis_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                                               test_3DES_cipheronly_mb_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_1),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_decryption_test_case_128_1),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_2),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_decryption_test_case_128_2),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_3),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_decryption_test_case_128_3),
-
-               TEST_CASES_END() /**< NULL terminate unit test array */
-       }
-};
-
-static struct unit_test_suite cryptodev_openssl_testsuite  = {
-       .suite_name = "Crypto Device OPENSSL Unit Test Suite",
-       .setup = testsuite_setup,
-       .teardown = testsuite_teardown,
-       .unit_test_cases = {
-               TEST_CASE_ST(ut_setup, ut_teardown, test_multi_session),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_multi_session_random_usage),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_AES_chain_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_AES_cipheronly_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_3DES_chain_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_3DES_cipheronly_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_DES_cipheronly_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_DES_docsis_openssl_all),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                               test_authonly_openssl_all),
-
-               /** AES GCM Authenticated Encryption */
                TEST_CASE_ST(ut_setup, ut_teardown,
                        test_AES_GCM_authenticated_encryption_test_case_1),
                TEST_CASE_ST(ut_setup, ut_teardown,
@@ -9124,7 +9259,6 @@ static struct unit_test_suite cryptodev_openssl_testsuite  = {
                TEST_CASE_ST(ut_setup, ut_teardown,
                        test_AES_GCM_authenticated_decryption_test_case_7),
 
-
                /** AES GCM Authenticated Encryption 192 bits key */
                TEST_CASE_ST(ut_setup, ut_teardown,
                        test_AES_GCM_auth_encryption_test_case_192_1),
@@ -9189,33 +9323,199 @@ static struct unit_test_suite cryptodev_openssl_testsuite  = {
                TEST_CASE_ST(ut_setup, ut_teardown,
                        test_AES_GCM_auth_decryption_test_case_256_7),
 
-               /** AES GMAC Authentication */
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_test_case_1),
-               TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_verify_test_case_1),
+               /** AES GCM Authenticated Encryption big aad size */
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_test_case_2),
+                       test_AES_GCM_auth_encryption_test_case_aad_1),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_verify_test_case_2),
+                       test_AES_GCM_auth_encryption_test_case_aad_2),
+
+               /** AES GCM Authenticated Decryption big aad size */
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_test_case_3),
+                       test_AES_GCM_auth_decryption_test_case_aad_1),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_verify_test_case_3),
+                       test_AES_GCM_auth_decryption_test_case_aad_2),
+
+               /** Session-less tests */
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_test_case_4),
+                       test_AES_GCM_authenticated_encryption_sessionless_test_case_1),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_GMAC_authentication_verify_test_case_4),
+                       test_AES_GCM_authenticated_decryption_sessionless_test_case_1),
 
-               /** AES CCM Authenticated Encryption 128 bits key */
+
+               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_chain_mb_all),
+               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_cipheronly_mb_all),
+               TEST_CASE_ST(ut_setup, ut_teardown, test_AES_docsis_mb_all),
+               TEST_CASE_ST(ut_setup, ut_teardown, test_authonly_mb_all),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_1),
+                                               test_DES_cipheronly_mb_all),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_2),
+                                               test_DES_docsis_mb_all),
                TEST_CASE_ST(ut_setup, ut_teardown,
-                       test_AES_CCM_authenticated_encryption_test_case_128_3),
-
-               /** AES CCM Authenticated Decryption 128 bits key*/
+                                               test_3DES_cipheronly_mb_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_decryption_test_case_128_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_decryption_test_case_128_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_decryption_test_case_128_3),
+
+               TEST_CASES_END() /**< NULL terminate unit test array */
+       }
+};
+
+static struct unit_test_suite cryptodev_openssl_testsuite  = {
+       .suite_name = "Crypto Device OPENSSL Unit Test Suite",
+       .setup = testsuite_setup,
+       .teardown = testsuite_teardown,
+       .unit_test_cases = {
+               TEST_CASE_ST(ut_setup, ut_teardown, test_multi_session),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_multi_session_random_usage),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_AES_chain_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_AES_cipheronly_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_3DES_chain_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_3DES_cipheronly_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_DES_cipheronly_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_DES_docsis_openssl_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                               test_authonly_openssl_all),
+
+               /** AES GCM Authenticated Encryption */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_7),
+
+               /** AES GCM Authenticated Decryption */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_7),
+
+
+               /** AES GCM Authenticated Encryption 192 bits key */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_192_7),
+
+               /** AES GCM Authenticated Decryption 192 bits key */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_192_7),
+
+               /** AES GCM Authenticated Encryption 256 bits key */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_encryption_test_case_256_7),
+
+               /** AES GCM Authenticated Decryption 256 bits key */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_auth_decryption_test_case_256_7),
+
+               /** AES GMAC Authentication */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_4),
+
+               /** AES CCM Authenticated Encryption 128 bits key */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_CCM_authenticated_encryption_test_case_128_3),
+
+               /** AES CCM Authenticated Decryption 128 bits key*/
                TEST_CASE_ST(ut_setup, ut_teardown,
                        test_AES_CCM_authenticated_decryption_test_case_128_1),
                TEST_CASE_ST(ut_setup, ut_teardown,
@@ -9604,6 +9904,31 @@ static struct unit_test_suite cryptodev_sw_zuc_testsuite  = {
        }
 };
 
+static struct unit_test_suite cryptodev_caam_jr_testsuite  = {
+       .suite_name = "Crypto CAAM JR Unit Test Suite",
+       .setup = testsuite_setup,
+       .teardown = testsuite_teardown,
+       .unit_test_cases = {
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_device_configure_invalid_dev_id),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_multi_session),
+
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_AES_chain_caam_jr_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_3DES_chain_caam_jr_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_AES_cipheronly_caam_jr_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_3DES_cipheronly_caam_jr_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                            test_authonly_caam_jr_all),
+
+               TEST_CASES_END() /**< NULL terminate unit test array */
+       }
+};
+
 static struct unit_test_suite cryptodev_dpaa_sec_testsuite  = {
        .suite_name = "Crypto DPAA_SEC Unit Test Suite",
        .setup = testsuite_setup,
@@ -9951,6 +10276,218 @@ static struct unit_test_suite cryptodev_ccp_testsuite  = {
        }
 };
 
+static struct unit_test_suite cryptodev_octeontx_testsuite  = {
+       .suite_name = "Crypto Device OCTEONTX Unit Test Suite",
+       .setup = testsuite_setup,
+       .teardown = testsuite_teardown,
+       .unit_test_cases = {
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_chain_octeontx_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_cipheronly_octeontx_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_3DES_chain_octeontx_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_3DES_cipheronly_octeontx_all),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_authonly_octeontx_all),
+
+               /** AES GCM Authenticated Encryption */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_encryption_test_case_7),
+
+               /** AES GCM Authenticated Decryption */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GCM_authenticated_decryption_test_case_7),
+               /** AES GMAC Authentication */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_AES_GMAC_authentication_verify_test_case_3),
+
+               /** SNOW 3G encrypt only (UEA2) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_5),
+
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_1_oop),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_1_oop),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_encryption_test_case_1_oop_sgl),
+
+               /** SNOW 3G decrypt only (UEA2) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_decryption_test_case_5),
+
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_generate_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_generate_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_generate_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_verify_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_verify_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_snow3g_hash_verify_test_case_3),
+
+               /** ZUC encrypt only (EEA3) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_hash_generate_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_hash_generate_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_hash_generate_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_hash_generate_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_hash_generate_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_zuc_encryption_test_case_6_sgl),
+
+               /** KASUMI encrypt only (UEA1) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_1_sgl),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_1_oop_sgl),
+               /** KASUMI decrypt only (UEA1) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_5),
+
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_encryption_test_case_1_oop),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_decryption_test_case_1_oop),
+
+               /** KASUMI hash only (UIA1) */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_5),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_generate_test_case_6),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_verify_test_case_1),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_verify_test_case_2),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_verify_test_case_3),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_verify_test_case_4),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_kasumi_hash_verify_test_case_5),
+
+               /** NULL tests */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_null_cipher_only_operation),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_null_auth_only_operation),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_null_cipher_auth_operation),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       test_null_auth_cipher_operation),
+
+               /** Negative tests */
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       authentication_verify_HMAC_SHA1_fail_data_corrupt),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       authentication_verify_HMAC_SHA1_fail_tag_corrupt),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       authentication_verify_AES128_GMAC_fail_data_corrupt),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       authentication_verify_AES128_GMAC_fail_tag_corrupt),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       auth_decryption_AES128CBC_HMAC_SHA1_fail_data_corrupt),
+               TEST_CASE_ST(ut_setup, ut_teardown,
+                       auth_decryption_AES128CBC_HMAC_SHA1_fail_tag_corrupt),
+               TEST_CASES_END() /**< NULL terminate unit test array */
+       }
+};
+
 static int
 test_cryptodev_qat(void /*argv __rte_unused, int argc __rte_unused*/)
 {
@@ -10203,6 +10740,37 @@ test_cryptodev_ccp(void)
        return unit_test_suite_runner(&cryptodev_ccp_testsuite);
 }
 
+static int
+test_cryptodev_octeontx(void)
+{
+       gbl_driver_id = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD));
+       if (gbl_driver_id == -1) {
+               RTE_LOG(ERR, USER1, "OCTEONTX PMD must be loaded. Check if "
+                               "CONFIG_RTE_LIBRTE_PMD_OCTEONTX_CRYPTO is "
+                               "enabled in config file to run this "
+                               "testsuite.\n");
+               return TEST_FAILED;
+       }
+       return unit_test_suite_runner(&cryptodev_octeontx_testsuite);
+}
+
+static int
+test_cryptodev_caam_jr(void /*argv __rte_unused, int argc __rte_unused*/)
+{
+       gbl_driver_id = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD));
+
+       if (gbl_driver_id == -1) {
+               RTE_LOG(ERR, USER1, "CAAM_JR PMD must be loaded. Check if "
+                               "CONFIG_RTE_LIBRTE_PMD_CAAM_JR is enabled "
+                               "in config file to run this testsuite.\n");
+               return TEST_FAILED;
+       }
+
+       return unit_test_suite_runner(&cryptodev_caam_jr_testsuite);
+}
+
 REGISTER_TEST_COMMAND(cryptodev_qat_autotest, test_cryptodev_qat);
 REGISTER_TEST_COMMAND(cryptodev_aesni_mb_autotest, test_cryptodev_aesni_mb);
 REGISTER_TEST_COMMAND(cryptodev_openssl_autotest, test_cryptodev_openssl);
@@ -10217,3 +10785,5 @@ REGISTER_TEST_COMMAND(cryptodev_dpaa2_sec_autotest, test_cryptodev_dpaa2_sec);
 REGISTER_TEST_COMMAND(cryptodev_dpaa_sec_autotest, test_cryptodev_dpaa_sec);
 REGISTER_TEST_COMMAND(cryptodev_ccp_autotest, test_cryptodev_ccp);
 REGISTER_TEST_COMMAND(cryptodev_virtio_autotest, test_cryptodev_virtio);
+REGISTER_TEST_COMMAND(cryptodev_octeontx_autotest, test_cryptodev_octeontx);
+REGISTER_TEST_COMMAND(cryptodev_caam_jr_autotest, test_cryptodev_caam_jr);
index 1bd44dc..a73a49e 100644 (file)
@@ -64,6 +64,8 @@
 #define CRYPTODEV_NAME_MVSAM_PMD               crypto_mvsam
 #define CRYPTODEV_NAME_CCP_PMD         crypto_ccp
 #define CRYPTODEV_NAME_VIRTIO_PMD      crypto_virtio
+#define CRYPTODEV_NAME_OCTEONTX_SYM_PMD        crypto_octeontx
+#define CRYPTODEV_NAME_CAAM_JR_PMD     crypto_caam_jr
 
 /**
  * Write (spread) data from buffer to mbuf data
index 1c4dc66..fff28bc 100644 (file)
@@ -1171,6 +1171,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1185,6 +1186,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1225,6 +1227,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1239,6 +1242,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1253,8 +1257,10 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Encryption Digest "
@@ -1262,7 +1268,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .test_data = &aes_test_data_13,
                .op_mask = BLOCKCIPHER_TEST_OP_ENC_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Encryption Digest "
@@ -1276,7 +1283,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Decryption Digest "
@@ -1290,8 +1299,10 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Decryption Digest "
@@ -1300,7 +1311,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY_DEC,
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SG,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                           BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Decryption Digest "
@@ -1308,7 +1321,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .test_data = &aes_test_data_13,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY_DEC,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA256 Encryption Digest",
@@ -1321,8 +1335,10 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA256 Encryption Digest "
@@ -1330,7 +1346,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .test_data = &aes_test_data_12,
                .op_mask = BLOCKCIPHER_TEST_OP_ENC_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA256 Decryption Digest "
@@ -1345,7 +1362,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA256 Decryption Digest "
@@ -1353,7 +1371,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .test_data = &aes_test_data_12,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY_DEC,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA512 Encryption Digest",
@@ -1365,8 +1384,10 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA512 Encryption Digest "
@@ -1376,7 +1397,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SESSIONLESS,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
                        BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA512 Encryption Digest "
@@ -1387,7 +1409,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_FEATURE_SG |
                        BLOCKCIPHER_TEST_FEATURE_OOP,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
-                       BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL
+                       BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA512 Decryption Digest "
@@ -1400,8 +1423,10 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA512 Decryption Digest "
@@ -1415,7 +1440,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER
+                       BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC XCBC Encryption Digest",
@@ -1443,7 +1469,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA1 Decryption Digest "
@@ -1455,7 +1483,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA224 Encryption Digest",
@@ -1467,7 +1497,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA224 Decryption Digest "
@@ -1480,7 +1512,9 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC HMAC-SHA384 Encryption Digest",
@@ -1492,6 +1526,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1506,6 +1541,7 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
@@ -1517,7 +1553,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SESSIONLESS,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
                        BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr =
@@ -1528,7 +1565,8 @@ static const struct blockcipher_test_case aes_chain_test_cases[] = {
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SESSIONLESS,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_ARMV8 |
                        BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
 };
 
@@ -1543,9 +1581,11 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CBC Decryption",
@@ -1557,9 +1597,11 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CBC Encryption",
@@ -1571,8 +1613,10 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CBC Encryption Scater gather",
@@ -1582,7 +1626,9 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_FEATURE_OOP,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CBC Decryption",
@@ -1594,8 +1640,10 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CBC Decryption Scatter Gather",
@@ -1603,7 +1651,9 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_DECRYPT,
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SG,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CBC Encryption",
@@ -1615,9 +1665,11 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CBC Decryption",
@@ -1629,9 +1681,11 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
+                       BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CBC OOP Encryption",
@@ -1642,6 +1696,7 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
                        BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
        },
@@ -1654,6 +1709,7 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
                        BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO
        },
@@ -1667,8 +1723,10 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CTR Decryption",
@@ -1680,8 +1738,10 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CTR Encryption",
@@ -1693,7 +1753,9 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CTR Decryption",
@@ -1705,7 +1767,9 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CTR Encryption",
@@ -1717,8 +1781,10 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CTR Decryption",
@@ -1730,26 +1796,31 @@ static const struct blockcipher_test_case aes_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-128-CTR Encryption (12-byte IV)",
                .test_data = &aes_test_data_1_IV_12_bytes,
                .op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-192-CTR Encryption (12-byte IV)",
                .test_data = &aes_test_data_2_IV_12_bytes,
                .op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "AES-256-CTR Encryption (12-byte IV)",
                .test_data = &aes_test_data_3_IV_12_bytes,
                .op_mask = BLOCKCIPHER_TEST_OP_ENCRYPT,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        }
 };
 
index f2701f8..1c3f29f 100644 (file)
@@ -68,10 +68,14 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t,
                        RTE_STR(CRYPTODEV_NAME_DPAA2_SEC_PMD));
        int dpaa_sec_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_DPAA_SEC_PMD));
+       int caam_jr_pmd = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD));
        int mrvl_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_MVSAM_PMD));
        int virtio_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_VIRTIO_PMD));
+       int octeontx_pmd = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD));
 
        int nb_segs = 1;
 
@@ -109,12 +113,14 @@ test_blockcipher_one_case(const struct blockcipher_test_case *t,
 
        if (driver_id == dpaa2_sec_pmd ||
                        driver_id == dpaa_sec_pmd ||
+                       driver_id == caam_jr_pmd ||
                        driver_id == qat_pmd ||
                        driver_id == openssl_pmd ||
                        driver_id == armv8_pmd ||
                        driver_id == mrvl_pmd ||
                        driver_id == ccp_pmd ||
-                       driver_id == virtio_pmd) { /* Fall through */
+                       driver_id == virtio_pmd ||
+                       driver_id == octeontx_pmd) { /* Fall through */
                digest_len = tdata->digest.len;
        } else if (driver_id == aesni_mb_pmd ||
                        driver_id == scheduler_pmd) {
@@ -628,6 +634,8 @@ test_blockcipher_all_tests(struct rte_mempool *mbuf_pool,
                        RTE_STR(CRYPTODEV_NAME_DPAA2_SEC_PMD));
        int dpaa_sec_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_DPAA_SEC_PMD));
+       int caam_jr_pmd = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_CAAM_JR_PMD));
        int scheduler_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_SCHEDULER_PMD));
        int armv8_pmd = rte_cryptodev_driver_id_get(
@@ -640,6 +648,8 @@ test_blockcipher_all_tests(struct rte_mempool *mbuf_pool,
                        RTE_STR(CRYPTODEV_NAME_MVSAM_PMD));
        int virtio_pmd = rte_cryptodev_driver_id_get(
                        RTE_STR(CRYPTODEV_NAME_VIRTIO_PMD));
+       int octeontx_pmd = rte_cryptodev_driver_id_get(
+                       RTE_STR(CRYPTODEV_NAME_OCTEONTX_SYM_PMD));
 
        switch (test_type) {
        case BLKCIPHER_AES_CHAIN_TYPE:
@@ -702,10 +712,14 @@ test_blockcipher_all_tests(struct rte_mempool *mbuf_pool,
                target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_CCP;
        else if (driver_id == dpaa_sec_pmd)
                target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC;
+       else if (driver_id == caam_jr_pmd)
+               target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR;
        else if (driver_id == mrvl_pmd)
                target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MVSAM;
        else if (driver_id == virtio_pmd)
                target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO;
+       else if (driver_id == octeontx_pmd)
+               target_pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX;
        else
                TEST_ASSERT(0, "Unrecognized cryptodev type");
 
index 6f7c892..f8bd858 100644 (file)
@@ -29,6 +29,8 @@
 #define BLOCKCIPHER_TEST_TARGET_PMD_MVSAM      0x0080 /* Marvell flag */
 #define BLOCKCIPHER_TEST_TARGET_PMD_CCP                0x0040 /* CCP flag */
 #define BLOCKCIPHER_TEST_TARGET_PMD_VIRTIO     0x0200 /* VIRTIO flag */
+#define BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX   0x0100 /* OCTEON TX flag */
+#define BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR    0x0400 /* CAAM_JR flag */
 
 #define BLOCKCIPHER_TEST_OP_CIPHER     (BLOCKCIPHER_TEST_OP_ENCRYPT | \
                                        BLOCKCIPHER_TEST_OP_DECRYPT)
index 1033456..f1b8cbd 100644 (file)
@@ -1069,6 +1069,7 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
        {
@@ -1079,6 +1080,7 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP
        },
        {
@@ -1103,8 +1105,10 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "3DES-192-CBC HMAC-SHA1 Decryption Digest Verify",
@@ -1114,8 +1118,10 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
-                       BLOCKCIPHER_TEST_TARGET_PMD_CCP
+                       BLOCKCIPHER_TEST_TARGET_PMD_CCP |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "3DES-192-CBC SHA1 Encryption Digest",
@@ -1193,7 +1199,8 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR
        },
        {
                .test_descr = "3DES-128-CBC HMAC-SHA1 Decryption Digest"
@@ -1204,7 +1211,8 @@ static const struct blockcipher_test_case triple_des_chain_test_cases[] = {
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                       BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR
        },
        {
                .test_descr = "3DES-128-CBC HMAC-SHA1 Encryption Digest"
@@ -1252,6 +1260,7 @@ static const struct blockcipher_test_case triple_des_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
                        BLOCKCIPHER_TEST_TARGET_PMD_MB
        },
@@ -1263,6 +1272,7 @@ static const struct blockcipher_test_case triple_des_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
                        BLOCKCIPHER_TEST_TARGET_PMD_MB
        },
@@ -1274,9 +1284,11 @@ static const struct blockcipher_test_case triple_des_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MB
+                       BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "3DES-192-CBC Decryption",
@@ -1286,9 +1298,11 @@ static const struct blockcipher_test_case triple_des_cipheronly_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MB
+                       BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "3DES-128-CTR Encryption",
index cf86dbb..a02dfb3 100644 (file)
@@ -373,14 +373,16 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .test_data = &md5_test_vector,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "MD5 Digest Verify",
                .test_data = &md5_test_vector,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-MD5 Digest",
@@ -391,8 +393,10 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-MD5 Digest Verify",
@@ -403,8 +407,10 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA1 Digest",
@@ -412,7 +418,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA1 Digest Verify",
@@ -420,7 +427,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA1 Digest",
@@ -431,9 +439,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA1 Digest Scatter Gather",
@@ -441,7 +451,9 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SG,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                           BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA1 Digest Verify",
@@ -452,9 +464,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA1 Digest Verify Scatter Gather",
@@ -462,7 +476,9 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .feature_mask = BLOCKCIPHER_TEST_FEATURE_SG,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
-                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC
+                           BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                           BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA224 Digest",
@@ -470,7 +486,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA224 Digest Verify",
@@ -478,7 +495,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA224 Digest",
@@ -489,8 +507,10 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA224 Digest Verify",
@@ -501,8 +521,10 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA256 Digest",
@@ -510,7 +532,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA256 Digest Verify",
@@ -518,7 +541,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA256 Digest",
@@ -529,9 +553,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA256 Digest Verify",
@@ -542,9 +568,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA384 Digest",
@@ -552,7 +580,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA384 Digest Verify",
@@ -560,7 +589,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA384 Digest",
@@ -571,9 +601,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA384 Digest Verify",
@@ -584,9 +616,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA512 Digest",
@@ -594,7 +628,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "SHA512 Digest Verify",
@@ -602,7 +637,8 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
                .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_OPENSSL |
                            BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                           BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                           BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA512 Digest",
@@ -613,9 +649,11 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "HMAC-SHA512 Digest Verify",
@@ -626,33 +664,39 @@ static const struct blockcipher_test_case hash_test_cases[] = {
                        BLOCKCIPHER_TEST_TARGET_PMD_SCHEDULER |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA2_SEC |
                        BLOCKCIPHER_TEST_TARGET_PMD_DPAA_SEC |
+                       BLOCKCIPHER_TEST_TARGET_PMD_CAAM_JR |
                        BLOCKCIPHER_TEST_TARGET_PMD_QAT |
                        BLOCKCIPHER_TEST_TARGET_PMD_CCP |
-                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM
+                       BLOCKCIPHER_TEST_TARGET_PMD_MVSAM |
+                       BLOCKCIPHER_TEST_TARGET_PMD_OCTEONTX
        },
        {
                .test_descr = "CMAC Digest 12B",
                .test_data = &cmac_test_vector_12,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
        },
        {
                .test_descr = "CMAC Digest Verify 12B",
                .test_data = &cmac_test_vector_12,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
        },
        {
                .test_descr = "CMAC Digest 16B",
                .test_data = &cmac_test_vector,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_GEN,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
        },
        {
                .test_descr = "CMAC Digest Verify 16B",
                .test_data = &cmac_test_vector,
                .op_mask = BLOCKCIPHER_TEST_OP_AUTH_VERIFY,
-               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB
+               .pmd_mask = BLOCKCIPHER_TEST_TARGET_PMD_MB |
+                       BLOCKCIPHER_TEST_TARGET_PMD_QAT
        }
 };
 
index 149f60b..c78e6a5 100644 (file)
  *   of cycles is correct with regard to the frequency of the timer.
  */
 
+static int
+check_wait_one_second(void)
+{
+       uint64_t cycles, prev_cycles;
+       uint64_t hz = rte_get_timer_hz();
+       uint64_t max_inc = (hz / 100); /* 10 ms max between 2 reads */
+
+       /* check that waiting 1 second is precise */
+       prev_cycles = rte_get_timer_cycles();
+       rte_delay_us(1000000);
+       cycles = rte_get_timer_cycles();
+
+       if ((uint64_t)(cycles - prev_cycles) > (hz + max_inc)) {
+               printf("delay_us is not accurate: too long\n");
+               return -1;
+       }
+       if ((uint64_t)(cycles - prev_cycles) < (hz - max_inc)) {
+               printf("delay_us is not accurate: too short\n");
+               return -1;
+       }
+
+       return 0;
+}
+
 static int
 test_cycles(void)
 {
@@ -43,24 +67,23 @@ test_cycles(void)
                prev_cycles = cycles;
        }
 
-       /* check that waiting 1 second is precise */
-       prev_cycles = rte_get_timer_cycles();
-       rte_delay_us(1000000);
-       cycles = rte_get_timer_cycles();
+       return check_wait_one_second();
+}
 
-       if ((uint64_t)(cycles - prev_cycles) > (hz + max_inc)) {
-               printf("delay_us is not accurate: too long\n");
-               return -1;
-       }
-       if ((uint64_t)(cycles - prev_cycles) < (hz - max_inc)) {
-               printf("delay_us is not accurate: too short\n");
-               return -1;
-       }
+REGISTER_TEST_COMMAND(cycles_autotest, test_cycles);
 
-       return 0;
+/*
+ * One second precision test with rte_delay_us_sleep.
+ */
+
+static int
+test_delay_us_sleep(void)
+{
+       rte_delay_us_callback_register(rte_delay_us_sleep);
+       return check_wait_one_second();
 }
 
-REGISTER_TEST_COMMAND(cycles_autotest, test_cycles);
+REGISTER_TEST_COMMAND(delay_us_sleep_autotest, test_delay_us_sleep);
 
 /*
  * rte_delay_us_callback test
index 2337e54..1d3be82 100644 (file)
@@ -32,7 +32,7 @@ struct event_eth_rx_adapter_test_params {
 static struct event_eth_rx_adapter_test_params default_params;
 
 static inline int
-port_init_common(uint8_t port, const struct rte_eth_conf *port_conf,
+port_init_common(uint16_t port, const struct rte_eth_conf *port_conf,
                struct rte_mempool *mp)
 {
        const uint16_t rx_ring_size = 512, tx_ring_size = 512;
@@ -94,12 +94,11 @@ port_init_common(uint8_t port, const struct rte_eth_conf *port_conf,
 }
 
 static inline int
-port_init_rx_intr(uint8_t port, struct rte_mempool *mp)
+port_init_rx_intr(uint16_t port, struct rte_mempool *mp)
 {
        static const struct rte_eth_conf port_conf_default = {
                .rxmode = {
-                       .mq_mode = ETH_MQ_RX_RSS,
-                       .max_rx_pkt_len = ETHER_MAX_LEN
+                       .mq_mode = ETH_MQ_RX_NONE,
                },
                .intr_conf = {
                        .rxq = 1,
@@ -110,20 +109,12 @@ port_init_rx_intr(uint8_t port, struct rte_mempool *mp)
 }
 
 static inline int
-port_init(uint8_t port, struct rte_mempool *mp)
+port_init(uint16_t port, struct rte_mempool *mp)
 {
        static const struct rte_eth_conf port_conf_default = {
                .rxmode = {
-                       .mq_mode = ETH_MQ_RX_RSS,
-                       .max_rx_pkt_len = ETHER_MAX_LEN
+                       .mq_mode = ETH_MQ_RX_NONE,
                },
-               .rx_adv_conf = {
-                       .rss_conf = {
-                               .rss_hf = ETH_RSS_IP |
-                                       ETH_RSS_TCP |
-                                       ETH_RSS_UDP,
-                       }
-               }
        };
 
        return port_init_common(port, &port_conf_default, mp);
@@ -319,6 +310,8 @@ adapter_create(void)
        struct rte_event_dev_info dev_info;
        struct rte_event_port_conf rx_p_conf;
 
+       memset(&rx_p_conf, 0, sizeof(rx_p_conf));
+
        err = rte_event_dev_info_get(TEST_DEV_ID, &dev_info);
        TEST_ASSERT(err == 0, "Expected 0 got %d", err);
 
@@ -496,14 +489,11 @@ adapter_multi_eth_add_del(void)
        err = init_ports(rte_eth_dev_count_total());
        TEST_ASSERT(err == 0, "Port initialization failed err %d\n", err);
 
-       /* creating new instance for all newly added eth devices */
-       adapter_create();
-
        /* eth_rx_adapter_queue_add for n ports */
        port_index = 0;
        for (; port_index < rte_eth_dev_count_total(); port_index += 1) {
                err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
-                               port_index, 0,
+                               port_index, -1,
                                &queue_config);
                TEST_ASSERT(err == 0, "Expected 0 got %d", err);
        }
@@ -512,12 +502,10 @@ adapter_multi_eth_add_del(void)
        port_index = 0;
        for (; port_index < rte_eth_dev_count_total(); port_index += 1) {
                err = rte_event_eth_rx_adapter_queue_del(TEST_INST_ID,
-                               port_index, 0);
+                               port_index, -1);
                TEST_ASSERT(err == 0, "Expected 0 got %d", err);
        }
 
-       adapter_free();
-
        return TEST_SUCCESS;
 }
 
@@ -547,11 +535,13 @@ adapter_intr_queue_add_del(void)
        /* weight = 0 => interrupt mode */
        queue_config.servicing_weight = 0;
 
-       /* add queue 0 */
-       err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
-                                               TEST_ETHDEV_ID, 0,
-                                               &queue_config);
-       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) {
+               /* add queue 0 */
+               err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
+                                                       TEST_ETHDEV_ID, 0,
+                                                       &queue_config);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       }
 
        /* add all queues */
        queue_config.servicing_weight = 0;
@@ -561,11 +551,13 @@ adapter_intr_queue_add_del(void)
                                                &queue_config);
        TEST_ASSERT(err == 0, "Expected 0 got %d", err);
 
-       /* del queue 0 */
-       err = rte_event_eth_rx_adapter_queue_del(TEST_INST_ID,
-                                               TEST_ETHDEV_ID,
-                                               0);
-       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) {
+               /* del queue 0 */
+               err = rte_event_eth_rx_adapter_queue_del(TEST_INST_ID,
+                                                       TEST_ETHDEV_ID,
+                                                       0);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       }
 
        /* del remaining queues */
        err = rte_event_eth_rx_adapter_queue_del(TEST_INST_ID,
@@ -583,11 +575,14 @@ adapter_intr_queue_add_del(void)
 
        /* intr -> poll mode queue */
        queue_config.servicing_weight = 1;
-       err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
-                                               TEST_ETHDEV_ID,
-                                               0,
-                                               &queue_config);
-       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       if (cap & RTE_EVENT_ETH_RX_ADAPTER_CAP_MULTI_EVENTQ) {
+               err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
+                                                       TEST_ETHDEV_ID,
+                                                       0,
+                                                       &queue_config);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       }
 
        err = rte_event_eth_rx_adapter_queue_add(TEST_INST_ID,
                                                TEST_ETHDEV_ID,
@@ -682,7 +677,8 @@ static struct unit_test_suite event_eth_rx_tests = {
                TEST_CASE_ST(NULL, NULL, adapter_create_free),
                TEST_CASE_ST(adapter_create, adapter_free,
                                        adapter_queue_add_del),
-               TEST_CASE_ST(NULL, NULL, adapter_multi_eth_add_del),
+               TEST_CASE_ST(adapter_create, adapter_free,
+                                       adapter_multi_eth_add_del),
                TEST_CASE_ST(adapter_create, adapter_free, adapter_start_stop),
                TEST_CASE_ST(adapter_create, adapter_free, adapter_stats),
                TEST_CASES_END() /**< NULL terminate unit test array */
diff --git a/test/test/test_event_eth_tx_adapter.c b/test/test/test_event_eth_tx_adapter.c
new file mode 100644 (file)
index 0000000..c26c515
--- /dev/null
@@ -0,0 +1,699 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <string.h>
+
+#include <rte_bus_vdev.h>
+#include <rte_common.h>
+#include <rte_ethdev.h>
+#include <rte_eth_ring.h>
+#include <rte_eventdev.h>
+#include <rte_event_eth_tx_adapter.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_service.h>
+
+#include "test.h"
+
+#define MAX_NUM_QUEUE          RTE_PMD_RING_MAX_RX_RINGS
+#define TEST_INST_ID           0
+#define TEST_DEV_ID            0
+#define SOCKET0                        0
+#define RING_SIZE              256
+#define ETH_NAME_LEN           32
+#define NUM_ETH_PAIR           1
+#define NUM_ETH_DEV            (2 * NUM_ETH_PAIR)
+#define NB_MBUF                        512
+#define PAIR_PORT_INDEX(p)     ((p) + NUM_ETH_PAIR)
+#define PORT(p)                        default_params.port[(p)]
+#define TEST_ETHDEV_ID         PORT(0)
+#define TEST_ETHDEV_PAIR_ID    PORT(PAIR_PORT_INDEX(0))
+
+#define EDEV_RETRY             0xffff
+
+struct event_eth_tx_adapter_test_params {
+       struct rte_mempool *mp;
+       uint16_t rx_rings, tx_rings;
+       struct rte_ring *r[NUM_ETH_DEV][MAX_NUM_QUEUE];
+       int port[NUM_ETH_DEV];
+};
+
+static int event_dev_delete;
+static struct event_eth_tx_adapter_test_params default_params;
+static uint64_t eid = ~0ULL;
+static uint32_t tid;
+
+static inline int
+port_init_common(uint8_t port, const struct rte_eth_conf *port_conf,
+               struct rte_mempool *mp)
+{
+       const uint16_t rx_ring_size = RING_SIZE, tx_ring_size = RING_SIZE;
+       int retval;
+       uint16_t q;
+
+       if (!rte_eth_dev_is_valid_port(port))
+               return -1;
+
+       default_params.rx_rings = MAX_NUM_QUEUE;
+       default_params.tx_rings = MAX_NUM_QUEUE;
+
+       /* Configure the Ethernet device. */
+       retval = rte_eth_dev_configure(port, default_params.rx_rings,
+                               default_params.tx_rings, port_conf);
+       if (retval != 0)
+               return retval;
+
+       for (q = 0; q < default_params.rx_rings; q++) {
+               retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
+                               rte_eth_dev_socket_id(port), NULL, mp);
+               if (retval < 0)
+                       return retval;
+       }
+
+       for (q = 0; q < default_params.tx_rings; q++) {
+               retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
+                               rte_eth_dev_socket_id(port), NULL);
+               if (retval < 0)
+                       return retval;
+       }
+
+       /* Start the Ethernet port. */
+       retval = rte_eth_dev_start(port);
+       if (retval < 0)
+               return retval;
+
+       /* Display the port MAC address. */
+       struct ether_addr addr;
+       rte_eth_macaddr_get(port, &addr);
+       printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+                          " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+                       (unsigned int)port,
+                       addr.addr_bytes[0], addr.addr_bytes[1],
+                       addr.addr_bytes[2], addr.addr_bytes[3],
+                       addr.addr_bytes[4], addr.addr_bytes[5]);
+
+       /* Enable RX in promiscuous mode for the Ethernet device. */
+       rte_eth_promiscuous_enable(port);
+
+       return 0;
+}
+
+static inline int
+port_init(uint8_t port, struct rte_mempool *mp)
+{
+       struct rte_eth_conf conf = { 0 };
+       return port_init_common(port, &conf, mp);
+}
+
+#define RING_NAME_LEN  20
+#define DEV_NAME_LEN   20
+
+static int
+init_ports(void)
+{
+       char ring_name[ETH_NAME_LEN];
+       unsigned int i, j;
+       struct rte_ring * const *c1;
+       struct rte_ring * const *c2;
+       int err;
+
+       if (!default_params.mp)
+               default_params.mp = rte_pktmbuf_pool_create("mbuf_pool",
+                       NB_MBUF, 32,
+                       0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+       if (!default_params.mp)
+               return -ENOMEM;
+
+       for (i = 0; i < NUM_ETH_DEV; i++) {
+               for (j = 0; j < MAX_NUM_QUEUE; j++) {
+                       snprintf(ring_name, sizeof(ring_name), "R%u%u", i, j);
+                       default_params.r[i][j] = rte_ring_create(ring_name,
+                                               RING_SIZE,
+                                               SOCKET0,
+                                               RING_F_SP_ENQ | RING_F_SC_DEQ);
+                       TEST_ASSERT((default_params.r[i][j] != NULL),
+                               "Failed to allocate ring");
+               }
+       }
+
+       /*
+        * To create two pseudo-Ethernet ports where the traffic is
+        * switched between them, that is, traffic sent to port 1 is
+        * read back from port 2 and vice-versa
+        */
+       for (i = 0; i < NUM_ETH_PAIR; i++) {
+               char dev_name[DEV_NAME_LEN];
+               int p;
+
+               c1 = default_params.r[i];
+               c2 = default_params.r[PAIR_PORT_INDEX(i)];
+
+               snprintf(dev_name, DEV_NAME_LEN, "%u-%u", i, i + NUM_ETH_PAIR);
+               p = rte_eth_from_rings(dev_name, c1, MAX_NUM_QUEUE,
+                                c2, MAX_NUM_QUEUE, SOCKET0);
+               TEST_ASSERT(p >= 0, "Port creation failed %s", dev_name);
+               err = port_init(p, default_params.mp);
+               TEST_ASSERT(err == 0, "Port init failed %s", dev_name);
+               default_params.port[i] = p;
+
+               snprintf(dev_name, DEV_NAME_LEN, "%u-%u",  i + NUM_ETH_PAIR, i);
+               p = rte_eth_from_rings(dev_name, c2, MAX_NUM_QUEUE,
+                               c1, MAX_NUM_QUEUE, SOCKET0);
+               TEST_ASSERT(p > 0, "Port creation failed %s", dev_name);
+               err = port_init(p, default_params.mp);
+               TEST_ASSERT(err == 0, "Port init failed %s", dev_name);
+               default_params.port[PAIR_PORT_INDEX(i)] = p;
+       }
+
+       return 0;
+}
+
+static void
+deinit_ports(void)
+{
+       uint16_t i, j;
+       char name[ETH_NAME_LEN];
+
+       for (i = 0; i < RTE_DIM(default_params.port); i++) {
+               rte_eth_dev_stop(default_params.port[i]);
+               rte_eth_dev_get_name_by_port(default_params.port[i], name);
+               rte_vdev_uninit(name);
+               for (j = 0; j < RTE_DIM(default_params.r[i]); j++)
+                       rte_ring_free(default_params.r[i][j]);
+       }
+}
+
+static int
+testsuite_setup(void)
+{
+       const char *vdev_name = "event_sw0";
+
+       int err = init_ports();
+       TEST_ASSERT(err == 0, "Port initialization failed err %d\n", err);
+
+       if (rte_event_dev_count() == 0) {
+               printf("Failed to find a valid event device,"
+                       " testing with event_sw0 device\n");
+               err = rte_vdev_init(vdev_name, NULL);
+               TEST_ASSERT(err == 0, "vdev %s creation failed  %d\n",
+                       vdev_name, err);
+               event_dev_delete = 1;
+       }
+       return err;
+}
+
+#define DEVICE_ID_SIZE 64
+
+static void
+testsuite_teardown(void)
+{
+       deinit_ports();
+       rte_mempool_free(default_params.mp);
+       default_params.mp = NULL;
+       if (event_dev_delete)
+               rte_vdev_uninit("event_sw0");
+}
+
+static int
+tx_adapter_create(void)
+{
+       int err;
+       struct rte_event_dev_info dev_info;
+       struct rte_event_port_conf tx_p_conf;
+       uint8_t priority;
+       uint8_t queue_id;
+
+       struct rte_event_dev_config config = {
+                       .nb_event_queues = 1,
+                       .nb_event_ports = 1,
+       };
+
+       struct rte_event_queue_conf wkr_q_conf = {
+                       .schedule_type = RTE_SCHED_TYPE_ORDERED,
+                       .priority = RTE_EVENT_DEV_PRIORITY_NORMAL,
+                       .nb_atomic_flows = 1024,
+                       .nb_atomic_order_sequences = 1024,
+       };
+
+       memset(&tx_p_conf, 0, sizeof(tx_p_conf));
+       err = rte_event_dev_info_get(TEST_DEV_ID, &dev_info);
+       config.nb_event_queue_flows = dev_info.max_event_queue_flows;
+       config.nb_event_port_dequeue_depth =
+                       dev_info.max_event_port_dequeue_depth;
+       config.nb_event_port_enqueue_depth =
+                       dev_info.max_event_port_enqueue_depth;
+       config.nb_events_limit =
+                       dev_info.max_num_events;
+
+       err = rte_event_dev_configure(TEST_DEV_ID, &config);
+       TEST_ASSERT(err == 0, "Event device initialization failed err %d\n",
+                       err);
+
+       queue_id = 0;
+       err = rte_event_queue_setup(TEST_DEV_ID, 0, &wkr_q_conf);
+       TEST_ASSERT(err == 0, "Event queue setup failed %d\n", err);
+
+       err = rte_event_port_setup(TEST_DEV_ID, 0, NULL);
+       TEST_ASSERT(err == 0, "Event port setup failed %d\n", err);
+
+       priority = RTE_EVENT_DEV_PRIORITY_LOWEST;
+       err = rte_event_port_link(TEST_DEV_ID, 0, &queue_id, &priority, 1);
+       TEST_ASSERT(err == 1, "Error linking port %s\n",
+               rte_strerror(rte_errno));
+       err = rte_event_dev_info_get(TEST_DEV_ID, &dev_info);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       tx_p_conf.new_event_threshold = dev_info.max_num_events;
+       tx_p_conf.dequeue_depth = dev_info.max_event_port_dequeue_depth;
+       tx_p_conf.enqueue_depth = dev_info.max_event_port_enqueue_depth;
+       err = rte_event_eth_tx_adapter_create(TEST_INST_ID, TEST_DEV_ID,
+                                       &tx_p_conf);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       return err;
+}
+
+static void
+tx_adapter_free(void)
+{
+       rte_event_eth_tx_adapter_free(TEST_INST_ID);
+}
+
+static int
+tx_adapter_create_free(void)
+{
+       int err;
+       struct rte_event_dev_info dev_info;
+       struct rte_event_port_conf tx_p_conf;
+
+       err = rte_event_dev_info_get(TEST_DEV_ID, &dev_info);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       tx_p_conf.new_event_threshold = dev_info.max_num_events;
+       tx_p_conf.dequeue_depth = dev_info.max_event_port_dequeue_depth;
+       tx_p_conf.enqueue_depth = dev_info.max_event_port_enqueue_depth;
+
+       err = rte_event_eth_tx_adapter_create(TEST_INST_ID, TEST_DEV_ID,
+                                       NULL);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       err = rte_event_eth_tx_adapter_create(TEST_INST_ID, TEST_DEV_ID,
+                                       &tx_p_conf);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_create(TEST_INST_ID,
+                                       TEST_DEV_ID, &tx_p_conf);
+       TEST_ASSERT(err == -EEXIST, "Expected -EEXIST %d got %d", -EEXIST, err);
+
+       err = rte_event_eth_tx_adapter_free(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_free(TEST_INST_ID);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL %d got %d", -EINVAL, err);
+
+       err = rte_event_eth_tx_adapter_free(1);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL %d got %d", -EINVAL, err);
+
+       return TEST_SUCCESS;
+}
+
+static int
+tx_adapter_queue_add_del(void)
+{
+       int err;
+       uint32_t cap;
+
+       err = rte_event_eth_tx_adapter_caps_get(TEST_DEV_ID, TEST_ETHDEV_ID,
+                                        &cap);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+
+       err = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID,
+                                               rte_eth_dev_count_total(),
+                                               -1);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID,
+                                               TEST_ETHDEV_ID,
+                                               0);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID,
+                                               TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID,
+                                               TEST_ETHDEV_ID,
+                                               0);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID,
+                                               TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID,
+                                               TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_add(1, TEST_ETHDEV_ID, -1);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(1, TEST_ETHDEV_ID, -1);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       return TEST_SUCCESS;
+}
+
+static int
+tx_adapter_start_stop(void)
+{
+       int err;
+
+       err = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID, TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_start(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_stop(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID, TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_start(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_stop(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_start(1);
+
+       err = rte_event_eth_tx_adapter_stop(1);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       return TEST_SUCCESS;
+}
+
+
+static int
+tx_adapter_single(uint16_t port, uint16_t tx_queue_id,
+               struct rte_mbuf *m, uint8_t qid,
+               uint8_t sched_type)
+{
+       struct rte_event event;
+       struct rte_mbuf *r;
+       int ret;
+       unsigned int l;
+
+       event.queue_id = qid;
+       event.op = RTE_EVENT_OP_NEW;
+       event.event_type = RTE_EVENT_TYPE_CPU;
+       event.sched_type = sched_type;
+       event.mbuf = m;
+
+       m->port = port;
+       rte_event_eth_tx_adapter_txq_set(m, tx_queue_id);
+
+       l = 0;
+       while (rte_event_enqueue_burst(TEST_DEV_ID, 0, &event, 1) != 1) {
+               l++;
+               if (l > EDEV_RETRY)
+                       break;
+       }
+
+       TEST_ASSERT(l < EDEV_RETRY, "Unable to enqueue to eventdev");
+       l = 0;
+       while (l++ < EDEV_RETRY) {
+
+               if (eid != ~0ULL) {
+                       ret = rte_service_run_iter_on_app_lcore(eid, 0);
+                       TEST_ASSERT(ret == 0, "failed to run service %d", ret);
+               }
+
+               ret = rte_service_run_iter_on_app_lcore(tid, 0);
+               TEST_ASSERT(ret == 0, "failed to run service %d", ret);
+
+               if (rte_eth_rx_burst(TEST_ETHDEV_PAIR_ID, tx_queue_id,
+                               &r, 1)) {
+                       TEST_ASSERT_EQUAL(r, m, "mbuf comparison failed"
+                                       " expected %p received %p", m, r);
+                       return 0;
+               }
+       }
+
+       TEST_ASSERT(0, "Failed to receive packet");
+       return -1;
+}
+
+static int
+tx_adapter_service(void)
+{
+       struct rte_event_eth_tx_adapter_stats stats;
+       uint32_t i;
+       int err;
+       uint8_t ev_port, ev_qid;
+       struct rte_mbuf  bufs[RING_SIZE];
+       struct rte_mbuf *pbufs[RING_SIZE];
+       struct rte_event_dev_info dev_info;
+       struct rte_event_dev_config dev_conf;
+       struct rte_event_queue_conf qconf;
+       uint32_t qcnt, pcnt;
+       uint16_t q;
+       int internal_port;
+       uint32_t cap;
+
+       memset(&dev_conf, 0, sizeof(dev_conf));
+       err = rte_event_eth_tx_adapter_caps_get(TEST_DEV_ID, TEST_ETHDEV_ID,
+                                               &cap);
+       TEST_ASSERT(err == 0, "Failed to get adapter cap err %d\n", err);
+
+       internal_port = !!(cap & RTE_EVENT_ETH_TX_ADAPTER_CAP_INTERNAL_PORT);
+       if (internal_port)
+               return TEST_SUCCESS;
+
+       err = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID, TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_event_port_get(TEST_INST_ID,
+                                               &ev_port);
+       TEST_ASSERT_SUCCESS(err, "Failed to get event port %d", err);
+
+       err = rte_event_dev_attr_get(TEST_DEV_ID, RTE_EVENT_DEV_ATTR_PORT_COUNT,
+                                       &pcnt);
+       TEST_ASSERT_SUCCESS(err, "Port count get failed");
+
+       err = rte_event_dev_attr_get(TEST_DEV_ID,
+                               RTE_EVENT_DEV_ATTR_QUEUE_COUNT, &qcnt);
+       TEST_ASSERT_SUCCESS(err, "Queue count get failed");
+
+       err = rte_event_dev_info_get(TEST_DEV_ID, &dev_info);
+       TEST_ASSERT_SUCCESS(err, "Dev info failed");
+
+       dev_conf.nb_event_queue_flows = dev_info.max_event_queue_flows;
+       dev_conf.nb_event_port_dequeue_depth =
+                       dev_info.max_event_port_dequeue_depth;
+       dev_conf.nb_event_port_enqueue_depth =
+                       dev_info.max_event_port_enqueue_depth;
+       dev_conf.nb_events_limit =
+                       dev_info.max_num_events;
+       dev_conf.nb_event_queues = qcnt + 1;
+       dev_conf.nb_event_ports = pcnt;
+       err = rte_event_dev_configure(TEST_DEV_ID, &dev_conf);
+       TEST_ASSERT(err == 0, "Event device initialization failed err %d\n",
+                       err);
+
+       ev_qid = qcnt;
+       qconf.nb_atomic_flows = dev_info.max_event_queue_flows;
+       qconf.nb_atomic_order_sequences = 32;
+       qconf.schedule_type = RTE_SCHED_TYPE_ATOMIC;
+       qconf.priority = RTE_EVENT_DEV_PRIORITY_HIGHEST;
+       qconf.event_queue_cfg = RTE_EVENT_QUEUE_CFG_SINGLE_LINK;
+       err = rte_event_queue_setup(TEST_DEV_ID, ev_qid, &qconf);
+       TEST_ASSERT_SUCCESS(err, "Failed to setup queue %u", ev_qid);
+
+       /*
+        * Setup ports again so that the newly added queue is visible
+        * to them
+        */
+       for (i = 0; i < pcnt; i++) {
+
+               int n_links;
+               uint8_t queues[RTE_EVENT_MAX_QUEUES_PER_DEV];
+               uint8_t priorities[RTE_EVENT_MAX_QUEUES_PER_DEV];
+
+               if (i == ev_port)
+                       continue;
+
+               n_links = rte_event_port_links_get(TEST_DEV_ID, i, queues,
+                                               priorities);
+               TEST_ASSERT(n_links > 0, "Failed to get port links %d\n",
+                       n_links);
+               err = rte_event_port_setup(TEST_DEV_ID, i, NULL);
+               TEST_ASSERT(err == 0, "Failed to setup port err %d\n", err);
+               err = rte_event_port_link(TEST_DEV_ID, i, queues, priorities,
+                                       n_links);
+               TEST_ASSERT(n_links == err, "Failed to link all queues"
+                       " err %s\n", rte_strerror(rte_errno));
+       }
+
+       err = rte_event_port_link(TEST_DEV_ID, ev_port, &ev_qid, NULL, 1);
+       TEST_ASSERT(err == 1, "Failed to link queue port %u",
+                   ev_port);
+
+       err = rte_event_eth_tx_adapter_start(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       if (!(dev_info.event_dev_cap & RTE_EVENT_DEV_CAP_DISTRIBUTED_SCHED)) {
+               err = rte_event_dev_service_id_get(0, (uint32_t *)&eid);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+               err = rte_service_runstate_set(eid, 1);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+               err = rte_service_set_runstate_mapped_check(eid, 0);
+               TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       }
+
+       err = rte_event_eth_tx_adapter_service_id_get(TEST_INST_ID, &tid);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_service_runstate_set(tid, 1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_service_set_runstate_mapped_check(tid, 0);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_dev_start(TEST_DEV_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       for (q = 0; q < MAX_NUM_QUEUE; q++) {
+               for (i = 0; i < RING_SIZE; i++)
+                       pbufs[i] = &bufs[i];
+               for (i = 0; i < RING_SIZE; i++) {
+                       pbufs[i] = &bufs[i];
+                       err = tx_adapter_single(TEST_ETHDEV_ID, q, pbufs[i],
+                                               ev_qid,
+                                               RTE_SCHED_TYPE_ORDERED);
+                       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+               }
+               for (i = 0; i < RING_SIZE; i++) {
+                       TEST_ASSERT_EQUAL(pbufs[i], &bufs[i],
+                               "Error: received data does not match"
+                               " that transmitted");
+               }
+       }
+
+       err = rte_event_eth_tx_adapter_stats_get(TEST_INST_ID, NULL);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       err = rte_event_eth_tx_adapter_stats_get(TEST_INST_ID, &stats);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       TEST_ASSERT_EQUAL(stats.tx_packets, MAX_NUM_QUEUE * RING_SIZE,
+                       "stats.tx_packets expected %u got %"PRIu64,
+                       MAX_NUM_QUEUE * RING_SIZE,
+                       stats.tx_packets);
+
+       err = rte_event_eth_tx_adapter_stats_reset(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_stats_get(TEST_INST_ID, &stats);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+       TEST_ASSERT_EQUAL(stats.tx_packets, 0,
+                       "stats.tx_packets expected %u got %"PRIu64,
+                       0,
+                       stats.tx_packets);
+
+       err = rte_event_eth_tx_adapter_stats_get(1, &stats);
+       TEST_ASSERT(err == -EINVAL, "Expected -EINVAL got %d", err);
+
+       err = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID, TEST_ETHDEV_ID,
+                                               -1);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       err = rte_event_eth_tx_adapter_free(TEST_INST_ID);
+       TEST_ASSERT(err == 0, "Expected 0 got %d", err);
+
+       rte_event_dev_stop(TEST_DEV_ID);
+
+       return TEST_SUCCESS;
+}
+
+static int
+tx_adapter_dynamic_device(void)
+{
+       uint16_t port_id = rte_eth_dev_count_avail();
+       const char *null_dev[2] = { "eth_null0", "eth_null1" };
+       struct rte_eth_conf dev_conf;
+       int ret;
+       size_t i;
+
+       memset(&dev_conf, 0, sizeof(dev_conf));
+       for (i = 0; i < RTE_DIM(null_dev); i++) {
+               ret = rte_vdev_init(null_dev[i], NULL);
+               TEST_ASSERT_SUCCESS(ret, "%s Port creation failed %d",
+                               null_dev[i], ret);
+
+               if (i == 0) {
+                       ret = tx_adapter_create();
+                       TEST_ASSERT_SUCCESS(ret, "Adapter create failed %d",
+                                       ret);
+               }
+
+               ret = rte_eth_dev_configure(port_id + i, MAX_NUM_QUEUE,
+                                       MAX_NUM_QUEUE, &dev_conf);
+               TEST_ASSERT_SUCCESS(ret, "Failed to configure device %d", ret);
+
+               ret = rte_event_eth_tx_adapter_queue_add(TEST_INST_ID,
+                                                       port_id + i, 0);
+               TEST_ASSERT_SUCCESS(ret, "Failed to add queues %d", ret);
+
+       }
+
+       for (i = 0; i < RTE_DIM(null_dev); i++) {
+               ret = rte_event_eth_tx_adapter_queue_del(TEST_INST_ID,
+                                                       port_id + i, -1);
+               TEST_ASSERT_SUCCESS(ret, "Failed to delete queues %d", ret);
+       }
+
+       tx_adapter_free();
+
+       for (i = 0; i < RTE_DIM(null_dev); i++)
+               rte_vdev_uninit(null_dev[i]);
+
+       return TEST_SUCCESS;
+}
+
+static struct unit_test_suite event_eth_tx_tests = {
+       .setup = testsuite_setup,
+       .teardown = testsuite_teardown,
+       .suite_name = "tx event eth adapter test suite",
+       .unit_test_cases = {
+               TEST_CASE_ST(NULL, NULL, tx_adapter_create_free),
+               TEST_CASE_ST(tx_adapter_create, tx_adapter_free,
+                                       tx_adapter_queue_add_del),
+               TEST_CASE_ST(tx_adapter_create, tx_adapter_free,
+                                       tx_adapter_start_stop),
+               TEST_CASE_ST(tx_adapter_create, tx_adapter_free,
+                                       tx_adapter_service),
+               TEST_CASE_ST(NULL, NULL, tx_adapter_dynamic_device),
+               TEST_CASES_END() /**< NULL terminate unit test array */
+       }
+};
+
+static int
+test_event_eth_tx_adapter_common(void)
+{
+       return unit_test_suite_runner(&event_eth_tx_tests);
+}
+
+REGISTER_TEST_COMMAND(event_eth_tx_adapter_autotest,
+               test_event_eth_tx_adapter_common);
index 93471db..a45b7d1 100644 (file)
@@ -792,7 +792,7 @@ _cancel_burst_thread(void *args)
                TEST_ASSERT_EQUAL(n, ret, "Failed to cancel complete burst of "
                                  "event timers");
                rte_mempool_put_bulk(eventdev_test_mempool, (void **)ev_tim,
-                               ret);
+                               RTE_MIN(ret, MAX_BURST));
 
                cancel_count += ret;
        }
index 04bdc6b..00d7327 100644 (file)
@@ -190,15 +190,18 @@ test_eventdev_configure(void)
                 "Config negative test failed");
        TEST_ASSERT_EQUAL(-EINVAL,
                test_ethdev_config_run(&dev_conf, &info, max_event_queue_flows),
-                "Config negative test failed");
-       TEST_ASSERT_EQUAL(-EINVAL,
-               test_ethdev_config_run(&dev_conf, &info,
-                       max_event_port_dequeue_depth),
-                        "Config negative test failed");
-       TEST_ASSERT_EQUAL(-EINVAL,
-               test_ethdev_config_run(&dev_conf, &info,
-               max_event_port_enqueue_depth),
-                "Config negative test failed");
+               "Config negative test failed");
+
+       if (info.event_dev_cap & RTE_EVENT_DEV_CAP_BURST_MODE) {
+               TEST_ASSERT_EQUAL(-EINVAL,
+                               test_ethdev_config_run(&dev_conf, &info,
+                                       max_event_port_dequeue_depth),
+                               "Config negative test failed");
+               TEST_ASSERT_EQUAL(-EINVAL,
+                               test_ethdev_config_run(&dev_conf, &info,
+                                       max_event_port_enqueue_depth),
+                               "Config negative test failed");
+       }
 
        /* Positive case */
        devconf_set_default_sane_values(&dev_conf, &info);
diff --git a/test/test/test_external_mem.c b/test/test/test_external_mem.c
new file mode 100644 (file)
index 0000000..998cafa
--- /dev/null
@@ -0,0 +1,390 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_errno.h>
+#include <rte_malloc.h>
+#include <rte_ring.h>
+#include <rte_string_fns.h>
+
+#include "test.h"
+
+#define EXTERNAL_MEM_SZ (RTE_PGSIZE_4K << 10) /* 4M of data */
+
+static int
+test_invalid_param(void *addr, size_t len, size_t pgsz, rte_iova_t *iova,
+               int n_pages)
+{
+       static const char * const names[] = {
+               NULL, /* NULL name */
+               "",   /* empty name */
+               "this heap name is definitely way too long to be valid"
+       };
+       const char *valid_name = "valid heap name";
+       unsigned int i;
+
+       /* check invalid name handling */
+       for (i = 0; i < RTE_DIM(names); i++) {
+               const char *name = names[i];
+
+               /* these calls may fail for other reasons, so check errno */
+               if (rte_malloc_heap_create(name) >= 0 || rte_errno != EINVAL) {
+                       printf("%s():%i: Created heap with invalid name\n",
+                                       __func__, __LINE__);
+                       goto fail;
+               }
+
+               if (rte_malloc_heap_destroy(name) >= 0 || rte_errno != EINVAL) {
+                       printf("%s():%i: Destroyed heap with invalid name\n",
+                                       __func__, __LINE__);
+                       goto fail;
+               }
+
+               if (rte_malloc_heap_get_socket(name) >= 0 ||
+                               rte_errno != EINVAL) {
+                       printf("%s():%i: Found socket for heap with invalid name\n",
+                                       __func__, __LINE__);
+                       goto fail;
+               }
+
+               if (rte_malloc_heap_memory_add(name, addr, len,
+                               NULL, 0, pgsz) >= 0 || rte_errno != EINVAL) {
+                       printf("%s():%i: Added memory to heap with invalid name\n",
+                                       __func__, __LINE__);
+                       goto fail;
+               }
+               if (rte_malloc_heap_memory_remove(name, addr, len) >= 0 ||
+                               rte_errno != EINVAL) {
+                       printf("%s():%i: Removed memory from heap with invalid name\n",
+                                       __func__, __LINE__);
+                       goto fail;
+               }
+
+               if (rte_malloc_heap_memory_attach(name, addr, len) >= 0 ||
+                               rte_errno != EINVAL) {
+                       printf("%s():%i: Attached memory to heap with invalid name\n",
+                               __func__, __LINE__);
+                       goto fail;
+               }
+               if (rte_malloc_heap_memory_detach(name, addr, len) >= 0 ||
+                               rte_errno != EINVAL) {
+                       printf("%s():%i: Detached memory from heap with invalid name\n",
+                               __func__, __LINE__);
+                       goto fail;
+               }
+       }
+
+       /* do same as above, but with a valid heap name */
+
+       /* skip create call */
+       if (rte_malloc_heap_destroy(valid_name) >= 0 || rte_errno != ENOENT) {
+               printf("%s():%i: Destroyed heap with invalid name\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_get_socket(valid_name) >= 0 ||
+                       rte_errno != ENOENT) {
+               printf("%s():%i: Found socket for heap with invalid name\n",
+                               __func__, __LINE__);
+               goto fail;
+       }
+
+       /* these calls may fail for other reasons, so check errno */
+       if (rte_malloc_heap_memory_add(valid_name, addr, len,
+                       NULL, 0, pgsz) >= 0 || rte_errno != ENOENT) {
+               printf("%s():%i: Added memory to non-existent heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_remove(valid_name, addr, len) >= 0 ||
+                       rte_errno != ENOENT) {
+               printf("%s():%i: Removed memory from non-existent heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       if (rte_malloc_heap_memory_attach(valid_name, addr, len) >= 0 ||
+                       rte_errno != ENOENT) {
+               printf("%s():%i: Attached memory to non-existent heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_detach(valid_name, addr, len) >= 0 ||
+                       rte_errno != ENOENT) {
+               printf("%s():%i: Detached memory from non-existent heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* create a valid heap but test other invalid parameters */
+       if (rte_malloc_heap_create(valid_name) != 0) {
+               printf("%s():%i: Failed to create valid heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* zero length */
+       if (rte_malloc_heap_memory_add(valid_name, addr, 0,
+                       NULL, 0, pgsz) >= 0 || rte_errno != EINVAL) {
+               printf("%s():%i: Added memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       if (rte_malloc_heap_memory_remove(valid_name, addr, 0) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Removed memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       if (rte_malloc_heap_memory_attach(valid_name, addr, 0) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Attached memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_detach(valid_name, addr, 0) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Detached memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* zero address */
+       if (rte_malloc_heap_memory_add(valid_name, NULL, len,
+                       NULL, 0, pgsz) >= 0 || rte_errno != EINVAL) {
+               printf("%s():%i: Added memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       if (rte_malloc_heap_memory_remove(valid_name, NULL, len) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Removed memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       if (rte_malloc_heap_memory_attach(valid_name, NULL, len) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Attached memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_detach(valid_name, NULL, len) >= 0 ||
+                       rte_errno != EINVAL) {
+               printf("%s():%i: Detached memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* wrong page count */
+       if (rte_malloc_heap_memory_add(valid_name, addr, len,
+                       iova, 0, pgsz) >= 0 || rte_errno != EINVAL) {
+               printf("%s():%i: Added memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_add(valid_name, addr, len,
+                       iova, n_pages - 1, pgsz) >= 0 || rte_errno != EINVAL) {
+               printf("%s():%i: Added memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_memory_add(valid_name, addr, len,
+                       iova, n_pages + 1, pgsz) >= 0 || rte_errno != EINVAL) {
+               printf("%s():%i: Added memory with invalid parameters\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* tests passed, destroy heap */
+       if (rte_malloc_heap_destroy(valid_name) != 0) {
+               printf("%s():%i: Failed to destroy valid heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       return 0;
+fail:
+       rte_malloc_heap_destroy(valid_name);
+       return -1;
+}
+
+static int
+test_basic(void *addr, size_t len, size_t pgsz, rte_iova_t *iova, int n_pages)
+{
+       const char *heap_name = "heap";
+       void *ptr = NULL;
+       int socket_id, i;
+       const struct rte_memzone *mz = NULL;
+
+       /* create heap */
+       if (rte_malloc_heap_create(heap_name) != 0) {
+               printf("%s():%i: Failed to create malloc heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* get socket ID corresponding to this heap */
+       socket_id = rte_malloc_heap_get_socket(heap_name);
+       if (socket_id < 0) {
+               printf("%s():%i: cannot find socket for external heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* heap is empty, so any allocation should fail */
+       ptr = rte_malloc_socket("EXTMEM", 64, 0, socket_id);
+       if (ptr != NULL) {
+               printf("%s():%i: Allocated from empty heap\n", __func__,
+                       __LINE__);
+               goto fail;
+       }
+
+       /* add memory to heap */
+       if (rte_malloc_heap_memory_add(heap_name, addr, len,
+                       iova, n_pages, pgsz) != 0) {
+               printf("%s():%i: Failed to add memory to heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* check that we can get this memory from EAL now */
+       for (i = 0; i < n_pages; i++) {
+               const struct rte_memseg *ms;
+               void *cur = RTE_PTR_ADD(addr, pgsz * i);
+
+               ms = rte_mem_virt2memseg(cur, NULL);
+               if (ms == NULL) {
+                       printf("%s():%i: Failed to retrieve memseg for external mem\n",
+                               __func__, __LINE__);
+                       goto fail;
+               }
+               if (ms->addr != cur) {
+                       printf("%s():%i: VA mismatch\n", __func__, __LINE__);
+                       goto fail;
+               }
+               if (ms->iova != iova[i]) {
+                       printf("%s():%i: IOVA mismatch\n", __func__, __LINE__);
+                       goto fail;
+               }
+       }
+
+       /* allocate - this now should succeed */
+       ptr = rte_malloc_socket("EXTMEM", 64, 0, socket_id);
+       if (ptr == NULL) {
+               printf("%s():%i: Failed to allocate from external heap\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* check if address is in expected range */
+       if (ptr < addr || ptr >= RTE_PTR_ADD(addr, len)) {
+               printf("%s():%i: Allocated from unexpected address space\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* we've allocated something - removing memory should fail */
+       if (rte_malloc_heap_memory_remove(heap_name, addr, len) >= 0 ||
+                       rte_errno != EBUSY) {
+               printf("%s():%i: Removing memory succeeded when memory is not free\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_destroy(heap_name) >= 0 || rte_errno != EBUSY) {
+               printf("%s():%i: Destroying heap succeeded when memory is not free\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       /* try allocating an IOVA-contiguous memzone - this should succeed
+        * because we've set up a contiguous IOVA table.
+        */
+       mz = rte_memzone_reserve("heap_test", pgsz * 2, socket_id,
+                       RTE_MEMZONE_IOVA_CONTIG);
+       if (mz == NULL) {
+               printf("%s():%i: Failed to reserve memzone\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       rte_malloc_dump_stats(stdout, NULL);
+       rte_malloc_dump_heaps(stdout);
+
+       /* free memory - removing it should now succeed */
+       rte_free(ptr);
+       ptr = NULL;
+
+       rte_memzone_free(mz);
+       mz = NULL;
+
+       if (rte_malloc_heap_memory_remove(heap_name, addr, len) != 0) {
+               printf("%s():%i: Removing memory from heap failed\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+       if (rte_malloc_heap_destroy(heap_name) != 0) {
+               printf("%s():%i: Destroying heap failed\n",
+                       __func__, __LINE__);
+               goto fail;
+       }
+
+       return 0;
+fail:
+       rte_memzone_free(mz);
+       rte_free(ptr);
+       /* even if something failed, attempt to clean up */
+       rte_malloc_heap_memory_remove(heap_name, addr, len);
+       rte_malloc_heap_destroy(heap_name);
+
+       return -1;
+}
+
+/* we need to test attach/detach in secondary processes. */
+static int
+test_external_mem(void)
+{
+       size_t len = EXTERNAL_MEM_SZ;
+       size_t pgsz = RTE_PGSIZE_4K;
+       rte_iova_t iova[len / pgsz];
+       void *addr;
+       int ret, n_pages;
+       int i;
+
+       /* create external memory area */
+       n_pages = RTE_DIM(iova);
+       addr = mmap(NULL, len, PROT_WRITE | PROT_READ,
+                       MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+       if (addr == MAP_FAILED) {
+               printf("%s():%i: Failed to create dummy memory area\n",
+                       __func__, __LINE__);
+               return -1;
+       }
+       for (i = 0; i < n_pages; i++) {
+               /* arbitrary IOVA */
+               rte_iova_t tmp = 0x100000000 + i * pgsz;
+               iova[i] = tmp;
+       }
+
+       ret = test_invalid_param(addr, len, pgsz, iova, n_pages);
+       ret |= test_basic(addr, len, pgsz, iova, n_pages);
+
+       munmap(addr, len);
+
+       return ret;
+}
+
+REGISTER_TEST_COMMAND(external_mem_autotest, test_external_mem);
index b3db9fd..6d06eb2 100644 (file)
@@ -260,6 +260,13 @@ static void run_hash_func_tests(void)
  *     - lookup (hit)
  *     - delete
  *     - lookup (miss)
+ *
+ * Repeat the test case when 'free on delete' is disabled.
+ *     - add
+ *     - lookup (hit)
+ *     - delete
+ *     - lookup (miss)
+ *     - free
  */
 static int test_add_delete(void)
 {
@@ -295,10 +302,12 @@ static int test_add_delete(void)
 
        /* repeat test with precomputed hash functions */
        hash_sig_t hash_value;
-       int pos1, expectedPos1;
+       int pos1, expectedPos1, delPos1;
 
+       ut_params.extra_flag = RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL;
        handle = rte_hash_create(&ut_params);
        RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+       ut_params.extra_flag = 0;
 
        hash_value = rte_hash_hash(handle, &keys[0]);
        pos1 = rte_hash_add_key_with_hash(handle, &keys[0], hash_value);
@@ -315,12 +324,18 @@ static int test_add_delete(void)
        print_key_info("Del", &keys[0], pos1);
        RETURN_IF_ERROR(pos1 != expectedPos1,
                        "failed to delete key (pos1=%d)", pos1);
+       delPos1 = pos1;
 
        pos1 = rte_hash_lookup_with_hash(handle, &keys[0], hash_value);
        print_key_info("Lkp", &keys[0], pos1);
        RETURN_IF_ERROR(pos1 != -ENOENT,
                        "fail: found key after deleting! (pos1=%d)", pos1);
 
+       pos1 = rte_hash_free_key_with_position(handle, delPos1);
+       print_key_info("Free", &keys[0], delPos1);
+       RETURN_IF_ERROR(pos1 != 0,
+                       "failed to free key (pos1=%d)", delPos1);
+
        rte_hash_free(handle);
 
        return 0;
@@ -390,6 +405,84 @@ static int test_add_update_delete(void)
        return 0;
 }
 
+/*
+ * Sequence of operations for a single key with 'disable free on del' set:
+ *     - delete: miss
+ *     - add
+ *     - lookup: hit
+ *     - add: update
+ *     - lookup: hit (updated data)
+ *     - delete: hit
+ *     - delete: miss
+ *     - lookup: miss
+ *     - free: hit
+ *     - lookup: miss
+ */
+static int test_add_update_delete_free(void)
+{
+       struct rte_hash *handle;
+       int pos0, expectedPos0, delPos0, result;
+
+       ut_params.name = "test2";
+       ut_params.extra_flag = RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL;
+       handle = rte_hash_create(&ut_params);
+       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+       ut_params.extra_flag = 0;
+
+       pos0 = rte_hash_del_key(handle, &keys[0]);
+       print_key_info("Del", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != -ENOENT,
+                       "fail: found non-existent key (pos0=%d)", pos0);
+
+       pos0 = rte_hash_add_key(handle, &keys[0]);
+       print_key_info("Add", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 < 0, "failed to add key (pos0=%d)", pos0);
+       expectedPos0 = pos0;
+
+       pos0 = rte_hash_lookup(handle, &keys[0]);
+       print_key_info("Lkp", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != expectedPos0,
+                       "failed to find key (pos0=%d)", pos0);
+
+       pos0 = rte_hash_add_key(handle, &keys[0]);
+       print_key_info("Add", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != expectedPos0,
+                       "failed to re-add key (pos0=%d)", pos0);
+
+       pos0 = rte_hash_lookup(handle, &keys[0]);
+       print_key_info("Lkp", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != expectedPos0,
+                       "failed to find key (pos0=%d)", pos0);
+
+       delPos0 = rte_hash_del_key(handle, &keys[0]);
+       print_key_info("Del", &keys[0], delPos0);
+       RETURN_IF_ERROR(delPos0 != expectedPos0,
+                       "failed to delete key (pos0=%d)", delPos0);
+
+       pos0 = rte_hash_del_key(handle, &keys[0]);
+       print_key_info("Del", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != -ENOENT,
+                       "fail: deleted already deleted key (pos0=%d)", pos0);
+
+       pos0 = rte_hash_lookup(handle, &keys[0]);
+       print_key_info("Lkp", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != -ENOENT,
+                       "fail: found key after deleting! (pos0=%d)", pos0);
+
+       result = rte_hash_free_key_with_position(handle, delPos0);
+       print_key_info("Free", &keys[0], delPos0);
+       RETURN_IF_ERROR(result != 0,
+                       "failed to free key (pos1=%d)", delPos0);
+
+       pos0 = rte_hash_lookup(handle, &keys[0]);
+       print_key_info("Lkp", &keys[0], pos0);
+       RETURN_IF_ERROR(pos0 != -ENOENT,
+                       "fail: found key after deleting! (pos0=%d)", pos0);
+
+       rte_hash_free(handle);
+       return 0;
+}
+
 /*
  * Sequence of operations for retrieving a key with its position
  *
@@ -399,11 +492,20 @@ static int test_add_update_delete(void)
  *  - delete key
  *  - try to get the deleted key: miss
  *
+ * Repeat the test case when 'free on delete' is disabled.
+ *  - create table
+ *  - add key
+ *  - get the key with its position: hit
+ *  - delete key
+ *  - try to get the deleted key: hit
+ *  - free key
+ *  - try to get the deleted key: miss
+ *
  */
 static int test_hash_get_key_with_position(void)
 {
        struct rte_hash *handle = NULL;
-       int pos, expectedPos, result;
+       int pos, expectedPos, delPos, result;
        void *key;
 
        ut_params.name = "hash_get_key_w_pos";
@@ -426,6 +528,38 @@ static int test_hash_get_key_with_position(void)
        result = rte_hash_get_key_with_position(handle, pos, &key);
        RETURN_IF_ERROR(result != -ENOENT, "non valid key retrieved");
 
+       rte_hash_free(handle);
+
+       ut_params.name = "hash_get_key_w_pos";
+       ut_params.extra_flag = RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL;
+       handle = rte_hash_create(&ut_params);
+       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+       ut_params.extra_flag = 0;
+
+       pos = rte_hash_add_key(handle, &keys[0]);
+       print_key_info("Add", &keys[0], pos);
+       RETURN_IF_ERROR(pos < 0, "failed to add key (pos0=%d)", pos);
+       expectedPos = pos;
+
+       result = rte_hash_get_key_with_position(handle, pos, &key);
+       RETURN_IF_ERROR(result != 0, "error retrieving a key");
+
+       delPos = rte_hash_del_key(handle, &keys[0]);
+       print_key_info("Del", &keys[0], delPos);
+       RETURN_IF_ERROR(delPos != expectedPos,
+                       "failed to delete key (pos0=%d)", delPos);
+
+       result = rte_hash_get_key_with_position(handle, delPos, &key);
+       RETURN_IF_ERROR(result != -ENOENT, "non valid key retrieved");
+
+       result = rte_hash_free_key_with_position(handle, delPos);
+       print_key_info("Free", &keys[0], delPos);
+       RETURN_IF_ERROR(result != 0,
+                       "failed to free key (pos1=%d)", delPos);
+
+       result = rte_hash_get_key_with_position(handle, delPos, &key);
+       RETURN_IF_ERROR(result != -ENOENT, "non valid key retrieved");
+
        rte_hash_free(handle);
        return 0;
 }
@@ -660,6 +794,116 @@ static int test_full_bucket(void)
        return 0;
 }
 
+/*
+ * Similar to the test above (full bucket test), but for extendable buckets.
+ */
+static int test_extendable_bucket(void)
+{
+       struct rte_hash_parameters params_pseudo_hash = {
+               .name = "test5",
+               .entries = 64,
+               .key_len = sizeof(struct flow_key), /* 13 */
+               .hash_func = pseudo_hash,
+               .hash_func_init_val = 0,
+               .socket_id = 0,
+               .extra_flag = RTE_HASH_EXTRA_FLAGS_EXT_TABLE
+       };
+       struct rte_hash *handle;
+       int pos[64];
+       int expected_pos[64];
+       unsigned int i;
+       struct flow_key rand_keys[64];
+
+       for (i = 0; i < 64; i++) {
+               rand_keys[i].port_dst = i;
+               rand_keys[i].port_src = i+1;
+       }
+
+       handle = rte_hash_create(&params_pseudo_hash);
+       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
+
+       /* Fill bucket */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_add_key(handle, &rand_keys[i]);
+               print_key_info("Add", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] < 0,
+                       "failed to add key (pos[%u]=%d)", i, pos[i]);
+               expected_pos[i] = pos[i];
+       }
+
+       /* Lookup */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_lookup(handle, &rand_keys[i]);
+               print_key_info("Lkp", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] != expected_pos[i],
+                       "failed to find key (pos[%u]=%d)", i, pos[i]);
+       }
+
+       /* Add - update */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_add_key(handle, &rand_keys[i]);
+               print_key_info("Add", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] != expected_pos[i],
+                       "failed to add key (pos[%u]=%d)", i, pos[i]);
+       }
+
+       /* Lookup */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_lookup(handle, &rand_keys[i]);
+               print_key_info("Lkp", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] != expected_pos[i],
+                       "failed to find key (pos[%u]=%d)", i, pos[i]);
+       }
+
+       /* Delete 1 key, check other keys are still found */
+       pos[35] = rte_hash_del_key(handle, &rand_keys[35]);
+       print_key_info("Del", &rand_keys[35], pos[35]);
+       RETURN_IF_ERROR(pos[35] != expected_pos[35],
+                       "failed to delete key (pos[1]=%d)", pos[35]);
+       pos[20] = rte_hash_lookup(handle, &rand_keys[20]);
+       print_key_info("Lkp", &rand_keys[20], pos[20]);
+       RETURN_IF_ERROR(pos[20] != expected_pos[20],
+                       "failed lookup after deleting key from same bucket "
+                       "(pos[20]=%d)", pos[20]);
+
+       /* Go back to previous state */
+       pos[35] = rte_hash_add_key(handle, &rand_keys[35]);
+       print_key_info("Add", &rand_keys[35], pos[35]);
+       expected_pos[35] = pos[35];
+       RETURN_IF_ERROR(pos[35] < 0, "failed to add key (pos[1]=%d)", pos[35]);
+
+       /* Delete */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_del_key(handle, &rand_keys[i]);
+               print_key_info("Del", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] != expected_pos[i],
+                       "failed to delete key (pos[%u]=%d)", i, pos[i]);
+       }
+
+       /* Lookup */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_lookup(handle, &rand_keys[i]);
+               print_key_info("Lkp", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] != -ENOENT,
+                       "fail: found non-existent key (pos[%u]=%d)", i, pos[i]);
+       }
+
+       /* Add again */
+       for (i = 0; i < 64; i++) {
+               pos[i] = rte_hash_add_key(handle, &rand_keys[i]);
+               print_key_info("Add", &rand_keys[i], pos[i]);
+               RETURN_IF_ERROR(pos[i] < 0,
+                       "failed to add key (pos[%u]=%d)", i, pos[i]);
+               expected_pos[i] = pos[i];
+       }
+
+       rte_hash_free(handle);
+
+       /* Cover the NULL case. */
+       rte_hash_free(0);
+       return 0;
+}
+
 /******************************************************************************/
 static int
 fbk_hash_unit_test(void)
@@ -1096,7 +1340,7 @@ test_hash_creation_with_good_parameters(void)
  * Test to see the average table utilization (entries added/max entries)
  * before hitting a random entry that cannot be added
  */
-static int test_average_table_utilization(void)
+static int test_average_table_utilization(uint32_t ext_table)
 {
        struct rte_hash *handle;
        uint8_t simple_key[MAX_KEYSIZE];
@@ -1107,12 +1351,23 @@ static int test_average_table_utilization(void)
 
        printf("\n# Running test to determine average utilization"
               "\n  before adding elements begins to fail\n");
+       if (ext_table)
+               printf("ext table is enabled\n");
+       else
+               printf("ext table is disabled\n");
+
        printf("Measuring performance, please wait");
        fflush(stdout);
        ut_params.entries = 1 << 16;
        ut_params.name = "test_average_utilization";
        ut_params.hash_func = rte_jhash;
+       if (ext_table)
+               ut_params.extra_flag |= RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+       else
+               ut_params.extra_flag &= ~RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+
        handle = rte_hash_create(&ut_params);
+
        RETURN_IF_ERROR(handle == NULL, "hash creation failed");
 
        for (j = 0; j < ITERATIONS; j++) {
@@ -1139,6 +1394,14 @@ static int test_average_table_utilization(void)
                        rte_hash_free(handle);
                        return -1;
                }
+               if (ext_table) {
+                       if (cnt != ut_params.entries) {
+                               printf("rte_hash_count returned wrong value "
+                                       "%u, %u, %u\n", j, added_keys, cnt);
+                               rte_hash_free(handle);
+                               return -1;
+                       }
+               }
 
                average_keys_added += added_keys;
 
@@ -1161,7 +1424,7 @@ static int test_average_table_utilization(void)
 }
 
 #define NUM_ENTRIES 256
-static int test_hash_iteration(void)
+static int test_hash_iteration(uint32_t ext_table)
 {
        struct rte_hash *handle;
        unsigned i;
@@ -1177,6 +1440,11 @@ static int test_hash_iteration(void)
        ut_params.name = "test_hash_iteration";
        ut_params.hash_func = rte_jhash;
        ut_params.key_len = 16;
+       if (ext_table)
+               ut_params.extra_flag |= RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+       else
+               ut_params.extra_flag &= ~RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+
        handle = rte_hash_create(&ut_params);
        RETURN_IF_ERROR(handle == NULL, "hash creation failed");
 
@@ -1186,8 +1454,13 @@ static int test_hash_iteration(void)
                for (i = 0; i < ut_params.key_len; i++)
                        keys[added_keys][i] = rte_rand() % 255;
                ret = rte_hash_add_key_data(handle, keys[added_keys], data[added_keys]);
-               if (ret < 0)
+               if (ret < 0) {
+                       if (ext_table) {
+                               printf("Insertion failed for ext table\n");
+                               goto err;
+                       }
                        break;
+               }
        }
 
        /* Iterate through the hash table */
@@ -1470,10 +1743,14 @@ test_hash(void)
                return -1;
        if (test_add_update_delete() < 0)
                return -1;
+       if (test_add_update_delete_free() < 0)
+               return -1;
        if (test_five_keys() < 0)
                return -1;
        if (test_full_bucket() < 0)
                return -1;
+       if (test_extendable_bucket() < 0)
+               return -1;
 
        if (test_fbk_hash_find_existing() < 0)
                return -1;
@@ -1483,9 +1760,17 @@ test_hash(void)
                return -1;
        if (test_hash_creation_with_good_parameters() < 0)
                return -1;
-       if (test_average_table_utilization() < 0)
+
+       /* ext table disabled */
+       if (test_average_table_utilization(0) < 0)
+               return -1;
+       if (test_hash_iteration(0) < 0)
+               return -1;
+
+       /* ext table enabled */
+       if (test_average_table_utilization(1) < 0)
                return -1;
-       if (test_hash_iteration() < 0)
+       if (test_hash_iteration(1) < 0)
                return -1;
 
        run_hash_func_tests();
index 6a3eb10..d447f6d 100644 (file)
@@ -12,6 +12,7 @@
 #include <rte_malloc.h>
 #include <rte_random.h>
 #include <rte_spinlock.h>
+#include <rte_jhash.h>
 
 #include "test.h"
 
@@ -108,7 +109,7 @@ test_hash_multiwriter(void)
        struct rte_hash_parameters hash_params = {
                .entries = nb_entries,
                .key_len = sizeof(uint32_t),
-               .hash_func = rte_hash_crc,
+               .hash_func = rte_jhash,
                .hash_func_init_val = 0,
                .socket_id = rte_socket_id(),
        };
index 33dcb9f..5252111 100644 (file)
 #include "test.h"
 
 #define MAX_ENTRIES (1 << 19)
-#define KEYS_TO_ADD (MAX_ENTRIES * 3 / 4) /* 75% table utilization */
+#define KEYS_TO_ADD (MAX_ENTRIES)
+#define ADD_PERCENT 0.75 /* 75% table utilization */
 #define NUM_LOOKUPS (KEYS_TO_ADD * 5) /* Loop among keys added, several times */
-#define BUCKET_SIZE 4
+/* BUCKET_SIZE should be same as RTE_HASH_BUCKET_ENTRIES in rte_hash library */
+#define BUCKET_SIZE 8
 #define NUM_BUCKETS (MAX_ENTRIES / BUCKET_SIZE)
 #define MAX_KEYSIZE 64
 #define NUM_KEYSIZES 10
@@ -77,7 +79,7 @@ static struct rte_hash_parameters ut_params = {
 
 static int
 create_table(unsigned int with_data, unsigned int table_index,
-               unsigned int with_locks)
+               unsigned int with_locks, unsigned int ext)
 {
        char name[RTE_HASH_NAMESIZE];
 
@@ -95,6 +97,9 @@ create_table(unsigned int with_data, unsigned int table_index,
        else
                ut_params.extra_flag = 0;
 
+       if (ext)
+               ut_params.extra_flag |= RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+
        ut_params.name = name;
        ut_params.key_len = hashtest_key_lens[table_index];
        ut_params.socket_id = rte_socket_id();
@@ -116,15 +121,21 @@ create_table(unsigned int with_data, unsigned int table_index,
 
 /* Shuffle the keys that have been added, so lookups will be totally random */
 static void
-shuffle_input_keys(unsigned table_index)
+shuffle_input_keys(unsigned int table_index, unsigned int ext)
 {
        unsigned i;
        uint32_t swap_idx;
        uint8_t temp_key[MAX_KEYSIZE];
        hash_sig_t temp_signature;
        int32_t temp_position;
+       unsigned int keys_to_add;
+
+       if (!ext)
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+       else
+               keys_to_add = KEYS_TO_ADD;
 
-       for (i = KEYS_TO_ADD - 1; i > 0; i--) {
+       for (i = keys_to_add - 1; i > 0; i--) {
                swap_idx = rte_rand() % i;
 
                memcpy(temp_key, keys[i], hashtest_key_lens[table_index]);
@@ -146,14 +157,20 @@ shuffle_input_keys(unsigned table_index)
  * ALL can fit in hash table (no errors)
  */
 static int
-get_input_keys(unsigned with_pushes, unsigned table_index)
+get_input_keys(unsigned int with_pushes, unsigned int table_index,
+                                                       unsigned int ext)
 {
        unsigned i, j;
        unsigned bucket_idx, incr, success = 1;
        uint8_t k = 0;
        int32_t ret;
        const uint32_t bucket_bitmask = NUM_BUCKETS - 1;
+       unsigned int keys_to_add;
 
+       if (!ext)
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+       else
+               keys_to_add = KEYS_TO_ADD;
        /* Reset all arrays */
        for (i = 0; i < MAX_ENTRIES; i++)
                slot_taken[i] = 0;
@@ -170,7 +187,7 @@ get_input_keys(unsigned with_pushes, unsigned table_index)
         * Regardless a key has been added correctly or not (success),
         * the next one to try will be increased by 1.
         */
-       for (i = 0; i < KEYS_TO_ADD;) {
+       for (i = 0; i < keys_to_add;) {
                incr = 0;
                if (i != 0) {
                        keys[i][0] = ++k;
@@ -234,21 +251,27 @@ get_input_keys(unsigned with_pushes, unsigned table_index)
 }
 
 static int
-timed_adds(unsigned with_hash, unsigned with_data, unsigned table_index)
+timed_adds(unsigned int with_hash, unsigned int with_data,
+                               unsigned int table_index, unsigned int ext)
 {
        unsigned i;
        const uint64_t start_tsc = rte_rdtsc();
        void *data;
        int32_t ret;
+       unsigned int keys_to_add;
+       if (!ext)
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+       else
+               keys_to_add = KEYS_TO_ADD;
 
-       for (i = 0; i < KEYS_TO_ADD; i++) {
+       for (i = 0; i < keys_to_add; i++) {
                data = (void *) ((uintptr_t) signatures[i]);
                if (with_hash && with_data) {
                        ret = rte_hash_add_key_with_hash_data(h[table_index],
                                                (const void *) keys[i],
                                                signatures[i], data);
                        if (ret < 0) {
-                               printf("Failed to add key number %u\n", ret);
+                               printf("H+D: Failed to add key number %u\n", i);
                                return -1;
                        }
                } else if (with_hash && !with_data) {
@@ -258,7 +281,7 @@ timed_adds(unsigned with_hash, unsigned with_data, unsigned table_index)
                        if (ret >= 0)
                                positions[i] = ret;
                        else {
-                               printf("Failed to add key number %u\n", ret);
+                               printf("H: Failed to add key number %u\n", i);
                                return -1;
                        }
                } else if (!with_hash && with_data) {
@@ -266,7 +289,7 @@ timed_adds(unsigned with_hash, unsigned with_data, unsigned table_index)
                                                (const void *) keys[i],
                                                data);
                        if (ret < 0) {
-                               printf("Failed to add key number %u\n", ret);
+                               printf("D: Failed to add key number %u\n", i);
                                return -1;
                        }
                } else {
@@ -274,7 +297,7 @@ timed_adds(unsigned with_hash, unsigned with_data, unsigned table_index)
                        if (ret >= 0)
                                positions[i] = ret;
                        else {
-                               printf("Failed to add key number %u\n", ret);
+                               printf("Failed to add key number %u\n", i);
                                return -1;
                        }
                }
@@ -283,22 +306,31 @@ timed_adds(unsigned with_hash, unsigned with_data, unsigned table_index)
        const uint64_t end_tsc = rte_rdtsc();
        const uint64_t time_taken = end_tsc - start_tsc;
 
-       cycles[table_index][ADD][with_hash][with_data] = time_taken/KEYS_TO_ADD;
+       cycles[table_index][ADD][with_hash][with_data] = time_taken/keys_to_add;
 
        return 0;
 }
 
 static int
-timed_lookups(unsigned with_hash, unsigned with_data, unsigned table_index)
+timed_lookups(unsigned int with_hash, unsigned int with_data,
+                               unsigned int table_index, unsigned int ext)
 {
        unsigned i, j;
        const uint64_t start_tsc = rte_rdtsc();
        void *ret_data;
        void *expected_data;
        int32_t ret;
-
-       for (i = 0; i < NUM_LOOKUPS/KEYS_TO_ADD; i++) {
-               for (j = 0; j < KEYS_TO_ADD; j++) {
+       unsigned int keys_to_add, num_lookups;
+
+       if (!ext) {
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+               num_lookups = NUM_LOOKUPS * ADD_PERCENT;
+       } else {
+               keys_to_add = KEYS_TO_ADD;
+               num_lookups = NUM_LOOKUPS;
+       }
+       for (i = 0; i < num_lookups / keys_to_add; i++) {
+               for (j = 0; j < keys_to_add; j++) {
                        if (with_hash && with_data) {
                                ret = rte_hash_lookup_with_hash_data(h[table_index],
                                                        (const void *) keys[j],
@@ -351,13 +383,14 @@ timed_lookups(unsigned with_hash, unsigned with_data, unsigned table_index)
        const uint64_t end_tsc = rte_rdtsc();
        const uint64_t time_taken = end_tsc - start_tsc;
 
-       cycles[table_index][LOOKUP][with_hash][with_data] = time_taken/NUM_LOOKUPS;
+       cycles[table_index][LOOKUP][with_hash][with_data] = time_taken/num_lookups;
 
        return 0;
 }
 
 static int
-timed_lookups_multi(unsigned with_data, unsigned table_index)
+timed_lookups_multi(unsigned int with_data, unsigned int table_index,
+                                                       unsigned int ext)
 {
        unsigned i, j, k;
        int32_t positions_burst[BURST_SIZE];
@@ -366,11 +399,20 @@ timed_lookups_multi(unsigned with_data, unsigned table_index)
        void *ret_data[BURST_SIZE];
        uint64_t hit_mask;
        int ret;
+       unsigned int keys_to_add, num_lookups;
+
+       if (!ext) {
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+               num_lookups = NUM_LOOKUPS * ADD_PERCENT;
+       } else {
+               keys_to_add = KEYS_TO_ADD;
+               num_lookups = NUM_LOOKUPS;
+       }
 
        const uint64_t start_tsc = rte_rdtsc();
 
-       for (i = 0; i < NUM_LOOKUPS/KEYS_TO_ADD; i++) {
-               for (j = 0; j < KEYS_TO_ADD/BURST_SIZE; j++) {
+       for (i = 0; i < num_lookups/keys_to_add; i++) {
+               for (j = 0; j < keys_to_add/BURST_SIZE; j++) {
                        for (k = 0; k < BURST_SIZE; k++)
                                keys_burst[k] = keys[j * BURST_SIZE + k];
                        if (with_data) {
@@ -418,19 +460,25 @@ timed_lookups_multi(unsigned with_data, unsigned table_index)
        const uint64_t end_tsc = rte_rdtsc();
        const uint64_t time_taken = end_tsc - start_tsc;
 
-       cycles[table_index][LOOKUP_MULTI][0][with_data] = time_taken/NUM_LOOKUPS;
+       cycles[table_index][LOOKUP_MULTI][0][with_data] = time_taken/num_lookups;
 
        return 0;
 }
 
 static int
-timed_deletes(unsigned with_hash, unsigned with_data, unsigned table_index)
+timed_deletes(unsigned int with_hash, unsigned int with_data,
+                               unsigned int table_index, unsigned int ext)
 {
        unsigned i;
        const uint64_t start_tsc = rte_rdtsc();
        int32_t ret;
+       unsigned int keys_to_add;
+       if (!ext)
+               keys_to_add = KEYS_TO_ADD * ADD_PERCENT;
+       else
+               keys_to_add = KEYS_TO_ADD;
 
-       for (i = 0; i < KEYS_TO_ADD; i++) {
+       for (i = 0; i < keys_to_add; i++) {
                /* There are no delete functions with data, so just call two functions */
                if (with_hash)
                        ret = rte_hash_del_key_with_hash(h[table_index],
@@ -442,7 +490,7 @@ timed_deletes(unsigned with_hash, unsigned with_data, unsigned table_index)
                if (ret >= 0)
                        positions[i] = ret;
                else {
-                       printf("Failed to add key number %u\n", ret);
+                       printf("Failed to delete key number %u\n", i);
                        return -1;
                }
        }
@@ -450,7 +498,7 @@ timed_deletes(unsigned with_hash, unsigned with_data, unsigned table_index)
        const uint64_t end_tsc = rte_rdtsc();
        const uint64_t time_taken = end_tsc - start_tsc;
 
-       cycles[table_index][DELETE][with_hash][with_data] = time_taken/KEYS_TO_ADD;
+       cycles[table_index][DELETE][with_hash][with_data] = time_taken/keys_to_add;
 
        return 0;
 }
@@ -468,7 +516,8 @@ reset_table(unsigned table_index)
 }
 
 static int
-run_all_tbl_perf_tests(unsigned int with_pushes, unsigned int with_locks)
+run_all_tbl_perf_tests(unsigned int with_pushes, unsigned int with_locks,
+                                               unsigned int ext)
 {
        unsigned i, j, with_data, with_hash;
 
@@ -477,25 +526,25 @@ run_all_tbl_perf_tests(unsigned int with_pushes, unsigned int with_locks)
 
        for (with_data = 0; with_data <= 1; with_data++) {
                for (i = 0; i < NUM_KEYSIZES; i++) {
-                       if (create_table(with_data, i, with_locks) < 0)
+                       if (create_table(with_data, i, with_locks, ext) < 0)
                                return -1;
 
-                       if (get_input_keys(with_pushes, i) < 0)
+                       if (get_input_keys(with_pushes, i, ext) < 0)
                                return -1;
                        for (with_hash = 0; with_hash <= 1; with_hash++) {
-                               if (timed_adds(with_hash, with_data, i) < 0)
+                               if (timed_adds(with_hash, with_data, i, ext) < 0)
                                        return -1;
 
                                for (j = 0; j < NUM_SHUFFLES; j++)
-                                       shuffle_input_keys(i);
+                                       shuffle_input_keys(i, ext);
 
-                               if (timed_lookups(with_hash, with_data, i) < 0)
+                               if (timed_lookups(with_hash, with_data, i, ext) < 0)
                                        return -1;
 
-                               if (timed_lookups_multi(with_data, i) < 0)
+                               if (timed_lookups_multi(with_data, i, ext) < 0)
                                        return -1;
 
-                               if (timed_deletes(with_hash, with_data, i) < 0)
+                               if (timed_deletes(with_hash, with_data, i, ext) < 0)
                                        return -1;
 
                                /* Print a dot to show progress on operations */
@@ -631,10 +680,16 @@ test_hash_perf(void)
                                printf("\nALL ELEMENTS IN PRIMARY LOCATION\n");
                        else
                                printf("\nELEMENTS IN PRIMARY OR SECONDARY LOCATION\n");
-                       if (run_all_tbl_perf_tests(with_pushes, with_locks) < 0)
+                       if (run_all_tbl_perf_tests(with_pushes, with_locks, 0) < 0)
                                return -1;
                }
        }
+
+       printf("\n EXTENDABLE BUCKETS PERFORMANCE\n");
+
+       if (run_all_tbl_perf_tests(1, 0, 1) < 0)
+               return -1;
+
        if (fbk_hash_perf_test() < 0)
                return -1;
 
index 55ae33d..01f986c 100644 (file)
 
 #define RTE_RWTEST_FAIL 0
 
-#define TOTAL_ENTRY (16*1024*1024)
-#define TOTAL_INSERT (15*1024*1024)
+#define TOTAL_ENTRY (5*1024*1024)
+#define TOTAL_INSERT (4.5*1024*1024)
+#define TOTAL_INSERT_EXT (5*1024*1024)
 
 #define NUM_TEST 3
 unsigned int core_cnt[NUM_TEST] = {2, 4, 8};
 
+unsigned int slave_core_ids[RTE_MAX_LCORE];
 struct perf {
        uint32_t single_read;
        uint32_t single_write;
@@ -58,14 +60,19 @@ test_hash_readwrite_worker(__attribute__((unused)) void *arg)
        uint64_t i, offset;
        uint32_t lcore_id = rte_lcore_id();
        uint64_t begin, cycles;
-       int ret;
+       int *ret;
 
-       offset = (lcore_id - rte_get_master_lcore())
-                       * tbl_rw_test_param.num_insert;
+       ret = rte_malloc(NULL, sizeof(int) *
+                               tbl_rw_test_param.num_insert, 0);
+       for (i = 0; i < rte_lcore_count(); i++) {
+               if (slave_core_ids[i] == lcore_id)
+                       break;
+       }
+       offset = tbl_rw_test_param.num_insert * i;
 
        printf("Core #%d inserting and reading %d: %'"PRId64" - %'"PRId64"\n",
               lcore_id, tbl_rw_test_param.num_insert,
-              offset, offset + tbl_rw_test_param.num_insert);
+              offset, offset + tbl_rw_test_param.num_insert - 1);
 
        begin = rte_rdtsc_precise();
 
@@ -75,13 +82,30 @@ test_hash_readwrite_worker(__attribute__((unused)) void *arg)
                                tbl_rw_test_param.keys + i) > 0)
                        break;
 
-               ret = rte_hash_add_key(tbl_rw_test_param.h,
+               ret[i - offset] = rte_hash_add_key(tbl_rw_test_param.h,
                                     tbl_rw_test_param.keys + i);
-               if (ret < 0)
+               if (ret[i - offset] < 0)
+                       break;
+
+               /* lookup a random key */
+               uint32_t rand = rte_rand() % (i + 1 - offset);
+
+               if (rte_hash_lookup(tbl_rw_test_param.h,
+                               tbl_rw_test_param.keys + rand) != ret[rand])
+                       break;
+
+
+               if (rte_hash_del_key(tbl_rw_test_param.h,
+                               tbl_rw_test_param.keys + rand) != ret[rand])
+                       break;
+
+               ret[rand] = rte_hash_add_key(tbl_rw_test_param.h,
+                                       tbl_rw_test_param.keys + rand);
+               if (ret[rand] < 0)
                        break;
 
                if (rte_hash_lookup(tbl_rw_test_param.h,
-                               tbl_rw_test_param.keys + i) != ret)
+                       tbl_rw_test_param.keys + rand) != ret[rand])
                        break;
        }
 
@@ -92,11 +116,12 @@ test_hash_readwrite_worker(__attribute__((unused)) void *arg)
        for (; i < offset + tbl_rw_test_param.num_insert; i++)
                tbl_rw_test_param.keys[i] = RTE_RWTEST_FAIL;
 
+       rte_free(ret);
        return 0;
 }
 
 static int
-init_params(int use_htm, int use_jhash)
+init_params(int use_ext, int use_htm, int use_jhash)
 {
        unsigned int i;
 
@@ -118,10 +143,19 @@ init_params(int use_htm, int use_jhash)
        if (use_htm)
                hash_params.extra_flag =
                        RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT |
-                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
+                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
        else
                hash_params.extra_flag =
-                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY;
+                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+
+       if (use_ext)
+               hash_params.extra_flag |=
+                       RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
+       else
+               hash_params.extra_flag &=
+                      ~RTE_HASH_EXTRA_FLAGS_EXT_TABLE;
 
        hash_params.name = "tests";
 
@@ -161,7 +195,7 @@ err:
 }
 
 static int
-test_hash_readwrite_functional(int use_htm)
+test_hash_readwrite_functional(int use_ext, int use_htm)
 {
        unsigned int i;
        const void *next_key;
@@ -171,6 +205,8 @@ test_hash_readwrite_functional(int use_htm)
        uint32_t duplicated_keys = 0;
        uint32_t lost_keys = 0;
        int use_jhash = 1;
+       int slave_cnt = rte_lcore_count() - 1;
+       uint32_t tot_insert = 0;
 
        rte_atomic64_init(&gcycles);
        rte_atomic64_clear(&gcycles);
@@ -178,21 +214,26 @@ test_hash_readwrite_functional(int use_htm)
        rte_atomic64_init(&ginsertions);
        rte_atomic64_clear(&ginsertions);
 
-       if (init_params(use_htm, use_jhash) != 0)
+       if (init_params(use_ext, use_htm, use_jhash) != 0)
                goto err;
 
+       if (use_ext)
+               tot_insert = TOTAL_INSERT_EXT;
+       else
+               tot_insert = TOTAL_INSERT;
+
        tbl_rw_test_param.num_insert =
-               TOTAL_INSERT / rte_lcore_count();
+               tot_insert / slave_cnt;
 
        tbl_rw_test_param.rounded_tot_insert =
                tbl_rw_test_param.num_insert
-               * rte_lcore_count();
+               * slave_cnt;
 
        printf("++++++++Start function tests:+++++++++\n");
 
        /* Fire all threads. */
        rte_eal_mp_remote_launch(test_hash_readwrite_worker,
-                                NULL, CALL_MASTER);
+                                NULL, SKIP_MASTER);
        rte_eal_mp_wait_lcore();
 
        while (rte_hash_iterate(tbl_rw_test_param.h, &next_key,
@@ -249,7 +290,7 @@ err:
 }
 
 static int
-test_rw_reader(__attribute__((unused)) void *arg)
+test_rw_reader(void *arg)
 {
        uint64_t i;
        uint64_t begin, cycles;
@@ -276,7 +317,7 @@ test_rw_reader(__attribute__((unused)) void *arg)
 }
 
 static int
-test_rw_writer(__attribute__((unused)) void *arg)
+test_rw_writer(void *arg)
 {
        uint64_t i;
        uint32_t lcore_id = rte_lcore_id();
@@ -285,8 +326,13 @@ test_rw_writer(__attribute__((unused)) void *arg)
        uint64_t start_coreid = (uint64_t)(uintptr_t)arg;
        uint64_t offset;
 
-       offset = TOTAL_INSERT / 2 + (lcore_id - start_coreid)
-                                       * tbl_rw_test_param.num_insert;
+       for (i = 0; i < rte_lcore_count(); i++) {
+               if (slave_core_ids[i] == lcore_id)
+                       break;
+       }
+
+       offset = TOTAL_INSERT / 2 + (i - (start_coreid)) *
+                               tbl_rw_test_param.num_insert;
        begin = rte_rdtsc_precise();
        for (i = offset; i < offset + tbl_rw_test_param.num_insert; i++) {
                ret = rte_hash_add_key_data(tbl_rw_test_param.h,
@@ -333,7 +379,7 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
        rte_atomic64_init(&gwrite_cycles);
        rte_atomic64_clear(&gwrite_cycles);
 
-       if (init_params(use_htm, use_jhash) != 0)
+       if (init_params(0, use_htm, use_jhash) != 0)
                goto err;
 
        /*
@@ -384,8 +430,8 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
        perf_results->single_read = end / i;
 
        for (n = 0; n < NUM_TEST; n++) {
-               unsigned int tot_lcore = rte_lcore_count();
-               if (tot_lcore < core_cnt[n] * 2 + 1)
+               unsigned int tot_slave_lcore = rte_lcore_count() - 1;
+               if (tot_slave_lcore < core_cnt[n] * 2)
                        goto finish;
 
                rte_atomic64_clear(&greads);
@@ -415,17 +461,19 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
                 */
 
                /* Test only reader cases */
-               for (i = 1; i <= core_cnt[n]; i++)
+               for (i = 0; i < core_cnt[n]; i++)
                        rte_eal_remote_launch(test_rw_reader,
-                                       (void *)(uintptr_t)read_cnt, i);
+                                       (void *)(uintptr_t)read_cnt,
+                                       slave_core_ids[i]);
 
                rte_eal_mp_wait_lcore();
 
                start_coreid = i;
                /* Test only writer cases */
-               for (; i <= core_cnt[n] * 2; i++)
+               for (; i < core_cnt[n] * 2; i++)
                        rte_eal_remote_launch(test_rw_writer,
-                                       (void *)((uintptr_t)start_coreid), i);
+                                       (void *)((uintptr_t)start_coreid),
+                                       slave_core_ids[i]);
 
                rte_eal_mp_wait_lcore();
 
@@ -464,22 +512,26 @@ test_hash_readwrite_perf(struct perf *perf_results, int use_htm,
                        }
                }
 
-               start_coreid = core_cnt[n] + 1;
+               start_coreid = core_cnt[n];
 
                if (reader_faster) {
-                       for (i = core_cnt[n] + 1; i <= core_cnt[n] * 2; i++)
+                       for (i = core_cnt[n]; i < core_cnt[n] * 2; i++)
                                rte_eal_remote_launch(test_rw_writer,
-                                       (void *)((uintptr_t)start_coreid), i);
-                       for (i = 1; i <= core_cnt[n]; i++)
+                                       (void *)((uintptr_t)start_coreid),
+                                       slave_core_ids[i]);
+                       for (i = 0; i < core_cnt[n]; i++)
                                rte_eal_remote_launch(test_rw_reader,
-                                       (void *)(uintptr_t)read_cnt, i);
+                                       (void *)(uintptr_t)read_cnt,
+                                       slave_core_ids[i]);
                } else {
-                       for (i = 1; i <= core_cnt[n]; i++)
+                       for (i = 0; i < core_cnt[n]; i++)
                                rte_eal_remote_launch(test_rw_reader,
-                                       (void *)(uintptr_t)read_cnt, i);
-                       for (; i <= core_cnt[n] * 2; i++)
+                                       (void *)(uintptr_t)read_cnt,
+                                       slave_core_ids[i]);
+                       for (; i < core_cnt[n] * 2; i++)
                                rte_eal_remote_launch(test_rw_writer,
-                                       (void *)((uintptr_t)start_coreid), i);
+                                       (void *)((uintptr_t)start_coreid),
+                                       slave_core_ids[i]);
                }
 
                rte_eal_mp_wait_lcore();
@@ -561,14 +613,20 @@ test_hash_readwrite_main(void)
         * than writer threads. This is to timing either reader threads or
         * writer threads for performance numbers.
         */
-       int use_htm, reader_faster;
+       int use_htm, use_ext,  reader_faster;
+       unsigned int i = 0, core_id = 0;
 
-       if (rte_lcore_count() == 1) {
-               printf("More than one lcore is required "
+       if (rte_lcore_count() <= 2) {
+               printf("More than two lcores are required "
                        "to do read write test\n");
                return 0;
        }
 
+       RTE_LCORE_FOREACH_SLAVE(core_id) {
+               slave_core_ids[i] = core_id;
+               i++;
+       }
+
        setlocale(LC_NUMERIC, "");
 
        if (rte_tm_supported()) {
@@ -578,7 +636,13 @@ test_hash_readwrite_main(void)
                printf("Test read-write with Hardware transactional memory\n");
 
                use_htm = 1;
-               if (test_hash_readwrite_functional(use_htm) < 0)
+               use_ext = 0;
+
+               if (test_hash_readwrite_functional(use_ext, use_htm) < 0)
+                       return -1;
+
+               use_ext = 1;
+               if (test_hash_readwrite_functional(use_ext, use_htm) < 0)
                        return -1;
 
                reader_faster = 1;
@@ -597,8 +661,14 @@ test_hash_readwrite_main(void)
 
        printf("Test read-write without Hardware transactional memory\n");
        use_htm = 0;
-       if (test_hash_readwrite_functional(use_htm) < 0)
+       use_ext = 0;
+       if (test_hash_readwrite_functional(use_ext, use_htm) < 0)
                return -1;
+
+       use_ext = 1;
+       if (test_hash_readwrite_functional(use_ext, use_htm) < 0)
+               return -1;
+
        reader_faster = 1;
        if (test_hash_readwrite_perf(&non_htm_results, use_htm,
                                                        reader_faster) < 0)
@@ -610,8 +680,6 @@ test_hash_readwrite_main(void)
 
        printf("Results summary:\n");
 
-       int i;
-
        printf("single read: %u\n", htm_results.single_read);
        printf("single write: %u\n", htm_results.single_write);
        for (i = 0; i < NUM_TEST; i++) {
diff --git a/test/test/test_hash_readwrite_lf.c b/test/test/test_hash_readwrite_lf.c
new file mode 100644 (file)
index 0000000..cbfd932
--- /dev/null
@@ -0,0 +1,1220 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Arm Limited
+ */
+
+#include <inttypes.h>
+#include <locale.h>
+
+#include <rte_cycles.h>
+#include <rte_hash.h>
+#include <rte_hash_crc.h>
+#include <rte_jhash.h>
+#include <rte_launch.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+#include <rte_spinlock.h>
+
+#include "test.h"
+
+#ifndef RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF
+#define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF 0
+#endif
+
+#define BULK_LOOKUP_SIZE 32
+
+#define RUN_WITH_HTM_DISABLED 0
+
+#if (RUN_WITH_HTM_DISABLED)
+
+#define TOTAL_ENTRY (5*1024)
+#define TOTAL_INSERT (5*1024)
+
+#else
+
+#define TOTAL_ENTRY (4*1024*1024)
+#define TOTAL_INSERT (4*1024*1024)
+
+#endif
+
+#define READ_FAIL 1
+#define READ_PASS_NO_KEY_SHIFTS 2
+#define READ_PASS_SHIFT_PATH 4
+#define READ_PASS_NON_SHIFT_PATH 8
+#define BULK_LOOKUP 16
+#define NUM_TEST 3
+unsigned int rwc_core_cnt[NUM_TEST] = {1, 2, 4};
+
+struct rwc_perf {
+       uint32_t w_no_ks_r_hit[2][NUM_TEST];
+       uint32_t w_no_ks_r_miss[2][NUM_TEST];
+       uint32_t w_ks_r_hit_nsp[2][NUM_TEST];
+       uint32_t w_ks_r_hit_sp[2][NUM_TEST];
+       uint32_t w_ks_r_miss[2][NUM_TEST];
+       uint32_t multi_rw[NUM_TEST - 1][2][NUM_TEST];
+};
+
+static struct rwc_perf rwc_lf_results, rwc_non_lf_results;
+
+struct {
+       uint32_t *keys;
+       uint32_t *keys_no_ks;
+       uint32_t *keys_ks;
+       uint32_t *keys_absent;
+       uint32_t *keys_shift_path;
+       uint32_t *keys_non_shift_path;
+       uint32_t count_keys_no_ks;
+       uint32_t count_keys_ks;
+       uint32_t count_keys_absent;
+       uint32_t count_keys_shift_path;
+       uint32_t count_keys_non_shift_path;
+       uint32_t single_insert;
+       struct rte_hash *h;
+} tbl_rwc_test_param;
+
+static rte_atomic64_t gread_cycles;
+static rte_atomic64_t greads;
+
+static volatile uint8_t writer_done;
+static volatile uint8_t multi_writer_done[4];
+
+uint16_t enabled_core_ids[RTE_MAX_LCORE];
+
+uint8_t *scanned_bkts;
+
+static inline int
+get_enabled_cores_list(void)
+{
+       uint32_t i = 0;
+       uint16_t core_id;
+       uint32_t max_cores = rte_lcore_count();
+       for (core_id = 0; core_id < RTE_MAX_LCORE && i < max_cores; core_id++) {
+               if (rte_lcore_is_enabled(core_id)) {
+                       enabled_core_ids[i] = core_id;
+                       i++;
+               }
+       }
+
+       if (i != max_cores) {
+               printf("Number of enabled cores in list is different from "
+                       "number given by rte_lcore_count()\n");
+               return -1;
+       }
+       return 0;
+}
+
+static inline int
+check_bucket(uint32_t bkt_idx, uint32_t key)
+{
+       uint32_t iter;
+       uint32_t prev_iter;
+       uint32_t diff;
+       uint32_t count = 0;
+       const void *next_key;
+       void *next_data;
+
+       /* Temporary bucket to hold the keys */
+       uint32_t keys_in_bkt[8];
+
+       iter = bkt_idx * 8;
+       prev_iter = iter;
+       while (rte_hash_iterate(tbl_rwc_test_param.h,
+                       &next_key, &next_data, &iter) >= 0) {
+
+               /* Check for duplicate entries */
+               if (*(const uint32_t *)next_key == key)
+                       return 1;
+
+               /* Identify if there is any free entry in the bucket */
+               diff = iter - prev_iter;
+               if (diff > 1)
+                       break;
+
+               prev_iter = iter;
+               keys_in_bkt[count] = *(const uint32_t *)next_key;
+               count++;
+
+               /* All entries in the bucket are occupied */
+               if (count == 8) {
+
+                       /*
+                        * Check if bucket was not scanned before, to avoid
+                        * duplicate keys.
+                        */
+                       if (scanned_bkts[bkt_idx] == 0) {
+                               /*
+                                * Since this bucket (pointed to by bkt_idx) is
+                                * full, it is likely that key(s) in this
+                                * bucket will be on the shift path, when
+                                * collision occurs. Thus, add it to
+                                * keys_shift_path.
+                                */
+                               memcpy(tbl_rwc_test_param.keys_shift_path +
+                                       tbl_rwc_test_param.count_keys_shift_path
+                                       , keys_in_bkt, 32);
+                               tbl_rwc_test_param.count_keys_shift_path += 8;
+                               scanned_bkts[bkt_idx] = 1;
+                       }
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int
+generate_keys(void)
+{
+       uint32_t *keys = NULL;
+       uint32_t *keys_no_ks = NULL;
+       uint32_t *keys_ks = NULL;
+       uint32_t *keys_absent = NULL;
+       uint32_t *keys_non_shift_path = NULL;
+       uint32_t *found = NULL;
+       uint32_t count_keys_no_ks = 0;
+       uint32_t count_keys_ks = 0;
+       uint32_t i;
+
+       /*
+        * keys will consist of a) keys whose addition to the hash table
+        * will result in shifting of the existing keys to their alternate
+        * locations b) keys whose addition to the hash table will not result
+        * in shifting of the existing keys.
+        */
+       keys = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_INSERT, 0);
+       if (keys == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /*
+        * keys_no_ks (no key-shifts): Subset of 'keys' - consists of keys  that
+        * will NOT result in shifting of the existing keys to their alternate
+        * locations. Roughly around 900K keys.
+        */
+       keys_no_ks = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_INSERT, 0);
+       if (keys_no_ks == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /*
+        * keys_ks (key-shifts): Subset of 'keys' - consists of keys that will
+        * result in shifting of the existing keys to their alternate locations.
+        * Roughly around 146K keys. There might be repeating keys. More code is
+        * required to filter out these keys which will complicate the test case
+        */
+       keys_ks = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_INSERT, 0);
+       if (keys_ks == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /* Used to identify keys not inserted in the hash table */
+       found = rte_zmalloc(NULL, sizeof(uint32_t) * TOTAL_INSERT, 0);
+       if (found == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /*
+        * This consist of keys not inserted to the hash table.
+        * Used to test perf of lookup on keys that do not exist in the table.
+        */
+       keys_absent = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_INSERT, 0);
+       if (keys_absent == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /*
+        * This consist of keys which are likely to be on the shift
+        * path (i.e. being moved to alternate location), when collision occurs
+        * on addition of a key to an already full primary bucket.
+        * Used to test perf of lookup on keys that are on the shift path.
+        */
+       tbl_rwc_test_param.keys_shift_path = rte_malloc(NULL, sizeof(uint32_t) *
+                                                       TOTAL_INSERT, 0);
+       if (tbl_rwc_test_param.keys_shift_path == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       /*
+        * This consist of keys which are never on the shift
+        * path (i.e. being moved to alternate location), when collision occurs
+        * on addition of a key to an already full primary bucket.
+        * Used to test perf of lookup on keys that are not on the shift path.
+        */
+       keys_non_shift_path = rte_malloc(NULL, sizeof(uint32_t) * TOTAL_INSERT,
+                                        0);
+       if (keys_non_shift_path == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+
+       hash_sig_t sig;
+       uint32_t prim_bucket_idx;
+       int ret;
+       uint32_t num_buckets;
+       uint32_t bucket_bitmask;
+       num_buckets  = rte_align32pow2(TOTAL_ENTRY) / 8;
+       bucket_bitmask = num_buckets - 1;
+
+       /*
+        * Used to mark bkts in which at least one key was shifted to its
+        * alternate location
+        */
+       scanned_bkts = rte_malloc(NULL, sizeof(uint8_t) * num_buckets, 0);
+       if (scanned_bkts == NULL) {
+               printf("RTE_MALLOC failed\n");
+               goto err;
+       }
+
+       tbl_rwc_test_param.keys = keys;
+       tbl_rwc_test_param.keys_no_ks = keys_no_ks;
+       tbl_rwc_test_param.keys_ks = keys_ks;
+       tbl_rwc_test_param.keys_absent = keys_absent;
+       tbl_rwc_test_param.keys_non_shift_path = keys_non_shift_path;
+       /* Generate keys by adding previous two keys, neglect overflow */
+       printf("Generating keys...\n");
+       keys[0] = 0;
+       keys[1] = 1;
+       for (i = 2; i < TOTAL_INSERT; i++)
+               keys[i] = keys[i-1] + keys[i-2];
+
+       /* Segregate keys into keys_no_ks and keys_ks */
+       for (i = 0; i < TOTAL_INSERT; i++) {
+               /* Check if primary bucket has space.*/
+               sig = rte_hash_hash(tbl_rwc_test_param.h,
+                                       tbl_rwc_test_param.keys+i);
+               prim_bucket_idx = sig & bucket_bitmask;
+               ret = check_bucket(prim_bucket_idx, keys[i]);
+               if (ret < 0) {
+                       /*
+                        * Primary bucket is full, this key will result in
+                        * shifting of the keys to their alternate locations.
+                        */
+                       keys_ks[count_keys_ks] = keys[i];
+                       count_keys_ks++;
+               } else if (ret == 0) {
+                       /*
+                        * Primary bucket has space, this key will not result in
+                        * shifting of the keys. Hence, add key to the table.
+                        */
+                       ret = rte_hash_add_key_data(tbl_rwc_test_param.h,
+                                                       keys+i,
+                                                       (void *)((uintptr_t)i));
+                       if (ret < 0) {
+                               printf("writer failed %"PRIu32"\n", i);
+                               break;
+                       }
+                       keys_no_ks[count_keys_no_ks] = keys[i];
+                       count_keys_no_ks++;
+               }
+       }
+
+       for (i = 0; i < count_keys_no_ks; i++) {
+               /*
+                * Identify keys in keys_no_ks with value less than
+                * 4M (HTM enabled) OR 5K (HTM disabled)
+                */
+               if (keys_no_ks[i] < TOTAL_INSERT)
+                       found[keys_no_ks[i]]++;
+       }
+
+       for (i = 0; i < count_keys_ks; i++) {
+               /*
+                * Identify keys in keys_ks with value less than
+                * 4M (HTM enabled) OR 5K (HTM disabled)
+                */
+               if (keys_ks[i] < TOTAL_INSERT)
+                       found[keys_ks[i]]++;
+       }
+
+       uint32_t count_keys_absent = 0;
+       for (i = 0; i < TOTAL_INSERT; i++) {
+               /*
+                * Identify missing keys between 0 and
+                * 4M (HTM enabled) OR 5K (HTM disabled)
+                */
+               if (found[i] == 0)
+                       keys_absent[count_keys_absent++] = i;
+       }
+
+       /* Find keys that will not be on the shift path */
+       uint32_t iter;
+       const void *next_key;
+       void *next_data;
+       uint32_t count = 0;
+       for (i = 0; i < num_buckets; i++) {
+               /* Check bucket for no keys shifted to alternate locations */
+               if (scanned_bkts[i] == 0) {
+                       iter = i * 8;
+                       while (rte_hash_iterate(tbl_rwc_test_param.h,
+                               &next_key, &next_data, &iter) >= 0) {
+
+                               /* Check if key belongs to the current bucket */
+                               if (i >= (iter-1)/8)
+                                       keys_non_shift_path[count++]
+                                               = *(const uint32_t *)next_key;
+                               else
+                                       break;
+                       }
+               }
+       }
+
+       tbl_rwc_test_param.count_keys_no_ks = count_keys_no_ks;
+       tbl_rwc_test_param.count_keys_ks = count_keys_ks;
+       tbl_rwc_test_param.count_keys_absent = count_keys_absent;
+       tbl_rwc_test_param.count_keys_non_shift_path = count;
+
+       printf("\nCount of keys NOT causing shifting of existing keys to "
+       "alternate location: %d\n", tbl_rwc_test_param.count_keys_no_ks);
+       printf("\nCount of keys causing shifting of existing keys to alternate "
+               "locations: %d\n\n", tbl_rwc_test_param.count_keys_ks);
+       printf("Count of absent keys that will never be added to the hash "
+               "table: %d\n\n", tbl_rwc_test_param.count_keys_absent);
+       printf("Count of keys likely to be on the shift path: %d\n\n",
+              tbl_rwc_test_param.count_keys_shift_path);
+       printf("Count of keys not likely to be on the shift path: %d\n\n",
+              tbl_rwc_test_param.count_keys_non_shift_path);
+
+       rte_free(found);
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_free(keys);
+       rte_free(keys_no_ks);
+       rte_free(keys_ks);
+       rte_free(keys_absent);
+       rte_free(found);
+       rte_free(tbl_rwc_test_param.keys_shift_path);
+       rte_free(scanned_bkts);
+       return -1;
+}
+
+static int
+init_params(int rwc_lf, int use_jhash, int htm)
+{
+       struct rte_hash *handle;
+
+       struct rte_hash_parameters hash_params = {
+               .entries = TOTAL_ENTRY,
+               .key_len = sizeof(uint32_t),
+               .hash_func_init_val = 0,
+               .socket_id = rte_socket_id(),
+       };
+
+       if (use_jhash)
+               hash_params.hash_func = rte_jhash;
+       else
+               hash_params.hash_func = rte_hash_crc;
+
+       if (rwc_lf)
+               hash_params.extra_flag =
+                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF |
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+       else if (htm)
+               hash_params.extra_flag =
+                       RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT |
+                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+       else
+               hash_params.extra_flag =
+                       RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY |
+                       RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD;
+
+       hash_params.name = "tests";
+
+       handle = rte_hash_create(&hash_params);
+       if (handle == NULL) {
+               printf("hash creation failed");
+               return -1;
+       }
+
+       tbl_rwc_test_param.h = handle;
+       return 0;
+}
+
+static int
+test_rwc_reader(__attribute__((unused)) void *arg)
+{
+       uint32_t i, j;
+       int ret;
+       uint64_t begin, cycles;
+       uint32_t loop_cnt = 0;
+       uint8_t read_type = (uint8_t)((uintptr_t)arg);
+       uint32_t read_cnt;
+       uint32_t *keys;
+       uint32_t extra_keys;
+       int32_t *pos;
+       void *temp_a[BULK_LOOKUP_SIZE];
+
+       /* Used to identify keys not inserted in the hash table */
+       pos = rte_zmalloc(NULL, sizeof(uint32_t) * BULK_LOOKUP_SIZE, 0);
+       if (pos == NULL) {
+               printf("RTE_MALLOC failed\n");
+               return -1;
+       }
+
+       if (read_type & READ_FAIL) {
+               keys = tbl_rwc_test_param.keys_absent;
+               read_cnt = tbl_rwc_test_param.count_keys_absent;
+       } else if (read_type & READ_PASS_NO_KEY_SHIFTS) {
+               keys = tbl_rwc_test_param.keys_no_ks;
+               read_cnt = tbl_rwc_test_param.count_keys_no_ks;
+       } else if (read_type & READ_PASS_SHIFT_PATH) {
+               keys = tbl_rwc_test_param.keys_shift_path;
+               read_cnt = tbl_rwc_test_param.count_keys_shift_path;
+       } else {
+               keys = tbl_rwc_test_param.keys_non_shift_path;
+               read_cnt = tbl_rwc_test_param.count_keys_non_shift_path;
+       }
+
+       extra_keys = read_cnt & (BULK_LOOKUP_SIZE - 1);
+
+       begin = rte_rdtsc_precise();
+       do {
+               if (read_type & BULK_LOOKUP) {
+                       for (i = 0; i < (read_cnt - extra_keys);
+                            i += BULK_LOOKUP_SIZE) {
+                               /* Array of  pointer to the list of keys */
+                               for (j = 0; j < BULK_LOOKUP_SIZE; j++)
+                                       temp_a[j] = keys + i + j;
+
+                               rte_hash_lookup_bulk(tbl_rwc_test_param.h,
+                                                  (const void **)
+                                                  ((uintptr_t)temp_a),
+                                                  BULK_LOOKUP_SIZE, pos);
+                               /* Validate lookup result */
+                               for (j = 0; j < BULK_LOOKUP_SIZE; j++)
+                                       if ((read_type & READ_FAIL &&
+                                            pos[j] != -ENOENT) ||
+                                           (!(read_type & READ_FAIL) &&
+                                            pos[j] == -ENOENT)) {
+                                               printf("lookup failed!"
+                                                      "%"PRIu32"\n",
+                                                      keys[i + j]);
+                                               return -1;
+                                       }
+                       }
+                       for (j = 0; j < extra_keys; j++)
+                               temp_a[j] = keys + i + j;
+
+                       rte_hash_lookup_bulk(tbl_rwc_test_param.h,
+                                          (const void **)
+                                          ((uintptr_t)temp_a),
+                                          extra_keys, pos);
+                       for (j = 0; j < extra_keys; j++)
+                               if ((read_type & READ_FAIL &&
+                                    pos[j] != -ENOENT) ||
+                                   (!(read_type & READ_FAIL) &&
+                                    pos[j] == -ENOENT)) {
+                                       printf("lookup failed! %"PRIu32"\n",
+                                              keys[i + j]);
+                                       return -1;
+                               }
+               } else {
+                       for (i = 0; i < read_cnt; i++) {
+                               ret = rte_hash_lookup
+                                       (tbl_rwc_test_param.h, keys + i);
+                               if (((read_type & READ_FAIL) &&
+                                    (ret != -ENOENT)) ||
+                                   (!(read_type & READ_FAIL) &&
+                                       ret == -ENOENT)) {
+                                       printf("lookup failed! %"PRIu32"\n",
+                                              keys[i]);
+                                       return -1;
+                               }
+                       }
+               }
+               loop_cnt++;
+       } while (!writer_done);
+
+       cycles = rte_rdtsc_precise() - begin;
+       rte_atomic64_add(&gread_cycles, cycles);
+       rte_atomic64_add(&greads, read_cnt*loop_cnt);
+       return 0;
+}
+
+static int
+write_keys(uint8_t key_shift)
+{
+       uint32_t i;
+       int ret;
+       uint32_t key_cnt;
+       uint32_t *keys;
+       if (key_shift) {
+               key_cnt = tbl_rwc_test_param.count_keys_ks;
+               keys = tbl_rwc_test_param.keys_ks;
+       } else {
+               key_cnt = tbl_rwc_test_param.count_keys_no_ks;
+               keys = tbl_rwc_test_param.keys_no_ks;
+       }
+       for (i = 0; i < key_cnt; i++) {
+               ret = rte_hash_add_key(tbl_rwc_test_param.h, keys + i);
+               if (!key_shift && ret < 0) {
+                       printf("writer failed %"PRIu32"\n", i);
+                       return -1;
+               }
+       }
+       return 0;
+}
+
+static int
+test_rwc_multi_writer(__attribute__((unused)) void *arg)
+{
+       uint32_t i, offset;
+       uint32_t pos_core = (uint32_t)((uintptr_t)arg);
+       offset = pos_core * tbl_rwc_test_param.single_insert;
+       for (i = offset; i < offset + tbl_rwc_test_param.single_insert; i++)
+               rte_hash_add_key(tbl_rwc_test_param.h,
+                                tbl_rwc_test_param.keys_ks + i);
+       multi_writer_done[pos_core] = 1;
+       return 0;
+}
+
+/*
+ * Test lookup perf:
+ * Reader(s) lookup keys present in the table.
+ */
+static int
+test_hash_add_no_ks_lookup_hit(struct rwc_perf *rwc_perf_results, int rwc_lf,
+                               int htm)
+{
+       unsigned int n, m;
+       uint64_t i;
+       int use_jhash = 0;
+       uint8_t key_shift = 0;
+       uint8_t read_type = READ_PASS_NO_KEY_SHIFTS;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Hash add - no key-shifts, read - hit\n");
+       for (m = 0; m < 2; m++) {
+               if (m == 1) {
+                       printf("\n** With bulk-lookup **\n");
+                       read_type |= BULK_LOOKUP;
+               }
+               for (n = 0; n < NUM_TEST; n++) {
+                       unsigned int tot_lcore = rte_lcore_count();
+                       if (tot_lcore < rwc_core_cnt[n] + 1)
+                               goto finish;
+
+                       printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
+
+                       rte_atomic64_clear(&greads);
+                       rte_atomic64_clear(&gread_cycles);
+
+                       rte_hash_reset(tbl_rwc_test_param.h);
+                       writer_done = 0;
+                       if (write_keys(key_shift) < 0)
+                               goto err;
+                       writer_done = 1;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                                       enabled_core_ids[i]);
+                       rte_eal_mp_wait_lcore();
+
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               if (lcore_config[i].ret < 0)
+                                       goto err;
+
+                       unsigned long long cycles_per_lookup =
+                               rte_atomic64_read(&gread_cycles) /
+                               rte_atomic64_read(&greads);
+                       rwc_perf_results->w_no_ks_r_hit[m][n]
+                                               = cycles_per_lookup;
+                       printf("Cycles per lookup: %llu\n", cycles_per_lookup);
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+/*
+ * Test lookup perf:
+ * Reader(s) lookup keys absent in the table while
+ * 'Main' thread adds with no key-shifts.
+ */
+static int
+test_hash_add_no_ks_lookup_miss(struct rwc_perf *rwc_perf_results, int rwc_lf,
+                               int htm)
+{
+       unsigned int n, m;
+       uint64_t i;
+       int use_jhash = 0;
+       uint8_t key_shift = 0;
+       uint8_t read_type = READ_FAIL;
+       int ret;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Hash add - no key-shifts, Hash lookup - miss\n");
+       for (m = 0; m < 2; m++) {
+               if (m == 1) {
+                       printf("\n** With bulk-lookup **\n");
+                       read_type |= BULK_LOOKUP;
+               }
+               for (n = 0; n < NUM_TEST; n++) {
+                       unsigned int tot_lcore = rte_lcore_count();
+                       if (tot_lcore < rwc_core_cnt[n] + 1)
+                               goto finish;
+
+                       printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
+
+                       rte_atomic64_clear(&greads);
+                       rte_atomic64_clear(&gread_cycles);
+
+                       rte_hash_reset(tbl_rwc_test_param.h);
+                       writer_done = 0;
+
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                                       enabled_core_ids[i]);
+                       ret = write_keys(key_shift);
+                       writer_done = 1;
+                       rte_eal_mp_wait_lcore();
+
+                       if (ret < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               if (lcore_config[i].ret < 0)
+                                       goto err;
+
+                       unsigned long long cycles_per_lookup =
+                               rte_atomic64_read(&gread_cycles) /
+                               rte_atomic64_read(&greads);
+                       rwc_perf_results->w_no_ks_r_miss[m][n]
+                                               = cycles_per_lookup;
+                       printf("Cycles per lookup: %llu\n", cycles_per_lookup);
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+/*
+ * Test lookup perf:
+ * Reader(s) lookup keys present in the table and not likely to be on the
+ * shift path  while 'Main' thread adds keys causing key-shifts.
+ */
+static int
+test_hash_add_ks_lookup_hit_non_sp(struct rwc_perf *rwc_perf_results,
+                                   int rwc_lf, int htm)
+{
+       unsigned int n, m;
+       uint64_t i;
+       int use_jhash = 0;
+       int ret;
+       uint8_t key_shift;
+       uint8_t read_type = READ_PASS_NON_SHIFT_PATH;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Hash add - key shift, Hash lookup - hit"
+              " (non-shift-path)\n");
+       for (m = 0; m < 2; m++) {
+               if (m == 1) {
+                       printf("\n** With bulk-lookup **\n");
+                       read_type |= BULK_LOOKUP;
+               }
+               for (n = 0; n < NUM_TEST; n++) {
+                       unsigned int tot_lcore = rte_lcore_count();
+                       if (tot_lcore < rwc_core_cnt[n] + 1)
+                               goto finish;
+
+                       printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
+
+                       rte_atomic64_clear(&greads);
+                       rte_atomic64_clear(&gread_cycles);
+
+                       rte_hash_reset(tbl_rwc_test_param.h);
+                       writer_done = 0;
+                       key_shift = 0;
+                       if (write_keys(key_shift) < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                                       enabled_core_ids[i]);
+                       key_shift = 1;
+                       ret = write_keys(key_shift);
+                       writer_done = 1;
+                       rte_eal_mp_wait_lcore();
+
+                       if (ret < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               if (lcore_config[i].ret < 0)
+                                       goto err;
+
+                       unsigned long long cycles_per_lookup =
+                               rte_atomic64_read(&gread_cycles) /
+                               rte_atomic64_read(&greads);
+                       rwc_perf_results->w_ks_r_hit_nsp[m][n]
+                                               = cycles_per_lookup;
+                       printf("Cycles per lookup: %llu\n", cycles_per_lookup);
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+/*
+ * Test lookup perf:
+ * Reader(s) lookup keys present in the table and likely on the shift-path while
+ * 'Main' thread adds keys causing key-shifts.
+ */
+static int
+test_hash_add_ks_lookup_hit_sp(struct rwc_perf *rwc_perf_results, int rwc_lf,
+                               int htm)
+{
+       unsigned int n, m;
+       uint64_t i;
+       int use_jhash = 0;
+       int ret;
+       uint8_t key_shift;
+       uint8_t read_type = READ_PASS_SHIFT_PATH;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Hash add - key shift, Hash lookup - hit (shift-path)"
+              "\n");
+
+       for (m = 0; m < 2; m++) {
+               if (m == 1) {
+                       printf("\n** With bulk-lookup **\n");
+                       read_type |= BULK_LOOKUP;
+               }
+               for (n = 0; n < NUM_TEST; n++) {
+                       unsigned int tot_lcore = rte_lcore_count();
+                       if (tot_lcore < rwc_core_cnt[n])
+                               goto finish;
+
+                       printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
+                       rte_atomic64_clear(&greads);
+                       rte_atomic64_clear(&gread_cycles);
+
+                       rte_hash_reset(tbl_rwc_test_param.h);
+                       writer_done = 0;
+                       key_shift = 0;
+                       if (write_keys(key_shift) < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                               enabled_core_ids[i]);
+                       key_shift = 1;
+                       ret = write_keys(key_shift);
+                       writer_done = 1;
+                       rte_eal_mp_wait_lcore();
+
+                       if (ret < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               if (lcore_config[i].ret < 0)
+                                       goto err;
+
+                       unsigned long long cycles_per_lookup =
+                               rte_atomic64_read(&gread_cycles) /
+                               rte_atomic64_read(&greads);
+                       rwc_perf_results->w_ks_r_hit_sp[m][n]
+                                               = cycles_per_lookup;
+                       printf("Cycles per lookup: %llu\n", cycles_per_lookup);
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+/*
+ * Test lookup perf:
+ * Reader(s) lookup keys absent in the table while
+ * 'Main' thread adds keys causing key-shifts.
+ */
+static int
+test_hash_add_ks_lookup_miss(struct rwc_perf *rwc_perf_results, int rwc_lf, int
+                            htm)
+{
+       unsigned int n, m;
+       uint64_t i;
+       int use_jhash = 0;
+       int ret;
+       uint8_t key_shift;
+       uint8_t read_type = READ_FAIL;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Hash add - key shift, Hash lookup - miss\n");
+       for (m = 0; m < 2; m++) {
+               if (m == 1) {
+                       printf("\n** With bulk-lookup **\n");
+                       read_type |= BULK_LOOKUP;
+               }
+               for (n = 0; n < NUM_TEST; n++) {
+                       unsigned int tot_lcore = rte_lcore_count();
+                       if (tot_lcore < rwc_core_cnt[n] + 1)
+                               goto finish;
+
+                       printf("\nNumber of readers: %u\n", rwc_core_cnt[n]);
+
+                       rte_atomic64_clear(&greads);
+                       rte_atomic64_clear(&gread_cycles);
+
+                       rte_hash_reset(tbl_rwc_test_param.h);
+                       writer_done = 0;
+                       key_shift = 0;
+                       if (write_keys(key_shift) < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                                       enabled_core_ids[i]);
+                       key_shift = 1;
+                       ret = write_keys(key_shift);
+                       writer_done = 1;
+                       rte_eal_mp_wait_lcore();
+
+                       if (ret < 0)
+                               goto err;
+                       for (i = 1; i <= rwc_core_cnt[n]; i++)
+                               if (lcore_config[i].ret < 0)
+                                       goto err;
+
+                       unsigned long long cycles_per_lookup =
+                               rte_atomic64_read(&gread_cycles) /
+                               rte_atomic64_read(&greads);
+                       rwc_perf_results->w_ks_r_miss[m][n] = cycles_per_lookup;
+                       printf("Cycles per lookup: %llu\n", cycles_per_lookup);
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+/*
+ * Test lookup perf for multi-writer:
+ * Reader(s) lookup keys present in the table and likely on the shift-path while
+ * Writers add keys causing key-shiftsi.
+ * Writers are running in parallel, on different data plane cores.
+ */
+static int
+test_hash_multi_add_lookup(struct rwc_perf *rwc_perf_results, int rwc_lf,
+                          int htm)
+{
+       unsigned int n, m, k;
+       uint64_t i;
+       int use_jhash = 0;
+       uint8_t key_shift;
+       uint8_t read_type = READ_PASS_SHIFT_PATH;
+
+       rte_atomic64_init(&greads);
+       rte_atomic64_init(&gread_cycles);
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               goto err;
+       printf("\nTest: Multi-add-lookup\n");
+       uint8_t pos_core;
+       for (m = 1; m < NUM_TEST; m++) {
+               /* Calculate keys added by each writer */
+               tbl_rwc_test_param.single_insert =
+                       tbl_rwc_test_param.count_keys_ks / rwc_core_cnt[m];
+               for (k = 0; k < 2; k++) {
+                       if (k == 1) {
+                               printf("\n** With bulk-lookup **\n");
+                               read_type |= BULK_LOOKUP;
+                       }
+                       for (n = 0; n < NUM_TEST; n++) {
+                               unsigned int tot_lcore  = rte_lcore_count();
+                               if (tot_lcore < (rwc_core_cnt[n] +
+                                    rwc_core_cnt[m] + 1))
+                                       goto finish;
+
+                               printf("\nNumber of writers: %u",
+                                      rwc_core_cnt[m]);
+                               printf("\nNumber of readers: %u\n",
+                                      rwc_core_cnt[n]);
+
+                               rte_atomic64_clear(&greads);
+                               rte_atomic64_clear(&gread_cycles);
+
+                               rte_hash_reset(tbl_rwc_test_param.h);
+                               writer_done = 0;
+                               for (i = 0; i < 4; i++)
+                                       multi_writer_done[i] = 0;
+                               key_shift = 0;
+                               if (write_keys(key_shift) < 0)
+                                       goto err;
+
+                               /* Launch reader(s) */
+                               for (i = 1; i <= rwc_core_cnt[n]; i++)
+                                       rte_eal_remote_launch(test_rwc_reader,
+                                               (void *)(uintptr_t)read_type,
+                                               enabled_core_ids[i]);
+                               key_shift = 1;
+                               pos_core = 0;
+
+                               /* Launch writers */
+                               for (; i <= rwc_core_cnt[m]
+                                    + rwc_core_cnt[n]; i++) {
+                                       rte_eal_remote_launch
+                                               (test_rwc_multi_writer,
+                                               (void *)(uintptr_t)pos_core,
+                                               enabled_core_ids[i]);
+                                       pos_core++;
+                               }
+
+                               /* Wait for writers to complete */
+                               for (i = 0; i < rwc_core_cnt[m]; i++)
+                                       while
+                                               (multi_writer_done[i] == 0);
+                               writer_done = 1;
+
+                               rte_eal_mp_wait_lcore();
+
+                               for (i = 1; i <= rwc_core_cnt[n]; i++)
+                                       if (lcore_config[i].ret < 0)
+                                               goto err;
+
+                               unsigned long long cycles_per_lookup =
+                                       rte_atomic64_read(&gread_cycles)
+                                       / rte_atomic64_read(&greads);
+                               rwc_perf_results->multi_rw[m][k][n]
+                                       = cycles_per_lookup;
+                               printf("Cycles per lookup: %llu\n",
+                                      cycles_per_lookup);
+                       }
+               }
+       }
+
+finish:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return 0;
+
+err:
+       rte_hash_free(tbl_rwc_test_param.h);
+       return -1;
+}
+
+static int
+test_hash_readwrite_lf_main(void)
+{
+       /*
+        * Variables used to choose different tests.
+        * rwc_lf indicates if read-write concurrency lock-free support is
+        * enabled.
+        * htm indicates if Hardware transactional memory support is enabled.
+        */
+       int rwc_lf = 0;
+       int htm;
+       int use_jhash = 0;
+       if (rte_lcore_count() == 1) {
+               printf("More than one lcore is required "
+                       "to do read write lock-free concurrency test\n");
+               return -1;
+       }
+
+       setlocale(LC_NUMERIC, "");
+
+       if (rte_tm_supported())
+               htm = 1;
+       else
+               htm = 0;
+
+       if (init_params(rwc_lf, use_jhash, htm) != 0)
+               return -1;
+       if (generate_keys() != 0)
+               return -1;
+       if (get_enabled_cores_list() != 0)
+               return -1;
+
+       if (RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
+               rwc_lf = 1;
+               printf("Test lookup with read-write concurrency lock free support"
+                      " enabled\n");
+               if (test_hash_add_no_ks_lookup_hit(&rwc_lf_results, rwc_lf,
+                                                       htm) < 0)
+                       return -1;
+               if (test_hash_add_no_ks_lookup_miss(&rwc_lf_results, rwc_lf,
+                                                       htm) < 0)
+                       return -1;
+               if (test_hash_add_ks_lookup_hit_non_sp(&rwc_lf_results, rwc_lf,
+                                                       htm) < 0)
+                       return -1;
+               if (test_hash_add_ks_lookup_hit_sp(&rwc_lf_results, rwc_lf,
+                                                       htm) < 0)
+                       return -1;
+               if (test_hash_add_ks_lookup_miss(&rwc_lf_results, rwc_lf, htm)
+                                                       < 0)
+                       return -1;
+               if (test_hash_multi_add_lookup(&rwc_lf_results, rwc_lf, htm)
+                                                       < 0)
+                       return -1;
+       }
+       printf("\nTest lookup with read-write concurrency lock free support"
+              " disabled\n");
+       rwc_lf = 0;
+       if (!htm) {
+               printf("With HTM Disabled\n");
+               if (!RUN_WITH_HTM_DISABLED) {
+                       printf("Enable RUN_WITH_HTM_DISABLED to test with"
+                              " lock-free disabled");
+                       goto results;
+               }
+       } else
+               printf("With HTM Enabled\n");
+       if (test_hash_add_no_ks_lookup_hit(&rwc_non_lf_results, rwc_lf, htm)
+                                               < 0)
+               return -1;
+       if (test_hash_add_no_ks_lookup_miss(&rwc_non_lf_results, rwc_lf, htm)
+                                               < 0)
+               return -1;
+       if (test_hash_add_ks_lookup_hit_non_sp(&rwc_non_lf_results, rwc_lf,
+                                               htm) < 0)
+               return -1;
+       if (test_hash_add_ks_lookup_hit_sp(&rwc_non_lf_results, rwc_lf, htm)
+                                               < 0)
+               return -1;
+       if (test_hash_add_ks_lookup_miss(&rwc_non_lf_results, rwc_lf, htm) < 0)
+               return -1;
+       if (test_hash_multi_add_lookup(&rwc_non_lf_results, rwc_lf, htm) < 0)
+               return -1;
+results:
+       printf("\n\t\t\t\t\t\t********** Results summary **********\n\n");
+       int i, j, k;
+       for (j = 0; j < 2; j++) {
+               if (j == 1)
+                       printf("\n\t\t\t\t\t#######********** Bulk Lookup "
+                              "**********#######\n\n");
+               printf("_______\t\t_______\t\t_________\t___\t\t_________\t\t"
+                       "\t\t\t\t_________________\n");
+               printf("Writers\t\tReaders\t\tLock-free\tHTM\t\tTest-case\t\t\t"
+                      "\t\t\tCycles per lookup\n");
+               printf("_______\t\t_______\t\t_________\t___\t\t_________\t\t\t"
+                      "\t\t\t_________________\n");
+               for (i = 0; i < NUM_TEST; i++) {
+                       printf("%u\t\t%u\t\t", 1, rwc_core_cnt[i]);
+                       printf("Enabled\t\t");
+                       printf("N/A\t\t");
+                       printf("Hash add - no key-shifts, lookup - hit\t\t\t\t"
+                               "%u\n\t\t\t\t\t\t\t\t",
+                               rwc_lf_results.w_no_ks_r_hit[j][i]);
+                       printf("Hash add - no key-shifts, lookup - miss\t\t\t\t"
+                               "%u\n\t\t\t\t\t\t\t\t",
+                               rwc_lf_results.w_no_ks_r_miss[j][i]);
+                       printf("Hash add - key-shifts, lookup - hit"
+                              "(non-shift-path)\t\t%u\n\t\t\t\t\t\t\t\t",
+                              rwc_lf_results.w_ks_r_hit_nsp[j][i]);
+                       printf("Hash add - key-shifts, lookup - hit "
+                              "(shift-path)\t\t%u\n\t\t\t\t\t\t\t\t",
+                              rwc_lf_results.w_ks_r_hit_sp[j][i]);
+                       printf("Hash add - key-shifts, Hash lookup miss\t\t\t\t"
+                               "%u\n\n\t\t\t\t",
+                               rwc_lf_results.w_ks_r_miss[j][i]);
+
+                       printf("Disabled\t");
+                       if (htm)
+                               printf("Enabled\t\t");
+                       else
+                               printf("Disabled\t");
+                       printf("Hash add - no key-shifts, lookup - hit\t\t\t\t"
+                               "%u\n\t\t\t\t\t\t\t\t",
+                               rwc_non_lf_results.w_no_ks_r_hit[j][i]);
+                       printf("Hash add - no key-shifts, lookup - miss\t\t\t\t"
+                               "%u\n\t\t\t\t\t\t\t\t",
+                               rwc_non_lf_results.w_no_ks_r_miss[j][i]);
+                       printf("Hash add - key-shifts, lookup - hit "
+                              "(non-shift-path)\t\t%u\n\t\t\t\t\t\t\t\t",
+                              rwc_non_lf_results.w_ks_r_hit_nsp[j][i]);
+                       printf("Hash add - key-shifts, lookup - hit "
+                              "(shift-path)\t\t%u\n\t\t\t\t\t\t\t\t",
+                              rwc_non_lf_results.w_ks_r_hit_sp[j][i]);
+                       printf("Hash add - key-shifts, Hash lookup miss\t\t\t\t"
+                              "%u\n", rwc_non_lf_results.w_ks_r_miss[j][i]);
+
+                       printf("_______\t\t_______\t\t_________\t___\t\t"
+                              "_________\t\t\t\t\t\t_________________\n");
+               }
+
+               for (i = 1; i < NUM_TEST; i++) {
+                       for (k = 0; k < NUM_TEST; k++) {
+                               printf("%u", rwc_core_cnt[i]);
+                               printf("\t\t%u\t\t", rwc_core_cnt[k]);
+                               printf("Enabled\t\t");
+                               printf("N/A\t\t");
+                               printf("Multi-add-lookup\t\t\t\t\t\t%u\n\n\t\t"
+                                      "\t\t",
+                                      rwc_lf_results.multi_rw[i][j][k]);
+                               printf("Disabled\t");
+                               if (htm)
+                                       printf("Enabled\t\t");
+                               else
+                                       printf("Disabled\t");
+                               printf("Multi-add-lookup\t\t\t\t\t\t%u\n",
+                                      rwc_non_lf_results.multi_rw[i][j][k]);
+
+                               printf("_______\t\t_______\t\t_________\t___"
+                                      "\t\t_________\t\t\t\t\t\t"
+                                      "_________________\n");
+                       }
+               }
+       }
+       rte_free(tbl_rwc_test_param.keys);
+       rte_free(tbl_rwc_test_param.keys_no_ks);
+       rte_free(tbl_rwc_test_param.keys_ks);
+       rte_free(tbl_rwc_test_param.keys_absent);
+       rte_free(tbl_rwc_test_param.keys_shift_path);
+       rte_free(scanned_bkts);
+       return 0;
+}
+
+REGISTER_TEST_COMMAND(hash_readwrite_lf_autotest, test_hash_readwrite_lf_main);
diff --git a/test/test/test_hash_scaling.c b/test/test/test_hash_scaling.c
deleted file mode 100644 (file)
index 07765a7..0000000
+++ /dev/null
@@ -1,191 +0,0 @@
-/* SPDX-License-Identifier: BSD-3-Clause
- * Copyright(c) 2015 Intel Corporation
- */
-
-#include <stdio.h>
-
-#include <rte_cycles.h>
-#include <rte_hash.h>
-#include <rte_hash_crc.h>
-#include <rte_spinlock.h>
-#include <rte_launch.h>
-
-#include "test.h"
-
-/*
- * Check condition and return an error if true. Assumes that "handle" is the
- * name of the hash structure pointer to be freed.
- */
-#define RETURN_IF_ERROR(cond, str, ...) do {                           \
-       if (cond) {                                                     \
-               printf("ERROR line %d: " str "\n", __LINE__,            \
-                                                       ##__VA_ARGS__); \
-               if (handle)                                             \
-                       rte_hash_free(handle);                          \
-               return -1;                                              \
-       }                                                               \
-} while (0)
-
-enum locking_mode_t {
-       NORMAL_LOCK,
-       LOCK_ELISION,
-       NULL_LOCK
-};
-
-struct {
-       uint32_t num_iterations;
-       struct rte_hash *h;
-       rte_spinlock_t *lock;
-       int locking_mode;
-} tbl_scaling_test_params;
-
-static rte_atomic64_t gcycles;
-
-static int test_hash_scaling_worker(__attribute__((unused)) void *arg)
-{
-       uint64_t i, key;
-       uint32_t thr_id = rte_sys_gettid();
-       uint64_t begin, cycles = 0;
-
-       switch (tbl_scaling_test_params.locking_mode) {
-
-       case NORMAL_LOCK:
-
-               for (i = 0; i < tbl_scaling_test_params.num_iterations; i++) {
-                       /*      different threads get different keys because
-                               we use the thread-id in the key computation
-                        */
-                       key = rte_hash_crc(&i, sizeof(i), thr_id);
-                       begin = rte_rdtsc_precise();
-                       rte_spinlock_lock(tbl_scaling_test_params.lock);
-                       rte_hash_add_key(tbl_scaling_test_params.h, &key);
-                       rte_spinlock_unlock(tbl_scaling_test_params.lock);
-                       cycles += rte_rdtsc_precise() - begin;
-               }
-               break;
-
-       case LOCK_ELISION:
-
-               for (i = 0; i < tbl_scaling_test_params.num_iterations; i++) {
-                       key = rte_hash_crc(&i, sizeof(i), thr_id);
-                       begin = rte_rdtsc_precise();
-                       rte_spinlock_lock_tm(tbl_scaling_test_params.lock);
-                       rte_hash_add_key(tbl_scaling_test_params.h, &key);
-                       rte_spinlock_unlock_tm(tbl_scaling_test_params.lock);
-                       cycles += rte_rdtsc_precise() - begin;
-               }
-               break;
-
-       default:
-
-               for (i = 0; i < tbl_scaling_test_params.num_iterations; i++) {
-                       key = rte_hash_crc(&i, sizeof(i), thr_id);
-                       begin = rte_rdtsc_precise();
-                       rte_hash_add_key(tbl_scaling_test_params.h, &key);
-                       cycles += rte_rdtsc_precise() - begin;
-               }
-       }
-
-       rte_atomic64_add(&gcycles, cycles);
-
-       return 0;
-}
-
-/*
- * Do scalability perf tests.
- */
-static int
-test_hash_scaling(int locking_mode)
-{
-       static unsigned calledCount =    1;
-       uint32_t num_iterations = 1024*1024;
-       uint64_t i, key;
-       struct rte_hash_parameters hash_params = {
-               .entries = num_iterations*2,
-               .key_len = sizeof(key),
-               .hash_func = rte_hash_crc,
-               .hash_func_init_val = 0,
-               .socket_id = rte_socket_id(),
-               .extra_flag = RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
-       };
-       struct rte_hash *handle;
-       char name[RTE_HASH_NAMESIZE];
-       rte_spinlock_t lock;
-
-       rte_spinlock_init(&lock);
-
-       snprintf(name, 32, "test%u", calledCount++);
-       hash_params.name = name;
-
-       handle = rte_hash_create(&hash_params);
-       RETURN_IF_ERROR(handle == NULL, "hash creation failed");
-
-       tbl_scaling_test_params.num_iterations =
-               num_iterations/rte_lcore_count();
-       tbl_scaling_test_params.h = handle;
-       tbl_scaling_test_params.lock = &lock;
-       tbl_scaling_test_params.locking_mode = locking_mode;
-
-       rte_atomic64_init(&gcycles);
-       rte_atomic64_clear(&gcycles);
-
-       /* fill up to initial size */
-       for (i = 0; i < num_iterations; i++) {
-               key = rte_hash_crc(&i, sizeof(i), 0xabcdabcd);
-               rte_hash_add_key(tbl_scaling_test_params.h, &key);
-       }
-
-       rte_eal_mp_remote_launch(test_hash_scaling_worker, NULL, CALL_MASTER);
-       rte_eal_mp_wait_lcore();
-
-       unsigned long long int cycles_per_operation =
-               rte_atomic64_read(&gcycles)/
-               (tbl_scaling_test_params.num_iterations*rte_lcore_count());
-       const char *lock_name;
-
-       switch (locking_mode) {
-       case NORMAL_LOCK:
-               lock_name = "normal spinlock";
-               break;
-       case LOCK_ELISION:
-               lock_name = "lock elision";
-               break;
-       default:
-               lock_name = "null lock";
-       }
-       printf("--------------------------------------------------------\n");
-       printf("Cores: %d; %s mode ->  cycles per operation: %llu\n",
-               rte_lcore_count(), lock_name, cycles_per_operation);
-       printf("--------------------------------------------------------\n");
-       /* CSV output */
-       printf(">>>%d,%s,%llu\n", rte_lcore_count(), lock_name,
-               cycles_per_operation);
-
-       rte_hash_free(handle);
-       return 0;
-}
-
-static int
-test_hash_scaling_main(void)
-{
-       int r = 0;
-
-       if (rte_lcore_count() == 1)
-               r = test_hash_scaling(NULL_LOCK);
-
-       if (r == 0)
-               r = test_hash_scaling(NORMAL_LOCK);
-
-       if (!rte_tm_supported()) {
-               printf("Hardware transactional memory (lock elision) is NOT supported\n");
-               return r;
-       }
-       printf("Hardware transactional memory (lock elision) is supported\n");
-
-       if (r == 0)
-               r = test_hash_scaling(LOCK_ELISION);
-
-       return r;
-}
-
-REGISTER_TEST_COMMAND(hash_scaling_autotest, test_hash_scaling_main);
index 1b87671..f3c19b5 100644 (file)
@@ -7,10 +7,11 @@
 #include <unistd.h>
 #include <string.h>
 #include <sys/wait.h>
+#include <dirent.h>
 
 #include "test.h"
 
-#ifndef RTE_LIBRTE_KNI
+#if !defined(RTE_EXEC_ENV_LINUXAPP) || !defined(RTE_LIBRTE_KNI)
 
 static int
 test_kni(void)
@@ -40,6 +41,8 @@ test_kni(void)
 
 #define IFCONFIG      "/sbin/ifconfig "
 #define TEST_KNI_PORT "test_kni_port"
+#define KNI_MODULE_PATH "/sys/module/rte_kni"
+#define KNI_MODULE_PARAM_LO KNI_MODULE_PATH"/parameters/lo_mode"
 #define KNI_TEST_MAX_PORTS 4
 /* The threshold number of mbufs to be transmitted or received. */
 #define KNI_NUM_MBUF_THRESHOLD 100
@@ -70,9 +73,6 @@ static const struct rte_eth_txconf tx_conf = {
 };
 
 static const struct rte_eth_conf port_conf = {
-       .rxmode = {
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
-       },
        .txmode = {
                .mq_mode = ETH_DCB_NONE,
        },
@@ -121,6 +121,79 @@ kni_change_mtu(uint16_t port_id, unsigned int new_mtu)
                                         port_id, kni_pkt_mtu);
        return 0;
 }
+
+static int
+test_kni_link_change(void)
+{
+       int ret;
+       int pid;
+
+       pid = fork();
+       if (pid < 0) {
+               printf("Error: Failed to fork a process\n");
+               return -1;
+       }
+
+       if (pid == 0) {
+               printf("Starting KNI Link status change tests.\n");
+               if (system(IFCONFIG TEST_KNI_PORT" up") == -1) {
+                       ret = -1;
+                       goto error;
+               }
+
+               ret = rte_kni_update_link(test_kni_ctx, 1);
+               if (ret < 0) {
+                       printf("Failed to change link state to Up ret=%d.\n",
+                               ret);
+                       goto error;
+               }
+               rte_delay_ms(1000);
+               printf("KNI: Set LINKUP, previous state=%d\n", ret);
+
+               ret = rte_kni_update_link(test_kni_ctx, 0);
+               if (ret != 1) {
+                       printf(
+               "Failed! Previous link state should be 1, returned %d.\n",
+                               ret);
+                       goto error;
+               }
+               rte_delay_ms(1000);
+               printf("KNI: Set LINKDOWN, previous state=%d\n", ret);
+
+               ret = rte_kni_update_link(test_kni_ctx, 1);
+               if (ret != 0) {
+                       printf(
+               "Failed! Previous link state should be 0, returned %d.\n",
+                               ret);
+                       goto error;
+               }
+               printf("KNI: Set LINKUP, previous state=%d\n", ret);
+
+               ret = 0;
+               rte_delay_ms(1000);
+
+error:
+               if (system(IFCONFIG TEST_KNI_PORT" down") == -1)
+                       ret = -1;
+
+               printf("KNI: Link status change tests: %s.\n",
+                       (ret == 0) ? "Passed" : "Failed");
+               exit(ret);
+       } else {
+               int p_ret, status;
+
+               while (1) {
+                       p_ret = waitpid(pid, &status, WNOHANG);
+                       if (p_ret != 0) {
+                               if (WIFEXITED(status))
+                                       return WEXITSTATUS(status);
+                               return -1;
+                       }
+                       rte_delay_ms(10);
+                       rte_kni_handle_request(test_kni_ctx);
+               }
+       }
+}
 /**
  * This loop fully tests the basic functions of KNI. e.g. transmitting,
  * receiving to, from kernel space, and kernel requests.
@@ -404,6 +477,10 @@ test_kni_processing(uint16_t port_id, struct rte_mempool *mp)
                goto fail_kni;
        }
 
+       ret = test_kni_link_change();
+       if (ret != 0)
+               goto fail_kni;
+
        rte_eal_mp_remote_launch(test_kni_loop, NULL, CALL_MASTER);
        RTE_LCORE_FOREACH_SLAVE(i) {
                if (rte_eal_wait_lcore(i) < 0) {
@@ -429,12 +506,6 @@ test_kni_processing(uint16_t port_id, struct rte_mempool *mp)
        }
        test_kni_ctx = NULL;
 
-       /* test of releasing a released kni device */
-       if (rte_kni_release(kni) == 0) {
-               printf("should not release a released kni device\n");
-               return -1;
-       }
-
        /* test of reusing memzone */
        kni = rte_kni_alloc(mp, &conf, &ops);
        if (!kni) {
@@ -462,7 +533,7 @@ static int
 test_kni(void)
 {
        int ret = -1;
-       uint16_t nb_ports, port_id;
+       uint16_t port_id;
        struct rte_kni *kni;
        struct rte_mempool *mp;
        struct rte_kni_conf conf;
@@ -470,6 +541,20 @@ test_kni(void)
        struct rte_kni_ops ops;
        const struct rte_pci_device *pci_dev;
        const struct rte_bus *bus;
+       FILE *fd;
+       DIR *dir;
+       char buf[16];
+
+       dir = opendir(KNI_MODULE_PATH);
+       if (!dir) {
+               if (errno == ENOENT) {
+                       printf("Cannot run UT due to missing rte_kni module\n");
+                       return -1;
+               }
+               printf("opendir: %s", strerror(errno));
+               return -1;
+       }
+       closedir(dir);
 
        /* Initialize KNI subsytem */
        rte_kni_init(KNI_TEST_MAX_PORTS);
@@ -485,12 +570,6 @@ test_kni(void)
                return -1;
        }
 
-       nb_ports = rte_eth_dev_count_avail();
-       if (nb_ports == 0) {
-               printf("no supported nic port found\n");
-               return -1;
-       }
-
        /* configuring port 0 for the test is enough */
        port_id = 0;
        ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
@@ -519,9 +598,25 @@ test_kni(void)
        rte_eth_promiscuous_enable(port_id);
 
        /* basic test of kni processing */
-       ret = test_kni_processing(port_id, mp);
-       if (ret < 0)
-               goto fail;
+       fd = fopen(KNI_MODULE_PARAM_LO, "r");
+       if (fd == NULL) {
+               printf("fopen: %s", strerror(errno));
+               return -1;
+       }
+       memset(&buf, 0, sizeof(buf));
+       if (fgets(buf, sizeof(buf), fd)) {
+               if (!strncmp(buf, "lo_mode_fifo", strlen("lo_mode_fifo")) ||
+                       !strncmp(buf, "lo_mode_fifo_skb",
+                                 strlen("lo_mode_fifo_skb"))) {
+                       ret = test_kni_processing(port_id, mp);
+                       if (ret < 0) {
+                               fclose(fd);
+                               goto fail;
+                       }
+               } else
+                       printf("test_kni_processing skipped because of missing rte_kni module lo_mode argument\n");
+       }
+       fclose(fd);
 
        /* test of allocating KNI with NULL mempool pointer */
        memset(&info, 0, sizeof(info));
index e673862..a42056f 100644 (file)
@@ -137,6 +137,26 @@ static int test_valid_kvargs(void)
        }
        rte_kvargs_free(kvlist);
 
+       /* third test using list as value */
+       args = "foo=[0,1],check=value2";
+       valid_keys = valid_keys_list;
+       kvlist = rte_kvargs_parse(args, valid_keys);
+       if (kvlist == NULL) {
+               printf("rte_kvargs_parse() error");
+               goto fail;
+       }
+       if (strcmp(kvlist->pairs[0].value, "[0,1]") != 0) {
+               printf("wrong value %s", kvlist->pairs[0].value);
+               goto fail;
+       }
+       count = kvlist->count;
+       if (count != 2) {
+               printf("invalid count value %d\n", count);
+               rte_kvargs_free(kvlist);
+               goto fail;
+       }
+       rte_kvargs_free(kvlist);
+
        return 0;
 
  fail:
@@ -162,6 +182,7 @@ static int test_invalid_kvargs(void)
                "foo=1,,foo=2",    /* empty key/value */
                "foo=1,foo",       /* no value */
                "foo=1,=2",        /* no key */
+               "foo=[1,2",        /* no closing bracket in value */
                ",=",              /* also test with a smiley */
                NULL };
        const char **args;
index 9163f63..e539f07 100644 (file)
@@ -110,7 +110,6 @@ static struct rte_eth_conf default_pmd_conf = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 4b5abb4..5e52724 100644 (file)
@@ -711,6 +711,9 @@ check_socket_mem(const struct rte_memseg_list *msl, void *arg)
 {
        int32_t *socket = arg;
 
+       if (msl->external)
+               return 0;
+
        return *socket == msl->socket_id;
 }
 
index 452d7cc..9fe465e 100644 (file)
@@ -115,6 +115,9 @@ find_available_pagesz(const struct rte_memseg_list *msl, void *arg)
 {
        struct walk_arg *wa = arg;
 
+       if (msl->external)
+               return 0;
+
        if (msl->page_sz == RTE_PGSIZE_2M)
                wa->hugepage_2MB_avail = 1;
        if (msl->page_sz == RTE_PGSIZE_1G)
diff --git a/test/test/test_metrics.c b/test/test/test_metrics.c
new file mode 100644 (file)
index 0000000..94d54d7
--- /dev/null
@@ -0,0 +1,313 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+
+#include <rte_lcore.h>
+#include <rte_metrics.h>
+
+#include "test.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#define        REG_METRIC_COUNT        6
+#define        METRIC_LESSER_COUNT     3
+#define        KEY     1
+#define        VALUE   1
+
+/* Initializes metric module. This function must be called
+ * from a primary process before metrics are used
+ */
+static int
+test_metrics_init(void)
+{
+       rte_metrics_init(rte_socket_id());
+       return TEST_SUCCESS;
+}
+
+ /* Test Case to check failures when memzone init is not done */
+static int
+test_metrics_without_init(void)
+{
+       int err = 0;
+       const uint64_t  value[REG_METRIC_COUNT] = {0};
+       const char * const mnames[] = {
+               "mean_bits_in", "mean_bits_out",
+               "peak_bits_in", "peak_bits_out",
+       };
+
+       /* Failure Test: Checking for memzone initialization */
+       err = rte_metrics_reg_name("peak_bits_in");
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       err = rte_metrics_reg_names(&mnames[0], 1);
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       err = rte_metrics_update_value(RTE_METRICS_GLOBAL, KEY, VALUE);
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL, KEY, &value[0], 4);
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       err = rte_metrics_get_names(NULL, 0);
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       err = rte_metrics_get_values(RTE_METRICS_GLOBAL, NULL, 0);
+       TEST_ASSERT(err == -EIO, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test Case to validate registering a single metric */
+static int
+test_metrics_reg_name_with_validname(void)
+{
+       int err = 0;
+
+       /* Test to register the new metric name */
+       err = rte_metrics_reg_name("peak_bits_out");
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Test to register the same metric name */
+       err = rte_metrics_reg_name("peak_bits_out");
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Test case to validate registering a invalid metric */
+       err = rte_metrics_reg_name(NULL);
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test case to validate registering a list of valid  metric names */
+static int
+test_metrics_reg_names(void)
+{
+       int err = 0;
+       const char * const mnames[] = {
+               "mean_bits_in", "mean_bits_out",
+               "peak_bits_in", "peak_bits_out",
+               };
+
+       /* Success Test: valid array and count size */
+       err = rte_metrics_reg_names(&mnames[0], ARRAY_SIZE(mnames));
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test case to validate update a metric */
+static int
+test_metrics_update_value(void)
+{
+       int err = 0;
+
+       /* Successful Test: Valid port_id, key and value */
+       err = rte_metrics_update_value(RTE_METRICS_GLOBAL, KEY, VALUE);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: Valid port_id otherthan RTE_METRICS_GLOBAL, key
+        * and value
+        */
+       err = rte_metrics_update_value(9, KEY, VALUE);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid port_id with lower value */
+       err = rte_metrics_update_value(-2, KEY, VALUE);
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid port_id with higher value */
+       err = rte_metrics_update_value(39, KEY, VALUE);
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: valid port id, value with invalid key */
+       err = rte_metrics_update_value(RTE_METRICS_GLOBAL, KEY+12, VALUE);
+       TEST_ASSERT(err < 0, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test case to validate update a list of  metrics  */
+static int
+test_metrics_update_values(void)
+{
+       int err = 0;
+       const uint64_t  value[REG_METRIC_COUNT] = {1, 2, 3, 4, 5, 6};
+
+       /* Successful Test: update metrics with first set */
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL, 0,
+                       &value[0], 1);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: update metrics with second set */
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL, 1,
+                       &value[1], 1);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: update metrics with third set */
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL, 2,
+                       &value[2], 4);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid count size */
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL,
+                        KEY, &value[0], 0);
+       TEST_ASSERT(err < 0, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid port_id(lower value) and valid data */
+       err = rte_metrics_update_values(-2, KEY, &value[0], ARRAY_SIZE(value));
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid port_id(higher value) and valid data */
+       err = rte_metrics_update_values(39, 1, &value[0], ARRAY_SIZE(value));
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Failed Test: Invalid array */
+       err = rte_metrics_update_values(RTE_METRICS_GLOBAL,
+                        KEY, NULL, ARRAY_SIZE(value));
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test to validate get metric name-key lookup table */
+static int
+test_metrics_get_names(void)
+{
+       int err = 0;
+       struct rte_metric_name metrics[METRIC_LESSER_COUNT];
+       struct rte_metric_name success_metrics[REG_METRIC_COUNT];
+
+       /* Successful Test: Invalid array list */
+       err = rte_metrics_get_names(NULL, 0);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: Valid array list, Correct Count Stats same
+        * as memzone stats
+        */
+       err = rte_metrics_get_names(success_metrics, REG_METRIC_COUNT);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: Valid array list, Increase Count Stats than
+        * memzone stats
+        */
+       err = rte_metrics_get_names(success_metrics, REG_METRIC_COUNT+5);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test, Not update results:
+        * Invalid array list, Lesser Count Stats than allocated stats
+        */
+       err = rte_metrics_get_names(metrics, METRIC_LESSER_COUNT);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+/* Test to validate get list of metric values  */
+static int
+test_metrics_get_values(void)
+{
+       int i = 0;
+       int err = 0;
+       struct rte_metric_value getvalues[REG_METRIC_COUNT];
+
+       size_t m_size = sizeof(struct rte_metric_value);
+       for (i = 0; i < REG_METRIC_COUNT; i++)
+               memset(&getvalues[i], 0, m_size);
+
+       /* Successful Test, Not update results: valid arguments
+        * count lessthan the memzone stats
+        */
+       err = rte_metrics_get_values(RTE_METRICS_GLOBAL, getvalues,
+                        METRIC_LESSER_COUNT);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test, update results: valid arguments */
+       err = rte_metrics_get_values(RTE_METRICS_GLOBAL, getvalues,
+                        REG_METRIC_COUNT);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test : valid arguments count greaterthan the
+        * memzone stats
+        */
+       err = rte_metrics_get_values(RTE_METRICS_GLOBAL, getvalues,
+                       REG_METRIC_COUNT+2);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       /* Failure Test: Invalid port_id(lower value) with correct values
+        * and  Capacity
+        */
+       err = rte_metrics_get_values(-2, getvalues, REG_METRIC_COUNT);
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Failure Test: Invalid port_id(higher value) with correct values
+        * and  Capacity
+        */
+       err = rte_metrics_get_values(33, getvalues, REG_METRIC_COUNT);
+       TEST_ASSERT(err == -EINVAL, "%s, %d", __func__, __LINE__);
+
+       /* Successful Test: valid port_id with incorrect values and  valid
+        * capacity
+        */
+       err = rte_metrics_get_values(RTE_METRICS_GLOBAL, NULL,
+                        REG_METRIC_COUNT);
+       TEST_ASSERT(err >= 0, "%s, %d", __func__, __LINE__);
+
+       return TEST_SUCCESS;
+}
+
+static struct unit_test_suite metrics_testsuite  = {
+       .suite_name = "Metrics Unit Test Suite",
+       .setup = NULL,
+       .teardown = NULL,
+       .unit_test_cases = {
+               /* Test Case 1: Test to check all metric APIs without
+                * metrics init
+                */
+               TEST_CASE(test_metrics_without_init),
+
+               /* TEST CASE 2: Test to register valid metrics*/
+               TEST_CASE_ST(test_metrics_init, NULL,
+                               test_metrics_reg_name_with_validname),
+
+               /* TEST CASE 3: Test to register list of metrics with valid
+                * names and valid count size, invalid names and invalid
+                * count size
+                */
+               TEST_CASE(test_metrics_reg_names),
+
+               /* TEST CASE 4: Test to register a update value with valid port
+                * id and invalid port id
+                */
+               TEST_CASE(test_metrics_update_value),
+
+               /* TEST CASE 5: Test to register update list of values with
+                * valid port id, key, value, count size and invalid port id,
+                * key, value, count size
+                */
+               TEST_CASE(test_metrics_update_values),
+
+               /* TEST CASE 6: Test to get metric names-key with valid
+                * array list, count size and invalid array list, count size
+                */
+               TEST_CASE(test_metrics_get_names),
+
+               /* TEST CASE 7: Test to get list of metric values with valid
+                * port id, array list, count size and invalid port id,
+                * arraylist, count size
+                */
+               TEST_CASE(test_metrics_get_values),
+               TEST_CASES_END()
+       }
+};
+
+static int
+test_metrics(void)
+{
+       return unit_test_suite_runner(&metrics_testsuite);
+}
+
+REGISTER_TEST_COMMAND(metrics_autotest, test_metrics);
index 4549965..f5095c8 100644 (file)
@@ -65,7 +65,6 @@ static struct rte_eth_conf port_conf = {
                .mq_mode = ETH_MQ_RX_NONE,
                .max_rx_pkt_len = ETHER_MAX_LEN,
                .split_hdr_size = 0,
-               .offloads = DEV_RX_OFFLOAD_CRC_STRIP,
        },
        .txmode = {
                .mq_mode = ETH_MQ_TX_NONE,
index 19d7d20..6414bbd 100644 (file)
@@ -2,20 +2,22 @@
  * Copyright(c) 2010-2015 Intel Corporation
  */
 #include "test.h"
+#include <string.h>
 
 #include <stdio.h>
 
 #include <rte_eth_ring.h>
 #include <rte_ethdev.h>
-
-static struct rte_mempool *mp;
-static int tx_porta, rx_portb, rxtx_portc, rxtx_portd, rxtx_porte;
+#include <rte_bus_vdev.h>
 
 #define SOCKET0 0
 #define RING_SIZE 256
 #define NUM_RINGS 2
 #define NB_MBUF 512
 
+static struct rte_mempool *mp;
+struct rte_ring *rxtx[NUM_RINGS];
+static int tx_porta, rx_portb, rxtx_portc, rxtx_portd, rxtx_porte;
 
 static int
 test_ethdev_configure_port(int port)
@@ -42,7 +44,7 @@ test_ethdev_configure_port(int port)
        }
 
        if (rte_eth_rx_queue_setup(port, 0, RING_SIZE, SOCKET0,
-                       NULL, mp) < 0) {
+                               NULL, mp) < 0) {
                printf("RX queue setup failed port %d\n", port);
                return -1;
        }
@@ -71,21 +73,21 @@ test_send_basic_packets(void)
 
        if (rte_eth_tx_burst(tx_porta, 0, pbufs, RING_SIZE/2) < RING_SIZE/2) {
                printf("Failed to transmit packet burst port %d\n", tx_porta);
-               return -1;
+               return TEST_FAILED;
        }
 
        if (rte_eth_rx_burst(rx_portb, 0, pbufs, RING_SIZE) != RING_SIZE/2) {
                printf("Failed to receive packet burst on port %d\n", rx_portb);
-               return -1;
+               return TEST_FAILED;
        }
 
        for (i = 0; i < RING_SIZE/2; i++)
                if (pbufs[i] != &bufs[i]) {
                        printf("Error: received data does not match that transmitted\n");
-                       return -1;
+                       return TEST_FAILED;
                }
 
-       return 0;
+       return TEST_SUCCESS;
 }
 
 static int
@@ -212,7 +214,7 @@ test_stats_reset(int port)
 }
 
 static int
-test_pmd_ring_pair_create_attach(int portd, int porte)
+test_pmd_ring_pair_create_attach(void)
 {
        struct rte_eth_stats stats, stats2;
        struct rte_mbuf buf, *pbuf = &buf;
@@ -220,185 +222,218 @@ test_pmd_ring_pair_create_attach(int portd, int porte)
 
        memset(&null_conf, 0, sizeof(struct rte_eth_conf));
 
-       if ((rte_eth_dev_configure(portd, 1, 1, &null_conf) < 0)
-               || (rte_eth_dev_configure(porte, 1, 1, &null_conf) < 0)) {
+       if ((rte_eth_dev_configure(rxtx_portd, 1, 1, &null_conf) < 0)
+                       || (rte_eth_dev_configure(rxtx_porte, 1, 1,
+                                       &null_conf) < 0)) {
                printf("Configure failed for port\n");
-               return -1;
+               return TEST_FAILED;
        }
 
-       if ((rte_eth_tx_queue_setup(portd, 0, RING_SIZE, SOCKET0, NULL) < 0)
-               || (rte_eth_tx_queue_setup(porte, 0, RING_SIZE, SOCKET0, NULL) < 0)) {
+       if ((rte_eth_tx_queue_setup(rxtx_portd, 0, RING_SIZE,
+                                       SOCKET0, NULL) < 0)
+                       || (rte_eth_tx_queue_setup(rxtx_porte, 0, RING_SIZE,
+                                       SOCKET0, NULL) < 0)) {
                printf("TX queue setup failed\n");
-               return -1;
+               return TEST_FAILED;
        }
 
-       if ((rte_eth_rx_queue_setup(portd, 0, RING_SIZE, SOCKET0, NULL, mp) < 0)
-               || (rte_eth_rx_queue_setup(porte, 0, RING_SIZE, SOCKET0, NULL, mp) < 0)) {
+       if ((rte_eth_rx_queue_setup(rxtx_portd, 0, RING_SIZE,
+                                       SOCKET0, NULL, mp) < 0)
+                       || (rte_eth_rx_queue_setup(rxtx_porte, 0, RING_SIZE,
+                                       SOCKET0, NULL, mp) < 0)) {
                printf("RX queue setup failed\n");
-               return -1;
+               return TEST_FAILED;
        }
 
-       if ((rte_eth_dev_start(portd) < 0)
-               || (rte_eth_dev_start(porte) < 0)) {
+       if ((rte_eth_dev_start(rxtx_portd) < 0)
+                       || (rte_eth_dev_start(rxtx_porte) < 0)) {
                printf("Error starting port\n");
-               return -1;
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_reset(portd);
+       rte_eth_stats_reset(rxtx_portd);
        /* check stats of port, should all be zero */
-       rte_eth_stats_get(portd, &stats);
+       rte_eth_stats_get(rxtx_portd, &stats);
        if (stats.ipackets != 0 || stats.opackets != 0 ||
                        stats.ibytes != 0 || stats.obytes != 0 ||
                        stats.ierrors != 0 || stats.oerrors != 0) {
-               printf("Error: port %d stats are not zero\n", portd);
-               return -1;
+               printf("Error: port %d stats are not zero\n", rxtx_portd);
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_reset(porte);
+       rte_eth_stats_reset(rxtx_porte);
        /* check stats of port, should all be zero */
-       rte_eth_stats_get(porte, &stats2);
+       rte_eth_stats_get(rxtx_porte, &stats2);
        if (stats2.ipackets != 0 || stats2.opackets != 0 ||
                        stats2.ibytes != 0 || stats2.obytes != 0 ||
                        stats2.ierrors != 0 || stats2.oerrors != 0) {
-               printf("Error: port %d stats are not zero\n", porte);
-               return -1;
+               printf("Error: port %d stats are not zero\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
        /*
-        * send and receive 1 packet (portd -> porte)
+        * send and receive 1 packet (rxtx_portd -> rxtx_porte)
         * and check for stats update
         */
-       printf("Testing send and receive 1 packet (portd -> porte)\n");
-       if (rte_eth_tx_burst(portd, 0, &pbuf, 1) != 1) {
-               printf("Error sending packet to port %d\n", portd);
-               return -1;
+       printf("Testing send and receive 1 packet (rxtx_portd -> rxtx_porte)\n");
+       if (rte_eth_tx_burst(rxtx_portd, 0, &pbuf, 1) != 1) {
+               printf("Error sending packet to port %d\n", rxtx_portd);
+               return TEST_FAILED;
        }
 
-       if (rte_eth_rx_burst(porte, 0, &pbuf, 1) != 1) {
-               printf("Error receiving packet from port %d\n", porte);
-               return -1;
+       if (rte_eth_rx_burst(rxtx_porte, 0, &pbuf, 1) != 1) {
+               printf("Error receiving packet from port %d\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_get(portd, &stats);
-       rte_eth_stats_get(porte, &stats2);
+       rte_eth_stats_get(rxtx_portd, &stats);
+       rte_eth_stats_get(rxtx_porte, &stats2);
        if (stats.ipackets != 0 || stats.opackets != 1 ||
                        stats.ibytes != 0 || stats.obytes != 0 ||
                        stats.ierrors != 0 || stats.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", portd);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_portd);
+               return TEST_FAILED;
        }
 
        if (stats2.ipackets != 1 || stats2.opackets != 0 ||
                        stats2.ibytes != 0 || stats2.obytes != 0 ||
                        stats2.ierrors != 0 || stats2.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", porte);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_porte);
+               return TEST_FAILED;
        }
 
        /*
-        * send and receive 1 packet (porte -> portd)
+        * send and receive 1 packet (rxtx_porte -> rxtx_portd)
         * and check for stats update
         */
-       printf("Testing send and receive 1 packet (porte -> portd)\n");
-       if (rte_eth_tx_burst(porte, 0, &pbuf, 1) != 1) {
-               printf("Error sending packet to port %d\n", porte);
-               return -1;
+       printf("Testing send and receive 1 packet "
+                       "(rxtx_porte -> rxtx_portd)\n");
+       if (rte_eth_tx_burst(rxtx_porte, 0, &pbuf, 1) != 1) {
+               printf("Error sending packet to port %d\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
-       if (rte_eth_rx_burst(portd, 0, &pbuf, 1) != 1) {
-               printf("Error receiving packet from port %d\n", portd);
-               return -1;
+       if (rte_eth_rx_burst(rxtx_portd, 0, &pbuf, 1) != 1) {
+               printf("Error receiving packet from port %d\n", rxtx_portd);
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_get(portd, &stats);
-       rte_eth_stats_get(porte, &stats2);
+       rte_eth_stats_get(rxtx_portd, &stats);
+       rte_eth_stats_get(rxtx_porte, &stats2);
        if (stats.ipackets != 1 || stats.opackets != 1 ||
                        stats.ibytes != 0 || stats.obytes != 0 ||
                        stats.ierrors != 0 || stats.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", portd);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_portd);
+               return TEST_FAILED;
        }
 
        if (stats2.ipackets != 1 || stats2.opackets != 1 ||
                        stats2.ibytes != 0 || stats2.obytes != 0 ||
                        stats2.ierrors != 0 || stats2.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", porte);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_porte);
+               return TEST_FAILED;
        }
 
        /*
-        * send and receive 1 packet (portd -> portd)
+        * send and receive 1 packet (rxtx_portd -> rxtx_portd)
         * and check for stats update
         */
-       printf("Testing send and receive 1 packet (portd -> portd)\n");
-       if (rte_eth_tx_burst(portd, 0, &pbuf, 1) != 1) {
-               printf("Error sending packet to port %d\n", portd);
-               return -1;
+       printf("Testing send and receive 1 packet "
+                       "(rxtx_portd -> rxtx_portd)\n");
+       if (rte_eth_tx_burst(rxtx_portd, 0, &pbuf, 1) != 1) {
+               printf("Error sending packet to port %d\n", rxtx_portd);
+               return TEST_FAILED;
        }
 
-       if (rte_eth_rx_burst(portd, 0, &pbuf, 1) != 1) {
-               printf("Error receiving packet from port %d\n", porte);
-               return -1;
+       if (rte_eth_rx_burst(rxtx_portd, 0, &pbuf, 1) != 1) {
+               printf("Error receiving packet from port %d\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_get(portd, &stats);
-       rte_eth_stats_get(porte, &stats2);
+       rte_eth_stats_get(rxtx_portd, &stats);
+       rte_eth_stats_get(rxtx_porte, &stats2);
        if (stats.ipackets != 2 || stats.opackets != 2 ||
                        stats.ibytes != 0 || stats.obytes != 0 ||
                        stats.ierrors != 0 || stats.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", portd);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_portd);
+               return TEST_FAILED;
        }
 
        if (stats2.ipackets != 1 || stats2.opackets != 1 ||
                        stats2.ibytes != 0 || stats2.obytes != 0 ||
                        stats2.ierrors != 0 || stats2.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", porte);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_porte);
+               return TEST_FAILED;
        }
 
        /*
-        * send and receive 1 packet (porte -> porte)
+        * send and receive 1 packet (rxtx_porte -> rxtx_porte)
         * and check for stats update
         */
-       printf("Testing send and receive 1 packet (porte -> porte)\n");
-       if (rte_eth_tx_burst(porte, 0, &pbuf, 1) != 1) {
-               printf("Error sending packet to port %d\n", porte);
-               return -1;
+       printf("Testing send and receive 1 packet "
+                       "(rxtx_porte -> rxtx_porte)\n");
+       if (rte_eth_tx_burst(rxtx_porte, 0, &pbuf, 1) != 1) {
+               printf("Error sending packet to port %d\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
-       if (rte_eth_rx_burst(porte, 0, &pbuf, 1) != 1) {
-               printf("Error receiving packet from port %d\n", porte);
-               return -1;
+       if (rte_eth_rx_burst(rxtx_porte, 0, &pbuf, 1) != 1) {
+               printf("Error receiving packet from port %d\n", rxtx_porte);
+               return TEST_FAILED;
        }
 
-       rte_eth_stats_get(portd, &stats);
-       rte_eth_stats_get(porte, &stats2);
+       rte_eth_stats_get(rxtx_portd, &stats);
+       rte_eth_stats_get(rxtx_porte, &stats2);
        if (stats.ipackets != 2 || stats.opackets != 2 ||
                        stats.ibytes != 0 || stats.obytes != 0 ||
                        stats.ierrors != 0 || stats.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", portd);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_portd);
+               return TEST_FAILED;
        }
 
        if (stats2.ipackets != 2 || stats2.opackets != 2 ||
                        stats2.ibytes != 0 || stats2.obytes != 0 ||
                        stats2.ierrors != 0 || stats2.oerrors != 0) {
-               printf("Error: port %d stats are not as expected\n", porte);
-               return -1;
+               printf("Error: port %d stats are not as expected\n",
+                               rxtx_porte);
+               return TEST_FAILED;
        }
 
-       rte_eth_dev_stop(portd);
-       rte_eth_dev_stop(porte);
+       rte_eth_dev_stop(rxtx_portd);
+       rte_eth_dev_stop(rxtx_porte);
 
-       return 0;
+       return TEST_SUCCESS;
+}
+
+static void
+test_cleanup_resources(void)
+{
+       int itr;
+       for (itr = 0; itr < NUM_RINGS; itr++)
+               rte_ring_free(rxtx[itr]);
+
+       rte_eth_dev_stop(tx_porta);
+       rte_eth_dev_stop(rx_portb);
+       rte_eth_dev_stop(rxtx_portc);
+
+       rte_mempool_free(mp);
+       rte_vdev_uninit("net_ring_net_ringa");
+       rte_vdev_uninit("net_ring_net_ringb");
+       rte_vdev_uninit("net_ring_net_ringc");
+       rte_vdev_uninit("net_ring_net_ringd");
+       rte_vdev_uninit("net_ring_net_ringe");
 }
 
 static int
-test_pmd_ring(void)
+test_pmd_ringcreate_setup(void)
 {
-       struct rte_ring *rxtx[NUM_RINGS];
-       int port, cmdl_port0 = -1;
        uint8_t nb_ports;
 
        nb_ports = rte_eth_dev_count_avail();
@@ -431,53 +466,33 @@ test_pmd_ring(void)
                        tx_porta, rx_portb, rxtx_portc, rxtx_portd, rxtx_porte);
 
        if ((tx_porta == -1) || (rx_portb == -1) || (rxtx_portc == -1)
-               || (rxtx_portd == -1) || (rxtx_porte == -1)) {
+                       || (rxtx_portd == -1) || (rxtx_porte == -1)) {
                printf("rte_eth_from rings failed\n");
                return -1;
        }
 
        mp = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32,
-               0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+                       0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
        if (mp == NULL)
                return -1;
 
        if ((tx_porta >= RTE_MAX_ETHPORTS) || (rx_portb >= RTE_MAX_ETHPORTS)
-               || (rxtx_portc >= RTE_MAX_ETHPORTS)
-               || (rxtx_portd >= RTE_MAX_ETHPORTS)
-               || (rxtx_porte >= RTE_MAX_ETHPORTS)) {
+                       || (rxtx_portc >= RTE_MAX_ETHPORTS)
+                       || (rxtx_portd >= RTE_MAX_ETHPORTS)
+                       || (rxtx_porte >= RTE_MAX_ETHPORTS)) {
                printf(" port exceed max eth ports\n");
                return -1;
        }
+       return 0;
+}
 
-       if (test_ethdev_configure_port(tx_porta) < 0)
-               return -1;
-
-       if (test_ethdev_configure_port(rx_portb) < 0)
-               return -1;
-
-       if (test_ethdev_configure_port(rxtx_portc) < 0)
-               return -1;
-
-       if (test_send_basic_packets() < 0)
-               return -1;
-
-       if (test_get_stats(rxtx_portc) < 0)
-               return -1;
-
-       if (test_stats_reset(rxtx_portc) < 0)
-               return -1;
-
-       rte_eth_dev_stop(tx_porta);
-       rte_eth_dev_stop(rx_portb);
-       rte_eth_dev_stop(rxtx_portc);
-
-       if (test_pmd_ring_pair_create_attach(rxtx_portd, rxtx_porte) < 0)
-               return -1;
-
+static int
+test_command_line_ring_port(void)
+{
+       int port, cmdl_port0 = -1;
        /* find a port created with the --vdev=net_ring0 command line option */
        RTE_ETH_FOREACH_DEV(port) {
                struct rte_eth_dev_info dev_info;
-
                rte_eth_dev_info_get(port, &dev_info);
                if (!strcmp(dev_info.driver_name, "Rings PMD")) {
                        printf("found a command line ring port=%d\n", port);
@@ -486,17 +501,66 @@ test_pmd_ring(void)
                }
        }
        if (cmdl_port0 != -1) {
-               if (test_ethdev_configure_port(cmdl_port0) < 0)
-                       return -1;
-               if (test_send_basic_packets_port(cmdl_port0) < 0)
-                       return -1;
-               if (test_stats_reset(cmdl_port0) < 0)
-                       return -1;
-               if (test_get_stats(cmdl_port0) < 0)
-                       return -1;
+               TEST_ASSERT((test_ethdev_configure_port(cmdl_port0) < 0),
+                               "test ethdev configure port cmdl_port0 is failed");
+               TEST_ASSERT((test_send_basic_packets_port(cmdl_port0) < 0),
+                               "test send basic packets port cmdl_port0 is failed");
+               TEST_ASSERT((test_stats_reset(cmdl_port0) < 0),
+                               "test stats reset cmdl_port0 is failed");
+               TEST_ASSERT((test_get_stats(cmdl_port0) < 0),
+                               "test get stats cmdl_port0 is failed");
                rte_eth_dev_stop(cmdl_port0);
        }
-       return 0;
+       return TEST_SUCCESS;
+}
+
+static int
+test_ethdev_configure_ports(void)
+{
+       TEST_ASSERT((test_ethdev_configure_port(tx_porta) == 0),
+                       "test ethdev configure ports tx_porta is failed");
+       TEST_ASSERT((test_ethdev_configure_port(rx_portb) == 0),
+                       "test ethdev configure ports rx_portb is failed");
+       TEST_ASSERT((test_ethdev_configure_port(rxtx_portc) == 0),
+                       "test ethdev configure ports rxtx_portc is failed");
+
+       return TEST_SUCCESS;
+}
+
+static int
+test_get_stats_for_port(void)
+{
+       TEST_ASSERT(test_get_stats(rxtx_portc) == 0, "test get stats failed");
+       return TEST_SUCCESS;
+}
+
+static int
+test_stats_reset_for_port(void)
+{
+       TEST_ASSERT(test_stats_reset(rxtx_portc) == 0, "test stats reset failed");
+       return TEST_SUCCESS;
+}
+
+static struct
+unit_test_suite test_pmd_ring_suite  = {
+       .setup = test_pmd_ringcreate_setup,
+       .teardown = test_cleanup_resources,
+       .suite_name = "Test Pmd Ring Unit Test Suite",
+       .unit_test_cases = {
+               TEST_CASE(test_ethdev_configure_ports),
+               TEST_CASE(test_send_basic_packets),
+               TEST_CASE(test_get_stats_for_port),
+               TEST_CASE(test_stats_reset_for_port),
+               TEST_CASE(test_pmd_ring_pair_create_attach),
+               TEST_CASE(test_command_line_ring_port),
+               TEST_CASES_END()
+       }
+};
+
+static int
+test_pmd_ring(void)
+{
+       return unit_test_suite_runner(&test_pmd_ring_suite);
 }
 
 REGISTER_TEST_COMMAND(ring_pmd_autotest, test_pmd_ring);
index 5e08f06..d29048e 100644 (file)
@@ -54,7 +54,7 @@
 
 #define BILLION (1UL << 30)
 
-#define TEST_DURATION_S 20 /* in seconds */
+#define TEST_DURATION_S 4 /* in seconds */
 #define N_TIMERS    50
 
 static struct rte_timer timer[N_TIMERS];
index 591b309..f8ddc2d 100644 (file)
@@ -91,7 +91,6 @@ virtual_ethdev_info_get(struct rte_eth_dev *dev __rte_unused,
        dev_info->max_tx_queues = (uint16_t)512;
 
        dev_info->min_rx_bufsize = 0;
-       dev_info->rx_offload_capa = DEV_RX_OFFLOAD_CRC_STRIP;
 }
 
 static int
diff --git a/usertools/dpdk-telemetry-client.py b/usertools/dpdk-telemetry-client.py
new file mode 100644 (file)
index 0000000..6dcf62b
--- /dev/null
@@ -0,0 +1,116 @@
+# SPDK-License-Identifier: BSD-3-Clause
+# Copyright(c) 2018 Intel Corporation
+
+import socket
+import os
+import sys
+import time
+
+BUFFER_SIZE = 200000
+
+METRICS_REQ = "{\"action\":0,\"command\":\"ports_all_stat_values\",\"data\":null}"
+API_REG = "{\"action\":1,\"command\":\"clients\",\"data\":{\"client_path\":\""
+API_UNREG = "{\"action\":2,\"command\":\"clients\",\"data\":{\"client_path\":\""
+DEFAULT_FP = "/var/run/dpdk/default_client"
+
+class Socket:
+
+    def __init__(self):
+        self.send_fd = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+        self.recv_fd = socket.socket(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+        self.client_fd = None
+
+    def __del__(self):
+        try:
+            self.send_fd.close()
+            self.recv_fd.close()
+            self.client_fd.close()
+        except:
+            print("Error - Sockets could not be closed")
+
+class Client:
+
+    def __init__(self): # Creates a client instance
+        self.socket = Socket()
+        self.file_path = None
+        self.choice = None
+        self.unregistered = 0
+
+    def __del__(self):
+        try:
+            if self.unregistered == 0:
+                self.unregister();
+        except:
+            print("Error - Client could not be destroyed")
+
+    def getFilepath(self, file_path): # Gets arguments from Command-Line and assigns to instance of client
+        self.file_path = file_path
+
+    def register(self): # Connects a client to DPDK-instance
+        if os.path.exists(self.file_path):
+            os.unlink(self.file_path)
+        try:
+            self.socket.recv_fd.bind(self.file_path)
+        except socket.error as msg:
+            print ("Error - Socket binding error: " + str(msg) + "\n")
+        self.socket.recv_fd.settimeout(2)
+        self.socket.send_fd.connect("/var/run/dpdk/rte/telemetry")
+        JSON = (API_REG + self.file_path + "\"}}")
+        self.socket.send_fd.sendall(JSON)
+        self.socket.recv_fd.listen(1)
+        self.socket.client_fd = self.socket.recv_fd.accept()[0]
+
+    def unregister(self): # Unregister a given client
+        self.socket.client_fd.send(API_UNREG + self.file_path + "\"}}")
+        self.socket.client_fd.close()
+
+    def requestMetrics(self): # Requests metrics for given client
+        self.socket.client_fd.send(METRICS_REQ)
+        data = self.socket.client_fd.recv(BUFFER_SIZE)
+        print "\nResponse: \n", str(data)
+
+    def repeatedlyRequestMetrics(self, sleep_time): # Recursively requests metrics for given client
+        print("\nPlease enter the number of times you'd like to continuously request Metrics:")
+        n_requests = int(input("\n:"))
+        print("\033[F") #Removes the user input from screen, cleans it up
+        print("\033[K")
+        for i in range(n_requests):
+            self.requestMetrics()
+            time.sleep(sleep_time)
+
+    def interactiveMenu(self, sleep_time): # Creates Interactive menu within the script
+        while self.choice != 3:
+            print("\nOptions Menu")
+            print("[1] Send for Metrics for all ports")
+            print("[2] Send for Metrics for all ports recursively")
+            print("[3] Unregister client")
+
+            try:
+                self.choice = int(input("\n:"))
+                print("\033[F") #Removes the user input for screen, cleans it up
+                print("\033[K")
+                if self.choice == 1:
+                    self.requestMetrics()
+                elif self.choice == 2:
+                    self.repeatedlyRequestMetrics(sleep_time)
+                elif self.choice == 3:
+                    self.unregister()
+                    self.unregistered = 1
+                else:
+                    print("Error - Invalid request choice")
+            except:
+                pass
+
+if __name__ == "__main__":
+
+    sleep_time = 1
+    file_path = ""
+    if (len(sys.argv) == 2):
+       file_path = sys.argv[1]
+    else:
+        print("Warning - No filepath passed, using default (" + DEFAULT_FP + ").")
+       file_path = DEFAULT_FP
+    client = Client()
+    client.getFilepath(file_path)
+    client.register()
+    client.interactiveMenu(sleep_time)